From 098666fc68075705f7c27af535e8bbbe452f6725 Mon Sep 17 00:00:00 2001 From: Ian Culp <57159226+icculp@users.noreply.github.com> Date: Fri, 11 Jun 2021 21:25:11 -0500 Subject: [PATCH] Counts the words in each chapter, ignoring code blocks and digits --- word_count.ipynb | 266 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 266 insertions(+) create mode 100644 word_count.ipynb diff --git a/word_count.ipynb b/word_count.ipynb new file mode 100644 index 0000000..7a19a21 --- /dev/null +++ b/word_count.ipynb @@ -0,0 +1,266 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Untitled25.ipynb", + "provenance": [], + "authorship_tag": "ABX9TyOJqhnf6/YgfQMx6pfh6Fth", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "kT4wyWuy2szs" + }, + "source": [ + "import numpy as np\n", + "import os\n", + "import pandas as pd" + ], + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9J62Uh4H2Y9g", + "outputId": "f4147b98-f72a-4ad2-8b5b-c3bb9f0c86db" + }, + "source": [ + "!git clone https://github.com/icculp/Learning-Bitcoin-from-the-Command-Line.git" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Cloning into 'Learning-Bitcoin-from-the-Command-Line'...\n", + "remote: Enumerating objects: 6634, done.\u001b[K\n", + "remote: Counting objects: 100% (238/238), done.\u001b[K\n", + "remote: Compressing objects: 100% (196/196), done.\u001b[K\n", + "remote: Total 6634 (delta 109), reused 82 (delta 42), pack-reused 6396\u001b[K\n", + "Receiving objects: 100% (6634/6634), 7.53 MiB | 13.87 MiB/s, done.\n", + "Resolving deltas: 100% (4068/4068), done.\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "k_g4-Fvn2yPy" + }, + "source": [ + "def count_words():\n", + " \"\"\" Counts words ignoring code blocks and digits \"\"\"\n", + " columns=['Chapter', 'Word Count']\n", + " counts = []\n", + " repo_path = '/content/Learning-Bitcoin-from-the-Command-Line/'\n", + " for chapter in os.listdir(repo_path):\n", + " if not chapter.endswith('.md'):\n", + " continue\n", + " count = 0\n", + " flag = 0 # ignores words between ``` code markdown\n", + " with open(repo_path + chapter) as ch:\n", + " for line in ch.readlines():\n", + " if flag:\n", + " if '```' in line:\n", + " flag = 0\n", + " continue\n", + " continue\n", + " if '```' in line:\n", + " flag = 1\n", + " continue\n", + " for word in line.split():\n", + " if any(ch.isdigit() for ch in word):\n", + " continue\n", + " count += 1\n", + " counts.append((chapter, count))\n", + " # print(chapter, count)\n", + " return pd.DataFrame(counts, columns=columns)" + ], + "execution_count": 35, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "2-pkEaYF3Uxm" + }, + "source": [ + "chapter_word_counts = count_words()\n", + "chapter_word_counts.sort_values(by=['Chapter'], inplace=True)" + ], + "execution_count": 38, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 631 + }, + "id": "IPkG4oQJ6f1e", + "outputId": "d057cf19-4d40-477a-964c-d2b6154e1e73" + }, + "source": [ + "from google.colab import data_table\n", + "data_table.DataTable(chapter_word_counts, include_index=False)" + ], + "execution_count": 39, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "application/vnd.google.colaboratory.module+javascript": "\n import \"https://ssl.gstatic.com/colaboratory/data_table/a6224c040fa35dcf/data_table.js\";\n\n window.createDataTable({\n data: [[\"01_0_Introduction.md\",\n{\n 'v': 1128,\n 'f': \"1128\",\n }],\n [\"01_1_Introducing_Bitcoin.md\",\n{\n 'v': 2784,\n 'f': \"2784\",\n }],\n [\"02_0_Setting_Up_a_Bitcoin-Core_VPS.md\",\n{\n 'v': 221,\n 'f': \"221\",\n }],\n [\"02_1_Setting_Up_a_Bitcoin-Core_VPS_with_StackScript.md\",\n{\n 'v': 1937,\n 'f': \"1937\",\n }],\n [\"02_2_Setting_Up_Bitcoin_Core_Other.md\",\n{\n 'v': 245,\n 'f': \"245\",\n }],\n [\"03_0_Understanding_Your_Bitcoin_Setup.md\",\n{\n 'v': 225,\n 'f': \"225\",\n }],\n [\"03_1_Verifying_Your_Bitcoin_Setup.md\",\n{\n 'v': 801,\n 'f': \"801\",\n }],\n [\"03_2_Knowing_Your_Bitcoin_Setup.md\",\n{\n 'v': 540,\n 'f': \"540\",\n }],\n [\"03_3_Setting_Up_Your_Wallet.md\",\n{\n 'v': 1741,\n 'f': \"1741\",\n }],\n [\"03_3__Interlude_Using_Command-Line_Variables.md\",\n{\n 'v': 354,\n 'f': \"354\",\n }],\n [\"03_4_Receiving_a_Transaction.md\",\n{\n 'v': 1521,\n 'f': \"1521\",\n }],\n [\"03_5_Understanding_the_Descriptor.md\",\n{\n 'v': 1393,\n 'f': \"1393\",\n }],\n [\"04_0_Sending_Bitcoin_Transactions.md\",\n{\n 'v': 153,\n 'f': \"153\",\n }],\n [\"04_1_Sending_Coins_The_Easy_Way.md\",\n{\n 'v': 1087,\n 'f': \"1087\",\n }],\n [\"04_2_Creating_a_Raw_Transaction.md\",\n{\n 'v': 1840,\n 'f': \"1840\",\n }],\n [\"04_2__Interlude_Using_JQ.md\",\n{\n 'v': 2034,\n 'f': \"2034\",\n }],\n [\"04_3_Creating_a_Raw_Transaction_with_Named_Arguments.md\",\n{\n 'v': 428,\n 'f': \"428\",\n }],\n [\"04_4_Sending_Coins_with_a_Raw_Transaction.md\",\n{\n 'v': 1011,\n 'f': \"1011\",\n }],\n [\"04_4__Interlude_Using_Curl.md\",\n{\n 'v': 1247,\n 'f': \"1247\",\n }],\n [\"04_5_Sending_Coins_with_Automated_Raw_Transactions.md\",\n{\n 'v': 608,\n 'f': \"608\",\n }],\n [\"04_6_Creating_a_Segwit_Transaction.md\",\n{\n 'v': 1203,\n 'f': \"1203\",\n }],\n [\"05_0_Controlling_Bitcoin_Transactions.md\",\n{\n 'v': 144,\n 'f': \"144\",\n }],\n [\"05_1_Watching_for_Stuck_Transactions.md\",\n{\n 'v': 588,\n 'f': \"588\",\n }],\n [\"05_2_Resending_a_Transaction_with_RBF.md\",\n{\n 'v': 1412,\n 'f': \"1412\",\n }],\n [\"05_3_Funding_a_Transaction_with_CPFP.md\",\n{\n 'v': 846,\n 'f': \"846\",\n }],\n [\"06_0_Expanding_Bitcoin_Transactions_Multisigs.md\",\n{\n 'v': 148,\n 'f': \"148\",\n }],\n [\"06_1_Sending_a_Transaction_to_a_Multisig.md\",\n{\n 'v': 1810,\n 'f': \"1810\",\n }],\n [\"06_2_Spending_a_Transaction_to_a_Multisig.md\",\n{\n 'v': 1131,\n 'f': \"1131\",\n }],\n [\"06_3_Sending_an_Automated_Multisig.md\",\n{\n 'v': 625,\n 'f': \"625\",\n }],\n [\"07_0_Expanding_Bitcoin_Transactions_PSBTs.md\",\n{\n 'v': 163,\n 'f': \"163\",\n }],\n [\"07_1_Creating_a_Partially_Signed_Bitcoin_Transaction.md\",\n{\n 'v': 1512,\n 'f': \"1512\",\n }],\n [\"07_2_Using_a_Partially_Signed_Bitcoin_Transaction.md\",\n{\n 'v': 1383,\n 'f': \"1383\",\n }],\n [\"07_3_Integrating_with_Hardware_Wallets.md\",\n{\n 'v': 2183,\n 'f': \"2183\",\n }],\n [\"08_0_Expanding_Bitcoin_Transactions_Other.md\",\n{\n 'v': 131,\n 'f': \"131\",\n }],\n [\"08_1_Sending_a_Transaction_with_a_Locktime.md\",\n{\n 'v': 1508,\n 'f': \"1508\",\n }],\n [\"08_2_Sending_a_Transaction_with_Data.md\",\n{\n 'v': 596,\n 'f': \"596\",\n }],\n [\"09_0_Introducing_Bitcoin_Scripts.md\",\n{\n 'v': 187,\n 'f': \"187\",\n }],\n [\"09_1_Understanding_the_Foundation_of_Transactions.md\",\n{\n 'v': 993,\n 'f': \"993\",\n }],\n [\"09_2_Running_a_Bitcoin_Script.md\",\n{\n 'v': 887,\n 'f': \"887\",\n }],\n [\"09_3_Testing_a_Bitcoin_Script.md\",\n{\n 'v': 1023,\n 'f': \"1023\",\n }],\n [\"09_4_Scripting_a_P2PKH.md\",\n{\n 'v': 889,\n 'f': \"889\",\n }],\n [\"09_5_Scripting_a_P2WPKH.md\",\n{\n 'v': 881,\n 'f': \"881\",\n }],\n [\"10_0_Embedding_Bitcoin_Scripts_in_P2SH_Transactions.md\",\n{\n 'v': 152,\n 'f': \"152\",\n }],\n [\"10_1_Understanding_the_Foundation_of_P2SH.md\",\n{\n 'v': 1179,\n 'f': \"1179\",\n }],\n [\"10_2_Building_the_Structure_of_P2SH.md\",\n{\n 'v': 1318,\n 'f': \"1318\",\n }],\n [\"10_3_Running_a_Bitcoin_Script_with_P2SH.md\",\n{\n 'v': 340,\n 'f': \"340\",\n }],\n [\"10_4_Scripting_a_Multisig.md\",\n{\n 'v': 1043,\n 'f': \"1043\",\n }],\n [\"10_5_Scripting_a_Segwit_Script.md\",\n{\n 'v': 751,\n 'f': \"751\",\n }],\n [\"10_6_Spending_a_P2SH_Transaction.md\",\n{\n 'v': 393,\n 'f': \"393\",\n }],\n [\"11_0_Empowering_Timelock_with_Bitcoin_Scripts.md\",\n{\n 'v': 99,\n 'f': \"99\",\n }],\n [\"11_1_Understanding_Timelock_Options.md\",\n{\n 'v': 571,\n 'f': \"571\",\n }],\n [\"11_2_Using_CLTV_in_Scripts.md\",\n{\n 'v': 1241,\n 'f': \"1241\",\n }],\n [\"11_3_Using_CSV_in_Scripts.md\",\n{\n 'v': 1544,\n 'f': \"1544\",\n }],\n [\"12_0_Expanding_Bitcoin_Scripts.md\",\n{\n 'v': 99,\n 'f': \"99\",\n }],\n [\"12_1_Using_Script_Conditionals.md\",\n{\n 'v': 1216,\n 'f': \"1216\",\n }],\n [\"12_2_Using_Other_Script_Commands.md\",\n{\n 'v': 483,\n 'f': \"483\",\n }],\n [\"13_0_Designing_Real_Bitcoin_Scripts.md\",\n{\n 'v': 112,\n 'f': \"112\",\n }],\n [\"13_1_Writing_Puzzle_Scripts.md\",\n{\n 'v': 1032,\n 'f': \"1032\",\n }],\n [\"13_2_Writing_Complex_Multisig_Scripts.md\",\n{\n 'v': 1031,\n 'f': \"1031\",\n }],\n [\"13_3_Empowering_Bitcoin_with_Scripts.md\",\n{\n 'v': 1493,\n 'f': \"1493\",\n }],\n [\"14_0_Using_Tor.md\",\n{\n 'v': 110,\n 'f': \"110\",\n }],\n [\"14_1_Verifying_Your_Tor_Setup.md\",\n{\n 'v': 1632,\n 'f': \"1632\",\n }],\n [\"14_2_Changing_Your_Bitcoin_Hidden_Services.md\",\n{\n 'v': 460,\n 'f': \"460\",\n }],\n [\"14_3_Adding_SSH_Hidden_Services.md\",\n{\n 'v': 339,\n 'f': \"339\",\n }],\n [\"15_0_Talking_to_Bitcoind.md\",\n{\n 'v': 251,\n 'f': \"251\",\n }],\n [\"15_1_Accessing_Bitcoind_with_C.md\",\n{\n 'v': 1315,\n 'f': \"1315\",\n }],\n [\"15_2_Programming_Bitcoind_with_C.md\",\n{\n 'v': 1484,\n 'f': \"1484\",\n }],\n [\"15_3_Receiving_Bitcoind_Notifications_with_C.md\",\n{\n 'v': 669,\n 'f': \"669\",\n }],\n [\"16_0_Programming_with_Libwally.md\",\n{\n 'v': 317,\n 'f': \"317\",\n }],\n [\"16_1_Setting_Up_Libwally.md\",\n{\n 'v': 583,\n 'f': \"583\",\n }],\n [\"16_2_Using_BIP39_in_Libwally.md\",\n{\n 'v': 958,\n 'f': \"958\",\n }],\n [\"16_3_Using_BIP32_in_Libwally.md\",\n{\n 'v': 978,\n 'f': \"978\",\n }],\n [\"16_4_Using_PSBTs_in_Libwally.md\",\n{\n 'v': 1005,\n 'f': \"1005\",\n }],\n [\"16_5_Using_Scripts_in_Libwally.md\",\n{\n 'v': 789,\n 'f': \"789\",\n }],\n [\"16_6_Using_Other_Functions_in_Libwally.md\",\n{\n 'v': 746,\n 'f': \"746\",\n }],\n [\"16_7_Integrating_Libwally_and_Bitcoin-CLI.md\",\n{\n 'v': 1401,\n 'f': \"1401\",\n }],\n [\"17_0_Talking_to_Bitcoind_Other.md\",\n{\n 'v': 272,\n 'f': \"272\",\n }],\n [\"17_1_Accessing_Bitcoind_with_Go.md\",\n{\n 'v': 697,\n 'f': \"697\",\n }],\n [\"17_2_Accessing_Bitcoind_with_Java.md\",\n{\n 'v': 782,\n 'f': \"782\",\n }],\n [\"17_3_Accessing_Bitcoind_with_NodeJS.md\",\n{\n 'v': 476,\n 'f': \"476\",\n }],\n [\"17_4_Accessing_Bitcoind_with_Python.md\",\n{\n 'v': 1246,\n 'f': \"1246\",\n }],\n [\"17_5_Accessing_Bitcoind_with_Rust.md\",\n{\n 'v': 905,\n 'f': \"905\",\n }],\n [\"17_6_Accessing_Bitcoind_with_Swift.md\",\n{\n 'v': 1588,\n 'f': \"1588\",\n }],\n [\"18_0_Understanding_Your_Lightning_Setup.md\",\n{\n 'v': 185,\n 'f': \"185\",\n }],\n [\"18_1_Verifying_Your_Lightning_Setup.md\",\n{\n 'v': 1347,\n 'f': \"1347\",\n }],\n [\"18_2_Knowing_Your_lightning_Setup.md\",\n{\n 'v': 421,\n 'f': \"421\",\n }],\n [\"18_2__Interlude_Accessing_a_Second_Lightning_Node.md\",\n{\n 'v': 882,\n 'f': \"882\",\n }],\n [\"18_3_Setting_Up_a_Channel.md\",\n{\n 'v': 1219,\n 'f': \"1219\",\n }],\n [\"19_0_Using_Lightning.md\",\n{\n 'v': 145,\n 'f': \"145\",\n }],\n [\"19_1_Generate_a_Payment_Request.md\",\n{\n 'v': 995,\n 'f': \"995\",\n }],\n [\"19_2_Paying_a_Invoice.md\",\n{\n 'v': 614,\n 'f': \"614\",\n }],\n [\"19_3_Closing_a_Channel.md\",\n{\n 'v': 866,\n 'f': \"866\",\n }],\n [\"19_4_Lightning_Network_Review.md\",\n{\n 'v': 677,\n 'f': \"677\",\n }],\n [\"A0_Appendices.md\",\n{\n 'v': 110,\n 'f': \"110\",\n }],\n [\"A1_0_Understanding_Bitcoin_Standup.md\",\n{\n 'v': 412,\n 'f': \"412\",\n }],\n [\"A2_0_Compiling_Bitcoin_from_Source.md\",\n{\n 'v': 414,\n 'f': \"414\",\n }],\n [\"A3_0_Using_Bitcoin_Regtest.md\",\n{\n 'v': 995,\n 'f': \"995\",\n }],\n [\"CLA.md\",\n{\n 'v': 512,\n 'f': \"512\",\n }],\n [\"CONTRIBUTING.md\",\n{\n 'v': 555,\n 'f': \"555\",\n }],\n [\"LICENSE-CC-BY-4.0.md\",\n{\n 'v': 2734,\n 'f': \"2734\",\n }],\n [\"README.md\",\n{\n 'v': 1366,\n 'f': \"1366\",\n }],\n [\"TODO-30.md\",\n{\n 'v': 122,\n 'f': \"122\",\n }],\n [\"TODO.md\",\n{\n 'v': 734,\n 'f': \"734\",\n }]],\n columns: [[\"string\", \"Chapter\"], [\"number\", \"Word Count\"]],\n columnOptions: [],\n rowsPerPage: 25,\n helpUrl: \"https://colab.research.google.com/notebooks/data_table.ipynb\",\n suppressOutputScrolling: true,\n minimumWidth: undefined,\n });\n ", + "text/plain": [ + "" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ChapterWord Count
4401_0_Introduction.md1128
6901_1_Introducing_Bitcoin.md2784
9202_0_Setting_Up_a_Bitcoin-Core_VPS.md221
3302_1_Setting_Up_a_Bitcoin-Core_VPS_with_StackS...1937
6602_2_Setting_Up_Bitcoin_Core_Other.md245
.........
45CONTRIBUTING.md555
34LICENSE-CC-BY-4.0.md2734
0README.md1366
52TODO-30.md122
29TODO.md734
\n", + "

103 rows × 2 columns

\n", + "
" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 39 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4dFD792BBy0S", + "outputId": "945da188-2d3b-4061-8f23-08e39e6f97f4" + }, + "source": [ + "total_count = chapter_word_counts['Word Count'].sum()\n", + "total_count" + ], + "execution_count": 44, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "89946" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 44 + } + ] + } + ] +} \ No newline at end of file