{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Untitled25.ipynb", "provenance": [], "authorship_tag": "ABX9TyNHQ/J52I0G+daf1wNCNdAI", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "metadata": { "id": "X7D8JEIgy9fV" }, "source": [ "This notebook counts through the translatable words of each chapter, including chapter links; ignores code blocks, markdown characters, and tokens containing digits." ] }, { "cell_type": "markdown", "metadata": { "id": "TQUe8ZZed-Bp" }, "source": [ "Run in colab by clicking the link above to view the results as a paginated table with word counts for each chapter near the bottom of the notebook. Total word count at the very bottom." ] }, { "cell_type": "code", "metadata": { "id": "kT4wyWuy2szs" }, "source": [ "import os\n", "import pandas as pd" ], "execution_count": 1, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "9J62Uh4H2Y9g", "outputId": "3295f8b2-5255-4eac-bc78-3da247037c41" }, "source": [ "!git clone https://github.com/icculp/Learning-Bitcoin-from-the-Command-Line.git" ], "execution_count": 2, "outputs": [ { "output_type": "stream", "text": [ "Cloning into 'Learning-Bitcoin-from-the-Command-Line'...\n", "remote: Enumerating objects: 6715, done.\u001b[K\n", "remote: Counting objects: 100% (319/319), done.\u001b[K\n", "remote: Compressing objects: 100% (193/193), done.\u001b[K\n", "remote: Total 6715 (delta 157), reused 277 (delta 125), pack-reused 6396\u001b[K\n", "Receiving objects: 100% (6715/6715), 7.56 MiB | 22.84 MiB/s, done.\n", "Resolving deltas: 100% (4116/4116), done.\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "k_g4-Fvn2yPy" }, "source": [ "def count_words():\n", " \"\"\" Counts words ignoring code blocks and digits\n", " To test for quality:\n", " lines 16-18 to test a single chapter\n", " uncomment line 31 to view skipped code block sections\n", " uncomment line 55 to view rejected word tokens (not including code blocks)\n", " uncomment line 57 to view accepted word tokens\n", " but not at the same time, and best one chapter at a time\n", " \"\"\"\n", " counts = []\n", " repo_path = '/content/Learning-Bitcoin-from-the-Command-Line/'\n", " for chapter in os.listdir(repo_path):\n", " ''' uncomment lines 16-18 to test a single chapter, replacing\n", " ch_name with the name you want to test\n", " '''\n", " #ch_name = '04_2__Interlude_Using_JQ.md'\n", " #if chapter != ch_name:\n", " # continue\n", " ignore_list = ['bitcoin.conf-annotated.txt', 'TODO.md', 'TODO-30.md']\n", " if chapter in ignore_list or\\\n", " not chapter.endswith('md'):\n", " continue\n", " count = 0\n", " flag = 0 # ignores words between code markdown ```\n", " with open(repo_path + chapter) as ch:\n", " for line in ch.readlines():\n", " if flag:\n", " if '```' in line[:3].replace(' ', ''): # chars can't precede code closing markdown\n", " flag = 0\n", " continue\n", " # print(line) # view uncounted code blocks\n", " continue\n", " if '```' in line:\n", " flag = 1\n", " continue\n", " for word in line.split():\n", " if '.md' in word: # indicates trailing link with chapter name;\n", " ch_link_tokens = word.split('_')\n", " if ']' in word: # counts last word of trailing link before chapter name\n", " count += 1\n", " link_tokens_count = len(ch_link_tokens[2:]) # ignoring chapter numbers\n", " count += link_tokens_count\n", " # print(word, '[TOK]', link_tokens_count, end='[SEPTOK]')\n", " continue\n", " ignore = ['*', '**', '#', '##', '###', '####',\n", " '-', '—', '>', '`', '/', '&', '|', '~']\n", " if any(ch.isdigit() for ch in word) or\\\n", " word in ignore or\\\n", " '`' in word or\\\n", " '~/' in word or\\\n", " '/.' in word or\\\n", " '|-' in word or\\\n", " (word[0] == ':' and word[-1] == ':') or\\\n", " (word[0] == '\"' and word[-1] == '\"'):\n", " # print(word) # , end='[SEP]') # view rejected tokens\n", " continue\n", " # print(word, count) # , end='[SEP]') # view accepted tokens\n", " count += 1\n", " counts.append((chapter, count))\n", " # print(chapter, count)\n", " return pd.DataFrame(counts, columns=['Chapter', 'Word Count'])" ], "execution_count": 3, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "2-pkEaYF3Uxm" }, "source": [ "chapter_word_counts = count_words()\n", "chapter_word_counts.sort_values(by=['Chapter'], inplace=True)\n", "# view accepted or rejected tokens below if line 55 or 53 uncommented in count_words(), respectively" ], "execution_count": 4, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "ZIUumQnh9C_1" }, "source": [ "View in colab for paginated table" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 631 }, "id": "K73GH7UFmwI5", "outputId": "246c0b17-0854-4811-c67f-8081d7950272" }, "source": [ "from google.colab import data_table\n", "data_table.DataTable(chapter_word_counts, include_index=False)" ], "execution_count": 5, "outputs": [ { "output_type": "execute_result", "data": { "application/vnd.google.colaboratory.module+javascript": "\n import \"https://ssl.gstatic.com/colaboratory/data_table/a6224c040fa35dcf/data_table.js\";\n\n window.createDataTable({\n data: [[\"01_0_Introduction.md\",\n{\n 'v': 1144,\n 'f': \"1144\",\n }],\n [\"01_1_Introducing_Bitcoin.md\",\n{\n 'v': 2735,\n 'f': \"2735\",\n }],\n [\"02_0_Setting_Up_a_Bitcoin-Core_VPS.md\",\n{\n 'v': 226,\n 'f': \"226\",\n }],\n [\"02_1_Setting_Up_a_Bitcoin-Core_VPS_with_StackScript.md\",\n{\n 'v': 2746,\n 'f': \"2746\",\n }],\n [\"02_2_Setting_Up_Bitcoin_Core_Other.md\",\n{\n 'v': 254,\n 'f': \"254\",\n }],\n [\"03_0_Understanding_Your_Bitcoin_Setup.md\",\n{\n 'v': 248,\n 'f': \"248\",\n }],\n [\"03_1_Verifying_Your_Bitcoin_Setup.md\",\n{\n 'v': 773,\n 'f': \"773\",\n }],\n [\"03_2_Knowing_Your_Bitcoin_Setup.md\",\n{\n 'v': 517,\n 'f': \"517\",\n }],\n [\"03_3_Setting_Up_Your_Wallet.md\",\n{\n 'v': 1699,\n 'f': \"1699\",\n }],\n [\"03_3__Interlude_Using_Command-Line_Variables.md\",\n{\n 'v': 347,\n 'f': \"347\",\n }],\n [\"03_4_Receiving_a_Transaction.md\",\n{\n 'v': 1479,\n 'f': \"1479\",\n }],\n [\"03_5_Understanding_the_Descriptor.md\",\n{\n 'v': 1349,\n 'f': \"1349\",\n }],\n [\"04_0_Sending_Bitcoin_Transactions.md\",\n{\n 'v': 176,\n 'f': \"176\",\n }],\n [\"04_1_Sending_Coins_The_Easy_Way.md\",\n{\n 'v': 1195,\n 'f': \"1195\",\n }],\n [\"04_2_Creating_a_Raw_Transaction.md\",\n{\n 'v': 1720,\n 'f': \"1720\",\n }],\n [\"04_2__Interlude_Using_JQ.md\",\n{\n 'v': 1956,\n 'f': \"1956\",\n }],\n [\"04_3_Creating_a_Raw_Transaction_with_Named_Arguments.md\",\n{\n 'v': 413,\n 'f': \"413\",\n }],\n [\"04_4_Sending_Coins_with_a_Raw_Transaction.md\",\n{\n 'v': 1024,\n 'f': \"1024\",\n }],\n [\"04_4__Interlude_Using_Curl.md\",\n{\n 'v': 1643,\n 'f': \"1643\",\n }],\n [\"04_5_Sending_Coins_with_Automated_Raw_Transactions.md\",\n{\n 'v': 614,\n 'f': \"614\",\n }],\n [\"04_6_Creating_a_Segwit_Transaction.md\",\n{\n 'v': 1172,\n 'f': \"1172\",\n }],\n [\"05_0_Controlling_Bitcoin_Transactions.md\",\n{\n 'v': 149,\n 'f': \"149\",\n }],\n [\"05_1_Watching_for_Stuck_Transactions.md\",\n{\n 'v': 595,\n 'f': \"595\",\n }],\n [\"05_2_Resending_a_Transaction_with_RBF.md\",\n{\n 'v': 1372,\n 'f': \"1372\",\n }],\n [\"05_3_Funding_a_Transaction_with_CPFP.md\",\n{\n 'v': 827,\n 'f': \"827\",\n }],\n [\"06_0_Expanding_Bitcoin_Transactions_Multisigs.md\",\n{\n 'v': 155,\n 'f': \"155\",\n }],\n [\"06_1_Sending_a_Transaction_to_a_Multisig.md\",\n{\n 'v': 1764,\n 'f': \"1764\",\n }],\n [\"06_2_Spending_a_Transaction_to_a_Multisig.md\",\n{\n 'v': 1079,\n 'f': \"1079\",\n }],\n [\"06_3_Sending_an_Automated_Multisig.md\",\n{\n 'v': 613,\n 'f': \"613\",\n }],\n [\"07_0_Expanding_Bitcoin_Transactions_PSBTs.md\",\n{\n 'v': 169,\n 'f': \"169\",\n }],\n [\"07_1_Creating_a_Partially_Signed_Bitcoin_Transaction.md\",\n{\n 'v': 1470,\n 'f': \"1470\",\n }],\n [\"07_2_Using_a_Partially_Signed_Bitcoin_Transaction.md\",\n{\n 'v': 1393,\n 'f': \"1393\",\n }],\n [\"07_3_Integrating_with_Hardware_Wallets.md\",\n{\n 'v': 2150,\n 'f': \"2150\",\n }],\n [\"08_0_Expanding_Bitcoin_Transactions_Other.md\",\n{\n 'v': 139,\n 'f': \"139\",\n }],\n [\"08_1_Sending_a_Transaction_with_a_Locktime.md\",\n{\n 'v': 1483,\n 'f': \"1483\",\n }],\n [\"08_2_Sending_a_Transaction_with_Data.md\",\n{\n 'v': 580,\n 'f': \"580\",\n }],\n [\"09_0_Introducing_Bitcoin_Scripts.md\",\n{\n 'v': 196,\n 'f': \"196\",\n }],\n [\"09_1_Understanding_the_Foundation_of_Transactions.md\",\n{\n 'v': 989,\n 'f': \"989\",\n }],\n [\"09_2_Running_a_Bitcoin_Script.md\",\n{\n 'v': 863,\n 'f': \"863\",\n }],\n [\"09_3_Testing_a_Bitcoin_Script.md\",\n{\n 'v': 1000,\n 'f': \"1000\",\n }],\n [\"09_4_Scripting_a_P2PKH.md\",\n{\n 'v': 838,\n 'f': \"838\",\n }],\n [\"09_5_Scripting_a_P2WPKH.md\",\n{\n 'v': 845,\n 'f': \"845\",\n }],\n [\"10_0_Embedding_Bitcoin_Scripts_in_P2SH_Transactions.md\",\n{\n 'v': 170,\n 'f': \"170\",\n }],\n [\"10_1_Understanding_the_Foundation_of_P2SH.md\",\n{\n 'v': 1164,\n 'f': \"1164\",\n }],\n [\"10_2_Building_the_Structure_of_P2SH.md\",\n{\n 'v': 1284,\n 'f': \"1284\",\n }],\n [\"10_3_Running_a_Bitcoin_Script_with_P2SH.md\",\n{\n 'v': 323,\n 'f': \"323\",\n }],\n [\"10_4_Scripting_a_Multisig.md\",\n{\n 'v': 1016,\n 'f': \"1016\",\n }],\n [\"10_5_Scripting_a_Segwit_Script.md\",\n{\n 'v': 750,\n 'f': \"750\",\n }],\n [\"10_6_Spending_a_P2SH_Transaction.md\",\n{\n 'v': 384,\n 'f': \"384\",\n }],\n [\"11_0_Empowering_Timelock_with_Bitcoin_Scripts.md\",\n{\n 'v': 108,\n 'f': \"108\",\n }],\n [\"11_1_Understanding_Timelock_Options.md\",\n{\n 'v': 557,\n 'f': \"557\",\n }],\n [\"11_2_Using_CLTV_in_Scripts.md\",\n{\n 'v': 1197,\n 'f': \"1197\",\n }],\n [\"11_3_Using_CSV_in_Scripts.md\",\n{\n 'v': 1470,\n 'f': \"1470\",\n }],\n [\"12_0_Expanding_Bitcoin_Scripts.md\",\n{\n 'v': 99,\n 'f': \"99\",\n }],\n [\"12_1_Using_Script_Conditionals.md\",\n{\n 'v': 1120,\n 'f': \"1120\",\n }],\n [\"12_2_Using_Other_Script_Commands.md\",\n{\n 'v': 407,\n 'f': \"407\",\n }],\n [\"13_0_Designing_Real_Bitcoin_Scripts.md\",\n{\n 'v': 116,\n 'f': \"116\",\n }],\n [\"13_1_Writing_Puzzle_Scripts.md\",\n{\n 'v': 998,\n 'f': \"998\",\n }],\n [\"13_2_Writing_Complex_Multisig_Scripts.md\",\n{\n 'v': 996,\n 'f': \"996\",\n }],\n [\"13_3_Empowering_Bitcoin_with_Scripts.md\",\n{\n 'v': 1467,\n 'f': \"1467\",\n }],\n [\"14_0_Using_Tor.md\",\n{\n 'v': 116,\n 'f': \"116\",\n }],\n [\"14_1_Verifying_Your_Tor_Setup.md\",\n{\n 'v': 1568,\n 'f': \"1568\",\n }],\n [\"14_2_Changing_Your_Bitcoin_Hidden_Services.md\",\n{\n 'v': 434,\n 'f': \"434\",\n }],\n [\"14_3_Adding_SSH_Hidden_Services.md\",\n{\n 'v': 330,\n 'f': \"330\",\n }],\n [\"15_0_Talking_to_Bitcoind.md\",\n{\n 'v': 254,\n 'f': \"254\",\n }],\n [\"15_1_Accessing_Bitcoind_with_C.md\",\n{\n 'v': 1238,\n 'f': \"1238\",\n }],\n [\"15_2_Programming_Bitcoind_with_C.md\",\n{\n 'v': 1427,\n 'f': \"1427\",\n }],\n [\"15_3_Receiving_Bitcoind_Notifications_with_C.md\",\n{\n 'v': 650,\n 'f': \"650\",\n }],\n [\"16_0_Programming_with_Libwally.md\",\n{\n 'v': 333,\n 'f': \"333\",\n }],\n [\"16_1_Setting_Up_Libwally.md\",\n{\n 'v': 559,\n 'f': \"559\",\n }],\n [\"16_2_Using_BIP39_in_Libwally.md\",\n{\n 'v': 939,\n 'f': \"939\",\n }],\n [\"16_3_Using_BIP32_in_Libwally.md\",\n{\n 'v': 959,\n 'f': \"959\",\n }],\n [\"16_4_Using_PSBTs_in_Libwally.md\",\n{\n 'v': 989,\n 'f': \"989\",\n }],\n [\"16_5_Using_Scripts_in_Libwally.md\",\n{\n 'v': 785,\n 'f': \"785\",\n }],\n [\"16_6_Using_Other_Functions_in_Libwally.md\",\n{\n 'v': 655,\n 'f': \"655\",\n }],\n [\"16_7_Integrating_Libwally_and_Bitcoin-CLI.md\",\n{\n 'v': 1380,\n 'f': \"1380\",\n }],\n [\"17_0_Talking_to_Bitcoind_Other.md\",\n{\n 'v': 286,\n 'f': \"286\",\n }],\n [\"17_1_Accessing_Bitcoind_with_Go.md\",\n{\n 'v': 547,\n 'f': \"547\",\n }],\n [\"17_2_Accessing_Bitcoind_with_Java.md\",\n{\n 'v': 821,\n 'f': \"821\",\n }],\n [\"17_3_Accessing_Bitcoind_with_NodeJS.md\",\n{\n 'v': 393,\n 'f': \"393\",\n }],\n [\"17_4_Accessing_Bitcoind_with_Python.md\",\n{\n 'v': 1158,\n 'f': \"1158\",\n }],\n [\"17_5_Accessing_Bitcoind_with_Rust.md\",\n{\n 'v': 829,\n 'f': \"829\",\n }],\n [\"17_6_Accessing_Bitcoind_with_Swift.md\",\n{\n 'v': 1503,\n 'f': \"1503\",\n }],\n [\"18_0_Understanding_Your_Lightning_Setup.md\",\n{\n 'v': 192,\n 'f': \"192\",\n }],\n [\"18_1_Verifying_Your_Lightning_Setup.md\",\n{\n 'v': 1294,\n 'f': \"1294\",\n }],\n [\"18_2_Knowing_Your_lightning_Setup.md\",\n{\n 'v': 399,\n 'f': \"399\",\n }],\n [\"18_2__Interlude_Accessing_a_Second_Lightning_Node.md\",\n{\n 'v': 886,\n 'f': \"886\",\n }],\n [\"18_3_Setting_Up_a_Channel.md\",\n{\n 'v': 1173,\n 'f': \"1173\",\n }],\n [\"19_0_Using_Lightning.md\",\n{\n 'v': 146,\n 'f': \"146\",\n }],\n [\"19_1_Generate_a_Payment_Request.md\",\n{\n 'v': 968,\n 'f': \"968\",\n }],\n [\"19_2_Paying_a_Invoice.md\",\n{\n 'v': 604,\n 'f': \"604\",\n }],\n [\"19_3_Closing_a_Channel.md\",\n{\n 'v': 848,\n 'f': \"848\",\n }],\n [\"19_4_Lightning_Network_Review.md\",\n{\n 'v': 626,\n 'f': \"626\",\n }],\n [\"A0_Appendices.md\",\n{\n 'v': 112,\n 'f': \"112\",\n }],\n [\"A1_0_Understanding_Bitcoin_Standup.md\",\n{\n 'v': 420,\n 'f': \"420\",\n }],\n [\"A2_0_Compiling_Bitcoin_from_Source.md\",\n{\n 'v': 412,\n 'f': \"412\",\n }],\n [\"A3_0_Using_Bitcoin_Regtest.md\",\n{\n 'v': 980,\n 'f': \"980\",\n }],\n [\"CLA.md\",\n{\n 'v': 495,\n 'f': \"495\",\n }],\n [\"CONTRIBUTING.md\",\n{\n 'v': 529,\n 'f': \"529\",\n }],\n [\"LICENSE-CC-BY-4.0.md\",\n{\n 'v': 2716,\n 'f': \"2716\",\n }],\n [\"README.md\",\n{\n 'v': 1705,\n 'f': \"1705\",\n }],\n [\"TRANSLATING.md\",\n{\n 'v': 686,\n 'f': \"686\",\n }]],\n columns: [[\"string\", \"Chapter\"], [\"number\", \"Word Count\"]],\n columnOptions: [],\n rowsPerPage: 25,\n helpUrl: \"https://colab.research.google.com/notebooks/data_table.ipynb\",\n suppressOutputScrolling: true,\n minimumWidth: undefined,\n });\n ", "text/plain": [ "" ], "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ChapterWord Count
6601_0_Introduction.md1144
6401_1_Introducing_Bitcoin.md2735
8202_0_Setting_Up_a_Bitcoin-Core_VPS.md226
7102_1_Setting_Up_a_Bitcoin-Core_VPS_with_StackS...2746
5902_2_Setting_Up_Bitcoin_Core_Other.md254
.........
5CLA.md495
0CONTRIBUTING.md529
34LICENSE-CC-BY-4.0.md2716
29README.md1705
22TRANSLATING.md686
\n", "

102 rows × 2 columns

\n", "
" ] }, "metadata": { "tags": [] }, "execution_count": 5 } ] }, { "cell_type": "markdown", "metadata": { "id": "14kdusmNzcfz" }, "source": [ "To convert the table to a markdown format and save as 'Chapter_word_counts.md'" ] }, { "cell_type": "code", "metadata": { "id": "uc7eKo9TkidD" }, "source": [ "from IPython.display import Markdown, display\n", "from tabulate import tabulate\n", "\n", "\n", "# borrowed from https://stackoverflow.com/questions/33181846/programmatically-convert-pandas-dataframe-to-markdown-table\n", "\n", "def pandas_df_to_markdown_table(df):\n", " fmt = ['---' for i in range(len(df.columns))]\n", " df_fmt = pd.DataFrame([fmt], columns=df.columns)\n", " df_formatted = pd.concat([df_fmt, df])\n", " return Markdown(df_formatted.to_csv(sep=\"|\", index=False))\n", "\n", "def df_to_markdown(df, y_index=False):\n", " blob = tabulate(df, headers='keys', tablefmt='pipe')\n", " if not y_index:\n", " return '\\n'.join(['| {}'.format(row.split('|', 2)[-1]) for row in blob.split('\\n')])\n", " return blob" ], "execution_count": 6, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Y0QVAGKqzWX6" }, "source": [ "mkdt = pandas_df_to_markdown_table(chapter_word_counts)\n", "\n", "with open('Chapter_word_counts.md', 'w') as m:\n", " m.write(str(mkdt.data))" ], "execution_count": 7, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_gGsbGDckIrC", "outputId": "8d4bbec6-aa6e-465b-fec7-4251f7f4cecf" }, "source": [ "total_count_translatable = chapter_word_counts['Word Count'].sum()\n", "total_count_translatable" ], "execution_count": 8, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "89069" ] }, "metadata": { "tags": [] }, "execution_count": 8 } ] } ] }