Learning-Bitcoin-from-the-C.../Chapter_word_counts.ipynb
2021-06-16 23:02:24 -05:00

388 lines
25 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Untitled25.ipynb",
"provenance": [],
"authorship_tag": "ABX9TyMjw6y4q6XdjW6h80beORDK",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/icculp/Learning-Bitcoin-from-the-Command-Line/blob/master/Chapter_word_counts.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "X7D8JEIgy9fV"
},
"source": [
"This notebook counts through the translatable words of each chapter, including chapter links; ignores code blocks, markdown characters, and other non-translatable characters."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "TQUe8ZZed-Bp"
},
"source": [
"Run in colab by clicking the link above to view the results as a paginated table with word counts for each chapter near the bottom of the notebook. Total word count at the very bottom. Markdown friendly table created in Chapter_word_counts.md"
]
},
{
"cell_type": "code",
"metadata": {
"id": "kT4wyWuy2szs"
},
"source": [
"import os\n",
"import pandas as pd"
],
"execution_count": 1,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "9J62Uh4H2Y9g",
"outputId": "05d36385-659c-4f88-a82c-6dd877efd645"
},
"source": [
"!git clone https://github.com/BlockchainCommons/Learning-Bitcoin-from-the-Command-Line.git"
],
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"text": [
"Cloning into 'Learning-Bitcoin-from-the-Command-Line'...\n",
"remote: Enumerating objects: 6705, done.\u001b[K\n",
"remote: Counting objects: 100% (309/309), done.\u001b[K\n",
"remote: Compressing objects: 100% (192/192), done.\u001b[K\n",
"remote: Total 6705 (delta 147), reused 267 (delta 112), pack-reused 6396\u001b[K\n",
"Receiving objects: 100% (6705/6705), 7.55 MiB | 25.61 MiB/s, done.\n",
"Resolving deltas: 100% (4106/4106), done.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "k_g4-Fvn2yPy"
},
"source": [
"def count_words():\n",
" \"\"\" Counts words ignoring code blocks and digits\n",
" To test for quality:\n",
" lines 16-18 to test a single chapter\n",
" uncomment line 31 to view skipped code block sections\n",
" uncomment line 55 to view rejected word tokens (not including code blocks)\n",
" uncomment line 57 to view accepted word tokens\n",
" but not at the same time, and best one chapter at a time\n",
" \"\"\"\n",
" counts = []\n",
" repo_path = '/content/Learning-Bitcoin-from-the-Command-Line/'\n",
" for chapter in os.listdir(repo_path):\n",
" ''' uncomment lines 16-18 to test a single chapter, replacing\n",
" ch_name with the name you want to test\n",
" '''\n",
" #ch_name = '04_2__Interlude_Using_JQ.md'\n",
" #if chapter != ch_name:\n",
" # continue\n",
" ignore_list = ['bitcoin.conf-annotated.txt', 'TODO.md', 'TODO-30.md']\n",
" if chapter in ignore_list or\\\n",
" not chapter.endswith('md'):\n",
" continue\n",
" count = 0\n",
" flag = 0 # ignores words between code markdown ```\n",
" with open(repo_path + chapter) as ch:\n",
" for line in ch.readlines():\n",
" if flag:\n",
" if '```' in line[:3].replace(' ', ''): # chars can't precede code closing markdown\n",
" flag = 0\n",
" continue\n",
" # print(line) # view uncounted code blocks\n",
" continue\n",
" if '```' in line:\n",
" flag = 1\n",
" continue\n",
" for word in line.split():\n",
" if '.md' in word: # indicates trailing link with chapter name;\n",
" ch_link_tokens = word.split('_')\n",
" if ']' in word: # counts last word of trailing link before chapter name\n",
" count += 1\n",
" link_tokens_count = len(ch_link_tokens[2:]) # ignoring chapter numbers\n",
" count += link_tokens_count\n",
" # print(word, '[TOK]', link_tokens_count, end='[SEPTOK]')\n",
" continue\n",
" ignore = ['*', '**', '#', '##', '###', '####',\n",
" '-', '—', '>', '`', '/', '&', '|', '~']\n",
" if any(ch.isdigit() for ch in word) or\\\n",
" word in ignore or\\\n",
" '`' in word or\\\n",
" '~/' in word or\\\n",
" '/.' in word or\\\n",
" '|-' in word or\\\n",
" (word[0] == ':' and word[-1] == ':') or\\\n",
" (word[0] == '\"' and word[-1] == '\"'):\n",
" # print(word) # , end='[SEP]') # view rejected tokens\n",
" continue\n",
" # print(word, count) # , end='[SEP]') # view accepted tokens\n",
" count += 1\n",
" counts.append((chapter, count))\n",
" # print(chapter, count)\n",
" return pd.DataFrame(counts, columns=['Chapter', 'Word Count'])"
],
"execution_count": 3,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "2-pkEaYF3Uxm"
},
"source": [
"chapter_word_counts = count_words()\n",
"# view accepted or rejected tokens below if line 55 or 53 uncommented in count_words(), respectively"
],
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Tc9IsNpC-vwR"
},
"source": [
"chapter_word_counts.sort_values(by=['Chapter'], inplace=True)\n",
"total_count_translatable = chapter_word_counts['Word Count'].sum()\n",
"chapter_word_counts.loc[len(chapter_word_counts.index)] = ['TOTAL', total_count_translatable] "
],
"execution_count": 5,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "ZIUumQnh9C_1"
},
"source": [
"View in colab for paginated table"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 631
},
"id": "K73GH7UFmwI5",
"outputId": "a4361d36-caf3-4bb7-ac6d-4cdc71f49afc"
},
"source": [
"from google.colab import data_table\n",
"data_table.DataTable(chapter_word_counts, include_index=False)"
],
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"data": {
"application/vnd.google.colaboratory.module+javascript": "\n import \"https://ssl.gstatic.com/colaboratory/data_table/a6224c040fa35dcf/data_table.js\";\n\n window.createDataTable({\n data: [[\"01_0_Introduction.md\",\n{\n 'v': 1144,\n 'f': \"1144\",\n }],\n [\"01_1_Introducing_Bitcoin.md\",\n{\n 'v': 2735,\n 'f': \"2735\",\n }],\n [\"02_0_Setting_Up_a_Bitcoin-Core_VPS.md\",\n{\n 'v': 226,\n 'f': \"226\",\n }],\n [\"02_1_Setting_Up_a_Bitcoin-Core_VPS_with_StackScript.md\",\n{\n 'v': 2746,\n 'f': \"2746\",\n }],\n [\"02_2_Setting_Up_Bitcoin_Core_Other.md\",\n{\n 'v': 254,\n 'f': \"254\",\n }],\n [\"03_0_Understanding_Your_Bitcoin_Setup.md\",\n{\n 'v': 248,\n 'f': \"248\",\n }],\n [\"03_1_Verifying_Your_Bitcoin_Setup.md\",\n{\n 'v': 773,\n 'f': \"773\",\n }],\n [\"03_2_Knowing_Your_Bitcoin_Setup.md\",\n{\n 'v': 517,\n 'f': \"517\",\n }],\n [\"03_3_Setting_Up_Your_Wallet.md\",\n{\n 'v': 1699,\n 'f': \"1699\",\n }],\n [\"03_3__Interlude_Using_Command-Line_Variables.md\",\n{\n 'v': 347,\n 'f': \"347\",\n }],\n [\"03_4_Receiving_a_Transaction.md\",\n{\n 'v': 1479,\n 'f': \"1479\",\n }],\n [\"03_5_Understanding_the_Descriptor.md\",\n{\n 'v': 1349,\n 'f': \"1349\",\n }],\n [\"04_0_Sending_Bitcoin_Transactions.md\",\n{\n 'v': 176,\n 'f': \"176\",\n }],\n [\"04_1_Sending_Coins_The_Easy_Way.md\",\n{\n 'v': 1195,\n 'f': \"1195\",\n }],\n [\"04_2_Creating_a_Raw_Transaction.md\",\n{\n 'v': 1720,\n 'f': \"1720\",\n }],\n [\"04_2__Interlude_Using_JQ.md\",\n{\n 'v': 1956,\n 'f': \"1956\",\n }],\n [\"04_3_Creating_a_Raw_Transaction_with_Named_Arguments.md\",\n{\n 'v': 413,\n 'f': \"413\",\n }],\n [\"04_4_Sending_Coins_with_a_Raw_Transaction.md\",\n{\n 'v': 1024,\n 'f': \"1024\",\n }],\n [\"04_4__Interlude_Using_Curl.md\",\n{\n 'v': 1643,\n 'f': \"1643\",\n }],\n [\"04_5_Sending_Coins_with_Automated_Raw_Transactions.md\",\n{\n 'v': 614,\n 'f': \"614\",\n }],\n [\"04_6_Creating_a_Segwit_Transaction.md\",\n{\n 'v': 1172,\n 'f': \"1172\",\n }],\n [\"05_0_Controlling_Bitcoin_Transactions.md\",\n{\n 'v': 149,\n 'f': \"149\",\n }],\n [\"05_1_Watching_for_Stuck_Transactions.md\",\n{\n 'v': 595,\n 'f': \"595\",\n }],\n [\"05_2_Resending_a_Transaction_with_RBF.md\",\n{\n 'v': 1372,\n 'f': \"1372\",\n }],\n [\"05_3_Funding_a_Transaction_with_CPFP.md\",\n{\n 'v': 827,\n 'f': \"827\",\n }],\n [\"06_0_Expanding_Bitcoin_Transactions_Multisigs.md\",\n{\n 'v': 155,\n 'f': \"155\",\n }],\n [\"06_1_Sending_a_Transaction_to_a_Multisig.md\",\n{\n 'v': 1764,\n 'f': \"1764\",\n }],\n [\"06_2_Spending_a_Transaction_to_a_Multisig.md\",\n{\n 'v': 1079,\n 'f': \"1079\",\n }],\n [\"06_3_Sending_an_Automated_Multisig.md\",\n{\n 'v': 613,\n 'f': \"613\",\n }],\n [\"07_0_Expanding_Bitcoin_Transactions_PSBTs.md\",\n{\n 'v': 169,\n 'f': \"169\",\n }],\n [\"07_1_Creating_a_Partially_Signed_Bitcoin_Transaction.md\",\n{\n 'v': 1470,\n 'f': \"1470\",\n }],\n [\"07_2_Using_a_Partially_Signed_Bitcoin_Transaction.md\",\n{\n 'v': 1393,\n 'f': \"1393\",\n }],\n [\"07_3_Integrating_with_Hardware_Wallets.md\",\n{\n 'v': 2150,\n 'f': \"2150\",\n }],\n [\"08_0_Expanding_Bitcoin_Transactions_Other.md\",\n{\n 'v': 139,\n 'f': \"139\",\n }],\n [\"08_1_Sending_a_Transaction_with_a_Locktime.md\",\n{\n 'v': 1483,\n 'f': \"1483\",\n }],\n [\"08_2_Sending_a_Transaction_with_Data.md\",\n{\n 'v': 580,\n 'f': \"580\",\n }],\n [\"09_0_Introducing_Bitcoin_Scripts.md\",\n{\n 'v': 196,\n 'f': \"196\",\n }],\n [\"09_1_Understanding_the_Foundation_of_Transactions.md\",\n{\n 'v': 989,\n 'f': \"989\",\n }],\n [\"09_2_Running_a_Bitcoin_Script.md\",\n{\n 'v': 863,\n 'f': \"863\",\n }],\n [\"09_3_Testing_a_Bitcoin_Script.md\",\n{\n 'v': 1000,\n 'f': \"1000\",\n }],\n [\"09_4_Scripting_a_P2PKH.md\",\n{\n 'v': 838,\n 'f': \"838\",\n }],\n [\"09_5_Scripting_a_P2WPKH.md\",\n{\n 'v': 845,\n 'f': \"845\",\n }],\n [\"10_0_Embedding_Bitcoin_Scripts_in_P2SH_Transactions.md\",\n{\n 'v': 170,\n 'f': \"170\",\n }],\n [\"10_1_Understanding_the_Foundation_of_P2SH.md\",\n{\n 'v': 1164,\n 'f': \"1164\",\n }],\n [\"10_2_Building_the_Structure_of_P2SH.md\",\n{\n 'v': 1284,\n 'f': \"1284\",\n }],\n [\"10_3_Running_a_Bitcoin_Script_with_P2SH.md\",\n{\n 'v': 323,\n 'f': \"323\",\n }],\n [\"10_4_Scripting_a_Multisig.md\",\n{\n 'v': 1016,\n 'f': \"1016\",\n }],\n [\"10_5_Scripting_a_Segwit_Script.md\",\n{\n 'v': 750,\n 'f': \"750\",\n }],\n [\"10_6_Spending_a_P2SH_Transaction.md\",\n{\n 'v': 384,\n 'f': \"384\",\n }],\n [\"11_0_Empowering_Timelock_with_Bitcoin_Scripts.md\",\n{\n 'v': 108,\n 'f': \"108\",\n }],\n [\"11_1_Understanding_Timelock_Options.md\",\n{\n 'v': 557,\n 'f': \"557\",\n }],\n [\"11_2_Using_CLTV_in_Scripts.md\",\n{\n 'v': 1197,\n 'f': \"1197\",\n }],\n [\"11_3_Using_CSV_in_Scripts.md\",\n{\n 'v': 1470,\n 'f': \"1470\",\n }],\n [\"12_0_Expanding_Bitcoin_Scripts.md\",\n{\n 'v': 99,\n 'f': \"99\",\n }],\n [\"12_1_Using_Script_Conditionals.md\",\n{\n 'v': 1120,\n 'f': \"1120\",\n }],\n [\"12_2_Using_Other_Script_Commands.md\",\n{\n 'v': 407,\n 'f': \"407\",\n }],\n [\"13_0_Designing_Real_Bitcoin_Scripts.md\",\n{\n 'v': 116,\n 'f': \"116\",\n }],\n [\"13_1_Writing_Puzzle_Scripts.md\",\n{\n 'v': 998,\n 'f': \"998\",\n }],\n [\"13_2_Writing_Complex_Multisig_Scripts.md\",\n{\n 'v': 996,\n 'f': \"996\",\n }],\n [\"13_3_Empowering_Bitcoin_with_Scripts.md\",\n{\n 'v': 1467,\n 'f': \"1467\",\n }],\n [\"14_0_Using_Tor.md\",\n{\n 'v': 116,\n 'f': \"116\",\n }],\n [\"14_1_Verifying_Your_Tor_Setup.md\",\n{\n 'v': 1568,\n 'f': \"1568\",\n }],\n [\"14_2_Changing_Your_Bitcoin_Hidden_Services.md\",\n{\n 'v': 434,\n 'f': \"434\",\n }],\n [\"14_3_Adding_SSH_Hidden_Services.md\",\n{\n 'v': 330,\n 'f': \"330\",\n }],\n [\"15_0_Talking_to_Bitcoind.md\",\n{\n 'v': 254,\n 'f': \"254\",\n }],\n [\"15_1_Accessing_Bitcoind_with_C.md\",\n{\n 'v': 1238,\n 'f': \"1238\",\n }],\n [\"15_2_Programming_Bitcoind_with_C.md\",\n{\n 'v': 1427,\n 'f': \"1427\",\n }],\n [\"15_3_Receiving_Bitcoind_Notifications_with_C.md\",\n{\n 'v': 650,\n 'f': \"650\",\n }],\n [\"16_0_Programming_with_Libwally.md\",\n{\n 'v': 333,\n 'f': \"333\",\n }],\n [\"16_1_Setting_Up_Libwally.md\",\n{\n 'v': 559,\n 'f': \"559\",\n }],\n [\"16_2_Using_BIP39_in_Libwally.md\",\n{\n 'v': 939,\n 'f': \"939\",\n }],\n [\"16_3_Using_BIP32_in_Libwally.md\",\n{\n 'v': 959,\n 'f': \"959\",\n }],\n [\"16_4_Using_PSBTs_in_Libwally.md\",\n{\n 'v': 989,\n 'f': \"989\",\n }],\n [\"16_5_Using_Scripts_in_Libwally.md\",\n{\n 'v': 785,\n 'f': \"785\",\n }],\n [\"16_6_Using_Other_Functions_in_Libwally.md\",\n{\n 'v': 655,\n 'f': \"655\",\n }],\n [\"16_7_Integrating_Libwally_and_Bitcoin-CLI.md\",\n{\n 'v': 1380,\n 'f': \"1380\",\n }],\n [\"17_0_Talking_to_Bitcoind_Other.md\",\n{\n 'v': 286,\n 'f': \"286\",\n }],\n [\"17_1_Accessing_Bitcoind_with_Go.md\",\n{\n 'v': 547,\n 'f': \"547\",\n }],\n [\"17_2_Accessing_Bitcoind_with_Java.md\",\n{\n 'v': 821,\n 'f': \"821\",\n }],\n [\"17_3_Accessing_Bitcoind_with_NodeJS.md\",\n{\n 'v': 393,\n 'f': \"393\",\n }],\n [\"17_4_Accessing_Bitcoind_with_Python.md\",\n{\n 'v': 1158,\n 'f': \"1158\",\n }],\n [\"17_5_Accessing_Bitcoind_with_Rust.md\",\n{\n 'v': 829,\n 'f': \"829\",\n }],\n [\"17_6_Accessing_Bitcoind_with_Swift.md\",\n{\n 'v': 1503,\n 'f': \"1503\",\n }],\n [\"18_0_Understanding_Your_Lightning_Setup.md\",\n{\n 'v': 192,\n 'f': \"192\",\n }],\n [\"18_1_Verifying_Your_Lightning_Setup.md\",\n{\n 'v': 1294,\n 'f': \"1294\",\n }],\n [\"18_2_Knowing_Your_lightning_Setup.md\",\n{\n 'v': 399,\n 'f': \"399\",\n }],\n [\"18_2__Interlude_Accessing_a_Second_Lightning_Node.md\",\n{\n 'v': 886,\n 'f': \"886\",\n }],\n [\"18_3_Setting_Up_a_Channel.md\",\n{\n 'v': 1173,\n 'f': \"1173\",\n }],\n [\"19_0_Using_Lightning.md\",\n{\n 'v': 146,\n 'f': \"146\",\n }],\n [\"19_1_Generate_a_Payment_Request.md\",\n{\n 'v': 968,\n 'f': \"968\",\n }],\n [\"19_2_Paying_a_Invoice.md\",\n{\n 'v': 604,\n 'f': \"604\",\n }],\n [\"19_3_Closing_a_Channel.md\",\n{\n 'v': 848,\n 'f': \"848\",\n }],\n [\"19_4_Lightning_Network_Review.md\",\n{\n 'v': 626,\n 'f': \"626\",\n }],\n [\"A0_Appendices.md\",\n{\n 'v': 112,\n 'f': \"112\",\n }],\n [\"A1_0_Understanding_Bitcoin_Standup.md\",\n{\n 'v': 420,\n 'f': \"420\",\n }],\n [\"A2_0_Compiling_Bitcoin_from_Source.md\",\n{\n 'v': 412,\n 'f': \"412\",\n }],\n [\"A3_0_Using_Bitcoin_Regtest.md\",\n{\n 'v': 980,\n 'f': \"980\",\n }],\n [\"CLA.md\",\n{\n 'v': 495,\n 'f': \"495\",\n }],\n [\"CONTRIBUTING.md\",\n{\n 'v': 529,\n 'f': \"529\",\n }],\n [\"LICENSE-CC-BY-4.0.md\",\n{\n 'v': 2716,\n 'f': \"2716\",\n }],\n [\"README.md\",\n{\n 'v': 1705,\n 'f': \"1705\",\n }],\n [\"TRANSLATING.md\",\n{\n 'v': 686,\n 'f': \"686\",\n }],\n [\"TOTAL\",\n{\n 'v': 89069,\n 'f': \"89069\",\n }]],\n columns: [[\"string\", \"Chapter\"], [\"number\", \"Word Count\"]],\n columnOptions: [],\n rowsPerPage: 25,\n helpUrl: \"https://colab.research.google.com/notebooks/data_table.ipynb\",\n suppressOutputScrolling: true,\n minimumWidth: undefined,\n });\n ",
"text/plain": [
"<google.colab.data_table.DataTable object>"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Chapter</th>\n",
" <th>Word Count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>66</th>\n",
" <td>01_0_Introduction.md</td>\n",
" <td>1144</td>\n",
" </tr>\n",
" <tr>\n",
" <th>64</th>\n",
" <td>01_1_Introducing_Bitcoin.md</td>\n",
" <td>2735</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>02_0_Setting_Up_a_Bitcoin-Core_VPS.md</td>\n",
" <td>226</td>\n",
" </tr>\n",
" <tr>\n",
" <th>71</th>\n",
" <td>02_1_Setting_Up_a_Bitcoin-Core_VPS_with_StackS...</td>\n",
" <td>2746</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>02_2_Setting_Up_Bitcoin_Core_Other.md</td>\n",
" <td>254</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>CONTRIBUTING.md</td>\n",
" <td>529</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>LICENSE-CC-BY-4.0.md</td>\n",
" <td>2716</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>README.md</td>\n",
" <td>1705</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>TRANSLATING.md</td>\n",
" <td>686</td>\n",
" </tr>\n",
" <tr>\n",
" <th>102</th>\n",
" <td>TOTAL</td>\n",
" <td>89069</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>103 rows × 2 columns</p>\n",
"</div>"
]
},
"metadata": {
"tags": []
},
"execution_count": 6
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "14kdusmNzcfz"
},
"source": [
"Converts the table to a markdown format and save as 'Chapter_word_counts.md'"
]
},
{
"cell_type": "code",
"metadata": {
"id": "uc7eKo9TkidD"
},
"source": [
"from IPython.display import Markdown, display\n",
"from tabulate import tabulate\n",
"\n",
"\n",
"# borrowed from https://stackoverflow.com/questions/33181846/programmatically-convert-pandas-dataframe-to-markdown-table\n",
"\n",
"def pandas_df_to_markdown_table(df):\n",
" fmt = ['---' for i in range(len(df.columns))]\n",
" df_fmt = pd.DataFrame([fmt], columns=df.columns)\n",
" df_formatted = pd.concat([df_fmt, df])\n",
" return Markdown(df_formatted.to_csv(sep=\"|\", index=False))\n",
"\n",
"def df_to_markdown(df, y_index=False):\n",
" blob = tabulate(df, headers='keys', tablefmt='pipe')\n",
" if not y_index:\n",
" return '\\n'.join(['| {}'.format(row.split('|', 2)[-1]) for row in blob.split('\\n')])\n",
" return blob"
],
"execution_count": 7,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Y0QVAGKqzWX6"
},
"source": [
"mkdt = pandas_df_to_markdown_table(chapter_word_counts)\n",
"\n",
"with open('Chapter_word_counts.md', 'w') as m:\n",
" m.write(str(mkdt.data))"
],
"execution_count": 8,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "kRKJUNcj_Fxl",
"outputId": "d826e833-2f4a-4f8f-b78f-cd07712c0f28"
},
"source": [
"total_count_translatable"
],
"execution_count": 9,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"89069"
]
},
"metadata": {
"tags": []
},
"execution_count": 9
}
]
}
]
}