{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Word Count Analysis\n", "This document describes word loss distribution as a result of preprocessing" ] }, { "cell_type": "code", "execution_count": 108, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "from os.path import join\n", "from constants import (\n", " PROCESSED_RECORDS_FILENAME,\n", " INPUT_DATA_FILENAME)\n", "from utils import get_data_path\n", "from preprocessing import preprocessing_initial_text_clean" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\t-wilson\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3057: DtypeWarning: Columns (2,3,7,10,11,12,13,14,15,16,17,18,20,22,23,24,25,26,28,29,30,31,32,33,34,35,36,37,38,39,40,41,46,47,48,49,50,51,52,54,55) have mixed types. Specify dtype option on import or set low_memory=False.\n", " interactivity=interactivity, compiler=compiler, result=result)\n" ] } ], "source": [ "df = pd.read_csv(join(get_data_path(), INPUT_DATA_FILENAME), encoding=\"iso-8859-1\")\n", "preprocessed_df = pd.read_csv(join(get_data_path(), PROCESSED_RECORDS_FILENAME), encoding=\"iso-8859-1\")" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [], "source": [ "df = df[['iati.identifier','description', 'title']]" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [], "source": [ "# Remove record in current full dataset with null iati.identifer\n", "df = df[~df[\"iati.identifier\"].str.isspace()]\n", "\n", "# If both description and title not NA concatenate them into description column\n", "df.loc[~df[\"description\"].isna() & ~df[\"title\"].isna(), [\"description\"]] = (\n", " df[\"title\"] + \" \" + df[\"description\"]\n", ")\n", "\n", "# If description is NA replace with title\n", "df.loc[df[\"description\"].isna(), [\"description\"]] = df[\"title\"]" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [], "source": [ "df = preprocessing_initial_text_clean(df, 'description')" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [], "source": [ "df['words'] = df['description'].str.count(' ').add(1)\n", "preprocessed_df['words'] = preprocessed_df['description'].str.count(' ').add(1) " ] }, { "cell_type": "code", "execution_count": 100, "metadata": {}, "outputs": [], "source": [ "df = df[df['iati.identifier'].isin(preprocessed_df['iati.identifier'])]" ] }, { "cell_type": "code", "execution_count": 101, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Initial DF word stats \n", "count 783035.000000\n", "mean 48.899398\n", "std 77.993783\n", "min 1.000000\n", "25% 14.000000\n", "50% 26.000000\n", "75% 49.000000\n", "max 12135.000000\n", "Name: words, dtype: float64\n" ] } ], "source": [ "print(\"Initial DF word stats \\n{0}\".format(df['words'].describe()))" ] }, { "cell_type": "code", "execution_count": 102, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Pre-processed DF word stats \n", "count 783035.000000\n", "mean 12.717107\n", "std 21.629170\n", "min 1.000000\n", "25% 2.000000\n", "50% 6.000000\n", "75% 15.000000\n", "max 4110.000000\n", "Name: words, dtype: float64\n" ] } ], "source": [ "print(\"Pre-processed DF word stats \\n{0}\".format(preprocessed_df['words'].describe()))" ] }, { "cell_type": "code", "execution_count": 123, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAAJNCAYAAADgesaeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOzdebiXZZ0/8PctCIkpi+ASipCapU2pY7kxCZr7mOiYWzbgMk6by8xYo9OClJb9phnTFpeUoMYlsxRN0xyEoUVLQimNJjc0ilREXLJQ8P79cb6cATnCYTkPhK/XdZ3rfL+f537u5/OcyD/e1/3cT6m1BgAAAACatN6abgAAAACA1x6hFAAAAACNE0oBAAAA0DihFAAAAACNE0oBAAAA0DihFAAAAACN676mG1hb9O/fvw4ePHhNtwEAAACwzvj5z38+p9Y6oKNjQqmWwYMHZ+rUqWu6DQAAAIB1Rinl0Vc75vE9AAAAABonlAIAAACgcUIpAAAAABonlAIAAACgcUIpAAAAABonlAIAAACgcUIpAAAAABonlAIAAACgcd3XdAMAAACs/ebPn5+5c+fmueeey8KFC9d0O8Aa0K1bt2y00Ubp169fevbsucrzCaUAAABYpvnz5+exxx5L3759M3jw4Ky//voppazptoAG1Vrz0ksv5dlnn81jjz2WQYMGrXIw5fE9AAAAlmnu3Lnp27dv+vfvnx49egik4DWolJIePXqkf//+6du3b+bOnbvKcwqlAAAAWKbnnnsuG2+88ZpuA1hLbLzxxnnuuedWeR6hFAAAAMu0cOHCrL/++mu6DWAtsf7666+WveWEUgAAACyXR/aARVbXfw+EUgAAAAA0TigFAAAAQOOEUgAAALAcM2fOTCklo0aN6vJrlVIybNiwVZqjyX5hZXVf0w0AAADwl+2C23+zpltYpn/a701ruoUlDB48OElbcLS2mDx5coYPH57Ro0fnnHPOafz6M2fOzJAhQzJy5MiMGzeu8euzZgilAAAAYDkGDhyYGTNmpHfv3l1+rRkzZqRXr15dfh1Y04RSAAAAsBzrr79+3vzmNzdyraauA2uaPaUAAABgOV5tj6ZRo0allJKZM2fm0ksvzV/91V/lda97XTbbbLOccsopeeaZZ9rHTp48OaWUPProo3n00UdTSmn/WXzejvaU+v3vf59Pf/rT2WuvvbL55punR48eecMb3pDjjjsuM2bMWKV7GzVqVIYPH54kGTNmzBJ9TZ48eYmxV199dYYPH56+ffvmda97Xd7ylrfk3HPPzfz585cYd9ppp6WUkn/5l39Z6npXXHFFSinZb7/98vLLL+ecc87JkCFDkiTjx49f4voe5Vu3WSkFAAAAq+hjH/tYbrvtthx66KHZf//9M2nSpHzta1/Lgw8+mDvuuCNJ215So0ePzhe/+MUkyRlnnNF+/k477bTM+adMmZLzzz8/w4cPz9/93d/l9a9/fR544IFcd911ufHGG/PjH/84b3/721eq9xEjRiRpC4T23nvvJQKxRftfJclJJ52UsWPHZsstt8wRRxyRPn365K677sonP/nJTJw4Mbfffnu6d2+LGb7whS/kJz/5SS644ILss88+OeSQQ5Ikv/rVr3Laaadls802y3/9139lvfXWy7BhwzJv3rxceOGFefvb397eT2f+LvxlE0oBAADAKrrrrrvyy1/+MoMGDUqSLFiwIPvss08mTZqUn/3sZ3nnO9+ZwYMH55xzzmlf/bMiG4rvs88+efzxx7PRRhstUZ8+fXr22muvnHXWWfn+97+/Ur2PGDEiffr0yfjx4zNs2LAO+xo3blzGjh2bww8/PFdeeWU22GCD9mPnnHNOxowZk6985Ss5/fTTkyQ9evTINddck1122SWjRo3Kvffem379+uWoo47Kn//850yYMCGbbbZZkmTYsGEZPHhwLrzwwuy0005rZKN11gyP7wEAAMAq+tSnPtUeSCVJ9+7dc8IJJyRJfvazn63y/JtuuulSgVSSvP3tb28Pv1566aVVvs6rufDCC9O9e/eMHTt2iUAqST75yU9mk002yZVXXrlEfdttt81ll12WOXPm5LjjjsuHP/zh3H///Tn77LPz7ne/u8t65S+HlVKvZZM+17lxw8/u2j4AAAD+wu26665L1bbaaqskydNPP71arnHzzTfnkksuydSpUzNnzpwsWLBgieNz5szJFltssVqutbgXXngh06dPT//+/dsfPXylnj17dri31THHHJOJEyfm8ssvz5QpUzJ06NCMGTNmtffIXyahFAAAAKyiPn36LFVbtL/SwoULV3n+iy66KKeffnr69u2b/fbbL4MGDUqvXr1SSskNN9yQ6dOnL7XZ+Ory9NNPp9aaJ598cqUCpSOPPDKXX355kuTUU09Nt27dVneL/IXqslCqlDI2yd8meaLW+tbF6qcm+UiSBUlurrV+rFU/O8lJSRYmOa3WelurfmCSC5N0S3J5rfX8Vn1IkmuS9EsyLcn7a60vllJ6JvlGkr9O8lSSo2utM7vqPgEAAKArLViwIKNHj87mm2+eadOmLbUa6s477+zS6/fu3TtJsvPOO2fatGkrdO6cOXNy0kknpVevXknaNncfPnx4BgwYsNr75C9PV+4pNS7JgYsXSinDkxyW5G211h2TfKFV3yHJMUl2bJ3z1VJKt1JKtyRfSXJQkh2SHNsamySfT3JBrXW7JE+nLdBK6/fTtdZtk1zQGgcAAABrhW7duq3Q6qk5c+Zk3rx52XPPPZcKpJ5//vkVDoperaek41Vdr3/967Pjjjvm/vvvz9y5czs9Z601o0aNyu9+97tceOGFufDCCzN79uz8/d//fWqtnb4+664uC6VqrVOSvPJf6weTnF9rnd8a80SrfliSa2qt82utjyR5MMk7Wz8P1lofrrW+mLaVUYeVUkqSfZJc1zp/fJIRi801vvX5uiT7tsYDAADAGrfJJpvkySefzJ/+9KdOjd90003Tq1ev/PznP8/zzz/fXn/ppZdy+umnZ86cOaulpyR57LHHOjz+z//8z3nxxRdz4oknZt68eUsdf/rpp5cKx/7zP/8zN998c4466qicfPLJOfnkk3PMMcfk1ltvzb//+78vMbZv374ppbzq9Vk3Nb2n1JuS/E0p5bwkf05yZq317iQDk9y12LhZrVqS/PYV9d2SbJJkXq11QQfjBy46p9a6oJTyTGv8qv+/FAAAAFbRvvvum7vvvjsHHnhg3vWud6Vnz555+9vfnkMPPbTD8eutt15OO+20nH/++fmrv/qrHHbYYXnxxRczadKkzJ07N8OHD8+kSZNWqaftt98+AwcOzDXXXJMePXpk0KBBKaXk/e9/f7beeuuceOKJ+fnPf56vfvWr2WabbXLAAQdk0KBBmTt3bh555JFMmTIlJ5xwQi655JIkyd13352zzz47Q4YMyWWXXdZ+nUsvvTR33313Pv7xj+dd73pXdt999yRtq7F22223/PCHP8z73ve+vOlNb0q3bt3ynve8J29729tW6d5YezUdSnVP0jfJ7knekeTaUsobk3S0kqmm45VcdRnjs5xjSyilnJLklCRLvLoTAAAAusonPvGJzJs3LzfddFN+/OMfZ+HChRk5cuSrhlJJ8pnPfCYDBgzI5ZdfnksvvTS9e/fOfvvtl3PPPTejR49e5Z66deuW66+/PmeddVauvfbaPPfcc6m1ZujQodl6662TJF/5yldy0EEH5ZJLLsl///d/Z968eenXr18GDRqUj370ozn++OOTJM8880yOPvroJMk111zTvidVkmy88ca55pprstdee+WYY47Jvffe275J/De/+c380z/9U2699dZcffXVqbVmyy23FEqtw8orn+NcrZOXMjjJ9xZtdF5KuTVtj+9Nbn1/KG0B1clJUmv9XKt+W5JzWtOcU2s9oFU/u1U7P8mTSTZvrYbaY9G4RefWWu8spXRP8ockA+pybnTXXXetU6dOXS33/Rdj0uc6N2742csfAwAArLNmzJiRt7zlLWu6DWAt0tn/LpRSfl5r3bWjY1250XlHbkjbXlAppbwpSY+0PVZ3Y5JjSik9W2/V2y7Jz5LcnWS7UsqQUkqPtG2GfmMrYJqU5MjWvCOTTGh9vrH1Pa3jdywvkAIAAACgWV32+F4p5eokw5L0L6XMSjI6ydgkY0sp9yV5McnIVmB0fynl2iS/SrIgyYdrrQtb83wkyW1JuiUZW2u9v3WJf01yTSnl3CT3JLmiVb8iyTdLKQ+mbaP1Y7rqHgEAAABYOV0WStVaj32VQ8e/yvjzkpzXQf2WJLd0UH84bW/ne2X9z0neu0LNAgAAANCoph/fAwAAAAChFAAAAADNE0oBAAAA0DihFAAAAACNE0oBAAAA0DihFAAAAACNE0oBAAAA0DihFAAAAACNE0oBAAAA0DihFAAAAACNE0oBAAAArAaTJ09OKSXnnHPOmm7lL0L3Nd0AAAAAf+EmfW5Nd7Bsw89e0x1Ah8aNG5cTTjghX//61zNq1KjGrz958uQMHz48o0ePXiNBmpVSAAAAADROKAUAAABA44RSAAAAsBwzZ85MKSWjRo3Kr3/964wYMSL9+vXLhhtumKFDh+YHP/jBUueMGzcupZSMGzcut956a4YNG5bevXunlLLEuF//+tcZNWpUttpqq/Ts2TObbbZZjjvuuPzv//7vCvW4+PVuvvnm7Lnnntlwww3Tt2/fHHnkkXnggQeWOmfUqFEppeThhx/Ol770pbztbW/LBhtskGHDhrWPefnll3PJJZfkHe94R17/+tdnww03zDve8Y5cfPHFefnllzvs5de//nVOPPHEDB48OD179symm26av/mbv8nFF1/c4djO3v/jjz+eM888M9tvv3023HDD9OnTJ9tvv31GjRqVhx9+uH1crTXjx4/PnnvumQEDBuR1r3tdttpqqxxwwAH51re+tdS8s2bNykc+8pG88Y1vTM+ePbPJJpvkPe95T+6+++4O7+/xxx/PSSedlM022ywbbLBBdtppp4wfP77Dsa9m2LBhOeGEE5IkJ5xwQkop7T8zZ85sH7dgwYJ89atfze67756NN944vXr1ys4775wvf/nLS/39R4wYkVJKvvSlLy11vU9+8pMppeTkk09O0va//fDhw5MkY8aMWeL6kydPXqF7WVn2lAIAAIBOeuSRR7LHHnvkrW99a/7xH/8xs2fPzre+9a0cdNBBueqqq3L00Ucvdc51112XW2+9NQcddFA+8IEPLBE43HrrrTniiCPy0ksv5dBDD822226bWbNm5bvf/W5uvvnmTJo0KbvssssK9fjd73433//+93P44Ydn2LBhuffee/Od73wnkyZNyk9+8pNsv/32S51z+umn54c//GEOOeSQHHzwwenWrVv7sfe///256qqrstVWW+Xkk09OKSXXX399PvShD+VHP/pRrrzyyiXmuvnmm/Pe97438+fPz4EHHphjjz028+bNy/Tp0/P//t//ywc/+MGVuv8XXnghe+21Vx566KHst99+OfTQQ1NrzaOPPpoJEybkyCOPzBvf+MYkycc//vF87nOfy5AhQ3LUUUeld+/emT17du6+++58+9vfXuJ/p2nTpmX//ffP3Llzc8ABB+SII47InDlzcsMNN2To0KG5/vrrc/DBB7ePf+qpp7Lnnnvm4YcfztChQzN06NDMnj07H/jAB7L//vt3+n+nUaNGpU+fPpkwYUIOO+yw7LTTTu3H+vTpkyTtf5fbbrst22+/fY477ri87nWvy6RJk3Lqqafmpz/9ab75zW+2nzd27NjsvPPO+ehHP5qhQ4dm5513TpJMnDgxn/3sZ7PDDjvkoosuStIWYCXJ+PHjs/feey8RRA4ePLjT97EqhFIAAADQSVOmTMmZZ56Zf//3f2+vfeQjH8kee+yRD3zgAznooIOy8cYbL3HOLbfckltuuSUHHnjgEvWnn346xx57bHr16pUpU6Zkhx12aD92//33Z7fddsvJJ5+cadOmrVCPN910U2666ab87d/+bXvtwgsvzBlnnJEPfehDmThx4lLnTJs2Lffcc0+GDBmyRP3qq6/OVVddlZ133jlTpkzJ61//+iTJueeem7333jtXXXVVDjnkkBx33HFJkjlz5uS4447LggULcscdd2TvvfdeYr5Zs2at9P1PnDgxDz30UM4444xccMEFS8z74osvZv78+e3fL7300gwcODD33XdfevXqtcTYOXPmtH9esGBBjjrqqDz//POZNGnSEv3+/ve/zzve8Y6cdNJJmTlzZnr27JkkOfvss/Pwww8v1ceifwedtWhj8wkTJmTEiBEdbnR+3nnn5bbbbstHPvKRfPGLX2wPCxcuXJhTTjklY8eOzZFHHpnDDjssSdKvX79cffXV2XvvvXP00Udn2rRpeeGFF3L88cenZ8+eufbaa9v/HiNGjEifPn0yfvz4DBs2zEbnAAAAsDbr3bt3PvWpTy1R23XXXfO+970v8+bNy/XXX7/UOYcddthSgVSSfOMb38i8efMyZsyYJQKZJNlxxx3zD//wD7nnnnvyq1/9aoV63GeffZYIpJK2wGSbbbbJHXfckUcffXSpcz72sY8tFUglbStvkuT8889vD6SSZMMNN8znP//5JMnll1/eXh8/fnyeffbZfPCDH1wqkEqSLbfcsv3zyt7/BhtssNS8PXr0yEYbbbREbf31119ixdci/fv3b/98880356GHHsqpp566VL9veMMb8rGPfSx/+MMf2oO8l156KVdeeWU22mijpUKcRf8OVpeXX345X/7yl7P55pvnggsuWOJeunXrlv/4j/9IKWWplWp77rlnPvOZz+SBBx7IP/7jP+b444/PH/7wh1x00UXZcccdV1t/q4OVUgAAANBJu+yyy1LhR9K2P9D48eNzzz33ZOTIkUsce+c739nhXHfeeWeSZPr06R2uUvnNb36TJJkxY0Z22GGHTJ48eam9fgYPHrzUCpuOwqBu3bpl6NCheeihh3LPPfdk66237lSP06ZNy3rrrbfEo12LX6dbt26555572mt33XVXkuSggw7qcL7Frej977333hk4cGDOP//8TJs2LQcffHD22muv7LTTTkuFT+973/vypS99KTvuuGPe+973Zu+9984ee+yR3r17d9jDo48+2mEPi/bhmjFjRg4++OD8+te/zgsvvJC/+Zu/WWqu5P/+HawOv/nNb/LUU09lu+22y7nnntvhmA022CAzZsxYqv6v//qvmTx5cq666qokybHHHtu+l9TaRCgFAAAAnbTZZpt1WN98882TJM8888yrHnulp556Kknyta99bZnXfP7555MkkydPzpgxY5Y4tvfeey8VSq3OHp955pn069cvPXr0WOpY9+7d079//zzxxBPttXnz5iVJBg4c+Cp3839W9P433njj3HXXXRk9enRuvPHG3HbbbUnaVj596EMfyic+8Ymsv/76SZILLrgg22yzTcaOHZvzzz8/559/frp3756DDz44//Ef/5Ftt912iR6+/e1vd6qHRX+75f2NV4dFvT3wwANL/e/eUW+LK6Xk8MMPb/8bnXHGGautr9XJ43sAAADQSY8//niH9T/84Q9J0uHqmVe+bW+RRWOnT5+eWuur/ixaeXXOOecsdayjt6St7h7nzp2bl156aaljCxYsyJw5c5bYQ2vRBt2/+93vOpzvlXMnnb//pO3xvyuuuCJPPPFE7rvvvlx00UXZZJNN8ulPfzqf/vSn28d169Ytp59+eqZPn57HH3883/nOd3L44YfnxhtvzIEHHti+/9SiHiZMmLDMHkaPHr3E+OX9jVeHRdc6/PDDl9nbI488stS5DzzwQM4888z07ds36623Xk4++eT8+c9/Xm29rS5CKQAAAOikadOm5bnnnluqvigcWvS2s87YfffdkyQ//OEPV0tvi/zP//zPUrWFCxfmRz/6UZIV63HnnXfOyy+/nClTpix1bMqUKVm4cOESbwdcdE/f//73lzv3qtx/KSU77rhjTj311Nx+++1JkhtuuKHDsZtuummOOOKIXHvttdlnn33y0EMP5b777lupHt785jenV69euffeeztccdZRSLgsi29c3tG1+vTpk7vuuqvDUPDVzJ8/P0cffXT++Mc/5pprrsnZZ5+dX/7ylx2ullrW9ZsglAIAAIBOeuaZZ5ZYkZMkU6dOzZVXXpnevXvn8MMP7/RcJ5xwQvr06ZMxY8bkZz/72VLHX3755RUOOZLkjjvuyPe+970lal/+8pfz0EMPZfjw4UvtJ7UsJ554YpK2N8698MIL7fUXXnghZ511VpLkpJNOaq+PHDkyG2+8cS6++OIOg6zF3763ovd/3333ZebMmUuNW7RqadFb5ebPn5+JEyem1rrEuJdeeilz585dYuxhhx2WbbbZJl/5yldyyy23dPg3uPPOO9vvff3118/73ve+PPfcc0vtQbXo38GK2GSTTZIkjz322FLHunfvnlNPPTWzZ8/Oaaedlj/96U9LjZk9e/ZSG8GfeeaZueeee/Kxj30s+++/f8aMGZO99torl156aa699tpOX78J9pQCAACATnrXu96Vyy+/PD/96U+z1157Zfbs2fnWt76Vl19+OZdeeukSj7ItzyabbJLrrrsuhx9+eHbffffsu+++2XHHHbPeeuvlsccey5133pmnnnpqhR+7OvTQQ3P44Yfn8MMPz7bbbpvp06fnlltuSb9+/fLVr351heY67rjjMmHChFx77bXZcccdM2LEiJRScsMNN+SRRx7JUUcdtcQb5/r375+rrroqRx55ZIYPH56DDjoob3vb2/Lss8/mF7/4RX7729+2P262ovf/3//93/nnf/7n7Lnnnnnzm9+cTTfdNLNmzcqECROy3nrr5aMf/WiS5E9/+lPe/e53Z/Dgwdltt92y9dZb589//nNuv/32zJgxI+95z3vylre8JUlbyPTd7343BxxwQA455JDsueee2WmnndKrV6/89re/zd13352HH344s2fPbg+yPvvZz2bixIn54he/mKlTp2bo0KHt/w4OPvjg3HjjjZ3+++6xxx7p1atXvvjFL2bu3Lnte1Wdeuqp6d27dz75yU9m+vTpueSSS3LTTTdln332ycCBA/PEE0/kgQceyI9//OOcd9557W8vvOGGG/LlL385u+22W/vm6N26dcvVV1+dnXbaKf/wD/+QXXfdNW984xuTJNtvv30GDhyYa665Jj169MigQYNSSsn73//+FQovV5ZQCgAAADppyJAhueSSS3LWWWflkksuyfz587PLLrvkU5/6VA444IAVnm/ffffNL37xi3zhC1/Ibbfdlh/+8Ifp0aNH3vCGN2SfffbJ3/3d363wnEcccUROOeWUnHfeebn55puz/vrr54gjjsjnPve5vOlNb1rh+a6++ursvffeGTt2bC699NIkyVve8pb8y7/8Sz74wQ8uNf6QQw7J1KlT8/nPfz4TJ07MD37wg/Tt2zdvfvObc/bZZ6/0/R9wwAE544wzMmXKlEyYMCHPPvtstthii+y3337tYVWSbLjhhvn85z+fSZMm5Sc/+UluuOGGbLTRRtlmm21y8cUXt6/+WuRtb3tbpk+fnv/8z//M9773vXz961/Peuutly222CI777xzxowZk/79+7eP79+/f3784x/n3/7t33LTTTdl6tSp2X777XPxxRdn8ODBKxRK9e3bN9/5zncyZsyYfP3rX88f//jHJMnxxx+f3r17Z/31188NN9yQ//qv/8q4cePyve99L88//3wGDBiQIUOG5DOf+Ux7KPjYY4/lxBNPTO/evXP11Vene/f/i3y22mqrjB07NiNGjMgxxxyTH/3oR+nRo0e6deuW66+/PmeddVauvfbaPPfcc6m1ZujQoY2EUuWVy9leq3bdddc6derUNd1GsyZ9rnPjhp+9/DEAAMA6a8aMGe0rS16rZs6cmSFDhmTkyJEZN27cmm6nQ+PGjcsJJ5yQr3/960u9kQ9Wt87+d6GU8vNa664dHbOnFAAAAACNE0oBAAAA0DihFAAAAACNs9E5AAAALMfgwYOztu/JPGrUKHtJ8RfFSikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAYLnW9v2UgOasrv8eCKUAAABYpm7duuWll15a020Aa4mXXnop3bp1W+V5hFIAAAAs00YbbZRnn312TbcBrCWeffbZbLTRRqs8j1AKAACAZerXr1+efvrpzJkzJy+++KJH+eA1qNaaF198MXPmzMnTTz+dfv36rfKc3VdDXwAAAKzDevbsmUGDBmXu3LmZOXNmFi5cuKZbAtaAbt26ZaONNsqgQYPSs2fPVZ5PKAUAAMBy9ezZM1tssUW22GKLNd0KsI7w+B4AAAAAjRNKAQAAANA4oRQAAAAAjRNKAQAAANA4oRQAAAAAjRNKAQAAANA4oRQAAAAAjRNKAQAAANA4oRQAAAAAjRNKAQAAANA4oRQAAAAAjRNKAQAAANA4oRQAAAAAjRNKAQAAANA4oRQAAAAAjRNKAQAAANC4LgulSiljSylPlFLu6+DYmaWUWkrp3/peSikXlVIeLKX8opSyy2JjR5ZSHmj9jFys/tellF+2zrmolFJa9X6llNtb428vpfTtqnsEAAAAYOV05UqpcUkOfGWxlLJVkv2SPLZY+aAk27V+TklycWtsvySjk+yW5J1JRi8WMl3cGrvovEXXOivJxFrrdkkmtr4DAAAAsBbpslCq1jolydwODl2Q5GNJ6mK1w5J8o7a5K0mfUsoWSQ5IcnutdW6t9ekktyc5sHVs41rrnbXWmuQbSUYsNtf41ufxi9UBAAAAWEs0uqdUKeU9SX5Xa53+ikMDk/x2se+zWrVl1Wd1UE+SzWqts5Ok9XvT1XYDAAAAAKwW3Zu6UCmlV5KPJ9m/o8Md1OpK1Fe0p1PS9ghgBg0atKKnAwAAALCSmlwptU2SIUmml1JmJtkyybRSyuZpW+m01WJjt0zy++XUt+ygniSPtx7vS+v3E6/WUK31slrrrrXWXQcMGLAKtwYAAADAimgslKq1/rLWummtdXCtdXDagqVdaq1/SHJjkr9vvYVv9yTPtB69u1gGNtAAACAASURBVC3J/qWUvq0NzvdPclvr2HOllN1bb937+yQTWpe6Mcmit/SNXKwOAAAAwFqiy0KpUsrVSe5Msn0pZVYp5aRlDL8lycNJHkzytSQfSpJa69wkn0lyd+vn061aknwwyeWtcx5K8v1W/fwk+5VSHkjbW/7OX533BQAAAMCq67I9pWqtxy7n+ODFPtckH36VcWOTjO2gPjXJWzuoP5Vk3xVsFwAAAIAGNfr2PQAAAABIhFIAAAAArAFCKQAAAAAaJ5QCAAAAoHFCKQAAAAAaJ5QCAAAAoHFCKQAAAAAaJ5QCAAAAoHFCKQAAAAAaJ5QCAAAAoHFCKQAAAAAaJ5QCAAAAoHFCKQAAAAAaJ5QCAAAAoHFCKQAAAAAaJ5QCAAAAoHFCKQAAAAAaJ5QCAAAAoHFCKQAAAAAaJ5QCAAAAoHFCKQAAAAAaJ5QCAAAAoHFCKQAAAAAaJ5QCAAAAoHFCKQAAAAAaJ5QCAAAAoHFCKQAAAAAaJ5QCAAAAoHFCKQAAAAAaJ5QCAAAAoHFCKQAAAAAaJ5QCAAAAoHFCKQAAAAAaJ5QCAAAAoHFCKQAAAAAaJ5QCAAAAoHFCKQAAAAAaJ5QCAAAAoHFCKQAAAAAaJ5QCAAAAoHFCKQAAAAAa131NN0AXmPS5Nd0BAAAAwDJZKQUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA47oslCqljC2lPFFKuW+x2r+XUn5dSvlFKeX6UkqfxY6dXUp5sJTyv6WUAxarH9iqPVhKOWux+pBSyk9LKQ+UUr5VSunRqvdsfX+wdXxwV90jAAAAACunK1dKjUty4Ctqtyd5a631bUl+k+TsJCml7JDkmCQ7ts75aimlWymlW5KvJDkoyQ5Jjm2NTZLPJ7mg1rpdkqeTnNSqn5Tk6VrrtkkuaI0DAAAAYC3SZaFUrXVKkrmvqP2g1rqg9fWuJFu2Ph+W5Jpa6/xa6yNJHkzyztbPg7XWh2utLya5JslhpZSSZJ8k17XOH59kxGJzjW99vi7Jvq3xAAAAAKwl1uSeUicm+X7r88Akv13s2KxW7dXqmySZt1jAtai+xFyt48+0xgMAAACwllgjoVQp5eNJFiS5clGpg2F1JerLmqujPk4ppUwtpUx98sknl900AAAAAKtN46FUKWVkkr9N8r5a66KwaFaSrRYbtmWS3y+jPidJn1JK91fUl5irdbx3XvEY4SK11stqrbvWWncdMGDAqt4aAAAAAJ3UaChVSjkwyb8meU+t9YXFDt2Y5JjWm/OGJNkuyc+S3J1ku9ab9nqkbTP0G1th1qQkR7bOH5lkwmJzjWx9PjLJHYuFXwAAAACsBbovf8jKKaVcnWRYkv6llFlJRqftbXs9k9ze2nv8rlrrB2qt95dSrk3yq7Q91vfhWuvC1jwfSXJbkm5JxtZa729d4l+TXFNKOTfJPUmuaNWvSPLNUsqDaVshdUxX3SMAAAAAK6fLQqla67EdlK/ooLZo/HlJzuugfkuSWzqoP5y2t/O9sv7nJO9doWYBAAAAaNSafPseAAAAAK9RQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBxQikAAAAAGieUAgAAAKBx3btq4lLK2CR/m+SJWutbW7V+Sb6VZHCSmUmOqrU+XUopSS5McnCSF5KMqrVOa50zMsknWtOeW2sd36r/dZJxSTZIckuS02ut9dWu0VX3+Zow6XOdGzf87K7tAwAAAFhndOVKqXFJDnxF7awkE2ut2yWZ2PqeJAcl2a71c0qSi5P2EGt0kt2SvDPJ6FJK39Y5F7fGLjrvwOVcAwAAAIC1RJeFUrXWKUnmvqJ8WJLxrc/jk4xYrP6N2uauJH1KKVskOSDJ7bXWua3VTrcnObB1bONa65211prkG6+Yq6NrAAAAALCWaHpPqc1qrbOTpPV701Z9YJLfLjZuVqu2rPqsDurLugYAAAAAa4m1ZaPz0kGtrkR9xS5ayimllKmllKlPPvnkip4OAAAAwEpqOpR6vPXoXVq/n2jVZyXZarFxWyb5/XLqW3ZQX9Y1llJrvazWumutddcBAwas9E0BAAAAsGKaDqVuTDKy9XlkkgmL1f++tNk9yTOtR+9uS7J/KaVva4Pz/ZPc1jr2XCll99ab+/7+FXN1dA0AAAAA1hLdu2riUsrVSYYl6V9KmZW2t+idn+TaUspJSR5L8t7W8FuSHJzkwSQvJDkhSWqtc0spn0lyd2vcp2utizZP/2Da3vC3QZLvt36yjGsAAAAAsJboslCq1nrsqxzat4OxNcmHX2WesUnGdlCfmuStHdSf6ugaAAAAAKw91paNzgEAAAB4DRFKAQAAANA4oRQAAAAAjRNKAQAAANA4oRQAAAAAjRNKAQAAANA4oRQAAAAAjRNKAQAAANA4oRQAAAAAjRNKAQAAANA4oRQAAAAAjRNKAQAAANA4oRQAAAAAjRNKAQAAANA4oRQAAAAAjRNKAQAAANA4oRQAAAAAjRNKAQAAANA4oRQAAAAAjVtuKFVK6ddEIwAAAAC8dnRmpdRPSynfLqUcXEopXd4RAAAAAOu8zoRSb0pyWZL3J3mwlPLZUsqburYtAAAAANZlyw2lapvba63HJjk5ycgkPyul/E8pZY8u7xAAAACAdU735Q0opWyS5Pi0rZR6PMmpSW5MslOSbycZ0pUNAgAAALDuWW4oleTOJN9MMqLWOmux+tRSyiVd0xYAAAAA67LOhFLb11prRwdqrZ9fzf0AAAAA8BrQmY3Of1BK6bPoSymlbynlti7sCQAAAIB1XGdCqQG11nmLvtRan06yade1BAAAAMC6rjOh1MJSyqBFX0opWyfp8HE+AAAAAOiMzuwp9fEkPyql/E/r+7uSnNJ1LQEAAACwrltuKFVrvbWUskuS3ZOUJP9Ua53T5Z0BAAAAsM7qzEqpJOmZZG5r/A6llNRap3RdWwAAAACsy5YbSpVSPp/k6CT3J3m5Va5JhFIAAAAArJTOrJQakWT7Wuv8rm4GAAAAgNeGzrx97+Ek63d1IwAAAAC8dnRmpdQLSe4tpUxM0r5aqtZ6Wpd1BQAAAMA6rTOh1I2tHwAAAABYLZYbStVax5dSNkgyqNb6vw30BAAAAMA6brl7SpVSDk1yb5JbW993KqVYOQUAAADASuvMRufnJHlnknlJUmu9N8mQLuwJAAAAgHVcZ0KpBbXWZ15Rq13RDAAAAACvDZ3Z6Py+UspxSbqVUrZLclqSn3RtWwAAAACsyzqzUurUJDsmmZ/k6iTPJjmjK5sCAAAAYN3WmbfvvZDk460fAAAAAFhlyw2lSimT0sEeUrXWfbqkIwAAAADWeZ3ZU+rMxT6/LsnfJVnQNe0AAAAA8FrQmcf3fv6K0o9LKf/TRf0AAAAA8BrQmcf3+i32db0kf51k8y7rCAAAAIB1Xmce3/t52vaUKml7bO+RJCd1ZVMAAAAArNs68/jekCYaAQAAAOC1ozOP7x2xrOO11u+uvnYAAAAAeC3ozON7JyXZM8kdre/Dk0xO8kzaHusTSgEAAACwQjoTStUkO9RaZydJKWWLJF+ptZ7QpZ0BAAAAsM5arxNjBi8KpFoeT/KmLuoHAAAAgNeAzqyUmlxKuS3J1WlbNXVMkkld2hUAAAAA67TlrpSqtX4kySVJ3p5kpySX1VpPXZWLllL+qZRyfynlvlLK1aWU15VShpRSflpKeaCU8q1SSo/W2J6t7w+2jg9ebJ6zW/X/LaUcsFj9wFbtwVLKWavSKwAAAACrX2ce30uSaUlurrX+U5LbSikbrewFSykDk5yWZNda61uTdEvb6qvPJ7mg1rpdkqfTtsF6Wr+frrVum+SC1riUUnZonbdjkgOTfLWU0q2U0i3JV5IclGSHJMe2xgIAAACwllhuKFVK+Yck1yW5tFUamOSGVbxu9yQblFK6J+mVZHaSfVrXSZLxSUa0Ph/W+p7W8X1LKaVVv6bWOr/W+kiSB5O8s/XzYK314Vrri0muaY0FAAAAYC3RmZVSH06yV5Jnk6TW+kCSTVf2grXW3yX5QpLH0hZGPZPk50nm1VoXtIbNSlv4ldbv37bOXdAav8ni9Vec82p1AAAAANYSnQml5rdWHCVJWqub6spesJTSN20rl4YkeUOSDdP2qN0rLbpGeZVjK1rvqJdTSilTSylTn3zyyeW1DgAAAMBq0plQ6n9KKf+Wtsft9kvy7SQ3rcI1353kkVrrk7XWl5J8N8meSfq0Aq8k2TLJ71ufZyXZKmkPxHonmbt4/RXnvFp9KbXWy2qtu9Zadx0wYMAq3BIAAAAAK6IzodRZSZ5M8ssk/5jkliSfWIVrPpZk91JKr9beUPsm+VWSSUmObI0ZmWRC6/ONre9pHb+j1lpb9WNab+cbkmS7JD9LcneS7Vpv8+uRts3Qb1yFfgEAAABYzbov62DrTXbja63HJ/na6rhgrfWnpZTr0vZGvwVJ7klyWZKbk1xTSjm3VbuidcoVSb5ZSnkwbSukjmnNc38p5dq0BVoLkny41rqw1fdHktyWtjf7ja213r86egcAAABg9VhmKFVrXVhKGVBK6bH4vlKrqtY6OsnoV5QfTtub81459s9J3vsq85yX5LwO6rekbUUXAAAAAGuhZYZSLTOT/LiUcmOSPy4q1lr/s6uaAgAAAGDd9qp7SpVSvtn6eHSS77XGbrTYDwAAAACslGWtlPrrUsrWaduY/EsN9QMAAADAa8CyQqlLktyaZEiSqYvVS5Ka5I1d2BcAAAAA67BXfXyv1npRrfUtSb5ea33jYj9Daq0CKQAAAABW2quGUovUWj/YRCMAAAAAvHYsN5QCAAAAgNVNKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA44RSAAAAADROKAUAAABA47qv6QZYh0z6XOfGDT+7a/sAAAAA1npWSgEAAADQOKEUAAAAAI0TSgEAAADQOKEUAAAAAI0TSgEAAADQOKEUAAAAAI0TSgEAAADQOKEUAAAAAI0TSgEAAADQOKEUAAAAAI1bI6FUKaVPKeW6UsqvSykzSil7lFL6lVJuL6U80PrdtzW2lFIuKqU8WEr5RSlll8XmGdka/0ApZeRi9b8upfyydc5FpZSyJu4TAAAAgI6tqZVSFya5tdb65iRvTzIjyVlJJtZat0sysfU9SQ5Ksl3r55QkFydJKaVfktFJdkvyziSjFwVZrTGnLHbegQ3cEwAAAACd1HgoVUrZOMm7klyRJLXWF2ut85IclmR8a9j4JCNanw9L8o3a5q4kfUopWyQ5IMnttda5tdank9ye5MDWsY1rrXfWWmuSbyw2FwAAAABrgTWxUuqNSZ5M8vVSyj2llMtLKRsm2azWOjtJWr83bY0fmOS3i50/q1VbVn1WB3UAAAAA1hJrIpTqnmSXJBfXWndO8sf836N6HeloP6i6EvWlJy7llFLK1FLK1CeffHLZXQMAAACw2qyJUGpWklm11p+2vl+XtpDq8dajd2n9fmKx8Vstdv6WSX6/nPqWHdSXUmu9rNa6a6111wEDBqzSTQEAAADQeY2HUrXWPyT5bSll+1Zp3yS/SnJjkkVv0BuZZELr843/v727j7asrO8D/v2VUZOoEV+INbwUNIypsS3qKFhrYjUg2lRsoi3EpdSwOtGgVVIbJe1aWq0JasxUs1ItESpGIpoYlyyDwWkU3wqEFxFEdBiRwEQKKha1Ri3k1z/OntXreO9wh7lnn3vP+XzWmnX3ec6z9/4dHvY9937vs5+T5AXDp/Adk+SO4fa+C5McV1UPHBY4Py7JhcNz36qqY4ZP3XvBkmMBAAAAsA5smtF5X5rk3Kq6d5Ibkrwwk4DsfVV1SpKbkjx36HtBkmcm2ZnkO0PfdPftVfW6JJcN/V7b3bcP2y9O8s4kP5rkw8M/AAAAANaJmYRS3X1Vki3LPPW0Zfp2klNXOM7ZSc5epv3yJI/ezzIBAAAAmJJZrCkFAAAAwIITSgEAAAAwOqEUAAAAAKOb1ULnsKa2bd+xqn6nHbt5ypUAAAAAq2GmFAAAAACjE0oBAAAAMDqhFAAAAACjE0oBAAAAMDqhFAAAAACjE0oBAAAAMDqhFAAAAACjE0oBAAAAMDqhFAAAAACjE0oBAAAAMDqhFAAAAACjE0oBAAAAMDqhFAAAAACjE0oBAAAAMDqhFAAAAACjE0oBAAAAMDqhFAAAAACjE0oBAAAAMDqhFAAAAACjE0oBAAAAMDqhFAAAAACjE0oBAAAAMDqhFAAAAACjE0oBAAAAMDqhFAAAAACjE0oBAAAAMDqhFAAAAACjE0oBAAAAMDqhFAAAAACjE0oBAAAAMDqhFAAAAACjE0oBAAAAMDqhFAAAAACjE0oBAAAAMDqhFAAAAACjE0oBAAAAMDqhFAAAAACj2zTrAmBM27bvWFW/047dPOVKAAAAYLGZKQUAAADA6IRSAAAAAIxOKAUAAADA6IRSAAAAAIxOKAUAAADA6IRSAAAAAIxOKAUAAADA6IRSAAAAAIxOKAUAAADA6IRSAAAAAIxOKAUAAADA6IRSAAAAAIxu06wLgJVs275j1iUAAAAAUzKzmVJVdUBVfaaqPjQ8PqKqLq2q66vqvVV176H9PsPjncPzhy85xulD+xer6ulL2o8f2nZW1avGfm0AAAAA7N0sb997WZLrljx+Q5Jt3X1kkm8kOWVoPyXJN7r7p5JsG/qlqh6V5MQkP5Pk+CT/dQi6Dkjy+0mekeRRSU4a+gIAAACwTswklKqqQ5L8syTvGB5Xkqcm+ZOhyzlJnj1snzA8zvD804b+JyQ5r7u/191fTrIzyROGfzu7+4bu/n6S84a+AAAAAKwTs1pT6r8k+Y0k9x8ePzjJ/+7uO4fHu5IcPGwfnOTmJOnuO6vqjqH/wUkuWXLMpfvcvEf70Wv9AtgPH/vtVXb8pamWAQAAAMzO6DOlquoXktzW3VcsbV6ma9/Nc/vavlwtW6vq8qq6/Ktf/epeqgYAAABgLc3i9r0nJXlWVd2Yya11T81k5tSBVbV75tYhSb4ybO9KcmiSDM8/IMntS9v32Gel9h/S3Wd295bu3nLQQQft/ysDAAAAYFVGD6W6+/TuPqS7D89kofKPdvfzknwsyXOGbicn+eCwff7wOMPzH+3uHtpPHD6d74gkRyb5yySXJTly+DS/ew/nOH+ElwYAAADAKs1qTanlvDLJeVX1n5N8JslZQ/tZSf6wqnZmMkPqxCTp7mur6n1JPp/kziSndvddSVJVL0lyYZIDkpzd3deO+koAAAAA2KuZhlLdfVGSi4btGzL55Lw9+3w3yXNX2P/1SV6/TPsFSS5Yw1IBAAAAWEOzWFMKAAAAgAUnlAIAAABgdEIpAAAAAEYnlAIAAABgdEIpAAAAAEYnlAIAAABgdEIpAAAAAEYnlAIAAABgdEIpAAAAAEa3adYFAKylbdt3rLrvacdunmIlAAAA7I2ZUgAAAACMTigFAAAAwOiEUgAAAACMTigFAAAAwOgsdM5COeamM1fVb9v2ravqZ6FsAAAAuGfMlAIAAABgdGZKwX7Ytn3HqvqZUQUAAAA/yEwpAAAAAEZnptQCu/iGr6+q3xMf/uApVwIAAAAsGqEU69ZqFyVPkksOW93C5AAAAMD64PY9AAAAAEZnphSsIxZOBwAAYFGYKQUAAADA6IRSAAAAAIxOKAUAAADA6IRSAAAAAIxOKAUAAADA6IRSAAAAAIxu06wLgLVwzE1nzroEAAAAYB+YKQUAAADA6IRSAAAAAIzO7XvAwtq2fceq+p127OYpVwIAALB4zJQCAAAAYHRCKQAAAABGJ5QCAAAAYHTWlIJlHHPTmavqd8lhW6dcCQAAAMwnM6UAAAAAGJ1QCgAAAIDRCaUAAAAAGJ1QCgAAAIDRCaUAAAAAGJ1QCgAAAIDRbZp1AQCrsW37jlmXAAAAwBoyUwoAAACA0QmlAAAAABidUAoAAACA0VlTCkZgPSQAAAD4QWZKAQAAADA6M6VgA9qXmVenHbt5ipUAAADAPWOmFAAAAACjE0oBAAAAMDqhFAAAAACjE0oBAAAAMDqhFAAAAACjE0oBAAAAMLrRQ6mqOrSqPlZV11XVtVX1sqH9QVW1vaquH74+cGivqnprVe2sqqur6rFLjnXy0P/6qjp5SfvjquqaYZ+3VlWN/ToBAAAAWNksZkrdmeTfdfffT3JMklOr6lFJXpXkL7r7yCR/MTxOkmckOXL4tzXJ25JJiJXk1UmOTvKEJK/eHWQNfbYu2e/4EV4XAAAAAKs0eijV3bd095XD9reSXJfk4CQnJDln6HZOkmcP2yckeVdPXJLkwKp6WJKnJ9ne3bd39zeSbE9y/PDcj3f3xd3dSd615FgAAAAArAMzXVOqqg5P8pgklyZ5aHffkkyCqyQ/MXQ7OMnNS3bbNbTtrX3XMu0AAAAArBObZnXiqrpfkvcneXl3f3Mvyz4t90Tfg/blatiayW1+Oeyww+6u5IV18Q1fX9PjPfHhD17T4wEAAAAbz0xmSlXVvTIJpM7t7j8dmm8dbr3L8PW2oX1XkkOX7H5Ikq/cTfshy7T/kO4+s7u3dPeWgw46aP9eFAAAAACrNvpMqeGT8M5Kcl13/+6Sp85PcnKSM4avH1zS/pKqOi+TRc3v6O5bqurCJL+1ZHHz45Kc3t23V9W3quqYTG4LfEGS35v6C2PV1nrmFQAAALDxzOL2vScleX6Sa6rqqqHtNzMJo95XVackuSnJc4fnLkjyzCQ7k3wnyQuTZAifXpfksqHfa7v79mH7xUnemeRHk3x4+AcAAADAOjF6KNXdn8ry6z4lydOW6d9JTl3hWGcnOXuZ9suTPHo/ygQAAABgima20DkskmNuOnNV/S45bOuUKwEAAID1YSYLnQMAAACw2IRSAAAAAIxOKAUAAADA6KwpBczUtu07Zl0CAAAAM2CmFAAAAACjE0oBAAAAMDq378F+OOamM2ddAuvIam9FPO3YzVOuBAAAYP0TSsGcE5QAAACwHrl9DwAAAIDRmSkFG9C+3DZ4yWFbp1gJAAAA3DNmSgEAAAAwOqEUAAAAAKMTSgEAAAAwOqEUAAAAAKMTSgEAAAAwOqEUAAAAAKPbNOsCgPVh2/Yda3q8047dvKbHAwAAYL6YKQUAAADA6IRSAAAAAIxOKAUAAADA6IRSAAAAAIzOQuewjhxz05mzLoERrHZReYvFAwAA88xMKQAAAABGJ5QCAAAAYHRu3wOmYrW3qAEAALCYzJQCAAAAYHRmSgH7ZLWLsV9y2NYpVzIes74AAADWnlAK5twihkgAAACsf27fAwAAAGB0ZkoBSVY/owoAAADWgplSAAAAAIxOKAUAAADA6IRSAAAAAIxOKAUAAADA6Cx0DkzFahdOv+SwrVOuBAAAgPXITCkAAAAARieUAgAAAGB0bt8DWKe2bd+xqn6nHbt5ypUAAACsPaEUMFPWntp/0wivBGLMI/9fAwCsL0IpAO6xRfwlfxFfMwAATINQCtgQzKgCAACYL0IpgAWx2hk+rE9rPX5mcgEAMGtCKQBgRfsShgm6AADYF0IpAKZuva/DNI1ZZOt9ZtosX7PwCgCARCgFzJnVrj21L9Z6nSrrY+2/9R74AAAAd08oBcC6IWxaDGZUAQCQCKUAgHVKeAUAMN+EUgB3w+12sL6ZYQcAsDEJpQDWyDTWswIAAJhXQimADc5MLlhb+zLzyq2D689GuO1zrWf3+f8QgI1KKAWwTs1y5pWgCwAAmDahFMCCcHshrL31PitnvdcHACw2oRQA99haB11mXjGv5mUx9mnc2rjeg7P1Xh8AbGRCKQDWjY0wm0twxjyaRmi21sdc78cDAPbd3IZSVXV8krckOSDJO7r7jBmXBMAcsN4WsJxZhlzzFLCt9Qy7tTZPM+LMAgTWg7kMparqgCS/n+TYJLuSXFZV53f352db2TguvuHrsy4BYOFthFlf651gDxbPeg/Y1nt9s7QRQq6NUCP7xyfobjzV3bOuYc1V1ROTvKa7nz48Pj1Juvu3V9pny5Ytffnll49U4XRdfNYrZl0CADBFAjsAmG/zDxg1dgAACz5JREFUFJpV1RXdvWW55+ZyplSSg5PcvOTxriRHz6gWAIA1ZSYe82q1gatbqQHmw7yGUrVM2w9NCauqrUl2v1N9u6q+ONWqxvOQJF+bdRGMzrgvLmO/uIz94jL2i2vOx/7N6/x4MzXnY88KjPuC+vX5Gvu/t9IT8xpK7Upy6JLHhyT5yp6duvvMJHP3p8aqunylqXHML+O+uIz94jL2i8vYLy5jv7iM/WIy7otrUcb+78y6gCm5LMmRVXVEVd07yYlJzp9xTQAAAAAM5nKmVHffWVUvSXJhkgOSnN3d1864LAAAAAAGcxlKJUl3X5DkglnXMSNzd0siq2LcF5exX1zGfnEZ+8Vl7BeXsV9Mxn1xLcTYV/cPrf8NAAAAAFM1r2tKAQAAALCOCaXmSFUdX1VfrKqdVfWqWdfD9FTVoVX1saq6rqquraqXDe2vqaq/rqqrhn/PnHWtrL2qurGqrhnG+PKh7UFVtb2qrh++PnDWdbK2quqRS67tq6rqm1X1ctf9fKqqs6vqtqr63JK2Za/zmnjr8P5/dVU9dnaVsz9WGPc3VdUXhrH9QFUdOLQfXlV/s+Taf/vsKmd/rTD2K35/r6rTh2v+i1X19NlUzVpYYezfu2Tcb6yqq4Z21/0c2cvvdAv1fu/2vTlRVQck2ZHk2CS7MvkEwpO6+/MzLYypqKqHJXlYd19ZVfdPckWSZyf5l0m+3d2/M9MCmaqqujHJlu7+2pK2Nya5vbvPGELpB3b3K2dVI9M1fM//6yRHJ3lhXPdzp6p+Nsm3k7yrux89tC17nQ+/qL40yTMz+X/iLd199Kxq555bYdyPS/LR4YN83pAkw7gfnuRDu/uxsa0w9q/JMt/fq+pRSd6T5AlJfjLJ/0iyubvvGrVo1sRyY7/H829Ockd3v9Z1P1/28jvdv84Cvd+bKTU/npBkZ3ff0N3fT3JekhNmXBNT0t23dPeVw/a3klyX5ODZVsWMnZDknGH7nEze0JhfT0vype7+q1kXwnR09yeS3L5H80rX+QmZ/DLT3X1JkgOHH3TZYJYb9+7+SHffOTy8JMkhoxfG1K1wza/khCTndff3uvvLSXZm8rsAG9Dexr6qKpM/Or9n1KIYxV5+p1uo93uh1Pw4OMnNSx7vipBiIQx/MXlMkkuHppcM0znPdgvX3OokH6mqK6pq69D20O6+JZm8wSX5iZlVxxhOzA/+gOq6XwwrXed+Blgcv5Lkw0seH1FVn6mqj1fVk2dVFFO13Pd31/zieHKSW7v7+iVtrvs5tMfvdAv1fi+Umh+1TJt7M+dcVd0vyfuTvLy7v5nkbUkekeSoJLckefMMy2N6ntTdj03yjCSnDtO+WRBVde8kz0ryx0OT6x4/AyyAqvoPSe5Mcu7QdEuSw7r7MUl+PckfVdWPz6o+pmKl7++u+cVxUn7wj1Cu+zm0zO90K3Zdpm3DX/tCqfmxK8mhSx4fkuQrM6qFEVTVvTL55nVud/9pknT3rd19V3f/bZI/iKncc6m7vzJ8vS3JBzIZ51t3T98dvt42uwqZsmckubK7b01c9wtmpevczwBzrqpOTvILSZ7Xw4Kww61bXx+2r0jypSSbZ1cla20v399d8wugqjYl+cUk793d5rqfP8v9TpcFe78XSs2Py5IcWVVHDH9FPzHJ+TOuiSkZ7i8/K8l13f27S9qX3lP8L5J8bs992diq6r7DQoipqvsmOS6TcT4/yclDt5OTfHA2FTKCH/irqet+oax0nZ+f5AXDp/Ick8mCuLfMokDWXlUdn+SVSZ7V3d9Z0n7Q8KEHqaqHJzkyyQ2zqZJp2Mv39/OTnFhV96mqIzIZ+78cuz6m7ueTfKG7d+1ucN3Pl5V+p8uCvd9vmnUBrI3hE1lekuTCJAckObu7r51xWUzPk5I8P8k1uz8iNslvJjmpqo7KZBrnjUl+dTblMUUPTfKByXtYNiX5o+7+86q6LMn7quqUJDclee4Ma2RKqurHMvmU1aXX9htd9/Onqt6T5ClJHlJVu5K8OskZWf46vyCTT+LZmeQ7mXwiIxvQCuN+epL7JNk+fO+/pLtflORnk7y2qu5McleSF3X3ahfKZp1ZYeyfstz39+6+tqrel+TzmdzSeapP3tu4lhv77j4rP7x+ZOK6nzcr/U63UO/3NcwABgAAAIDRuH0PAAAAgNEJpQAAAAAYnVAKAAAAgNEJpQAAAAAYnVAKAAAAgNEJpQAA9kFVPaWqPjSD8x5YVb829nnHVFXfnnUNAMB4hFIAAHtRVQfMuobBgUmmEkpV1aZ78ty0zgkALAahFAAwl6rqN6rq3w7b26rqo8P206rq3cP2SVV1TVV9rqresGTfb1fVa6vq0iRPrKrjq+oLVfWpJL+4wvkOqKrfGY53dVW9dMn5PjO0n11V9xnab6yqhwzbW6rqomH7NUO/i6rqht2vIckZSR5RVVdV1Zv2OPfhQ33nDOf+k6r6seG5x1XVx6vqiqq6sKoeNrRfVFW/VVUfT/KyPY73mqo6s6o+kuRdw2t7U1VdNhz/V/f473xNVX22qs4Y2o6qqkuGvh+oqgcud86qOqKqLh6O+7p9G2EAYKMTSgEA8+oTSZ48bG9Jcr+quleSf5Lkk1X1k0nekOSpSY5K8viqevbQ/75JPtfdRye5PMkfJPnnw/H+7grn25rkiCSP6e5/mOTcqvqRJO9M8q+6+x8k2ZTkxauo/aeTPD3JE5K8eqj7VUm+1N1Hdfe/X2afRyY5czj3N5P82rDf7yV5Tnc/LsnZSV6/ZJ8Du/vnuvvNyxzvcUlO6O5fTnJKkju6+/FJHp/k3wyB0jOSPDvJ0d39j5K8cdj3XUleOdRyTZJXr3DOtyR523Dc/7WK/y4AwBwRSgEA8+qKJI+rqvsn+V6SizMJp56c5JOZhCsXdfdXu/vOJOcm+dlh37uSvH/Y/ukkX+7u67u7k7x7hfP9fJK3D8dKd9+eSVD05e7eMfQ5Z8k59ubPuvt73f21JLcleegq9rm5uz89bL87k/DtkUkenWR7VV2V5D8mOWTJPu/dy/HO7+6/GbaPS/KC4RiXJnlwkiMzec3/vbu/k0xec1U9IJPg6ePDvnu+5qXnfFKS9wzbf7iK1wgAzBH38gMAc6m7/29V3ZjkhUn+Z5Krk/zTJI9Icl2SzXvZ/bvdfdfSw63ilLVMv9pL/zvz//9A+CN7PPe9Jdt3ZXU/s+157h7Of213P3GFff7PXo639LlK8tLuvnBph6o6fpnz3p09z7mv+wMAc8JMKQBgnn0iySuGr59M8qIkVw0zni5N8nNV9ZBhMfOTknx8mWN8IckRVfWI4fFJK5zrI0letHsB76p60LDv4VX1U0Of5y85x42Z3CKXJL+0itfyrST338vzh1XV7vDppCSfSvLFJAftbq+qe1XVz6ziXHu6MMmLh9sBU1Wbq+q+mbzmX1myftWDuvuOJN+oqt23Ti59zXv6dJITh+3n3YO6AIANTCgFAMyzTyZ5WJKLu/vWJN8d2tLdtyQ5PcnHknw2yZXd/cE9D9Dd381kvag/GxY6/6sVzvWOJDclubqqPpvkl4d9X5jkj6vqmiR/m+TtQ///lOQtVfXJTGZD7VV3fz3Jp4dF2d+0TJfrkpxcVVcneVAmazV9P8lzkrxhqOmqJP/47s61wmv7fJIrq+pzSf5bkk3d/edJzk9y+XBr3yuG/icnedNQy1FJXrvCcV+W5NSquizJA+5BXQDABlaTPxQCALBRVdXhST7U3Y+ecSkAAKtmphQAAAAAozNTCgAAAIDRmSkFAAAAwOiEUgAAAACMTigFAAAAwOiEUgAAAACMTigFAAAAwOiEUgAAAACM7v8BHVRFULPBlccAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(20,10))\n", "plt.hist(df['words'], bins=100, range=[0,200], alpha=0.5, label='intial text')\n", "plt.hist(preprocessed_df['words'], bins=100, range=[0,200],alpha=0.5, label='pre-processed text')\n", "plt.legend(prop={'size':20})\n", "plt.xlabel('word count per record')\n", "plt.ylabel('frequency')\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }