{ "cells": [ { "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Data Source Comparison\n", "This document describes how the data changed, when moving from the old data source, to the new IATI data source." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import os\n", "os.chdir('/home/jovyan/iati-partner-search/ips_python')" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "from os.path import join\n", "from constants import (\n", " PROCESSED_RECORDS_FILENAME,\n", " INPUT_DATA_FILENAME)\n", "from utils import get_data_path\n", "from preprocessing import preprocess_pipeline" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "#read new data\n", "df_new = pd.read_csv(join(get_data_path(), INPUT_DATA_FILENAME.replace('.csv', '_new.csv')), encoding=\"iso-8859-1\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Check description_narrative and description_narrative_text columns contain identical number of characters (expected: no rows):\n", "df_new[df_new['description_narrative'].map(str).apply(len) != df_new['description_narrative_text'].map(str).apply(len)].shape[0]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "#Rename columns to match old data\n", "df_new = df_new.rename(columns={'description_narrative_text': 'description', 'title_narrative_text': 'title', 'iati_identifier':'iati.identifier'})\n", "df_new = df_new[['iati.identifier', 'title', 'description']]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3058: DtypeWarning: Columns (2,3,7,10,11,12,13,14,15,16,17,18,20,22,23,24,25,26,28,29,30,31,32,33,34,35,36,37,38,39,40,41,46,47,48,49,50,51,52,54,55) have mixed types. Specify dtype option on import or set low_memory=False.\n", " interactivity=interactivity, compiler=compiler, result=result)\n" ] } ], "source": [ "#read old data\n", "df_old = pd.read_csv(join(get_data_path(), INPUT_DATA_FILENAME.replace('.csv', '_old.csv')), encoding=\"iso-8859-1\")\n", "df_old = df_old[['iati.identifier', 'title', 'description']]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "new df row count before clean: 962449\n", "new df row count after clean: 959355\n", "old df row count before clean: 1020000\n", "old df row count after clean: 1009067\n" ] } ], "source": [ "# clean empty text and merge title\n", "print(\"new df row count before clean: {0}\".format(df_new.shape[0]))\n", "df_new = df_new.loc[~df_new[\"iati.identifier\"].isna()]\n", "df_new = df_new[~df_new[\"iati.identifier\"].str.isspace()]\n", "df_new.loc[df_new[\"description\"].isna(), [\"description\"]] = df_new[\"title\"]\n", "df_new.loc[\"description\"] = df_new[\"title\"] + \" \" + df_new[\"description\"]\n", "df_new =df_new.loc[~df_new[\"description\"].isna()]\n", "df_new =df_new.loc[~df_new[\"description\"].str.isspace()]\n", "print(\"new df row count after clean: {0}\".format(df_new.shape[0]))\n", "print(\"old df row count before clean: {0}\".format(df_old.shape[0]))\n", "df_old = df_old.loc[~df_old[\"iati.identifier\"].isna()]\n", "df_old = df_old[~df_old[\"iati.identifier\"].str.isspace()]\n", "df_old.loc[df_old[\"description\"].isna(), [\"description\"]] = df_old[\"title\"]\n", "df_old.loc[\"description\"] = df_old[\"title\"] + \" \" + df_old[\"description\"]\n", "df_old =df_old.loc[~df_old[\"description\"].isna()]\n", "df_old =df_old.loc[~df_old[\"description\"].str.isspace()]\n", "print(\"old df row count after clean: {0}\".format(df_old.shape[0]))" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "count of iati_identifier in new, not in old: 320662\n", "count of iati_identifier in old, not in new: 370374\n" ] } ], "source": [ "#Number of IATI Identifier values in new, not old and in old, not new\n", "print(\"count of iati_identifier in new, not in old: {0}\".format(df_new[~df_new['iati.identifier'].isin(df_old['iati.identifier'])].shape[0]))\n", "\n", "print(\"count of iati_identifier in old, not in new: {0}\".format(df_old[~df_old['iati.identifier'].isin(df_new['iati.identifier'])].shape[0]))" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Initial NEW DF word stats \n", "count 959355.000000\n", "mean 51.589194\n", "std 109.683399\n", "min 1.000000\n", "25% 10.000000\n", "50% 16.000000\n", "75% 51.000000\n", "max 12524.000000\n", "Name: words, dtype: float64\n" ] } ], "source": [ "df_new['words'] = df_new['description'].str.count(' ').add(1)\n", "\n", "print(\"Initial NEW DF word stats \\n{0}\".format(df_new['words'].describe()))" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Initial OLD DF word stats \n", "count 1009067.000000\n", "mean 36.744806\n", "std 74.189388\n", "min 1.000000\n", "25% 8.000000\n", "50% 14.000000\n", "75% 39.000000\n", "max 12524.000000\n", "Name: words, dtype: float64\n" ] } ], "source": [ "df_old['words'] = df_old['description'].str.count(' ').add(1)\n", "pd.set_option('float_format', '{:f}'.format)\n", "print(\"Initial OLD DF word stats \\n{0}\".format(df_old['words'].describe()))" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "df_new = preprocess_pipeline(df_new)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Preprocessed NEW DF word stats \n", "count 845945.000000\n", "mean 17.848165\n", "std 33.220827\n", "min 1.000000\n", "25% 3.000000\n", "50% 8.000000\n", "75% 18.000000\n", "max 4402.000000\n", "Name: words, dtype: float64\n" ] } ], "source": [ "df_new['words'] = df_new['description'].str.count(' ').add(1)\n", "print(\"Preprocessed NEW DF word stats \\n{0}\".format(df_new['words'].describe()))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df_old = preprocess_pipeline(df_old)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Preprocessed OLD DF word stats \n", "count 804690.000000\n", "mean 13.786120\n", "std 22.610445\n", "min 1.000000\n", "25% 3.000000\n", "50% 6.000000\n", "75% 16.000000\n", "max 4427.000000\n", "Name: words, dtype: float64\n" ] } ], "source": [ "df_old['words'] = df_old['description'].str.count(' ').add(1)\n", "print(\"Preprocessed OLD DF word stats \\n{0}\".format(df_old['words'].describe()))" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAAJNCAYAAADgesaeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzdeXSV1b3/8feWSAAFBAFFCAYFB6AyCqhQGVTAVQGFClhbKCoOeB2qtuotJZQqWEVxwv6qIg7FYrmIomUSSSC3jHVoHSEtlotDQSapVkDYvz9ykpuQAAHJEy68X2udxcl+9t7n+yRnuZaftfd+QowRSZIkSZIkKUlHVHQBkiRJkiRJOvwYSkmSJEmSJClxhlKSJEmSJElKnKGUJEmSJEmSEmcoJUmSJEmSpMQZSkmSJEmSJClxaRVdwMGiTp06MTMzs6LLkCRJkiRJOmT8+c9//jzGWLe0a4ZSKZmZmSxfvryiy5AkSZIkSTpkhBD+sbtrbt+TJEmSJElS4gylJEmSJEmSlDhDKUmSJEmSJCXOUEqSJEmSJEmJM5SSJEmSJElS4gylJEmSJEmSlDhDKUmSJEmSJCXOUEqSJEmSJEmJS6voAiRJkiRJB7etW7eyYcMGtmzZwo4dOyq6HEkVpFKlSlSvXp3atWuTnp7+reczlJIkSZIk7dbWrVtZvXo1tWrVIjMzkyOPPJIQQkWXJSlhMUa2b9/OF198werVq2nUqNG3DqbcvidJkiRJ2q0NGzZQq1Yt6tSpQ+XKlQ2kpMNUCIHKlStTp04datWqxYYNG771nIZSkiRJkqTd2rJlCzVq1KjoMiQdRGrUqMGWLVu+9TyGUpIkSZKk3dqxYwdHHnlkRZch6SBy5JFHHpDz5QylJEmSJEl75JY9SUUdqP8mGEpJkiRJkiQpcYZSkiRJkiRJSpyhlCRJkiRJ0mFu0qRJhBCYNGlSYp+ZltgnSZIkSZIOOQ/MXVHRJezRzeefUtElSNoNV0pJkiRJkiQpcYZSkiRJkiRJSpyhlCRJkiRJe/DRRx8RQmDIkCF89NFHDBw4kDp16lClShXatWvHK6+8stuxzz//PF27dqVWrVpUqVKF008/nV/96lds3bq1WL8TTjiBhg0blhh/4oknEkJg9OjRxdr/+Mc/EkLgF7/4xV7rz87OJoRAVlYWixYt4rzzzqNmzZpUr16dHj16sHz58hJjsrKyCCGQnZ3N5MmT6dChA0cffTSZmZnF+i1ZsoT+/ftz/PHHU7lyZTIyMrj66qv55JNP9lrX7j7v6aefpnXr1lStWpV69eoxdOhQPvvssxJjunTpQgiBbdu28ctf/pJTTz2V9PR0hgwZUthn69atjB07ljPOOINq1apRo0YNOnfuzAsvvLDbWpYuXcqAAQNo0KAB6enp1K9fnwsuuKDUMfty/3//+98ZNmwYTZo0oWrVqtSuXZvvfOc7XHPNNaxfv76w37Zt23jooYdo06YNtWrVolq1amRmZtKnTx9ee+21EvN+8MEHDBkyhIyMDNLT0znuuOO47LLL+PDDD0u9v7y8PL7//e9Tq1YtjjrqKM4++2xeffXV3f4+ypNnSkmSJEmSVAb/+Mc/aN++PSeddBI//OEP2bBhA1OmTCkMC7p27Vqs/xVXXMHEiRNp2LAhl1xyCccccwyLFy9mxIgRzJs3j7lz55KWlv+/5d26deN3v/sdH3zwAaeddhqQHx6sXr0agHnz5jFixIjCuV9//XUAunfvXub6lyxZwpgxYzjvvPMYPnw4eXl5TJs2jQULFjBnzhw6d+5cYsy4ceOYO3cuF110EV27dmXz5s2F15566imuuuoq0tPT6d27NxkZGaxcuZInnniCGTNmsHjxYho1alTm+gAeeOAB5syZw4ABA+jZsye5ubk89dRTZGdns2TJEurWrVtiTL9+/Vi2bBm9evWib9++1KtXD8gPd3r06EFOTg6nnXYaw4cP56uvvmLq1KkMGDCAt956i7vvvrvYXI8//jjXXnstlSpVonfv3jRt2pS1a9eyfPlyJkyYwKWXXrpf9//pp59y5pln8sUXX3DhhRfSr18/vv76a1atWsWzzz7L9ddfz7HHHgvAkCFDeP7552nRogU/+tGPqFq1Kp988gm5ubnMmjWL8847r7CGWbNmcckll7B9+3YuuugimjRpwpo1a5g2bRqvvvoq8+fPp02bNoX9V65cyVlnncX69evp1asXrVq1Ii8vj759+9KrV699+lsdCIZSkiRJkiSVQXZ2NllZWYwcObKw7bLLLqNnz57ce++9xUKpSZMmMXHiRC6++GJ+97vfUbVq1cJrWVlZjBo1ikcffZQbb7wR+N9Qat68eYWh1Lx58wA4//zzycnJ4auvvqJatWqF16pWrcpZZ51V5vpnzZrFww8/zPXXX1/Y9tJLL9G3b1+GDh3Khx9+yBFHFN9Q9frrr7No0SJat25drH3FihVcffXVZGZmkpOTQ4MGDYqNOf/887nxxht58cUXy1wfwMyZM1myZEmxz7v55psZP348t99+O08++WSJMf/4xz945513qFOnTrH2cePGkZOTQ69evXj55ZcLA8CRI0fSvn17xowZw/e+9z3OPvtsAN577z2uu+46atSowcKFC2nevHmx+dasWbPf9z916lQ2bNjA+PHjC//mBb788svC3/vmzZv5/e9/T9u2bVmyZAmVKlUq1rfoiqqNGzcyaNAgqlWrxoIFC2jWrFnhtXfffZcOHTpw5ZVX8sYbbxS2Dx8+nPXr15eoo+B7kDS370mSJEmSVAYnnngiP//5z4u19ejRg0aNGrF06dJi7Q8++CBpaWlMnDixWCAFMGLECI499lh+97vfFbYVrHgqCKIK3terV48bbriBbdu2kZubC+QHE2+//TadOnWicuXKZa6/SZMmXHfddcXa+vTpw7nnnkteXh4LFy4sMWbYsGElAimAxx57jO3bt/Pggw8WC2QgP2Dr3bs3M2bMYMuWLWWuD+CHP/xhic/LysqiZs2aTJ48ucS2R4DRo0eXCKQAJk6cSAiB+++/vzCQAqhXr17hqrMnnnii2D198803jBgxokQgBRTbXrm/97/rdwHgqKOOKmwPIRBjJD09vURACBSupgJ45pln2LRpE6NGjSoWSAE0b96cq666ijfffJP33nsPyA/V5s6dS+PGjYsFk/C/34OkuVJKh775Yyq6goNH1zsqugJJkiTp/6xWrVqVWLkCkJGRwaJFiwp//uqrr3j77bepU6cO48ePL3Wu9PR03n///cKfTzzxRE466STmz5/Pzp07C89XOu+88zj33HNJS0tj3rx5XHDBBcyfP58YI926ddun+jt37lxq0NGlSxdycnJ48803SwQT7du3L3WugvvNyclh2bJlJa6vXbuWHTt2sGLFCtq2bcv06dN56623ivVp1apVidU5pQUjNWvWpFWrVuTk5PD+++/TqlWrvda4ZcsW8vLyaNCgQeHKs6IKfndvvvlmYdvixYsByrSNbV/vv3fv3tx5550MHz6c2bNn06NHD8455xyaNWtGCKFwXI0aNbjooouYMWMGrVq1ol+/fnTu3JkOHToUrpLbtYa3336brKysEjWsWLECgPfff59mzZoV3munTp1K/R4XfA+SZCglSZIkSVIZHHPMMaW2p6WlsXPnzsKfN27cSIyRdevWMWrUqDLP3717dx5//HHeeOMNjjzySNatW0f37t2pXr06Z555ZuEqqoJ/9+U8KYDjjjuu1Pbjjz8eoNh5Ubte21XBNrJ77713j5/5r3/9C4Dp06fz9NNPF7s2ePDgEqHUgaqxoF/9+vVLna+gfdOmTYVtBe93XflUmn29/xNPPJGlS5eSlZXFrFmzmDZtGpAfaN56663ccMMNhWOmTJnCPffcw+TJkwu3ilapUoX+/ftz3333Ff6OCmp4/PHHy1RDwe9kb7/jJLl9T5IkSZKkA6hmzZoAtG7dmhjjHl9FFazeee211wqDp4K2bt268eabb7JhwwbmzZtHzZo1ix1gXRb//Oc/S20veLJdQd1FFV3FU9o9bt68eY/3V7DyadKkSSWuTZo0qdxqLOhX2lP7IP/g8V3nKwgdP/7441LHlDZ/We8f4PTTT2fKlCmsX7+e5cuXM3bsWHbu3MmNN95Y7KysqlWrkpWVxYoVK1i9ejXPPfccnTp14rnnnqN///4lanj77bf3WMPgwYOL9d/b7zhJhlKSJEmSJB1ARx99NM2bN+fdd99lw4YNZR7XrVs3QgjMmzeP119/nZNOOonGjRsD+auidu7cybPPPsvKlSvp0qVLqVuw9iQ3N7fYiq4C2dnZAKWeHbU7HTt2BCj1HKpvo7TtY5s3b+att96iSpUqnH766WWap3r16px88sl8/PHHrFy5ssT1+fPnAxQL9gruaebMmXud/9vcf1paGm3btuVnP/sZzz//PJC/kqw0GRkZ/OAHP2D27Nk0bdqU3NzcwhVS+1pDwd83NzeXHTt2lLhe8D1IkqGUJEmSJEkH2E9+8hO2bdvG0KFDi20RK7Bx48ZiT0WD/AO4mzdvzn//93+zYMGCYtvzzj77bKpUqcLdd98NsM/nSQGsXLmSCRMmFGt76aWXyMnJoUmTJnTu3LnMc11//fUceeSR3HzzzYVnFxW1bdu2/Qpsnn322WLnPEH+QeebN29m0KBBpKenl3muoUOHEmPktttuKxbCfP7554wePbqwT4Frr72WtLQ0Ro8eXXg4eFFFn763r/e/dOnSUlcoFbQVnBe1bt06lixZUqLfl19+yZYtW0hLSys83P7HP/4xxxxzDKNGjSpx0D7Azp07iwVNDRs25Pzzz2fVqlU88sgjxfoWfA+S5plSkiRJkiQdYEOHDuXPf/4zEyZM4OSTTy58St+GDRtYtWoVCxYs4Mc//jG/+c1vio3r3r0777zzTuH7Aunp6Zxzzjn7fZ4UQM+ePbnllluYOXMmLVu2JC8vj2nTplGlShWefPLJUg9B353TTjuNiRMnMnToUJo3b07Pnj055ZRT2L59O6tXr2bhwoXUrVuXDz74YJ9q7NWrF+eccw6XXnop9evXJzc3l9zcXDIzMxk7duw+zXXrrbcyc+ZMXnrpJVq2bMmFF17IV199xR/+8AfWrl3LT3/6Uzp16lTYv1mzZkyYMIFrrrmG1q1b06dPH5o2bVq43a569eqFK6z29f4nT57Mo48+yrnnnkuTJk2oVasWf/vb35gxYwbp6encdNNNQP7WwY4dO3L66afTpk0bMjIy+OKLL3jllVf47LPPuOGGG6hevTqQ/yS+qVOncvHFF9OxY0e6d+9O8+bNOeKII1i9ejWLFi1i/fr1fP3114X3+Oijj3LWWWdx0003MWfOnMLvwYsvvlh4wHqSDKUkSZIkSSoHjz76KL169eI3v/kNr732Gps2baJ27do0atSI2267jcsvv7zEmO7du/Pggw8SQqBr164lrs2bN4/jjjuO5s2b73M9HTp04Be/+AUjRozgkUceKXyC31133cWZZ565z/NdfvnltGzZknHjxjF//nzmzJnDUUcdxQknnED//v0ZMGDAPs958803c/HFFzN+/HimTJnC0UcfzZAhQ7j77rupV6/ePs1VuXJl5s6dy/3338/kyZN5+OGHSUtLo2XLlowfP55BgwaVGHPVVVfRokUL7rvvPrKzs5k+fTp16tThjDPO4Morr9zv+x80aBBbt27lT3/6E2+88Qb//ve/adCgAQMHDuSWW26hRYsWAGRmZjJq1Ciys7OZP38+n3/+ObVr1+bUU09l7NixDBw4sFgN3bt35y9/+Qv33Xcfs2fPZuHChVSuXJkTTjiBbt260a9fv2L9mzZtyuLFi7n99tt57bXXyM7O5owzzmD69OmsW7cu8VAq7Hqw2uGqXbt2cfny5RVdhsrD/DEVXcHBo+sdFV2BJEmS/o95//33y3yOjw5O2dnZdO3alZEjR5KVlVXR5ZQqKyuLUaNGMX/+fLp06VLR5agMyvrfhhDCn2OM7Uq75plSkiRJkiRJSpyhlCRJkiRJkhJnKCVJkiRJkqTEedC5JEmSJEmHsC5dunCwnyedlZV10J53pfLjSilJkiRJkiQlzlBKkiRJkiRJiTOUkiRJkiRJUuIMpSRJkiRJkpQ4QylJkiRJkiQlzlBKkiRJkiRJiTOUkiRJkiRJUuIMpSRJkiRJkpQ4QylJkiRJkiQlzlBKkiRJkqQDLDMzk8zMzDL3/+ijjwghMGTIkHKrSYefSZMmEUJg0qRJFV1KqdIqugBJkiRJ0v9h88dUdAV71vWOiq5A0m64UkqSJEmSJEmJM5SSJEmSJElS4gylJEmSJEkqgxdeeIHvfve71KxZk6pVq/Kd73yHMWPGsHXr1jLPsWXLFn7yk5/QsGFDqlSpwmmnncb999/Pzp0796mW7OxsQghkZWWxaNEizjvvPGrWrEn16tXp0aMHy5cvLzEmKyuLEALZ2dlMnjyZDh06cPTRR5c4+2rJkiX079+f448/nsqVK5ORkcHVV1/NJ598sk81Fv28p59+mtatW1O1alXq1avH0KFD+eyzz0qM6dKlCyEEtm3bxi9/+UtOPfVU0tPTi521tXXrVsaOHcsZZ5xBtWrVqFGjBp07d+aFF17YbS1Lly5lwIABNGjQgPT0dOrXr88FF1xQ6ph9uf+///3vDBs2jCZNmlC1alVq167Nd77zHa655hrWr19f2G/btm089NBDtGnThlq1alGtWjUyMzPp06cPr732Wol5P/jgA4YMGUJGRgbp6ekcd9xxXHbZZXz44Yel3l9eXh7f//73qVWrFkcddRRnn302r7766m5/HwcLz5SSJEmSJGkv7rzzTsaMGUOdOnW47LLLOProo5k5cyZ33nkns2fPZu7cuRx55JF7nGPr1q10796dZcuW0bJlS37wgx+wadMmRo8eTU5Ozn7VtWTJEsaMGcN5553H8OHDycvLY9q0aSxYsIA5c+bQuXPnEmPGjRvH3Llzueiii+jatSubN28uvPbUU09x1VVXkZ6eTu/evcnIyGDlypU88cQTzJgxg8WLF9OoUaN9qvGBBx5gzpw5DBgwgJ49e5Kbm8tTTz1FdnY2S5YsoW7duiXG9OvXj2XLltGrVy/69u1LvXr1gPxwp0ePHuTk5HDaaacxfPhwvvrqK6ZOncqAAQN46623uPvuu4vN9fjjj3PttddSqVIlevfuTdOmTVm7di3Lly9nwoQJXHrppft1/59++ilnnnkmX3zxBRdeeCH9+vXj66+/ZtWqVTz77LNcf/31HHvssQAMGTKE559/nhYtWvCjH/2IqlWr8sknn5Cbm8usWbM477zzCmuYNWsWl1xyCdu3b+eiiy6iSZMmrFmzhmnTpvHqq68yf/582rRpU9h/5cqVnHXWWaxfv55evXrRqlUr8vLy6Nu3L7169dqnv1XSDKUkSZIkSdqDRYsWMWbMGDIyMli6dCnHH388AGPGjOHiiy/mlVde4d577+XOO+/c4zzjxo1j2bJlXHLJJfzhD3/giCPyNy/dfvvttG3bdr9qmzVrFg8//DDXX399YdtLL71E3759GTp0KB9++GHh5xR4/fXXWbRoEa1bty7WvmLFCq6++moyMzPJycmhQYMGxcacf/753Hjjjbz44ov7VOPMmTNZsmRJsc+7+eabGT9+PLfffjtPPvlkiTH/+Mc/eOedd6hTp06x9nHjxpGTk0OvXr14+eWXSUvLjzVGjhxJ+/btGTNmDN/73vc4++yzAXjvvfe47rrrqFGjBgsXLqR58+bF5luzZs1+3//UqVPZsGED48eP58Ybbyw275dffln4e9+8eTO///3vadu2LUuWLKFSpUrF+hZdUbVx40YGDRpEtWrVWLBgAc2aNSu89u6779KhQweuvPJK3njjjcL24cOHs379+hJ1FHwPDmZu35MkSZIkaQ8mTpwIwM9//vPCQAogLS2NcePGccQRR/DEE0/sdZ6nnnqKI444gl//+tfFgqLGjRtzww037FdtTZo04brrrivW1qdPH84991zy8vJYuHBhiTHDhg0rEUgBPPbYY2zfvp0HH3ywWCAD0K1bN3r37s2MGTPYsmXLPtX4wx/+sMTnZWVlUbNmTSZPnlzq9sfRo0eXCKQg/28RQuD+++8vDKQA6tWrx4gRIwCK/S0ee+wxvvnmG0aMGFEikAJo2LBhsb77c/9Vq1YtMe9RRx1V2B5CIMZIenp6iYAQKFxNBfDMM8+wadMmRo0aVSyQAmjevDlXXXUVb775Ju+99x6QH6rNnTuXxo0bFwsm4X+/BwczV0pJkiRJkrQHBatSunXrVuLaKaecQsOGDVm1ahWbNm3imGOOKXWOLVu2kJeXR0ZGBieffHKJ6126dGHUqFH7XFvnzp1LDTq6dOlCTk4Ob775Zolgon379qXOtWjRIgBycnJYtmxZietr165lx44drFixgrZt2zJ9+nTeeuutYn1atWpVYnVOacFIzZo1adWqFTk5Obz//vu0atVqrzUW/A4bNGjAaaedVuJ6wd/nzTffLGxbvHgxQJm2se3r/ffu3Zs777yT4cOHM3v2bHr06ME555xDs2bNCCEUjqtRowYXXXQRM2bMoFWrVvTr14/OnTvToUMHqlWrVmoNb7/9NllZWSVqWLFiBQDvv/8+zZo1K7zXTp06lViBBf/7PThYlVsoFUKYCHwPWBtjbFGk/T+A64FvgFdjjD9Ntd8BXAHsAG6IMc5OtbcFJgFVgT8CN8YYYwghHXgGaAusBwbEGD9KjRkM/Dz1kb+KMT5dXvcpSZIkSTq0FZy5VL9+/VKv169fn9WrV7N58+bdhlIFcxx33HGlXi+6Amtf7G2+oudF7e2zCraR3XvvvXv8zH/9618ATJ8+naefLv6/24MHDy4RSh2oGsvydwDYtGlTYVvB+11XPpVmX+//xBNPZOnSpWRlZTFr1iymTZsGQEZGBrfeemux1W9TpkzhnnvuYfLkyYwcORKAKlWq0L9/f+67777C31FBDY8//niZaiiv71VSynP73iSgZ9GGEEJXoA9wRoyxOXBfqr0ZMBBonhozIYRQEPE9BgwDmqZeBXNeAWyMMTYBHgDuSc1VGxgJdADaAyNDCLXK5xYlSZIkSYe6mjVrApT6tDjIP/C6aL89zfHPf/6z1Ou7m3tv9jZfaTUVXcVTVEHfzZs3E2Pc7atg5dOkSZNKXJs0aVK51bg/f4eCkPDjjz8udUxp85f1/gFOP/10pkyZwvr161m+fDljx45l586d3HjjjcXOyqpatSpZWVmsWLGC1atX89xzz9GpUyeee+45+vfvX6KGt99+e481DB48uFj/A/29Skq5hVIxxgXAhl2arwXGxhi3pvqsTbX3AX4fY9waY1wF5AHtQwj1gRoxxkUxxkj+yqi+RcYURLJTge4h/1vbA5gbY9wQY9wIzGWXcEySJEmSpLIqOA8pOzu7xLW8vDzWrFlD48aNd7tKCqB69eo0adKEjz/+mL/97W8lrpc2d1nk5uayc+fO3c5X2tlRu9OxY0eAUs+h+jZK2z62efNm3nrrLapUqcLpp59epnmqV6/OySefzMcff8zKlStLXJ8/fz5AsSfTFdzTzJkz9zr/t7n/tLQ02rZty89+9jOef/55IH8lWWkyMjL4wQ9+wOzZs2natCm5ubmFK6T2tYaCv29ubi47duwocX1/v1dJSfqg81OAziGEJSGEnBDCman2BsD/FOm3JtXWIPV+1/ZiY2KM3wCbgWP3MJckSZIkSfts6NChAPzqV79i3bp1he07duzg1ltvZefOnVxxxRV7nefHP/4xO3fu5Gc/+1mxIGnVqlU89NBD+1XbypUrmTBhQrG2l156iZycHJo0aULnzp3LPNf111/PkUceyc0331x4dlFR27Zt26/A5tlnny12zhPkH3S+efNmBg0aRHp6epnnGjp0KDFGbrvttmIhzOeff87o0aML+xS49tprSUtLY/To0YWHgxdV9Ol7+3r/S5cuLXWFUkFbwXlR69atY8mSJSX6ffnll2zZsoW0tDQqV64M5H9HjjnmGEaNGsXSpUtLjNm5c2exoKlhw4acf/75rFq1ikceeaRY34LvwcEs6YPO04BaQEfgTOCFEMJJQGlrB+Me2tnPMcWEEIaRvzWQRo0a7bFwSZIkSdLh6eyzz+anP/0pv/71r2nRogX9+/fnqKOOYubMmbzzzjt06tSJ2267ba/z3HLLLUyfPp3/+q//ok2bNvTo0YPNmzczZcoUvvvd7/Lyyy/vc209e/bklltuYebMmbRs2ZK8vDymTZtGlSpVePLJJ0s9BH13TjvtNCZOnMjQoUNp3rw5PXv25JRTTmH79u2sXr2ahQsXUrduXT744IN9qrFXr16cc845XHrppdSvX5/c3Fxyc3PJzMxk7Nix+zTXrbfeysyZM3nppZdo2bIlF154IV999RV/+MMfWLt2LT/96U/p1KlTYf9mzZoxYcIErrnmGlq3bk2fPn1o2rRp4Xa76tWrF66w2tf7nzx5Mo8++ijnnnsuTZo0oVatWvztb39jxowZpKenc9NNNwH5Wwc7duzI6aefTps2bcjIyOCLL77glVde4bPPPuOGG26gevXqQP6T+KZOncrFF19Mx44d6d69O82bN+eII45g9erVLFq0iPXr1/P1118X3uOjjz7KWWedxU033cScOXMKvwcvvvhi4QHrB6ukQ6k1wLTUVrylIYSdQJ1Ue0aRfg2BT1LtDUtpp8iYNSGENKAm+dsF1wBddhmTXVoxMcbfAr8FaNeuXanBlSRJkiRJ99xzD61bt+aRRx7hmWeeYfv27Zx88sn86le/4pZbbilc6bIn6enpvPbaa2RlZTFlyhQefPBBMjMz+fnPf87FF1+8X6FUhw4d+MUvfsGIESN45JFHiDHSrVs37rrrLs4888y9T7CLyy+/nJYtWzJu3Djmz5/PnDlzOOqoozjhhBPo378/AwYM2Oc5b775Zi6++GLGjx/PlClTOProoxkyZAh333039erV26e5KleuzNy5c7n//vuZPHkyDz/8MGlpabRs2ZLx48czaNCgEmOuuuoqWrRowX333YwYd84AACAASURBVEd2djbTp0+nTp06nHHGGVx55ZX7ff+DBg1i69at/OlPf+KNN97g3//+Nw0aNGDgwIHccssttGiR/8y3zMxMRo0aRXZ2NvPnz+fzzz+ndu3anHrqqYwdO5aBAwcWq6F79+785S9/4b777mP27NksXLiQypUrc8IJJ9CtWzf69etXrH/Tpk1ZvHgxt99+O6+99hrZ2dmcccYZTJ8+nXXr1h3UoVTIz4fKafIQMoFXCp6+F0K4BjghxviLEMIpwDygEdAMmEz+weQnpNqbxhh3hBCWAf8BLCH/6XsPxxj/GEIYDnwnxnhNCGEgcEmM8dLUQed/Bgo2kb4BtI0x7nq+VTHt2rWLy5cvP5C3r4PF/DEVXcHBo+sdFV2BJEmS/o95//33y3zmj5KTnZ1N165dGTlyJFlZWRVdTqmysrIYNWoU8+fPp0uXLhVdjg6wsv63IYTw5xhju9KuldtKqRDC8+SvWKoTQlhD/hPxJgITQwjvANuAwalVU++GEF4A3gO+AYbHGAs2h15L/pP8qgIzUy+AJ4FnQwh55K+QGggQY9wQQhgNLEv1++XeAilJkiRJkiQlq9xCqRhjyTVz+S7fTf+7gLtKaV8OtCil/Wvg+7uZayL5AZgkSZIkSZIOQkk/fU+SJEmSJElK/KBzSZIkSZL0LXXp0oXyPCP6QMjKyjpoz7vSwcGVUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZL26GA/u0hSsg7UfxMMpSRJkiRJu1WpUiW2b99e0WVIOohs376dSpUqfet5DKUkSZIkSbtVvXp1vvjii4ouQ9JB5IsvvqB69erfeh5DKUmSJEnSbtWuXZuNGzfy+eefs23bNrfySYepGCPbtm3j888/Z+PGjdSuXftbz5l2AOqSJEmSJB2i0tPTadSoERs2bOCjjz5ix44dFV2SpApSqVIlqlevTqNGjUhPT//W8xlKSZIkSZL2KD09nfr161O/fv2KLkXSIcTte5IkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxKVVdAFSeVv09/UVXcJB46yuFV2BJEmSJEn5XCklSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxJVbKBVCmBhCWBtCeKeUa7eGEGIIoU6RtjtCCHkhhA9DCD2KtLcNIfw1de2hEEJItaeHEKak2peEEDKLjBkcQliZeg0ur3uUJEmSJEnS/inPlVKTgJ67NoYQMoDzgdVF2poBA4HmqTETQgiVUpcfA4YBTVOvgjmvADbGGJsADwD3pOaqDYwEOgDtgZEhhFoH+N4kSZIkSZL0LZRbKBVjXABsKOXSA8BPgVikrQ/w+xjj1hjjKiAPaB9CqA/UiDEuijFG4Bmgb5ExT6feTwW6p1ZR9QDmxhg3xBg3AnMpJRyTJEmSJElSxUn0TKkQQm/g4xjj27tcagD8T5Gf16TaGqTe79pebEyM8RtgM3DsHuaSJEmSJEnSQSItqQ8KIVQD/hO4oLTLpbTFPbTv75hdaxpG/tZAGjVqVFoXSZIkSZIklYMkV0qdDDQG3g4hfAQ0BN4IIRxP/mqmjCJ9GwKfpNobltJO0TEhhDSgJvnbBXc3Vwkxxt/GGNvFGNvVrVv3W92cJEmSJEmSyi6xUCrG+NcYY70YY2aMMZP88KhNjPEz4GVgYOqJeo3JP9B8aYzxU2BLCKFj6ryoHwEvpaZ8GSh4sl5/4PXUuVOzgQtCCLVSB5xfkGqTJEmSJEnSQaLctu+FEJ4HugB1QghrgJExxidL6xtjfDeE8ALwHvANMDzGuCN1+Vryn+RXFZiZegE8CTwbQsgjf4XUwNRcG0IIo4FlqX6/jDGWduC6JEmSJEmSKki5hVIxxkF7uZ65y893AXeV0m850KKU9q+B7+9m7onAxH0oV5IkSZIkSQlK9Ol7kiRJkiRJEhhKSZIkSZIkqQIYSkmSJEmSJClxhlKSJEmSJElKnKGUJEmSJEmSEmcoJUmSJEmSpMQZSkmSJEmSJClxhlKSJEmSJElKnKGUJEmSJEmSEmcoJUmSJEmSpMQZSkmSJEmSJClxhlKSJEmSJElKnKGUJEmSJEmSEmcoJUmSJEmSpMQZSkmSJEmSJClxhlKSJEmSJElKnKGUJEmSJEmSEmcoJUmSJEmSpMQZSkmSJEmSJClxhlKSJEmSJElKnKGUJEmSJEmSEmcoJUmSJEmSpMQZSkmSJEmSJClxhlKSJEmSJElKnKGUJEmSJEmSEmcoJUmSJEmSpMQZSkmSJEmSJClxhlKSJEmSJElKnKGUJEmSJEmSEmcoJUmSJEmSpMQZSkmSJEmSJClxhlKSJEmSJElKnKGUJEmSJEmSEmcoJUmSJEmSpMQZSkmSJEmSJClxhlKSJEmSJElKnKGUJEmSJEmSEmcoJUmSJEmSpMQZSkmSJEmSJClxhlKSJEmSJElKnKGUJEmSJEmSEmcoJUmSJEmSpMQZSkmSJEmSJClxhlKSJEmSJElKnKGUJEmSJEmSEmcoJUmSJEmSpMQZSkmSJEmSJClxhlKSJEmSJElKnKGUJEmSJEmSEmcoJUmSJEmSpMQZSkmSJEmSJClx5RZKhRAmhhDWhhDeKdJ2bwjhgxDCX0IIL4YQjily7Y4QQl4I4cMQQo8i7W1DCH9NXXsohBBS7ekhhCmp9iUhhMwiYwaHEFamXoPL6x4lSZIkSZK0f8pzpdQkoOcubXOBFjHGM4AVwB0AIYRmwECgeWrMhBBCpdSYx4BhQNPUq2DOK4CNMcYmwAPAPam5agMjgQ5Ae2BkCKFWOdyfJEmSJEmS9lO5hVIxxgXAhl3a5sQYv0n9uBhomHrfB/h9jHFrjHEVkAe0DyHUB2rEGBfFGCPwDNC3yJinU++nAt1Tq6h6AHNjjBtijBvJD8J2DcckSZIkSZJUgSryTKmhwMzU+wbA/xS5tibV1iD1ftf2YmNSQddm4Ng9zCVJkiRJkqSDRIWEUiGE/wS+AX5X0FRKt7iH9v0ds2sdw0IIy0MIy9etW7fnoiVJkiRJknTAJB5KpQ4e/x7wg9SWPMhfzZRRpFtD4JNUe8NS2ouNCSGkATXJ3y64u7lKiDH+NsbYLsbYrm7dut/mtiRJkiRJkrQPEg2lQgg9gZ8BvWOMXxW59DIwMPVEvcbkH2i+NMb4KbAlhNAxdV7Uj4CXiowpeLJef+D1VMg1G7gghFArdcD5Bak2SZIkSZIkHSTSymviEMLzQBegTghhDflPxLsDSAfm5mdMLI4xXhNjfDeE8ALwHvnb+obHGHekprqW/Cf5VSX/DKqCc6ieBJ4NIeSRv0JqIECMcUMIYTSwLNXvlzHGYgeuS5IkSZIkqWKVWygVYxxUSvOTe+h/F3BXKe3LgRaltH8NfH83c00EJpa5WEmSJEmSJCWqIp++J0mSJEmSpMOUoZQkSZIkSZISZyglSZIkSZKkxJXbmVKSDkLzx1R0BQeXrndUdAWSJEmSdNhypZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUqcoZQkSZIkSZISZyglSZIkSZKkxBlKSZIkSZIkKXGGUpIkSZIkSUpcuYVSIYSJIYS1IYR3irTVDiHMDSGsTP1bq8i1O0IIeSGED0MIPYq0tw0h/DV17aEQQki1p4cQpqTal4QQMouMGZz6jJUhhMHldY+SJEmSJEnaP+W5UmoS0HOXttuBeTHGpsC81M+EEJoBA4HmqTETQgiVUmMeA4YBTVOvgjmvADbGGJsADwD3pOaqDYwEOgDtgZFFwy9JkiRJkiRVvHILpWKMC4ANuzT3AZ5OvX8a6Fuk/fcxxq0xxlVAHtA+hFAfqBFjXBRjjMAzu4wpmGsq0D21iqoHMDfGuCHGuBGYS8lwTJIkSZIkSRUo6TOljosxfgqQ+rdeqr0B8D9F+q1JtTVIvd+1vdiYGOM3wGbg2D3MJUmSJEmSpIPEwXLQeSilLe6hfX/HFP/QEIaFEJaHEJavW7euTIVKkiRJkiTp20s6lPpnakseqX/XptrXABlF+jUEPkm1NyylvdiYEEIaUJP87YK7m6uEGONvY4ztYozt6tat+y1uS5IkSZIkSfsi6VDqZaDgaXiDgZeKtA9MPVGvMfkHmi9NbfHbEkLomDov6ke7jCmYqz/weurcqdnABSGEWqkDzi9ItUmSJEmSJOkgkVZeE4cQnge6AHVCCGvIfyLeWOCFEMIVwGrg+wAxxndDCC8A7wHfAMNjjDtSU11L/pP8qgIzUy+AJ4FnQwh55K+QGpiaa0MIYTSwLNXvlzHGXQ9clyRJkiRJUgUqt1AqxjhoN5e676b/XcBdpbQvB1qU0v41qVCrlGsTgYllLlaSJEmSJEmJOlgOOpckSZIkSdJhZK+hVAihdhKFSJIkSZIk6fBRlu17S0IIbwFPATNTh4lL+j9o0d/XV3QJB5WzulZ0BZIkSZJ0+CrL9r1TgN8CPwTyQgh3hxBOKd+yJEmSJEmSdCjbaygV881NHVx+JTAYWBpCyAkhnFXuFUqSJEmSJOmQs9fteyGEY4HLyV8p9U/gP4CXgVbAH4DG5VmgJEmSJEmSDj1lOVNqEfAs0DfGuKZI+/IQwm/KpyxJkiRJkiQdysoSSp26u8PNY4z3HOB6JEmSJEmSdBgoy0Hnc0IIxxT8EEKoFUKYXY41SZIkSZIk6RBXllCqboxxU8EPMcaNQL3yK0mSJEmSJEmHurKEUjtCCI0KfgghnAiUup1PkiRJkiRJKouynCn1n0BuCCEn9fN3gWHlV5IkSZIkSZIOdXsNpWKMs0IIbYCOQABujjF+Xu6VSZIkSZIk6ZBVlpVSAOnAhlT/ZiEEYowLyq8sSZIkSZIkHcr2GkqFEO4BBgDvAjtTzREwlJIkSZIkSdJ+KctKqb7AqTHGreVdjCRJkiRJkg4PZXn63t+BI8u7EEmSJEmSJB0+yrJS6ivgrRDCPKBwtVSM8YZyq0qSJEmSJEmHtLKEUi+nXpIkSZIkSdIBsddQKsb4dAihKtAoxvhhAjVJkiRJkiTpELfXM6VCCBcBbwGzUj+3CiG4ckqSJEmSJEn7rSwHnWcB7YFNADHGt4DG5ViTJEmSJEmSDnFlCaW+iTFu3qUtlkcxkiRJkiRJOjyU5aDzd0IIlwGVQghNgRuAP5VvWZIkSZIkSTqUlWWl1H8AzYGtwPPAF8BN5VmUJEmSJEmSDm1lefreV8B/pl6SJEmSJEnSt7bXUCqEMJ9SzpCKMXYrl4okSZIkSZJ0yCvLmVK3FnlfBegHfFM+5UiSJEmSJOlwUJbte3/epem/Qwg55VSPJEmSJEmSDgNl2b5Xu8iPRwBtgePLrSJJkiRJkiQd8sqyfe/P5J8pFcjftrcKuKI8i5IkSZIkSdKhrSzb9xonUYgkSZIkSZIOH2XZvnfJnq7HGKcduHIkKUHzx1R0BQeXrndUdAWSJEmSDiNl2b53BXA28Hrq565ANrCZ/G19hlKSJEmSJEnaJ2UJpSLQLMb4KUAIoT7waIzxx+VamSRJkiRJkg5ZR5ShT2ZBIJXyT+CUcqpHkiRJkiRJh4GyrJTKDiHMBp4nf9XUQGB+uVYlSZIkSZKkQ1pZnr53fQjhYuC7qabfxhhfLN+yJEmSJEmSdCgry0opgDeALTHG10II1UII1WOMW8qzMEmSJEmSJB269nqmVAjhKmAq8P9STQ2A6eVZlCRJkiRJkg5tZTnofDhwDvAFQIxxJVCvPIuSJEmSJEnSoa0sodTWGOO2gh9CCGnkH3guSZIkSZIk7ZeyhFI5IYQ7gaohhPOBPwAzyrcsSZIkSZIkHcrKEkrdDqwD/gpcDfwR+Hl5FiVJkiRJkqRD2x6fvhdCqAQ8HWO8HHg8mZIkSZIkSZJ0qNvjSqkY4w6gbgihckL1SJIkSZIk6TCwx5VSKR8B/x1CeBn4sqAxxnh/eRUlSZIkSZKkQ9tuV0qFEJ5NvR0AvJLqW73IS5IkSZIkSdove1op1TaEcCKwGng4oXokSZIkSZJ0GNhTKPUbYBbQGFhepD0AETipHOuSJEmSJEnSIWy32/dijA/FGE8HnooxnlTk1TjGaCAlSZIkSZKk/bbHp+8BxBivPdAfGkK4OYTwbgjhnRDC8yGEKiGE2iGEuSGElal/axXpf0cIIS+E8GEIoUeR9rYhhL+mrj0UQgip9vQQwpRU+5IQQuaBvgdJkiRJkiTtv72GUgdaCKEBcAPQLsbYAqgEDARuB+bFGJsC81I/E0JolrreHOgJTAghVEpN9xgwDGiaevVMtV8BbIwxNgEeAO5J4NYkSZIkSZJURomHUilpQNUQQhpQDfgE6AM8nbr+NNA39b4P8PsY49YY4yogD2gfQqgP1IgxLooxRuCZXcYUzDUV6F6wikqSJEmSJEkVL/FQKsb4MXAf+U/1+xTYHGOcAxwXY/w01edToF5qSAPgf4pMsSbV1iD1ftf2YmNijN8Am4Fjy+N+JEmSJEmStO8qYvteLfJXMjUGTgCOCiFcvqchpbTFPbTvacyutQwLISwPISxft27dnguXJEmSJEnSAVMR2/fOA1bFGNfFGLcD04CzgX+mtuSR+ndtqv8aIKPI+Ibkb/dbk3q/a3uxMaktgjWBDbsWEmP8bYyxXYyxXd26dQ/Q7UmSJEmSJGlvKiKUWg10DCFUS53z1B14H3gZGJzqMxh4KfX+ZWBg6ol6jck/0HxpaovflhBCx9Q8P9plTMFc/YHXU+dOSZIkSZIk/f/27j9Yt6q8D/j3Ga4/KP5E0RLAQhS0ShMMSLAmxoQQsU0LTTWBtJFaWhJCDGFqE8h0RmOHDESUmEwlvVEDqAlSTCqTaJRBUbSIXhFFQPBWKRAIoBiVtGDAp3+cfePh5NzDxXDXfu+5n8/MO+9+17vX3s+GNXvOfO/a62UBbBh9wu6+sqouSnJVkvuTfCbJxiSPS3JhVR2fpeDqFdP+11bVhUmum/Y/qbsfmA53YpJzk+ya5P3TK0neluQdVbU5SzOkjhlwacAO5oovfXXuEhbKC3907goAAICdyfBQKkm6+7VJXrui+b4szZpabf/Tk5y+SvumJAeu0n5vplALAAAAgMUzx+N7AAAAAOzkhFIAAAAADDfL43tsX2dfcuPcJSyUw+YuAAAAAPg7zJQCAAAAYDihFAAAAADDCaUAAAAAGE4oBQAAAMBwQikAAAAAhhNKAQAAADCcUAoAAACA4YRSAAAAAAwnlAIAAABgOKEUAAAAAMMJpQAAAAAYTigFAAAAwHBCKQAAAACGE0oBAAAAMJxQCgAAAIDhhFIAAAAADCeUAgAAAGA4oRQAAAAAwwmlAAAAABhOKAUAAADAcEIpAAAAAIYTSgEAAAAwnFAKAAAAgOGEUgAAAAAMJ5QCAAAAYDihFAAAAADDCaUAAAAAGE4oBQAAAMBwQikAAAAAhhNKAQAAADCcUAoAAACA4YRSAAAAAAwnlAIAAABguA1zF8Aj77CbN85dAgAAAMCazJQCAAAAYDihFAAAAADDCaUAAAAAGE4oBQAAAMBwQikAAAAAhhNKAQAAADCcUAoAAACA4YRSAAAAAAwnlAIAAABgOKEUAAAAAMMJpQAAAAAYTigFAAAAwHBCKQAAAACGE0oBAAAAMJxQCgAAAIDhZgmlqupJVXVRVX2hqq6vqhdW1e5VdUlVfXF6f/Ky/U+rqs1VdUNVvXRZ+8FVdc303e9UVU3tj6mqd0/tV1bVvuOvEgAAAICtmWum1JuT/Hl3PyfJ9ye5PsmpSS7t7v2TXDp9TlU9N8kxSZ6X5Mgkb6mqXabjnJPkhCT7T68jp/bjk3ytu5+V5OwkZ464KAAAAAC2zfBQqqqekOTFSd6WJN39re7+qyRHJTlv2u28JEdP20cluaC77+vuLyfZnOTQqtozyRO6+4ru7iTnr+iz5VgXJTl8yywqAAAAAOY3x0yp701yV5I/qKrPVNVbq2q3JE/v7tuTZHp/2rT/XkluWdb/1qltr2l7ZfuD+nT3/Um+nuQp2+dyAAAAAHi45gilNiT5gSTndPfzk/x1pkf1tmK1GU69RvtafR584KoTqmpTVW2666671q4aAAAAgEfMHKHUrUlu7e4rp88XZSmkumN6JC/T+53L9t9nWf+9k9w2te+9SvuD+lTVhiRPTHL3ykK6e2N3H9Ldh+yxxx6PwKUBAAAAsC2Gh1Ld/ZdJbqmqZ09Nhye5LsnFSY6b2o5L8t5p++Ikx0y/qLdflhY0/+T0iN83q+qwab2oV67os+VYL0/yoWndKQAAAAAWwIaZzvvqJO+qqkcn+VKSV2UpILuwqo5PcnOSVyRJd19bVRdmKbi6P8lJ3f3AdJwTk5ybZNck759eydIi6u+oqs1ZmiF1zIiLAgAAAGDbzBJKdffVSQ5Z5avDt7L/6UlOX6V9U5IDV2m/N1OoBcC2OfuSG+cuYWGccsQBc5cAAADr3hxrSgEAAACwkxNKAQAAADCcUAoAAACA4YRSAAAAAAwnlAIAAABgOKEUAAAAAMMJpQAAAAAYTigFAAAAwHBCKQAAAACGE0oBAAAAMJxQCgAAAIDhhFIAAAAADCeUAgAAAGA4oRQAAAAAwwmlAAAAABhOKAUAAADAcEIpAAAAAIYTSgEAAAAwnFAKAAAAgOGEUgAAAAAMJ5QCAAAAYDihFAAAAADDCaUAAAAAGE4oBQAAAMBwQikAAAAAhhNKAQAAADCcUAoAAACA4YRSAAAAAAwnlAIAAABgOKEUAAAAAMMJpQAAAAAYTigFAAAAwHBCKQAAAACGE0oBAAAAMJxQCgAAAIDhhFIAAAAADCeUAgAAAGC4DXMXAMBiOOzmjXOXsEDOmrsAAABY98yUAgAAAGA4oRQAAAAAw3l8DwBWOPuSG+cuYaGccsQBc5cAAMA6ZKYUAAAAAMMJpQAAAAAYTigFAAAAwHBCKQAAAACGE0oBAAAAMJxQCgAAAIDhhFIAAAAADCeUAgAAAGC4DXMXAAAstrMvuXHuEhbKKUccMHcJAADrgplSAAAAAAw320ypqtolyaYkf9HdP1lVuyd5d5J9k9yU5Ke7+2vTvqclOT7JA0l+ubs/MLUfnOTcJLsmeV+Sk7u7q+oxSc5PcnCSryb5me6+adjFAQDrlplj32HWGADw9zHnTKmTk1y/7POpSS7t7v2TXDp9TlU9N8kxSZ6X5Mgkb5kCrSQ5J8kJSfafXkdO7ccn+Vp3PyvJ2UnO3L6XAgAAAMDDMUsoVVV7J/nnSd66rPmoJOdN2+clOXpZ+wXdfV93fznJ5iSHVtWeSZ7Q3Vd0d2dpZtTRqxzroiSHV1VttwsCAAAA4GGZa6bUbyf51STfXtb29O6+PUmm96dN7XsluWXZfrdObXtN2yvbH9Snu+9P8vUkT3lkLwEAAACA79bwUKqqfjLJnd396W3tskpbr9G+Vp+VtZxQVZuqatNdd921jeUAAAAA8Pc1x0ypFyX5l1V1U5ILkvxYVb0zyR3TI3mZ3u+c9r81yT7L+u+d5Lapfe9V2h/Up6o2JHlikrtXFtLdG7v7kO4+ZI899nhkrg4AAACAhzT81/e6+7QkpyVJVb0kyWu6+99W1RuSHJfkjOn9vVOXi5P8YVW9Kcn3ZGlB80929wNV9c2qOizJlUlemeR3l/U5LskVSV6e5EPTulMAADxC/BLhg/k1QgB4eIaHUms4I8mFVXV8kpuTvCJJuvvaqrowyXVJ7k9yUnc/MPU5Mcm5SXZN8v7plSRvS/KOqtqcpRlSx4y6CAAAAAAe2qyhVHdfluSyafurSQ7fyn6nJzl9lfZNSQ5cpf3eTKEWAAAAAItnrl/fAwAAAGAnJpQCAAAAYDihFAAAAADDCaUAAAAAGE4oBQAAAMBwQikAAAAAhhNKAQAAADCcUAoAAACA4YRSAAAAAAwnlAIAAABgOKEUAAAAAMMJpQAAAAAYbsPcBQDAojns5o1zlwA7hE8844S5SwAAdmBmSgEAZpQQwwAADuxJREFUAAAwnFAKAAAAgOGEUgAAAAAMJ5QCAAAAYDihFAAAAADDCaUAAAAAGG7D3AUAALBjOuzmjXOXsGDOmrsAANihmCkFAAAAwHBCKQAAAACGE0oBAAAAMJxQCgAAAIDhhFIAAAAADCeUAgAAAGA4oRQAAAAAwwmlAAAAABhOKAUAAADAcEIpAAAAAIYTSgEAAAAwnFAKAAAAgOGEUgAAAAAMJ5QCAAAAYDihFAAAAADDCaUAAAAAGE4oBQAAAMBwQikAAAAAhhNKAQAAADCcUAoAAACA4YRSAAAAAAwnlAIAAABgOKEUAAAAAMMJpQAAAAAYTigFAAAAwHBCKQAAAACG2zB3AQAAsB6cfcmNc5ewUE454oC5SwBgwZkpBQAAAMBwQikAAAAAhhNKAQAAADCcUAoAAACA4YaHUlW1T1V9uKqur6prq+rkqX33qrqkqr44vT95WZ/TqmpzVd1QVS9d1n5wVV0zffc7VVVT+2Oq6t1T+5VVte/o6wQAAABg6+aYKXV/kv/U3f84yWFJTqqq5yY5Ncml3b1/kkunz5m+OybJ85IcmeQtVbXLdKxzkpyQZP/pdeTUfnySr3X3s5KcneTMERcGAAAAwLYZHkp19+3dfdW0/c0k1yfZK8lRSc6bdjsvydHT9lFJLuju+7r7y0k2Jzm0qvZM8oTuvqK7O8n5K/psOdZFSQ7fMosKAAAAgPltmPPk02N1z09yZZKnd/ftyVJwVVVPm3bbK8knlnW7dWr7m2l7ZfuWPrdMx7q/qr6e5ClJvrJdLgQAAHiQsy+5ce4SFsYpRxwwdwkAC2m2hc6r6nFJ3pPkV7r7G2vtukpbr9G+Vp+VNZxQVZuqatNdd931UCUDAAAA8AiZJZSqqkdlKZB6V3f/8dR8x/RIXqb3O6f2W5Pss6z73klum9r3XqX9QX2qakOSJya5e2Ud3b2xuw/p7kP22GOPR+LSAAAAANgGc/z6XiV5W5Lru/tNy766OMlx0/ZxSd67rP2Y6Rf19svSguafnB71+2ZVHTYd85Ur+mw51suTfGhadwoAAACABTDHmlIvSvJzSa6pqquntl9PckaSC6vq+CQ3J3lFknT3tVV1YZLrsvTLfSd19wNTvxOTnJtk1yTvn17JUuj1jqranKUZUsds74sCAAC+47CbN85dwgI5a+4CABbS8FCquz+W1dd8SpLDt9Ln9CSnr9K+KcmBq7TfmynUAgAAAGDxzLbQOQAAAAA7L6EUAAAAAMPNsaYUAACsO9ZQAoCHx0wpAAAAAIYzUwoAAACY3dmX3Dh3CQvllCMOmLuE7U4oBQAAwDCCh+/YGUIHWIvH9wAAAAAYTigFAAAAwHBCKQAAAACGs6YUAAAAwxx288a5S1ggZ81dAMxKKAUAALAdWdj7wQ6buwBgYQilAAAAgNmZRbfS+p9JZ00pAAAAAIYzUwoAAABm4NHOB/No587HTCkAAAAAhhNKAQAAADCcUAoAAACA4YRSAAAAAAxnoXMAAIDtyM/cA6zOTCkAAAAAhhNKAQAAADCcUAoAAACA4YRSAAAAAAwnlAIAAABgOL++BwAAADPwy4zs7MyUAgAAAGA4oRQAAAAAwwmlAAAAABhOKAUAAADAcEIpAAAAAIYTSgEAAAAwnFAKAAAAgOGEUgAAAAAMJ5QCAAAAYDihFAAAAADDCaUAAAAAGE4oBQAAAMBwQikAAAAAhhNKAQAAADCcUAoAAACA4YRSAAAAAAwnlAIAAABgOKEUAAAAAMMJpQAAAAAYTigFAAAAwHBCKQAAAACGE0oBAAAAMJxQCgAAAIDhhFIAAAAADCeUAgAAAGA4oRQAAAAAwwmlAAAAABhuXYdSVXVkVd1QVZur6tS56wEAAABgyboNpapqlyT/LcnLkjw3ybFV9dx5qwIAAAAgWcehVJJDk2zu7i9197eSXJDkqJlrAgAAACDrO5TaK8ktyz7fOrUBAAAAMLMNcxewHdUqbf2gHapOSHLC9PGeqrphu1c1xlOTfGXuImANxiiLzhhl0Rmj7AiMUxadMcpi+w9vXC9j9B9t7Yv1HErdmmSfZZ/3TnLb8h26e2OSjSOLGqGqNnX3IXPXAVtjjLLojFEWnTHKjsA4ZdEZoyy6nWGMrufH9z6VZP+q2q+qHp3kmCQXz1wTAAAAAFnHM6W6+/6q+qUkH0iyS5K3d/e1M5cFAAAAQNZxKJUk3f2+JO+bu44ZrLtHEll3jFEWnTHKojNG2REYpyw6Y5RFt+7HaHX3Q+8FAAAAAI+g9bymFAAAAAALSii1zlTVkVV1Q1VtrqpT564HVqqqm6rqmqq6uqo2zV0PVNXbq+rOqvr8srbdq+qSqvri9P7kOWtk57aVMfq6qvqL6V56dVX9szlrZOdWVftU1Yer6vqquraqTp7a3UtZCGuMUfdSFkJVPbaqPllVn53G6G9M7ev+PurxvXWkqnZJcmOSI5LcmqVfIDy2u6+btTBYpqpuSnJId39l7logSarqxUnuSXJ+dx84tf1Wkru7+4wp4H9yd//anHWy89rKGH1dknu6+6w5a4Mkqao9k+zZ3VdV1eOTfDrJ0Un+XdxLWQBrjNGfjnspC6CqKslu3X1PVT0qyceSnJzkp7LO76NmSq0vhybZ3N1f6u5vJbkgyVEz1wSw0Lr7o0nuXtF8VJLzpu3zsvSHK8xiK2MUFkZ3397dV03b30xyfZK94l7KglhjjMJC6CX3TB8fNb06O8F9VCi1vuyV5JZln2+Nmy2Lp5N8sKo+XVUnzF0MbMXTu/v2ZOkP2SRPm7keWM0vVdXnpsf71t10fnZMVbVvkucnuTLupSygFWM0cS9lQVTVLlV1dZI7k1zS3TvFfVQotb7UKm2ez2TRvKi7fyDJy5KcND2WAsDDc06SZyY5KMntSd44bzmQVNXjkrwnya909zfmrgdWWmWMupeyMLr7ge4+KMneSQ6tqgPnrmkEodT6cmuSfZZ93jvJbTPVAqvq7tum9zuT/EmWHjuFRXPHtP7ElnUo7py5HniQ7r5j+uP120l+P+6lzGxaA+U9Sd7V3X88NbuXsjBWG6PupSyi7v6rJJclOTI7wX1UKLW+fCrJ/lW1X1U9OskxSS6euSb4W1W127S4ZKpqtyQ/keTza/eCWVyc5Lhp+7gk752xFvg7tvyBOvlXcS9lRtMCvW9Lcn13v2nZV+6lLIStjVH3UhZFVe1RVU+atndN8uNJvpCd4D7q1/fWmelnTH87yS5J3t7dp89cEvytqvreLM2OSpINSf7QGGVuVfVHSV6S5KlJ7kjy2iT/M8mFSZ6R5OYkr+huC00zi62M0Zdk6XGTTnJTkp/fsuYEjFZVP5Tk8iTXJPn21PzrWVqzx72U2a0xRo+NeykLoKq+L0sLme+SpclDF3b366vqKVnn91GhFAAAAADDeXwPAAAAgOGEUgAAAAAMJ5QCAAAAYDihFAAAAADDCaUAAAAAGE4oBQDwMFTVS6rqT2c475Oq6hdHn3ekqrpn7hoAgHGEUgAAa6iqXeauYfKkJNsllKqqDd/Nd9vrnADAzkEoBQCsS1X1q1X1y9P22VX1oWn78Kp657R9bFVdU1Wfr6ozl/W9p6peX1VXJnlhVR1ZVV+oqo8l+amtnG+XqjprOt7nqurVy873man97VX1mKn9pqp66rR9SFVdNm2/btrvsqr60pZrSHJGkmdW1dVV9YYV5953qu+86dwXVdU/mL47uKo+UlWfrqoPVNWeU/tlVfWbVfWRJCevON7rqmpjVX0wyfnTtb2hqj41Hf/nV/x3vqaqPltVZ0xtB1XVJ6Z9/6SqnrzaOatqv6q6Yjruf314/4cBgB2dUAoAWK8+muSHp+1Dkjyuqh6V5IeSXF5V35PkzCQ/luSgJC+oqqOn/XdL8vnu/sEkm5L8fpJ/MR3vH27lfCck2S/J87v7+5K8q6oem+TcJD/T3f8kyYYkJ25D7c9J8tIkhyZ57VT3qUn+d3cf1N3/eZU+z06ycTr3N5L84tTvd5O8vLsPTvL2JKcv6/Ok7v6R7n7jKsc7OMlR3f2zSY5P8vXufkGSFyT5j1Og9LIkRyf5we7+/iS/NfU9P8mvTbVck+S1Wznnm5OcMx33L7fhvwsAsI4IpQCA9erTSQ6uqscnuS/JFVkKp344yeVZClcu6+67uvv+JO9K8uKp7wNJ3jNtPyfJl7v7i93dSd65lfP9eJLfm46V7r47S0HRl7v7xmmf85adYy1/1t33dfdXktyZ5Onb0OeW7v74tP3OLIVvz05yYJJLqurqJP8lyd7L+rx7jeNd3N3/b9r+iSSvnI5xZZKnJNk/S9f8B939f5Ola66qJ2YpePrI1HflNS8/54uS/NG0/Y5tuEYAYB3xLD8AsC51999U1U1JXpXkfyX5XJIfTfLMJNcnOWCN7vd29wPLD7cNp6xV9qs19r8/3/kHwseu+O6+ZdsPZNv+Zlt57p7Of213v3Arff56jeMt/66SvLq7P7B8h6o6cpXzPpSV53y4/QGAdcJMKQBgPftoktdM75cn+YUkV08znq5M8iNV9dRpMfNjk3xklWN8Icl+VfXM6fOxWznXB5P8wpYFvKtq96nvvlX1rGmfn1t2jpuy9IhckvzrbbiWbyZ5/BrfP6OqtoRPxyb5WJIbkuyxpb2qHlVVz9uGc630gSQnTo8DpqoOqKrdsnTN/37Z+lW7d/fXk3ytqrY8Orn8mlf6eJJjpu1/813UBQDswIRSAMB6dnmSPZNc0d13JLl3akt3357ktCQfTvLZJFd193tXHqC7783SelF/Ni10/n+2cq63Jrk5yeeq6rNJfnbq+6ok/6Oqrkny7SS/N+3/G0neXFWXZ2k21Jq6+6tJPj4tyv6GVXa5PslxVfW5JLtnaa2mbyV5eZIzp5quTvJPH+pcW7m265JcVVWfT/Lfk2zo7j9PcnGSTdOjfa+Z9j8uyRumWg5K8vqtHPfkJCdV1aeSPPG7qAsA2IHV0j8UAgCwo6qqfZP8aXcfOHMpAADbzEwpAAAAAIYzUwoAAACA4cyUAgAAAGA4oRQAAAAAwwmlAAAAABhOKAUAAADAcEIpAAAAAIYTSgEAAAAw3P8HxGDQYJdZuWQAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(20,10))\n", "plt.hist(df_new['words'], bins=15, range=[0,30], alpha=0.5, label='new pre-processed')\n", "plt.hist(df_old['words'], bins=15, range=[0,30],alpha=0.5, label='old pre-processed')\n", "plt.legend(prop={'size':20})\n", "plt.xlabel('word count per record')\n", "plt.ylabel('frequency')\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }