{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Download from: https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import string\n", "import pandas as pd\n", "from tqdm import tqdm\n", "import transformers\n", "import pandas as pd\n", "import json\n", "import seaborn" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
reviewsentiment
0One of the other reviewers has mentioned that ...positive
1A wonderful little production. <br /><br />The...positive
2I thought this was a wonderful way to spend ti...positive
3Basically there's a family where a little boy ...negative
4Petter Mattei's \"Love in the Time of Money\" is...positive
\n", "
" ], "text/plain": [ " review sentiment\n", "0 One of the other reviewers has mentioned that ... positive\n", "1 A wonderful little production.

The... positive\n", "2 I thought this was a wonderful way to spend ti... positive\n", "3 Basically there's a family where a little boy ... negative\n", "4 Petter Mattei's \"Love in the Time of Money\" is... positive" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv(\"./imdb_data.csv\")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "source_lengths = []\n", "target_lengths = []\n", "\n", "tokenizer = transformers.AutoTokenizer.from_pretrained(\"t5-large\", cache_dir=\"/workspace/cache\")\n", "\n", "def get_len(text):\n", " return len(tokenizer.encode(text))\n", "\n", "label_set = [\"positive\", \"negative\"]\n", "\n", "def create_pair(s, t, split):\n", " prefix = \"\"\n", " s = s.replace(\"

\", \" \")\n", " line = {\n", " \"translation\": {\n", " \"s\": prefix+s,\n", " \"t\": t\n", " }\n", " }\n", " source_lengths.append(get_len(s))\n", " target_lengths.append(get_len(t))\n", " with open(\"./\"+split+\"lines.json\", 'a+') as outfile:\n", " json.dump(line, outfile)\n", " outfile.write(\"\\n\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "x_list = []\n", "y_list = []\n", "\n", "count = 0\n", "\n", "for index, row in df.iterrows():\n", " s = row[\"review\"].split(\" \")[:256]\n", " s = \" \".join(s).strip()\n", " t = row[\"sentiment\"].strip()\n", " x_list.append(s)\n", " y_list.append(t)\n", " count += 1\n", " if count > 10000:\n", " break" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "x_train, x_valid, y_train, y_valid = train_test_split(x_list, y_list, test_size=0.2, stratify=y_list)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "split = \"train\"\n", "for s, t in zip(x_train, y_train):\n", " create_pair(s, t, split)\n", " \n", "split = \"valid\"\n", "for s, t in zip(x_valid, y_valid):\n", " create_pair(s, t, split)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWEAAAFgCAYAAABqo8hyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAXrElEQVR4nO3df4yd1X3n8fe3/Ewg6wFnatljRybCpJTshtApcUq0JHhT2SwbkxWlRFFwWXe9ErSbLBWN02p3VWn/IKIqP1Yrdq04G1OlJE4KsusCqdeQrlZaSAZCAOMAExZqzxg8UOPsxlOC4bt/3GO4OM7MNZ7nnjt33i/p6p7nPOe5833E1YfH5z733MhMJEl1/FLtAiRpLjOEJakiQ1iSKjKEJakiQ1iSKjqxdgHHY+XKlXnffffVLkOS2sWxDJ7VV8IvvfRS7RIk6bjM6hCWpNnOEJakigxhSaqo0RCOiH8XETsj4omIuDMiTo2IsyLioYgYjYhvRsTJZewpZXu07F/aZG2S1AsaC+GIGAL+LTCcmR8ETgCuAr4M3JyZZwP7gbXlkLXA/tJ/cxknSX2t6emIE4F3RcSJwLuBvcAlwLfL/k3A5aW9umxT9q+IiGO61UOSZpvGQjgzx4A/Bf6OVvgeAB4GXsnMQ2XYHmCotIeA3eXYQ2X8/CNfNyLWRcRIRIxMTEw0Vb4kdUWT0xFn0Lq6PQtYBJwGrDze183MDZk5nJnDg4ODx/tyklRVk9MR/wz4P5k5kZmvAXcBFwEDZXoCYDEwVtpjwBKAsn8e8HKD9UlSdU2G8N8ByyPi3WVudwXwJPAAcEUZswbYUtpbyzZl//3pivOS+lyTc8IP0fqA7RHg8fK3NgBfBK6PiFFac74byyEbgfml/3pgfVO1SVKviNl8sTk8PJwjIyO1y5CkdnNnAR9Jmu0MYUmqaFavJyxpauecex7j42O/cP+iRUM8vWtnFyvSkQxhqY+Nj49x2U33/sL9225Y1cVqdDROR0hSRYawJFVkCEtSRYawJFVkCEtSRd4doUZ4a5TUGUNYjfDWKKkzTkdIUkWGsCRVZAhLUkWGsCRVZAhLUkWGsCRVZAhLUkWGsCRVZAhLUkWGsCRVZAhLUkWGsCRV5AI+OmbTrZAGMHlwskvVSLObIaxjNt0KaQCbr724S9VIs5vTEZJUkSEsSRUZwpJUkSEsSRUZwpJUUWMhHBEfiIhH2x4/iYgvRMSZEbE9Ip4pz2eU8RERt0XEaEQ8FhEXNFWbJPWKxkI4M5/KzPMz83zg14CDwN3AemBHZi4DdpRtgFXAsvJYB9zeVG2S1Cu6NR2xAvhxZj4PrAY2lf5NwOWlvRq4I1seBAYiYmGX6pOkKroVwlcBd5b2gszcW9ovAAtKewjY3XbMntL3NhGxLiJGImJkYmKiqXolqSsaD+GIOBn4FPCtI/dlZgJ5LK+XmRsyczgzhwcHB2eoSkmqoxtXwquARzLzxbL94uFphvK8r/SPAUvajltc+iSpb3UjhD/DW1MRAFuBNaW9BtjS1n91uUtiOXCgbdpCkvpSowv4RMRpwCeBf9PWfSOwOSLWAs8DV5b+e4BLgVFad1Jc02RtktQLGg3hzPwpMP+Ivpdp3S1x5NgErmuyHknqNX5jTpIqMoQlqSJDWJIqMoQlqSJDWJIqMoQlqSJDWJIq8teWpVnqnHPPY3x86m/2Tx6c7FI1eqcMYWmWGh8f47Kb7p1yzOZrL+5SNXqnnI6QpIoMYUmqyBCWpIoMYUmqyBCWpIoMYUmqyBCWpIoMYUmqyBCWpIoMYUmqyBCWpIoMYUmqyBCWpIoMYUmqyBCWpIoMYUmqyBCWpIoMYUmqyBCWpIoMYUmqyBCWpIoa/bXliBgAvgJ8EEjgXwFPAd8ElgLPAVdm5v6ICOBW4FLgIPA7mflIk/VJc93k5KucPm9gyjGLFg3x9K6d3SloDmr6J+9vBe7LzCsi4mTg3cAfATsy88aIWA+sB74IrAKWlcdHgNvLs6SG5Buvc9lN9045ZtsNq7pUzdzU2HRERMwD/imwESAzf5aZrwCrgU1l2Cbg8tJeDdyRLQ8CAxGxsKn6JKkXNDknfBYwAfz3iPhBRHwlIk4DFmTm3jLmBWBBaQ8Bu9uO31P6JKlvNRnCJwIXALdn5oeBn9KaenhTZiatueKORcS6iBiJiJGJiYkZK1aSamgyhPcAezLzobL9bVqh/OLhaYbyvK/sHwOWtB2/uPS9TWZuyMzhzBweHBxsrHhJ6obGQjgzXwB2R8QHStcK4ElgK7Cm9K0BtpT2VuDqaFkOHGibtpCkvtT03RG/D3y93BnxLHANreDfHBFrgeeBK8vYe2jdnjZK6xa1axquTZKqazSEM/NRYPgou1YcZWwC1zVZjyT1Gr8xJ0kVNT0doVnmnHPPY3z85z4PfZvJg5Ndqkbqf4aw3mZ8fGzab1BtvvbiLlUj9T+nIySpIkNYkioyhCWpIkNYkioyhCWpIkNYkioyhCWpIkNYkioyhCWpIkNYkioyhCWpIkNYkioyhCWpIkNYkipyKUupB7mu89xhCEs9yHWd5w6nIySpIkNYkipyOkJVTE6+yunzBqYcs2jREE/v2tmdgqRKDGFVkW+8Pu2c57YbVnWpGqkepyMkqSJDWJIqMoQlqSJDWJIqMoQlqSJDWJIqMoQlqSJDWJIqajSEI+K5iHg8Ih6NiJHSd2ZEbI+IZ8rzGaU/IuK2iBiNiMci4oIma5OkXtCNK+FPZOb5mTlcttcDOzJzGbCjbAOsApaVxzrg9i7UJklV1ZiOWA1sKu1NwOVt/Xdky4PAQEQsrFCfJHVN0yGcwN9ExMMRsa70LcjMvaX9ArCgtIeA3W3H7il9bxMR6yJiJCJGJiYmmqpbkrqi6QV8PpaZYxHxy8D2iPhR+87MzIjIY3nBzNwAbAAYHh4+pmMlqdc0eiWcmWPleR9wN3Ah8OLhaYbyvK8MHwOWtB2+uPRJUt9qLIQj4rSIeM/hNvCbwBPAVmBNGbYG2FLaW4Gry10Sy4EDbdMWktSXmpyOWADcHRGH/85fZOZ9EfF9YHNErAWeB64s4+8BLgVGgYPANQ3WJkk9obEQzsxngQ8dpf9lYMVR+hO4rql6JKkX+csa6lmd/ATSa68d4qSTpn4b+zNJ6mWGsHpWJz+BtPnai/n0LdunHOPPJKmXuXaEJFVkCEtSRYawJFVkCEtSRYawJFVkCEtSRYawJFVkCEtSRYawJFVkCEtSRYawJFVkCEtSRYawJFVkCEtSRYawJFXUUQhHxEWd9EmSjk2nV8L/ucM+SdIxmPKXNSLio8BvAIMRcX3brn8EnNBkYZI0F0z380YnA6eXce9p6/8JcEVTRUnSXDFlCGfm3wJ/GxFfy8znu1STJM0Znf7Q5ykRsQFY2n5MZl7SRFGSNFd0GsLfAv4r8BXg9ebKkaS5pdMQPpSZtzdaiSTNQZ3eovZXEXFtRCyMiDMPPxqtTJLmgE6vhNeU5xva+hJ4/8yWI0lzS0chnJlnNV2IJM1FHYVwRFx9tP7MvGNmy5GkuaXT6Yhfb2ufCqwAHgEMYUk6Dp1OR/x++3ZEDADf6OTYiDgBGAHGMvOyiDirHDsfeBj4XGb+LCJOoRXqvwa8DPx2Zj7X4XlI0qz0Tpey/CnQ6Tzx54FdbdtfBm7OzLOB/cDa0r8W2F/6by7jJKmvdbqU5V9FxNby+GvgKeDuDo5bDPxzWl/yICICuAT4dhmyCbi8tFeXbcr+FWW8JPWtTueE/7StfQh4PjP3dHDcLcAf8tbiP/OBVzLzUNneAwyV9hCwGyAzD0XEgTL+pfYXjIh1wDqA973vfR2WL0m9qaMr4bKQz49ohekZwM+mOyYiLgP2ZebDx1Xhz9eyITOHM3N4cHBwJl9akrqu0+mIK4HvAb8FXAk8FBHTLWV5EfCpiHiO1gdxlwC3AgMRcfgKfDEwVtpjwJLy904E5tH6gE6S+lanH8z9MfDrmbkmM68GLgT+/VQHZOaXMnNxZi4FrgLuz8zPAg/w1lrEa4Atpb2Vt76Zd0UZnx2fiSTNQp2G8C9l5r627ZeP4dgjfRG4PiJGac35biz9G4H5pf96YP07fH1JmjU6/WDuvoj4DnBn2f5t4J5O/0hmfhf4bmk/S+tK+sgx/0BrukOS5ozpfmPubGBBZt4QEf8S+FjZ9b+BrzddnCT1u+muhG8BvgSQmXcBdwFExD8u+/5Fg7VJ6gGTk69y+ryBKccsWjTE07t2dqegPjNdCC/IzMeP7MzMxyNiaTMlSeol+cbrXHbTvVOO2XbDqi5V03+m+3BtYIp975rBOiRpTpouhEci4l8f2RkRv0tr8R1J0nGYbjriC8DdEfFZ3grdYeBk4NMN1iXNGOc01cumDOHMfBH4jYj4BPDB0v3XmXl/45VJM8Q5TfWyTtcTfoDWN90kSTPonX7rTZI0AwxhSarIEJakigxhSarIEJakigxhSarIEJakigxhSarIEJakigxhSarIEJakigxhSarIEJakigxhSaqo05+8l/paNxd+P+fc8xgfH5u6noOTx/13NDsYwhLdXfh9fHxs2r+1+dqLZ+Rvqfc5HSFJFRnCklSRISxJFTknPIf4gZDUewzhOcQPhI5PN++g0NxhCEsd6uYdFJo7GpsTjohTI+J7EfHDiNgZEX9S+s+KiIciYjQivhkRJ5f+U8r2aNm/tKnaJKlXNPnB3KvAJZn5IeB8YGVELAe+DNycmWcD+4G1ZfxaYH/pv7mMk6S+1lgIZ8v/K5snlUcClwDfLv2bgMtLe3XZpuxfERHRVH2S1AsavUUtIk6IiEeBfcB24MfAK5l5qAzZAwyV9hCwG6DsPwDMP8prrouIkYgYmZiYaLJ8SWpcoyGcma9n5vnAYuBC4Fdm4DU3ZOZwZg4PDg4e78tJUlVd+bJGZr4CPAB8FBiIiMN3ZSwGDt+4OgYsASj75wEvd6M+SaqlybsjBiNioLTfBXwS2EUrjK8ow9YAW0p7a9mm7L8/M7Op+iSpFzR5n/BCYFNEnEAr7Ddn5raIeBL4RkT8J+AHwMYyfiPw5xExCvw9cFWDtUlST2gshDPzMeDDR+l/ltb88JH9/wD8VlP1SFIvcgEfSarIEJakigxhSarIBXykGdTJSmsuF6p2hrA0gzpZac3lQtXO6QhJqsgQlqSKDGFJqsgQlqSKDGFJqsgQlqSKDGFJqsgQlqSKDGFJqsgQlqSKDGFJqsgQlqSKDGFJqsgQlqSKDGFJqsj1hGeBc849j/HxsSnHLFo0xNO7dnapIkkzxRCeBcbHx6ZdKHzbDau6VI2kmWQISzpunfysk/9aOzpDWNJx6+RnnfzX2tH5wZwkVWQIS1JFhrAkVWQIS1JFfjDXoE7u733ttUOcdNLU/xkmD07OZFmSeogh3KBO7u/dfO3FfPqW7dOOkdSfGpuOiIglEfFARDwZETsj4vOl/8yI2B4Rz5TnM0p/RMRtETEaEY9FxAVN1SZJvaLJK+FDwB9k5iMR8R7g4YjYDvwOsCMzb4yI9cB64IvAKmBZeXwEuL08qwOd3CzvtIbUexoL4czcC+wt7f8bEbuAIWA18PEybBPwXVohvBq4IzMTeDAiBiJiYXkdTaOTm+Wd1pB6T1fujoiIpcCHgYeABW3B+gKwoLSHgN1th+0pfUe+1rqIGImIkYmJieaKlqQuaDyEI+J04C+BL2TmT9r3lavePJbXy8wNmTmcmcODg4MzWKkkdV+jIRwRJ9EK4K9n5l2l+8WIWFj2LwT2lf4xYEnb4YtLnyT1rSbvjghgI7ArM/+sbddWYE1prwG2tPVfXe6SWA4ccD5YUr9r8u6Ii4DPAY9HxKOl74+AG4HNEbEWeB64suy7B7gUGAUOAtc0WJsk9YQm7474X0D8gt0rjjI+geuaqkeSepFrR0hSRYawJFVkCEtSRYawJFXkKmrvUCfLVLpWg6TpGMLvUKfLVErSVJyOkKSKDGFJqsgQlqSKDGFJqsgQlqSKDGFJqsgQlqSKDGFJqsgQlqSKDGFJqsgQlqSKDGFJqsgQlqSKDGFJqsgQlqSKDGFJqsgQlqSKDGFJqsgQlqSKDGFJqsgQlqSKDGFJqsgQlqSKDGFJqqixEI6Ir0bEvoh4oq3vzIjYHhHPlOczSn9ExG0RMRoRj0XEBU3VJUm9pMkr4a8BK4/oWw/syMxlwI6yDbAKWFYe64DbG6xLknpGYyGcmf8T+PsjulcDm0p7E3B5W/8d2fIgMBARC5uqTZJ6RbfnhBdk5t7SfgFYUNpDwO62cXtKnyT1tWofzGVmAnmsx0XEuogYiYiRiYmJBiqTpO7pdgi/eHiaoTzvK/1jwJK2cYtL38/JzA2ZOZyZw4ODg40WK0lN63YIbwXWlPYaYEtb/9XlLonlwIG2aQtJ6lsnNvXCEXEn8HHgvRGxB/iPwI3A5ohYCzwPXFmG3wNcCowCB4FrmqpLknpJYyGcmZ/5BbtWHGVsAtc1VYsk9arGQliS2k1Ovsrp8wamHLNo0RBP79rZnYJ6hCEsqSvyjde57KZ7pxyz7YZVXaqmd7h2hCRVZAhLUkWGsCRVZAhLUkWGsCRVZAhLUkWGsCRV5H3CR3HOuecxPn7U9YPeNHlwskvVSOpnhvBRjI+PTXtT+eZrL+5SNZL6mdMRklSRISxJFRnCklSRISxJFRnCklSRd0dI6hlzcc1hQ1hSz5iLaw47HSFJFRnCklSRISxJFRnCklSRISxJFRnCklSRISxJFRnCklSRISxJFRnCklSRX1uWNKv02/oScy6E/f04aXbrt/Ul5lwI+/txUv+bTVfLPRXCEbESuBU4AfhKZt5YuSRJs9BsulrumRCOiBOA/wJ8EtgDfD8itmbmk3Urk9SPeuVquWdCGLgQGM3MZwEi4hvAasAQljTjeuVqOTKz8T/SiYi4AliZmb9btj8HfCQzf++IceuAdWXzA8BTXS20+94LvFS7iC7wPPvLXD7PlzJzZacv0EtXwh3JzA3Ahtp1dEtEjGTmcO06muZ59hfPs3O99GWNMWBJ2/bi0idJfauXQvj7wLKIOCsiTgauArZWrkmSGtUz0xGZeSgifg/4Dq1b1L6amfVv4qtvrky9eJ79xfPsUM98MCdJc1EvTUdI0pxjCEtSRYZwZRHx1YjYFxFPtPWdGRHbI+KZ8nxG6Y+IuC0iRiPisYi4oF7lxyYilkTEAxHxZETsjIjPl/6+OteIODUivhcRPyzn+Sel/6yIeKiczzfLh89ExClle7TsX1r1BI5BRJwQET+IiG1lu+/OESAinouIxyPi0YgYKX0z9r41hOv7GnDkjd3rgR2ZuQzYUbYBVgHLymMdcHuXapwJh4A/yMxfBZYD10XEr9J/5/oqcElmfgg4H1gZEcuBLwM3Z+bZwH5gbRm/Fthf+m8u42aLzwO72rb78RwP+0Rmnt92T/DMvW8z00flB7AUeKJt+ylgYWkvBJ4q7f8GfOZo42bbA9hCa52Qvj1X4N3AI8BHaH2r6sTS/1HgO6X9HeCjpX1iGRe1a+/g3BaX8LkE2AZEv51j27k+B7z3iL4Ze996JdybFmTm3tJ+AVhQ2kPA7rZxe0rfrFL+Ofph4CH68FzLP9MfBfYB24EfA69k5qEypP1c3jzPsv8AML+rBb8ztwB/CLxRtufTf+d4WAJ/ExEPl2UTYAbftz1zn7COLjMzIvrmPsKIOB34S+ALmfmTiHhzX7+ca2a+DpwfEQPA3cCv1K1oZkXEZcC+zHw4Ij5euZxu+FhmjkXELwPbI+JH7TuP933rlXBvejEiFgKU532lf1Z/tTsiTqIVwF/PzLtKd1+eK0BmvgI8QOuf5gMRcfiip/1c3jzPsn8e8HJ3Kz1mFwGfiojngG/QmpK4lf46xzdl5lh53kfrf6oXMoPvW0O4N20F1pT2Glrzp4f7ry6fwC4HDrT9k6inReuSdyOwKzP/rG1XX51rRAyWK2Ai4l205r130QrjK8qwI8/z8PlfAdyfZTKxV2XmlzJzcWYupbW8wP2Z+Vn66BwPi4jTIuI9h9vAbwJPMJPv29qT3nP9AdwJ7AVeozV/tJbWfNkO4BngfwBnlrFBa+H7HwOPA8O16z+G8/wYrbm1x4BHy+PSfjtX4J8APyjn+QTwH0r/+4HvAaPAt4BTSv+pZXu07H9/7XM4xvP9OLCtX8+xnNMPy2Mn8Melf8bet35tWZIqcjpCkioyhCWpIkNYkioyhCWpIkNYkioyhCWpIkNYkir6/7TnxEOAphZiAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "seaborn.displot(source_lengths)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAFgCAYAAACFYaNMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAATcUlEQVR4nO3df6zd9X3f8eerOORH88Mm8RC1jWAL/UGzRaEOoRBVaZjA0G6mE6G0UbAQqyuFZck2dfkxaUhJIyVS1aRsC50FLFBFIYyyQlYKcoEkm1JInIRBgKZYRGA7JDiYkC5Rmzl574/7cXvrXtvHvvee973c50O6ut/z+X7POZ+Pojzzzdffc26qCknS9P1Y9wQkaaUywJLUxABLUhMDLElNDLAkNVnVPYFp27RpU915553d05C0smSuwRV3Bvztb3+7ewqSBKzAAEvSUmGAJamJAZakJgZYkpoYYElqYoAlqYkBlqQmBliSmhhgSWpigCWpiQGWpCaLFuAk1yd5OslXZ42dkGR7ksfG7zVjPEmuTrIzyYNJzpj1nC3j+MeSbJk1/nNJHhrPuTrJnF92IUlL1WKeAX8c2HTQ2HuAu6vqNODu8RjgAuC08bMVuAZmgg1cBbwBOBO46kC0xzG/Met5B7+XJC1pixbgqvocsO+g4c3ADWP7BuCiWeM31oz7gNVJTgLOB7ZX1b6qehbYDmwa+15eVffVzF8VvXHWa0nSsjDt7wM+saqeGtvfBE4c2+uAXbOO2z3GDje+e47xOSXZysyZNSeffPJRT3rdhpP5xu5dRz5Q0vPeT6zfwJ5dTy7Ia7V9IXtVVZKa0nttA7YBbNy48ajf8xu7d/Gr//XzCz4vScvPp37z7AV7rWnfBfGtcfmA8fvpMb4H2DDruPVj7HDj6+cYl6RlY9oBvh04cCfDFuC2WeOXjbshzgKeG5cq7gLOS7Jm/OPbecBdY993k5w17n64bNZrSdKysGiXIJJ8EngT8Koku5m5m+FDwM1JrgCeAC4Zh98BXAjsBL4PXA5QVfuSfAD44jju/VV14B/23s7MnRYvBv5k/EjSsrFoAa6qXzvErnPnOLaAKw/xOtcD188xvgN4zXzmKEmd/CScJDUxwJLUxABLUhMDLElNDLAkNTHAktTEAEtSEwMsSU0MsCQ1McCS1MQAS1ITAyxJTQywJDUxwJLUxABLUhMDLElNDLAkNTHAktTEAEtSEwMsSU0MsCQ1McCS1MQAS1ITAyxJTQywJDUxwJLUxABLUhMDLElNDLAkNTHAktTEAEtSEwMsSU0MsCQ1McCS1MQAS1ITAyxJTQywJDUxwJLUxABLUhMDLElNDLAkNTHAktTEAEtSEwMsSU0MsCQ1McCS1MQAS1ITAyxJTQywJDUxwJLUxABLUhMDLElNWgKc5N8keTjJV5N8MsmLkpya5P4kO5N8Ksnx49gXjsc7x/5TZr3Oe8f415Kc37EWSTpWUw9wknXAvwY2VtVrgOOAS4EPAx+pqlcDzwJXjKdcATw7xj8yjiPJ6eN5PwtsAj6W5LhprkWS5qPrEsQq4MVJVgEvAZ4C3gzcMvbfAFw0tjePx4z95ybJGL+pqv66qr4O7ATOnM70JWn+ph7gqtoD/A7wJDPhfQ74EvCdqto/DtsNrBvb64Bd47n7x/GvnD0+x3P+jiRbk+xIsmPv3r0LuyBJOkYdlyDWMHP2eirwE8CPM3MJYdFU1baq2lhVG9euXbuYbyVJE+u4BPFPga9X1d6q+n/ArcA5wOpxSQJgPbBnbO8BNgCM/a8Anpk9PsdzJGnJ6wjwk8BZSV4yruWeCzwC3AtcPI7ZAtw2tm8fjxn776mqGuOXjrskTgVOA74wpTVI0rytOvIhC6uq7k9yC/BlYD/wFWAb8MfATUl+e4xdN55yHfAHSXYC+5i584GqejjJzczEez9wZVX9cKqLkaR5mHqAAarqKuCqg4YfZ467GKrqr4C3HOJ1Pgh8cMEnKElT4CfhJKmJAZakJgZYkpoYYElqYoAlqYkBlqQmBliSmhhgSWpigCWpiQGWpCYGWJKaGGBJamKAJamJAZakJgZYkpoYYElqYoAlqYkBlqQmBliSmhhgSWpigCWpiQGWpCYGWJKaGGBJamKAJamJAZakJgZYkpoYYElqYoAlqYkBlqQmBliSmhhgSWpigCWpiQGWpCYGWJKaGGBJamKAJamJAZakJgZYkpoYYElqYoAlqYkBlqQmBliSmhhgSWpigCWpiQGWpCYGWJKaGGBJamKAJamJAZakJgZYkpoYYElq0hLgJKuT3JLkz5M8muTnk5yQZHuSx8bvNePYJLk6yc4kDyY5Y9brbBnHP5ZkS8daJOlYdZ0B/x5wZ1X9NPBa4FHgPcDdVXUacPd4DHABcNr42QpcA5DkBOAq4A3AmcBVB6ItScvB1AOc5BXALwDXAVTVD6rqO8Bm4IZx2A3ARWN7M3BjzbgPWJ3kJOB8YHtV7auqZ4HtwKapLUSS5qnjDPhUYC/w35J8Jcm1SX4cOLGqnhrHfBM4cWyvA3bNev7uMXao8b8nydYkO5Ls2Lt37wIuRZKOXUeAVwFnANdU1euA7/G3lxsAqKoCaqHesKq2VdXGqtq4du3ahXpZSZqXjgDvBnZX1f3j8S3MBPlb49IC4/fTY/8eYMOs568fY4cal6RlYeoBrqpvAruS/NQYOhd4BLgdOHAnwxbgtrF9O3DZuBviLOC5caniLuC8JGvGP76dN8YkaVlY1fS+7wA+keR44HHgcmb+x+DmJFcATwCXjGPvAC4EdgLfH8dSVfuSfAD44jju/VW1b3pLkKT5aQlwVT0AbJxj17lzHFvAlYd4neuB6xd0cpI0JX4STpKaGGBJamKAJamJAZakJgZYkppMFOAk50wyJkma3KRnwP9pwjFJ0oQOex9wkp8HzgbWJvm3s3a9HDhuMScmSc93R/ogxvHAS8dxL5s1/l3g4sWalCStBIcNcFV9Fvhsko9X1RNTmpMkrQiTfhT5hUm2AafMfk5VvXkxJiVJK8GkAf7vwO8D1wI/XLzpSNLKMWmA91fVNYs6E0laYSa9De3TSd6e5KTx14tPGH8UU5J0jCY9Az7wRem/NWusgH+4sNORpJVjogBX1amLPRFJWmkmCnCSy+Yar6obF3Y6krRyTHoJ4vWztl/EzF+u+DJggCXpGE16CeIdsx8nWQ3ctBgTkqSV4li/jvJ7gNeFJWkeJr0G/Glm7nqAmS/h+Rng5sWalCStBJNeA/6dWdv7gSeqavcizEeSVoyJLkGML+X5c2a+EW0N8IPFnJQkrQST/kWMS4AvAG8BLgHuT+LXUUrSPEx6CeI/AK+vqqcBkqwF/hS4ZbEmJknPd5PeBfFjB+I7PHMUz5UkzWHSM+A7k9wFfHI8/lXgjsWZkiStDEf6m3CvBk6sqt9K8i+AN45dfwZ8YrEnJ0nPZ0c6A/4o8F6AqroVuBUgyT8e+/7ZIs5Nkp7XjnQd98SqeujgwTF2yqLMSJJWiCMFePVh9r14AechSSvOkQK8I8lvHDyY5F8CX1qcKUnSynCka8DvAv5Hkrfyt8HdCBwP/MoizkuSnvcOG+Cq+hZwdpJfBF4zhv+4qu5Z9JlJ0vPcpN8HfC9w7yLPRZJWFD/NJklNDLAkNTHAktTEAEtSEwMsSU0MsCQ1McCS1MQAS1ITAyxJTQywJDUxwJLUxABLUhMDLElNDLAkNTHAktTEAEtSEwMsSU3aApzkuCRfSfI/x+NTk9yfZGeSTyU5foy/cDzeOfafMus13jvGv5bk/KalSNIx6TwDfifw6KzHHwY+UlWvBp4FrhjjVwDPjvGPjONIcjpwKfCzwCbgY0mOm9LcJWneWgKcZD3wS8C143GANwO3jENuAC4a25vHY8b+c8fxm4Gbquqvq+rrwE7gzKksQJIWQNcZ8EeBfw/8aDx+JfCdqto/Hu8G1o3tdcAugLH/uXH834zP8Zy/I8nWJDuS7Ni7d+8CLkOSjt3UA5zkl4Gnq+pL03rPqtpWVRurauPatWun9baSdFgT/Vn6BXYO8M+TXAi8CHg58HvA6iSrxlnuemDPOH4PsAHYnWQV8ArgmVnjB8x+jiQteVM/A66q91bV+qo6hZl/RLunqt4K3AtcPA7bAtw2tm8fjxn776mqGuOXjrskTgVOA74wpWVI0rx1nAEfyruBm5L8NvAV4Loxfh3wB0l2AvuYiTZV9XCSm4FHgP3AlVX1w+lPW5KOTWuAq+ozwGfG9uPMcRdDVf0V8JZDPP+DwAcXb4aStHj8JJwkNTHAktTEAEtSEwMsSU0MsCQ1McCS1MQAS1ITAyxJTQywJDUxwJLUxABLUhMDLElNDLAkNTHAktTEAEtSEwMsSU0MsCQ1McCS1MQAS1ITAyxJTQywJDUxwJLUxABLUhMDLElNDLAkNTHAktTEAEtSEwMsSU0MsCQ1McCS1MQAS1ITAyxJTQywJDUxwJLUxABLUhMDLElNDLAkNTHAktTEAEtSEwMsSU0MsCQ1McCS1MQAS1ITAyxJTQywJDUxwJLUxABLUhMDLElNDLAkNTHAktTEAEtSEwMsSU2mHuAkG5Lcm+SRJA8neecYPyHJ9iSPjd9rxniSXJ1kZ5IHk5wx67W2jOMfS7Jl2muRpPnoOAPeD/y7qjodOAu4MsnpwHuAu6vqNODu8RjgAuC08bMVuAZmgg1cBbwBOBO46kC0JWk5mHqAq+qpqvry2P5L4FFgHbAZuGEcdgNw0djeDNxYM+4DVic5CTgf2F5V+6rqWWA7sGl6K5Gk+Wm9BpzkFOB1wP3AiVX11Nj1TeDEsb0O2DXrabvH2KHGJWlZaAtwkpcCfwi8q6q+O3tfVRVQC/heW5PsSLJj7969C/WykjQvLQFO8gJm4vuJqrp1DH9rXFpg/H56jO8BNsx6+voxdqjxv6eqtlXVxqrauHbt2oVbiCTNQ8ddEAGuAx6tqt+dtet24MCdDFuA22aNXzbuhjgLeG5cqrgLOC/JmvGPb+eNMUlaFlY1vOc5wNuAh5I8MMbeB3wIuDnJFcATwCVj3x3AhcBO4PvA5QBVtS/JB4AvjuPeX1X7prICSVoAUw9wVf1vIIfYfe4cxxdw5SFe63rg+oWbnSRNj5+Ek6QmBliSmhhgSWpigCWpiQGWpCYGWJKaGGBJamKAJamJAZakJgZYkpoYYElqYoAlqYkBlqQmBliSmhhgSWpigCWpiQGWpCYGWJKaGGBJamKAJamJAZakJgZYkpoYYElqYoAlqYkBlqQmBliSmhhgSWpigCWpiQGWpCYGWJKaGGBJamKAJamJAZakJgZYkpoYYElqYoAlqYkBlqQmBliSmhhgSWpigCWpiQGWpCYGWJKaGGBJamKAJamJAZakJgZYkpoYYElqYoAlqYkBlqQmBliSmhhgSWpigCWpiQGWpCbLPsBJNiX5WpKdSd7TPR9JmtSyDnCS44D/AlwAnA78WpLTe2clSZNZ1gEGzgR2VtXjVfUD4CZgc/OcJGkiq7onME/rgF2zHu8G3nDwQUm2AlvHw/+b5GtH+0af+s2zj2mCR+lVwLen8UZT4nqWNtdzjJIc7VPurKpNBw8u9wBPpKq2Adu653EkSXZU1cbueSwU17O0uZ5+y/0SxB5gw6zH68eYJC15yz3AXwROS3JqkuOBS4Hbm+ckSRNZ1pcgqmp/kn8F3AUcB1xfVQ83T2s+lvxlkqPkepY219MsVdU9B0lakZb7JQhJWrYMsCQ1McBTluT6JE8n+ephjnlTkgeSPJzks9Oc39E60nqSvCLJp5P8n7Gey6c9x0kl2ZDk3iSPjLm+c45jkuTq8dH3B5Oc0THXSUy4nreOdTyU5PNJXtsx10lMsp5Zx74+yf4kF09zjketqvyZ4g/wC8AZwFcPsX818Ahw8nj8D7rnPM/1vA/48NheC+wDju+e9yHmehJwxth+GfAXwOkHHXMh8CdAgLOA+7vnPc/1nA2sGdsXLPf1jH3HAfcAdwAXd8/7cD+eAU9ZVX2OmQgdyq8Dt1bVk+P4p6cysWM0wXoKeFlmPjr00nHs/mnM7WhV1VNV9eWx/ZfAo8x82nK2zcCNNeM+YHWSk6Y81YlMsp6q+nxVPTse3sfMvfRL0oT/+QC8A/hDYEn/dwe8BLEU/SSwJslnknwpyWXdE5qn/wz8DPAN4CHgnVX1o94pHVmSU4DXAfcftGuuj7/PFYEl5TDrme0KZs7ul7xDrSfJOuBXgGsapnXUlvV9wM9Tq4CfA84FXgz8WZL7quoveqd1zM4HHgDeDPwjYHuS/1VV322d1WEkeSkzZ1DvWsrznNQk60nyi8wE+I3TnNuxOMJ6Pgq8u6p+dAzf1zB1Bnjp2Q08U1XfA76X5HPAa5m53rUcXQ58qGYuzu1M8nXgp4Ev9E5rbklewMx/uT9RVbfOcciy+vj7BOshyT8BrgUuqKpnpjm/ozXBejYCN434vgq4MMn+qvqj6c1ycl6CWHpuA96YZFWSlzDz7W6PNs9pPp5k5myeJCcCPwU83jqjQxjXqa8DHq2q3z3EYbcDl427Ic4Cnquqp6Y2yaMwyXqSnAzcCrxtqf+/rEnWU1WnVtUpVXUKcAvw9qUaX/AMeOqSfBJ4E/CqJLuBq4AXAFTV71fVo0nuBB4EfgRcW1WHvGWt25HWA3wA+HiSh5i5c+DdVbVUvwLxHOBtwENJHhhj7wNOhr9Zzx3M3AmxE/g+M2f4S9Uk6/mPwCuBj42zxv21dL9RbJL1LCt+FFmSmngJQpKaGGBJamKAJamJAZakJgZYkpoYYElqYoAlqcn/B8BZiWB9A6hZAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "seaborn.displot(target_lengths)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 4 }