Commit a5d4a032f6cc55c8817b81e7f617c4df0f354f52

Authored by Junghwan Park
1 parent be5a830d6d
Exists in main

added comments, users with less amount of data is removed

Showing 3 changed files with 204 additions and 14 deletions Side-by-side Diff

.vscode/settings.json View file @ a5d4a03
  1 +{
  2 + "python.formatting.provider": "autopep8",
  3 + "python.linting.enabled": false,
  4 + "python.analysis.autoImportCompletions": false,
  5 + "python.analysis.autoSearchPaths": false,
  6 + "python.analysis.useLibraryCodeForTypes": false
  7 +}
python-notebook/data_loading.ipynb View file @ a5d4a03
... ... @@ -9,27 +9,52 @@
9 9 },
10 10 {
11 11 "cell_type": "code",
12   - "execution_count": 13,
  12 + "execution_count": 3,
13 13 "metadata": {},
14 14 "outputs": [],
15 15 "source": [
16 16 "import numpy as np\n",
17 17 "import matplotlib.pyplot as plt\n",
  18 + "import seaborn as sns\n",
18 19 "from pandas import read_csv\n",
  20 + "import pandas as pd\n",
19 21 "import os\n",
20   - "from datetime import datetime"
  22 + "from datetime import datetime, date\n",
  23 + "# %load_ext line_profiler"
21 24 ]
22 25 },
23 26 {
24 27 "cell_type": "markdown",
25 28 "metadata": {},
26 29 "source": [
  30 + "# Defining Functions and Adjusting Settings"
  31 + ]
  32 + },
  33 + {
  34 + "cell_type": "code",
  35 + "execution_count": 4,
  36 + "metadata": {},
  37 + "outputs": [],
  38 + "source": [
  39 + "pd.options.mode.chained_assignment = None\n",
  40 + "\n",
  41 + "def get_date(x):\n",
  42 + " return date(x.year, x.month, x.day)\n",
  43 + "\n",
  44 + "def get_minute_index(x):\n",
  45 + " return (x.hour * 60) + x.minute"
  46 + ]
  47 + },
  48 + {
  49 + "cell_type": "markdown",
  50 + "metadata": {},
  51 + "source": [
27 52 "# Loading data files"
28 53 ]
29 54 },
30 55 {
31 56 "cell_type": "code",
32   - "execution_count": 16,
  57 + "execution_count": 5,
33 58 "metadata": {},
34 59 "outputs": [],
35 60 "source": [
36 61  
37 62  
38 63  
39 64  
40 65  
41 66  
42 67  
43 68  
44 69  
... ... @@ -37,37 +62,192 @@
37 62 "\n",
38 63 "daily = read_csv(os.path.join(data_dir, 'daily.csv'))\n",
39 64 "dose = read_csv(os.path.join(data_dir, 'dose.csv'))\n",
40   - "jawbone = read_csv(os.path.join(data_dir, 'jawbone.csv'), low_memory=False)"
  65 + "jawbone = read_csv(os.path.join(data_dir, 'jawbone.csv'), low_memory=False)\n"
41 66 ]
42 67 },
43 68 {
44 69 "cell_type": "markdown",
45 70 "metadata": {},
46 71 "source": [
47   - "# Preprocessing"
  72 + "# Preprocessing\n",
  73 + "## Picking up the variables"
48 74 ]
49 75 },
50 76 {
51 77 "cell_type": "code",
52   - "execution_count": 19,
  78 + "execution_count": 6,
53 79 "metadata": {},
  80 + "outputs": [],
  81 + "source": [
  82 + "# Column names of jawbone data\n",
  83 + "# 'Var1', 'user', 'start_datetime', 'end_datetime', 'timezone', 'userid',\n",
  84 + "# 'steps', 'gmtoff', 'tz', 'start_date', 'end_date', 'start_utime',\n",
  85 + "# 'end_utime', 'start_udate', 'end_udate', 'intake_date', 'intake_utime',\n",
  86 + "# 'intake_tz', 'intake_gmtoff', 'intake_hour', 'intake_min',\n",
  87 + "# 'intake_slot', 'travel_start', 'travel_end', 'exit_date',\n",
  88 + "# 'dropout_date', 'last_date', 'last_utime', 'last_tz', 'last_gmtoff',\n",
  89 + "# 'last_hour', 'last_min', 'start_utime_local', 'end_utime_local'\n",
  90 + "\n",
  91 + "\n",
  92 + "# duplicate jawbone data\n",
  93 + "jawbone2 = jawbone.copy(deep=True)\n",
  94 + "\n",
  95 + "# convert string datetimes to actual datetime objects\n",
  96 + "jawbone2[\"start_utime_local\"] = pd.to_datetime(\n",
  97 + " jawbone2[\"start_utime_local\"], format=\"%Y-%m-%d %H:%M:%S\")\n",
  98 + "jawbone2[\"start_datetime\"] = pd.to_datetime(\n",
  99 + " jawbone2[\"start_datetime\"], format=\"%Y-%m-%d %H:%M:%S\")\n",
  100 + "\n",
  101 + "# calculate the timezone offset\n",
  102 + "jawbone2[\"tz_offset\"] = jawbone2[\"start_datetime\"] - \\\n",
  103 + " jawbone2[\"start_utime_local\"]\n",
  104 + "\n",
  105 + "\n",
  106 + "# selecting only important columns\n",
  107 + "jawbone3 = jawbone2[[\"user\", \"start_utime_local\",\n",
  108 + " \"end_utime_local\", \"tz_offset\", \"steps\"]]\n",
  109 + "\n",
  110 + "# picking up the local date\n",
  111 + "jawbone3[\"local_date\"] = jawbone3[\"start_utime_local\"].apply(get_date)\n",
  112 + "\n",
  113 + "# picking up the local minute index\n",
  114 + "jawbone3[\"local_minute_index\"] = jawbone3[\"start_utime_local\"].apply(\n",
  115 + " get_minute_index)\n"
  116 + ]
  117 + },
  118 + {
  119 + "cell_type": "markdown",
  120 + "metadata": {},
  121 + "source": [
  122 + "## Making a key info database"
  123 + ]
  124 + },
  125 + {
  126 + "cell_type": "code",
  127 + "execution_count": 7,
  128 + "metadata": {},
  129 + "outputs": [],
  130 + "source": [
  131 + "# picking up the user - date data\n",
  132 + "user_date = jawbone3[[\"user\", \"local_date\"]].drop_duplicates()"
  133 + ]
  134 + },
  135 + {
  136 + "cell_type": "markdown",
  137 + "metadata": {},
  138 + "source": [
  139 + "## Removing users with too small amount of data"
  140 + ]
  141 + },
  142 + {
  143 + "cell_type": "code",
  144 + "execution_count": 12,
  145 + "metadata": {},
54 146 "outputs": [
55 147 {
  148 + "name": "stdout",
  149 + "output_type": "stream",
  150 + "text": [
  151 + "Threshold: 10\n",
  152 + "Users to be removed:[12, 36, 38]\n",
  153 + "Shape Change: 258889 -> 258363 (-526, -0.2%)\n"
  154 + ]
  155 + },
  156 + {
56 157 "data": {
  158 + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEWCAYAAABhffzLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAnTUlEQVR4nO3deVxU9d4H8M/gICKbyr4IiBurICDk85SKglqWuOAKKqnMbfG6lfda9z65hluuLbdIrpr6UrNraZikoplZFriQZqCyKJsECciaLL/nDx/ncUJgVGaG4Xzer5evl3POmd/ve84cPhx+c+Y3MiGEABERSYaBrgsgIiLtYvATEUkMg5+ISGIY/EREEsPgJyKSGAY/EZHEMPjbkJdeegkrVqxolbZu3rwJU1NT1NfXAwCGDBmCrVu3tkrbAPDss89ix44drdaeuv75z3/CysoKdnZ2Wu/7Qd988w2cnJx01v/nn3+O7t27w9TUFBcuXGhx+9Z+/Um/yXVdgFS4urqisLAQcrkcHTp0gKenJ6ZPnw6FQgEDg3u/fz/88EO129q6dStCQ0Ob3MbZ2RkVFRWtUvvSpUtx/fp17Nq1S7nsyJEjrdL2o7h58ybWr1+PGzduwMbGRuv9tyWvv/463nvvPYSHh+u6FNJDvOLXoi+//BLl5eW4ceMGFi9ejDVr1mDWrFmt3k9dXV2rt9kW3Lx5E5aWlu0u9B/n9bpx4wa8vLw0UE3bo6vzub3+HAEMfp2wsLDA6NGjsW/fPuzYsQOXL18GAERHR+Of//wnAKC4uBjPP/88unTpgm7duuGZZ55BQ0MDpk2bhps3b+KFF16Aqakp1q5di+zsbMhkMsTHx8PZ2RlDhw5VLnvw5M3IyEBQUBDMzc0RHh6O27dvA3j4sIWrqyuOHz+OxMRExMbGYt++fTA1NYWvry8A1aGDhoYGrFy5Ei4uLrCxscH06dNRVlYGAMo6duzYAWdnZ1hZWeHtt99u8tiUlZVh+vTpsLa2houLC1auXImGhgYcP34cYWFhyM/Ph6mpKaKjoxs99/5+rF+/HjY2NrC3t8e2bduU6/883LF9+3Y8/fTTyscymQwffPABevfuDTMzM/zP//wPMjIy8F//9V8wNzfHxIkTcffuXZU+Y2NjYWVlBVdXV+zevVu5/I8//sDrr78OZ2dn2Nra4qWXXkJ1dbVKnWvWrIGdnR1efPHFRvvS1DH9448/lEN4vr6+6Nmz50OP47Fjx+Du7g4LCwvMmTMHD35APyMjA0OHDoWlpSWsrKwQGRmJ0tJSAMC6deswfvx4lbbmzp2LefPmKY+Zm5sbzMzM0KNHD5V9ftDSpUsRERGBSZMmwczMDP7+/khNTVWuz8/Px/jx42FtbY0ePXpgy5YtjZ4bFRUFc3NzbN++vVH7zb2WQggsWLAANjY2MDc3h4+Pj/Jn7Elfl/aCwa9DQUFBcHJywunTpxutW79+PZycnFBUVITCwkLExsZCJpNh586dcHZ2xpdffomKigr87W9/Uz7n1KlT+PXXX/H1118/tL9PPvkE//73v1FQUAC5XI65c+e2WOPIkSPx5ptvYtKkSaioqFD54b1v+/bt2L59O06ePInMzExUVFRgzpw5Ktt89913SE9PR1JSEpYvX45ff/31of399a9/RVlZGTIzM3Hq1Cl88skn2LZtG0JDQ3HkyBE4ODigoqLioWEAALdu3UJZWRny8vIQHx+PV199FSUlJS3u531ff/01zp07h7Nnz2Lt2rVQKBTYtWsXcnJycPnyZezZs0elr+LiYuTl5WHHjh1QKBRIT08HACxevBhXr17FxYsXcf36deTl5WH58uUqz719+zZu3LiBuLg4tY+pkZGRcggvNTUVGRkZjZ5bXFyMcePGYeXKlSguLkbPnj1x5swZ5XohBN544w3k5+fj119/RU5ODpYuXQoAiIqKQmJiovIXQV1dHfbu3Yvp06ejsrISc+fOxZEjR1BeXo7vv/8efn5+TR7LgwcPYsKECbh9+zamTp2KMWPGoLa2Fg0NDXjhhRfg6+uLvLw8JCUlYdOmTSrn7cGDBxEREYHS0lJERka2/MI94OjRo/j2229x9epVlJWV4dNPP4WlpSWAJ39d2gsGv445ODgor7wfZGhoiIKCAty4cQOGhoZ45plnIJPJmm1r6dKlMDExgbGx8UPXT5s2Dd7e3jAxMcGKFSvw6aefKt/8fRK7d+/GwoUL4ebmBlNTU6xatQp79+5V+WtjyZIlMDY2hq+vL3x9fR/6C6S+vh579+7FqlWrYGZmBldXV7z22mvYuXOn2rUYGhrirbfegqGhIZ577jmYmpoqw1gdf/vb32Bubg4vLy94e3tj+PDhcHNzg4WFBZ599tlGb6SuWLECRkZGGDx4MEaNGoVPP/0UQgjExcVh48aN6NatG8zMzPDmm29i7969yucZGBhg2bJlMDIyeujrpc4xbcpXX30FLy8vREREwNDQEPPnz1d5M7xXr14ICwuDkZERrK2tsXDhQpw6dQoAYG9vj0GDBmH//v0AgMTERFhZWSEgIEBZ9+XLl1FdXQ17e/tmh5sCAgKUNSxcuBA1NTU4e/YskpOTUVRUhLfeegsdO3aEm5sbYmJiVI7PwIEDMWbMGBgYGDR5PjfF0NAQ5eXlSEtLgxACHh4esLe3b5XXpb1g8OtYXl4eunXr1mj5okWL0KtXL2XwrF69usW2unfvrvZ6FxcX1NbWori4+NGL/pP8/Hy4uLiotF1XV4fCwkLlsgeDp3Pnzg9947m4uBi1tbWN2srLy1O7FktLS8jl/3/PQlN9NcXW1lb5f2Nj40aPH2yra9euMDExUak1Pz8fRUVFqKqqQkBAALp06YIuXbpg5MiRKCoqUm5rbW2NTp06NVmHOse0uec++FrLZDKVx4WFhZg8eTIcHR1hbm6OqKgolfNgxowZyjfyd+3ahWnTpgEATExMsG/fPnz44Yewt7fHqFGjkJaW1mQdD/ZpYGAAJycn5Ofn48aNG8jPz1cemy5duiA2NlZl31o6l5szdOhQzJkzB6+++ipsbGygUChw586dVnld2gsGvw4lJycjLy9PZZz5PjMzM6xfvx6ZmZk4dOgQNmzYgKSkJABo8sq/pb8IcnJylP+/efMmDA0NYWVlBRMTE1RVVSnX1dfXq/wwtNSug4MDbty4odK2XC5XCU11WFlZwdDQsFFbjo6Oj9ROU/68n7du3Xqi9kpKSlBZWal8fPPmTTg4OMDKygrGxsb45ZdfUFpaitLSUpSVlan80tDkMbW3t1d5rYUQKo/ffPNNyGQyXLp0CXfu3MGuXbtU3gMYM2YMfv75Z1y+fBkJCQkqQy0jRozAsWPHUFBQAHd3d8TExDRZx4N9NjQ0IDc3Fw4ODujevTt69OihPDalpaUoLy/HV199pdy+pePT0ms5d+5cnDt3DleuXMHVq1exbt26Vnld2gsGvw7cuXMHCQkJmDx5MqKiouDj49Nom4SEBFy/fh1CCFhYWKBDhw7K2z5tbW2RmZn5yP3u2rULV65cQVVVFd566y1ERESgQ4cO6NOnD2pqanD48GHU1tZi5cqV+OOPP5TPs7W1RXZ2NhoaGh7a7pQpU7Bx40ZkZWWhoqJC+Z7Ag1fe6ujQoQMmTpyIf/zjH8q7nzZs2ICoqKhH3teH8fPzw4EDB1BVVYXr168jPj7+idtcsmQJ7t69i9OnTyMhIQETJkyAgYEBYmJisGDBAvz2228A7v1l19R7Lw/zJMd01KhR+OWXX3DgwAHU1dVhy5YtKsFYXl4OU1NTWFhYIC8vD+vWrVN5fqdOnRAREYGpU6ciKCgIzs7OAO79pXDw4EFUVlbCyMgIpqamynPyYc6dO6esYdOmTTAyMsJTTz2FoKAgmJmZYc2aNaiurkZ9fT0uX76M5ORktY9Pc69lcnIyfvzxR9TW1sLExASdOnWCgYFBq7wu7QWDX4teeOEFmJmZoXv37nj77bexcOFClbtOHnTt2jWEhobC1NQUAwcOxCuvvIKQkBAAwBtvvIGVK1eiS5cueOedd9Tuf9q0aYiOjoadnR1qamqUd1JYWFjggw8+wOzZs+Ho6AgTExOVu3wmTJgA4N4wir+/f6N2Z86ciWnTpmHQoEHo0aMHOnXqhHfffVftuh707rvvwsTEBG5ubnj66acxdepUzJw587Ha+rMFCxagY8eOsLW1xYwZMx75TcM/s7OzQ9euXeHg4IDIyEh8+OGHcHd3BwCsWbMGvXr1wlNPPQVzc3OEhoY+0nsNT3JMrayssH//fixevBiWlpa4du0a/vu//1u5fsmSJTh//jwsLCwwatQojBs3rlEbM2bMwKVLl5TDPMC9q/YNGzbAwcEB3bp1w6lTp/Cvf/2ryTrCw8Oxb98+dO3aFTt37sSBAwdgaGiIDh06ICEhARcvXkSPHj1gZWWF2bNnK+8EU0dzr+WdO3cQExODrl27wsXFBZaWlli0aBGAJ39d2gsZv4iFiP7s5s2bcHd3x61bt2Bubv7Iz3/Yh/6o7eAVPxGpuH9lP3ny5McKfWr7OGUDESlVVlbC1tYWLi4uSExM1HU5pCEc6iEikhgO9RARSYxeDPXcnwuFiIjUl52d/dAPaepF8Lu6uiIlJUXXZRAR6ZXAwMCHLudQDxGRxDD4iYgkhsFPRCQxDH4iIolh8BMRSQyDn4hIYjQW/DNnzoSNjQ28vb0brVu/fj1kMlmrfAkIERE9Go0Ff3R09EPn+sjJycHRo0eVc3wTEZF2aSz4Bw0a9NCvFFywYAHWrl0rmW+6ISJqa7T6yd2DBw/C0dERvr6+LW4bFxen/Jb7B78GkIhaj19gMAoKCprdxt7eHhdTftRSRaQNWgv+qqoqxMbG4ujRo2ptr1AooFAoADT9sWMiejIFBQUIWbKv2W1OLpukpWpIW7R2V09GRgaysrLg6+sLV1dX5Obmwt/f/4m/8JqIiB6N1q74fXx8lF9wDPz/xGtWVlbaKoGIiKDBK/4pU6Zg4MCBSE9Ph5OTE+Lj4zXVFRERPQKNXfHv2bOn2fXZ2dma6pqIiJrBT+4SEUkMg5+ISGIY/EREEsPgJyKSGAY/EZHEMPiJiCSGwU9EJDEMfiIiiWHwExFJDIOfiEhiGPxERBLD4CcikhgGPxGRxDD4iYgkhsFPRCQxDH4iIolh8BMRSQyDn4hIYhj8REQSw+AnIpIYjQX/zJkzYWNjA29vb+WyRYsWwd3dHf369cPYsWNRWlqqqe6JiKgJGgv+6OhoJCYmqiwLCwvD5cuX8fPPP6NPnz5YtWqVpronIqImaCz4Bw0ahG7duqksGz58OORyOQDgqaeeQm5urqa6JyKiJuhsjP/f//43nn322SbXx8XFITAwEIGBgSgqKtJiZURE7ZtOgv/tt9+GXC5HZGRkk9soFAqkpKQgJSUF1tbWWqyOiKh9k2u7w+3btyMhIQFJSUmQyWTa7p6ISPK0GvyJiYlYu3YtTp06hc6dO2uzayIi+j8aG+qZMmUKBg4ciPT0dDg5OSE+Ph5z5sxBeXk5wsLC4Ofnh5deeklT3RMRURM0dsW/Z8+eRstmzZqlqe6IiEhN/OQuEZHEMPiJiCSGwU9EJDEMfiIiiWHwExFJDIOfiEhiGPxERBLD4CcikhgGPxGRxDD4iYgkhsFPRCQxDH4iIolh8BMRSQyDn4hIYhj8REQSw+AnIpIYBj8RkcQw+ImIJIbBT0QkMQx+IiKJYfATEUmMxoJ/5syZsLGxgbe3t3LZ7du3ERYWht69eyMsLAwlJSWa6p6IiJqgseCPjo5GYmKiyrLVq1dj2LBhuHbtGoYNG4bVq1drqnsiImqCxoJ/0KBB6Natm8qygwcPYsaMGQCAGTNm4IsvvtBU90RE1AS5NjsrLCyEvb09AMDOzg6FhYVNbhsXF4e4uDgAQFFRkVbqIyKSAp29uSuTySCTyZpcr1AokJKSgpSUFFhbW2uxMiKi9k2rwW9ra4uCggIAQEFBAWxsbLTZPRERQcvBP3r0aOzYsQMAsGPHDoSHh2uzeyIiggaDf8qUKRg4cCDS09Ph5OSE+Ph4LF68GMeOHUPv3r1x/PhxLF68WFPdExFREzT25u6ePXseujwpKUlTXRIRkRr4yV0iIolh8BMRSQyDn4hIYhj8REQSw+AnIpIYBj8RkcQw+ImIJIbBT0QkMQx+IiKJYfATEUmMWsF/6dIlTddBRERaolbwv/LKKwgKCsIHH3yAsrIyTddEREQapFbwnz59Grt370ZOTg4CAgIwdepUHDt2TNO1ERGRBqg9xt+7d2+sXLkSa9aswalTpzB37ly4u7vjwIEDmqyPiIhamVrB//PPP2PBggXw8PDAiRMn8OWXX+LXX3/FiRMnsGDBAk3XSERErUit+fj/+te/Yvbs2YiNjYWxsbFyuYODA1auXKmx4oiIqPWpFfyHDx+GsbExOnToAABoaGhATU0NOnfujGnTpmm0QCIial1qDfWEhoaiurpa+biqqgqhoaEaK4qIiDRHreCvqamBqamp8rGpqSmqqqo0VhQREWmOWsFvYmKC8+fPKx+fO3dOZayfiIj0h1pj/Js2bcKECRPg4OAAIQRu3bqFffv2PXanGzduxNatWyGTyeDj44Nt27ahU6dOj90eERGpT63gHzBgANLS0pCeng4A6Nu3LwwNDR+rw7y8PGzZsgVXrlyBsbExJk6ciL179yI6Ovqx2iMiokejVvADQHJyMrKzs1FXV6cc9pk+ffpjdVpXV4fq6moYGhqiqqoKDg4Oj9UOERE9OrWCf9q0acjIyICfn5/ylk6ZTPZYwe/o6IjXX38dzs7OMDY2xvDhwzF8+PBG28XFxSEuLg4AUFRU9Mj9EBHRw6kV/CkpKbhy5QpkMtkTd1hSUoKDBw8iKysLXbp0wYQJE7Br1y5ERUWpbKdQKKBQKAAAgYGBT9wvERHdo9ZdPd7e3rh161ardHj8+HH06NED1tbWMDQ0xLhx4/D999+3SttERNQyta74i4uL4enpiaCgIBgZGSmXHzp06JE7dHZ2xtmzZ1FVVQVjY2MkJSXxip6ISIvUCv6lS5e2WofBwcGIiIiAv78/5HI5+vfvrxzSISIizVMr+AcPHowbN27g2rVrCA0NRVVVFerr6x+702XLlmHZsmWP/XwiInp8ao3xf/zxx4iIiMBf/vIXAPfuxR8zZowm6yIiIg1RK/jff/99nDlzBubm5gDufSnLb7/9ptHCiIhIM9QKfiMjI3Ts2FH5uK6urlVu7SQiIu1TK/gHDx6M2NhYVFdX49ixY5gwYQJeeOEFTddGREQaoFbwr169GtbW1vDx8cFHH32E5557jt+8RUSkp9S6q8fAwAAxMTGIiYnRdD1ERKRhagV/jx49Hjqmn5mZ2eoFERGRZqk9V899NTU12L9/P27fvq2xooiISHPUGuO3tLRU/nN0dMT8+fNx+PBhTddGREQaoNYV/4Nfu9jQ0ICUlBTU1dVprCgiajtKSkpg6+jc7Db29va4mPKjliqiJ6VW8L/22mv//wS5HK6urvj00081VhQRtR0NDQIhS5r/qtWTyyZpqRpqDWoF/8mTJzVdBxERaYlawb9hw4Zm1y9cuLBViiEiIs1T+66e5ORkjB49GgDw5ZdfIigoCL1799ZocURE1PrUCv7c3FycP38eZmZmAO7Nzz9q1Cjs2rVLo8UREVHrU+t2zsLCQpVJ2jp27IjCwkKNFUVERJqj1hX/9OnTERQUhLFjxwIAvvjiC8yYMUOjhRERkWaoFfz/+Mc/8Oyzz+L06dMAgG3btqF///4aLYyIiDRDraEeAKiqqoK5uTnmzZsHJycnZGVlabIuIiLSELWCf9myZVizZg1WrVoFAKitrUVUVJRGCyMiIs1QK/g///xzHDp0CCYmJgAABwcHlJeXa7QwIiLSDLWCv2PHjpDJZMqpmSsrK5+o09LSUkRERMDd3R0eHh744Ycfnqg9IiJSn1rBP3HiRPzlL39BaWkpPv74Y4SGhj7Rl7LMmzcPI0eORFpaGlJTU+Hh4fHYbRER0aNp8a4eIQQmTZqEtLQ0mJubIz09HcuXL0dYWNhjdVhWVoZvv/0W27dvB3Dvr4kHPyNARESa1WLwy2QyPPfcc7h06dJjh/2DsrKyYG1tjRdffBGpqakICAjA5s2ble8f3BcXF4e4uDgAQFFR0RP3S0RE96g11OPv74/k5ORW6bCurg7nz5/Hyy+/jAsXLsDExASrV69utJ1CoUBKSgpSUlJgbW3dKn0TEZGawf/jjz/iqaeeQs+ePdGvXz/4+PigX79+j9Whk5MTnJycEBwcDACIiIhQ+aIXIiLSrGaHem7evAlnZ2d8/fXXrdahnZ0dunfvjvT0dPTt2xdJSUnw9PRstfaJiKh5zQb/mDFjcP78ebi4uGD8+PH4z3/+0yqdvvvuu4iMjMTdu3fh5uaGbdu2tUq7RETUsmaDXwih/H9mZmardern54eUlJRWa4+IiNTX7Bj//Q9s/fn/RESkv5q94k9NTYW5uTmEEKiuroa5uTmAe38JyGQy3LlzRytFEhFR62k2+Ovr67VVBxERaYna0zITEVH7wOAnIpIYBj8RkcQw+ImIJIbBT0QkMQx+IiKJYfATEUkMg5+ISGIY/EREEsPgJyKSGAY/EZHEMPiJiCSGwU9EJDEMfiIiiWHwExFJDIOfiEhiGPxERBKjs+Cvr69H//798fzzz+uqBCIiSdJZ8G/evBkeHh666p6ISLJ0Evy5ubk4fPgwZs+erYvuiYgkTSfBP3/+fKxduxYGBnyLgYhI2+Ta7jAhIQE2NjYICAjAN9980+R2cXFxiIuLAwAUFRVpqToiehwlJSWwdXRucr29vT0upvyoxYqoOVoP/jNnzuDQoUP46quvUFNTgzt37iAqKgq7du1S2U6hUEChUAAAAgMDtV0mET2ChgaBkCX7mlx/ctkkLVZDLdH6WMuqVauQm5uL7Oxs7N27F0OHDm0U+kREpDkcZCcikhitD/U8aMiQIRgyZIguSyAikhxe8RMRSQyDn4hIYhj8REQSw+AnIpIYBj8RkcQw+ImIJIbBT0QkMQx+IiKJYfATEUmMTj+5S0R0n19gMAoKCprdhrN8tg4GPxG1CQUFBc3O8Alwls/WwqEeIiKJYfATEUkMg5+ISGIY/EREEsPgJyKSGAY/EZHEMPiJiCSGwU9EJDEMfiIiiWHwExFJjNaDPycnByEhIfD09ISXlxc2b96s7RKIiCRN63P1yOVyrF+/Hv7+/igvL0dAQADCwsLg6emp7VKIiCRJ61f89vb28Pf3BwCYmZnBw8MDeXl52i6DiEiydDo7Z3Z2Ni5cuIDg4OBG6+Li4hAXFwcAKCoqeuw+ONUrke6VlJTA1tG5+W1KS7VTTBuiq3zSWfBXVFRg/Pjx2LRpE8zNzRutVygUUCgUAIDAwMDH7odTvRLpXkODaPHn8LO5YVqqpu3QVT7p5K6e2tpajB8/HpGRkRg3bpwuSiAikiytB78QArNmzYKHhwcWLlyo7e6JiCRP68F/5swZ7Ny5EydOnICfnx/8/Pzw1VdfabsMIiLJ0voY/9NPPw0hhLa7JSKi/8NP7hIRSQyDn4hIYhj8REQSw+AnIpIYBj8RkcQw+ImIJIbBT0QkMQx+IiKJYfATEUmMTqdl1ictTZ+qramdW2sa17ayP9rU0j6XV1TAzNS02TZa49i2pX70bSrklqZ3bo/nrSYw+NXU0vSp2praubWmcW0r+6NNLe3zZ3PDELJO88e2LfWjb1MhtzS9c3s8bzWBQz1ERBLD4CcikhgGPxGRxDD4iYgkhsFPRCQxDH4iIolh8BMRSQyDn4hIYhj8REQSw+AnIpIYnQR/YmIi+vbti169emH16tW6KIGISLK0Hvz19fV49dVXceTIEVy5cgV79uzBlStXtF0GEZFkaT34f/rpJ/Tq1Qtubm7o2LEjJk+ejIMHD2q7DCIiyZIJIYQ2O/zss8+QmJiIrVu3AgB27tyJH3/8Ee+9957KdnFxcYiLiwMApKWlwd3dvcW2i4qKYG1t3fpFa5G+7wPr1z193wfW33qys7NRXFzcaHmbnZZZoVBAoVA80nMCAwORkpKioYq0Q9/3gfXrnr7vA+vXPK0P9Tg6OiInJ0f5ODc3F46Ojtoug4hIsrQe/AMGDMC1a9eQlZWFu3fvYu/evRg9erS2yyAikiytD/XI5XK89957GDFiBOrr6zFz5kx4eXm1StuPOjTUFun7PrB+3dP3fWD9mqf1N3eJiEi3+MldIiKJYfATEUlMuwl+fZsGYubMmbCxsYG3t7dy2e3btxEWFobevXsjLCwMJSUlOqyweTk5OQgJCYGnpye8vLywefNmAPq1DzU1NQgKCoKvry+8vLywZMkSAEBWVhaCg4PRq1cvTJo0CXfv3tVxpc2rr69H//798fzzzwPQr/pdXV3h4+MDPz8/BAYGAtCvcwgASktLERERAXd3d3h4eOCHH35o8/vQLoJfH6eBiI6ORmJiosqy1atXY9iwYbh27RqGDRvWpn+ByeVyrF+/HleuXMHZs2fx/vvv48qVK3q1D0ZGRjhx4gRSU1Nx8eJFJCYm4uzZs/j73/+OBQsW4Pr16+jatSvi4+N1XWqzNm/eDA8PD+Vjfav/5MmTuHjxovLed306hwBg3rx5GDlyJNLS0pCamgoPD4+2vw+iHfj+++/F8OHDlY9jY2NFbGysDitST1ZWlvDy8lI+7tOnj8jPzxdCCJGfny/69Omjq9Ie2ejRo8XRo0f1dh8qKytF//79xdmzZ4WlpaWora0VQjQ+t9qanJwcMXToUJGUlCRGjRolGhoa9Kp+FxcXUVRUpLJMn86h0tJS4erqKhoaGlSWt/V9aBdX/Hl5eejevbvysZOTE/Ly8nRY0eMpLCyEvb09AMDOzg6FhYU6rkg92dnZuHDhAoKDg/VuH+rr6+Hn5wcbGxuEhYWhZ8+e6NKlC+Tye3c6t/Vzaf78+Vi7di0MDO79KP/+++96Vb9MJsPw4cMREBCgnKJFn86hrKwsWFtb48UXX0T//v0xe/ZsVFZWtvl9aBfB3x7JZDLIZDJdl9GiiooKjB8/Hps2bYK5ubnKOn3Yhw4dOuDixYvIzc3FTz/9hLS0NF2XpLaEhATY2NggICBA16U8tu+++w7nz5/HkSNH8P777+Pbb79VWd/Wz6G6ujqcP38eL7/8Mi5cuAATE5NGwzptcR/aRfC3l2kgbG1tUVBQAAAoKCiAjY2NjitqXm1tLcaPH4/IyEiMGzcOgP7tw31dunRBSEgIfvjhB5SWlqKurg5A2z6Xzpw5g0OHDsHV1RWTJ0/GiRMnMG/ePL2pH4CyNhsbG4wdOxY//fSTXp1DTk5OcHJyQnBwMAAgIiIC58+fb/P70C6Cv71MAzF69Gjs2LEDALBjxw6Eh4fruKKmCSEwa9YseHh4YOHChcrl+rQPRUVFKC0tBQBUV1fj2LFj8PDwQEhICD777DMAbXsfVq1ahdzcXGRnZ2Pv3r0YOnQodu/erTf1V1ZWory8XPn/o0ePwtvbW6/OITs7O3Tv3h3p6ekAgKSkJHh6erb9fdD1mwyt5fDhw6J3797Czc1NrFy5UtfltGjy5MnCzs5OyOVy4ejoKLZu3SqKi4vF0KFDRa9evcSwYcPE77//rusym3T69GkBQPj4+AhfX1/h6+srDh8+rFf7kJqaKvz8/ISPj4/w8vISy5YtE0IIkZGRIQYMGCB69uwpIiIiRE1NjY4rbdnJkyfFqFGjhBD6U39GRobo16+f6Nevn/D09FT+3OrTOSSEEBcuXBABAQHCx8dHhIeHi9u3b7f5feCUDUREEtMuhnqIiEh9DH4iIolh8BMRSQyDn4hIYhj8REQSw+AnvSCTyfDaa68pH7/zzjtYunRpq7QdHR2tvO9dk/bv36/8nEBbqIeki8FPesHIyAgHDhxAcXGxrktRcf8TsuqIj4/Hxx9/jJMnT2qwIqKWMfhJL8jlcigUCmzcuLHRuj9fIZuamgIAvvnmGwwePBjh4eFwc3PD4sWLsXv3bgQFBcHHxwcZGRnK5xw/fhyBgYHo06cPEhISANybwG3RokUYMGAA+vXrh48++kjZ7jPPPIPRo0fD09OzUT179uyBj48PvL298fe//x0AsHz5cnz33XeYNWsWFi1apLK9EAJz5sxB3759ERoait9++025bvny5RgwYAC8vb2hUCgghEBGRgb8/f2V21y7dk35ePHixfD09ES/fv3w+uuvP9pBJunQ7efHiNRjYmIiysrKhIuLiygtLRXr1q0TS5YsEUIIMWPGDLF//36VbYW492lWCwsLkZ+fL2pqaoSDg4N46623hBBCbNq0ScybN0/5/BEjRoj6+npx9epV4ejoKKqrq8VHH30kVqxYIYQQoqamRgQEBIjMzExx8uRJ0blzZ5GZmdmozry8PNG9e3fx22+/idraWhESEiI+//xzIYQQgwcPFsnJyY2e85///EeEhoaKuro6kZeXJywsLJT78+AnPqOiosShQ4eEEEIMGTJEXLhwQQghxBtvvCG2bNkiiouLRZ8+fZRTBJeUlDzGkSYp4BU/6Q1zc3NMnz4dW7ZsUfs5AwYMgL29PYyMjNCzZ08MHz4cAODj44Ps7GzldhMnToSBgQF69+4NNzc3pKWl4ejRo/jkk0/g5+eH4OBg/P7777h27RoAICgoCD169GjUX3JyMoYMGQJra2vI5XJERkY2mnHyz7799ltMmTIFHTp0gIODA4YOHapcd/LkSQQHB8PHxwcnTpzAL7/8AgCYPXs2tm3bhvr6euzbtw9Tp06FhYUFOnXqhFmzZuHAgQPo3Lmz2seJpIXBT3pl/vz5iI+PR2VlpXKZXC5HQ0MDAKChoUHlqwaNjIyU/zcwMFA+NjAwUBmf//O0uTKZDEIIvPvuu7h48SIuXryIrKws5S8OExOT1t+5P6mpqcErr7yCzz77DJcuXUJMTAxqamoAAOPHj8eRI0eQkJCAgIAAWFpaQi6X46effkJERAQSEhIwcuRIjddI+onBT3qlW7dumDhxosrXCbq6uuLcuXMAgEOHDqG2tvaR292/fz8aGhqQkZGBzMxM9O3bFyNGjMC//vUvZXtXr15V+YXzMEFBQTh16hSKi4tRX1+PPXv2YPDgwc0+Z9CgQdi3bx/q6+tRUFCgfPP3fshbWVmhoqJC5X2MTp06YcSIEXj55Zfx4osvArj33QhlZWV47rnnsHHjRqSmpj7ycSBpkOu6AKJH9dprr+G9995TPo6JiUF4eDh8fX0xcuTIx7oad3Z2RlBQEO7cuYMPP/wQnTp1wuzZs5GdnQ1/f38IIWBtbY0vvvii2Xbs7e2xevVqhISEQAiBUaNGtTgl79ixY3HixAl4enrC2dkZAwcOBHDvOwJiYmLg7e0NOzs7DBgwQOV5kZGR+Pzzz5V/hZSXlyM8PBw1NTUQQmDDhg2PfBxIGjg7J5Geeuedd1BWVoYVK1bouhTSM7ziJ9JDY8eORUZGBk6cOKHrUkgP8YqfiEhi+OYuEZHEMPiJiCSGwU9EJDEMfiIiiWHwExFJzP8CkWQEIHluKswAAAAASUVORK5CYII=",
57 159 "text/plain": [
58   - "0 1\n",
59   - "1 1\n",
60   - "2 1\n",
61   - "3 1\n",
62   - "4 1\n",
63   - "Name: user, dtype: int64"
  160 + "<Figure size 432x288 with 1 Axes>"
64 161 ]
65 162 },
66   - "execution_count": 19,
67 163 "metadata": {},
68   - "output_type": "execute_result"
  164 + "output_type": "display_data"
69 165 }
70 166 ],
  167 + "source": [
  168 + "# making a stat of the number of days per user\n",
  169 + "stat_user = user_date.groupby(['user'])['local_date'].nunique().sort_values()\n",
  170 + "\n",
  171 + "ax = plt.figure()\n",
  172 + "ax.patch.set_facecolor('white')\n",
  173 + "ax = sns.histplot(stat_user)\n",
  174 + "ax.set_title('Distribution of number of days per user')\n",
  175 + "ax.set_xlabel('Number of days')\n",
  176 + "ax.set_ylabel('Frequency')\n",
  177 + "\n",
  178 + "# cut off values that are not in the range of the data\n",
  179 + "THRESHOLD_OF_DAYS_PER_USER = 10\n",
  180 + "\n",
  181 + "# filter out users that have less days of data than THRESHOLD_OF_DAYS_PER_USER\n",
  182 + "users_to_be_removed = stat_user[stat_user < THRESHOLD_OF_DAYS_PER_USER].index\n",
  183 + "\n",
  184 + "print(\"Threshold: {}\".format(THRESHOLD_OF_DAYS_PER_USER))\n",
  185 + "print(\"Users to be removed:{}\".format(list(users_to_be_removed)))\n",
  186 + "\n",
  187 + "jawbone4 = jawbone3[~jawbone3[\"user\"].isin(users_to_be_removed)]\n",
  188 + "\n",
  189 + "\n",
  190 + "# printing the amount of data removed\n",
  191 + "jawbone3_count, _ = jawbone3.shape\n",
  192 + "jawbone4_count, _ = jawbone4.shape\n",
  193 + "\n",
  194 + "print(\"Shape Change: {} -> {} (-{}, -{}%)\".format(\n",
  195 + " jawbone3_count, \n",
  196 + " jawbone4_count, \n",
  197 + " jawbone3_count - jawbone4_count, \n",
  198 + " round((jawbone3_count - jawbone4_count) / jawbone3_count * 100, 2)\n",
  199 + " )\n",
  200 + ")"
  201 + ]
  202 + },
  203 + {
  204 + "cell_type": "code",
  205 + "execution_count": null,
  206 + "metadata": {},
  207 + "outputs": [
  208 + {
  209 + "ename": "NameError",
  210 + "evalue": "name 'users' is not defined",
  211 + "output_type": "error",
  212 + "traceback": [
  213 + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
  214 + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
  215 + "\u001b[0;32m/var/folders/m6/l3x11zj94l3dp3wnxy1vnscc0000gn/T/ipykernel_50945/4152346818.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mstandard_minute_index\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSeries\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"local_minute_index\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1440\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0ma_user\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0musers\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0ma_date\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0muser_date2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlocal_date\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
  216 + "\u001b[0;31mNameError\u001b[0m: name 'users' is not defined"
  217 + ]
  218 + }
  219 + ],
  220 + "source": [
  221 + "standard_minute_index = pd.Series(name=\"local_minute_index\", data=np.arange(0, 1440, 1))\n",
  222 + "\n",
  223 + "a_user = users[0]\n",
  224 + "a_date = user_date2.local_date[0]\n",
  225 + "\n",
  226 + "a_jawbone3 = jawbone3.loc[(jawbone3.user == a_user) & (jawbone3.local_date == a_date), :]\n",
  227 + "\n",
  228 + "vec = a_jawbone3[[\"local_minute_index\", \"steps\"]]\n",
  229 + "\n",
  230 + "steps = [0] * 1440\n",
  231 + "\n",
  232 + "for index, row in vec.iterrows():\n",
  233 + " steps[row.local_minute_index] += row.steps\n",
  234 + "\n",
  235 + "print(steps)\n",
  236 + "steps_series = pd.Series(name=\"steps\", data=steps)\n",
  237 + "steps_series[\"over60\"] = (steps_series > 60) * 1\n",
  238 + "\n",
  239 + "steps_series[\"roll\"] = steps_series.rolling(window=5, min_periods=1).sum()\n",
  240 + "\n",
  241 + "steps_series.roll.plot()\n",
  242 + "\n",
  243 + "\n"
  244 + ]
  245 + },
  246 + {
  247 + "cell_type": "code",
  248 + "execution_count": null,
  249 + "metadata": {},
  250 + "outputs": [],
71 251 "source": []
72 252 }
73 253 ],
requirements.txt View file @ a5d4a03
... ... @@ -29,6 +29,7 @@
29 29 Keras-Preprocessing==1.1.2
30 30 kiwisolver==1.3.2
31 31 libclang==12.0.0
  32 +line-profiler==3.4.0
32 33 Markdown==3.3.6
33 34 matplotlib==3.5.1
34 35 matplotlib-inline==0.1.3
... ... @@ -55,6 +56,8 @@
55 56 requests==2.27.1
56 57 requests-oauthlib==1.3.0
57 58 rsa==4.8
  59 +scipy==1.7.3
  60 +seaborn==0.11.2
58 61 six==1.16.0
59 62 tensorboard==2.7.0
60 63 tensorboard-data-server==0.6.1