Commit a5d4a032f6cc55c8817b81e7f617c4df0f354f52
1 parent
be5a830d6d
Exists in
main
added comments, users with less amount of data is removed
Showing 3 changed files with 204 additions and 14 deletions Side-by-side Diff
.vscode/settings.json
View file @
a5d4a03
python-notebook/data_loading.ipynb
View file @
a5d4a03
... | ... | @@ -9,27 +9,52 @@ |
9 | 9 | }, |
10 | 10 | { |
11 | 11 | "cell_type": "code", |
12 | - "execution_count": 13, | |
12 | + "execution_count": 3, | |
13 | 13 | "metadata": {}, |
14 | 14 | "outputs": [], |
15 | 15 | "source": [ |
16 | 16 | "import numpy as np\n", |
17 | 17 | "import matplotlib.pyplot as plt\n", |
18 | + "import seaborn as sns\n", | |
18 | 19 | "from pandas import read_csv\n", |
20 | + "import pandas as pd\n", | |
19 | 21 | "import os\n", |
20 | - "from datetime import datetime" | |
22 | + "from datetime import datetime, date\n", | |
23 | + "# %load_ext line_profiler" | |
21 | 24 | ] |
22 | 25 | }, |
23 | 26 | { |
24 | 27 | "cell_type": "markdown", |
25 | 28 | "metadata": {}, |
26 | 29 | "source": [ |
30 | + "# Defining Functions and Adjusting Settings" | |
31 | + ] | |
32 | + }, | |
33 | + { | |
34 | + "cell_type": "code", | |
35 | + "execution_count": 4, | |
36 | + "metadata": {}, | |
37 | + "outputs": [], | |
38 | + "source": [ | |
39 | + "pd.options.mode.chained_assignment = None\n", | |
40 | + "\n", | |
41 | + "def get_date(x):\n", | |
42 | + " return date(x.year, x.month, x.day)\n", | |
43 | + "\n", | |
44 | + "def get_minute_index(x):\n", | |
45 | + " return (x.hour * 60) + x.minute" | |
46 | + ] | |
47 | + }, | |
48 | + { | |
49 | + "cell_type": "markdown", | |
50 | + "metadata": {}, | |
51 | + "source": [ | |
27 | 52 | "# Loading data files" |
28 | 53 | ] |
29 | 54 | }, |
30 | 55 | { |
31 | 56 | "cell_type": "code", |
32 | - "execution_count": 16, | |
57 | + "execution_count": 5, | |
33 | 58 | "metadata": {}, |
34 | 59 | "outputs": [], |
35 | 60 | "source": [ |
36 | 61 | |
37 | 62 | |
38 | 63 | |
39 | 64 | |
40 | 65 | |
41 | 66 | |
42 | 67 | |
43 | 68 | |
44 | 69 | |
... | ... | @@ -37,37 +62,192 @@ |
37 | 62 | "\n", |
38 | 63 | "daily = read_csv(os.path.join(data_dir, 'daily.csv'))\n", |
39 | 64 | "dose = read_csv(os.path.join(data_dir, 'dose.csv'))\n", |
40 | - "jawbone = read_csv(os.path.join(data_dir, 'jawbone.csv'), low_memory=False)" | |
65 | + "jawbone = read_csv(os.path.join(data_dir, 'jawbone.csv'), low_memory=False)\n" | |
41 | 66 | ] |
42 | 67 | }, |
43 | 68 | { |
44 | 69 | "cell_type": "markdown", |
45 | 70 | "metadata": {}, |
46 | 71 | "source": [ |
47 | - "# Preprocessing" | |
72 | + "# Preprocessing\n", | |
73 | + "## Picking up the variables" | |
48 | 74 | ] |
49 | 75 | }, |
50 | 76 | { |
51 | 77 | "cell_type": "code", |
52 | - "execution_count": 19, | |
78 | + "execution_count": 6, | |
53 | 79 | "metadata": {}, |
80 | + "outputs": [], | |
81 | + "source": [ | |
82 | + "# Column names of jawbone data\n", | |
83 | + "# 'Var1', 'user', 'start_datetime', 'end_datetime', 'timezone', 'userid',\n", | |
84 | + "# 'steps', 'gmtoff', 'tz', 'start_date', 'end_date', 'start_utime',\n", | |
85 | + "# 'end_utime', 'start_udate', 'end_udate', 'intake_date', 'intake_utime',\n", | |
86 | + "# 'intake_tz', 'intake_gmtoff', 'intake_hour', 'intake_min',\n", | |
87 | + "# 'intake_slot', 'travel_start', 'travel_end', 'exit_date',\n", | |
88 | + "# 'dropout_date', 'last_date', 'last_utime', 'last_tz', 'last_gmtoff',\n", | |
89 | + "# 'last_hour', 'last_min', 'start_utime_local', 'end_utime_local'\n", | |
90 | + "\n", | |
91 | + "\n", | |
92 | + "# duplicate jawbone data\n", | |
93 | + "jawbone2 = jawbone.copy(deep=True)\n", | |
94 | + "\n", | |
95 | + "# convert string datetimes to actual datetime objects\n", | |
96 | + "jawbone2[\"start_utime_local\"] = pd.to_datetime(\n", | |
97 | + " jawbone2[\"start_utime_local\"], format=\"%Y-%m-%d %H:%M:%S\")\n", | |
98 | + "jawbone2[\"start_datetime\"] = pd.to_datetime(\n", | |
99 | + " jawbone2[\"start_datetime\"], format=\"%Y-%m-%d %H:%M:%S\")\n", | |
100 | + "\n", | |
101 | + "# calculate the timezone offset\n", | |
102 | + "jawbone2[\"tz_offset\"] = jawbone2[\"start_datetime\"] - \\\n", | |
103 | + " jawbone2[\"start_utime_local\"]\n", | |
104 | + "\n", | |
105 | + "\n", | |
106 | + "# selecting only important columns\n", | |
107 | + "jawbone3 = jawbone2[[\"user\", \"start_utime_local\",\n", | |
108 | + " \"end_utime_local\", \"tz_offset\", \"steps\"]]\n", | |
109 | + "\n", | |
110 | + "# picking up the local date\n", | |
111 | + "jawbone3[\"local_date\"] = jawbone3[\"start_utime_local\"].apply(get_date)\n", | |
112 | + "\n", | |
113 | + "# picking up the local minute index\n", | |
114 | + "jawbone3[\"local_minute_index\"] = jawbone3[\"start_utime_local\"].apply(\n", | |
115 | + " get_minute_index)\n" | |
116 | + ] | |
117 | + }, | |
118 | + { | |
119 | + "cell_type": "markdown", | |
120 | + "metadata": {}, | |
121 | + "source": [ | |
122 | + "## Making a key info database" | |
123 | + ] | |
124 | + }, | |
125 | + { | |
126 | + "cell_type": "code", | |
127 | + "execution_count": 7, | |
128 | + "metadata": {}, | |
129 | + "outputs": [], | |
130 | + "source": [ | |
131 | + "# picking up the user - date data\n", | |
132 | + "user_date = jawbone3[[\"user\", \"local_date\"]].drop_duplicates()" | |
133 | + ] | |
134 | + }, | |
135 | + { | |
136 | + "cell_type": "markdown", | |
137 | + "metadata": {}, | |
138 | + "source": [ | |
139 | + "## Removing users with too small amount of data" | |
140 | + ] | |
141 | + }, | |
142 | + { | |
143 | + "cell_type": "code", | |
144 | + "execution_count": 12, | |
145 | + "metadata": {}, | |
54 | 146 | "outputs": [ |
55 | 147 | { |
148 | + "name": "stdout", | |
149 | + "output_type": "stream", | |
150 | + "text": [ | |
151 | + "Threshold: 10\n", | |
152 | + "Users to be removed:[12, 36, 38]\n", | |
153 | + "Shape Change: 258889 -> 258363 (-526, -0.2%)\n" | |
154 | + ] | |
155 | + }, | |
156 | + { | |
56 | 157 | "data": { |
158 | + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEWCAYAAABhffzLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAnTUlEQVR4nO3deVxU9d4H8M/gICKbyr4IiBurICDk85SKglqWuOAKKqnMbfG6lfda9z65hluuLbdIrpr6UrNraZikoplZFriQZqCyKJsECciaLL/nDx/ncUJgVGaG4Xzer5evl3POmd/ve84cPhx+c+Y3MiGEABERSYaBrgsgIiLtYvATEUkMg5+ISGIY/EREEsPgJyKSGAY/EZHEMPjbkJdeegkrVqxolbZu3rwJU1NT1NfXAwCGDBmCrVu3tkrbAPDss89ix44drdaeuv75z3/CysoKdnZ2Wu/7Qd988w2cnJx01v/nn3+O7t27w9TUFBcuXGhx+9Z+/Um/yXVdgFS4urqisLAQcrkcHTp0gKenJ6ZPnw6FQgEDg3u/fz/88EO129q6dStCQ0Ob3MbZ2RkVFRWtUvvSpUtx/fp17Nq1S7nsyJEjrdL2o7h58ybWr1+PGzduwMbGRuv9tyWvv/463nvvPYSHh+u6FNJDvOLXoi+//BLl5eW4ceMGFi9ejDVr1mDWrFmt3k9dXV2rt9kW3Lx5E5aWlu0u9B/n9bpx4wa8vLw0UE3bo6vzub3+HAEMfp2wsLDA6NGjsW/fPuzYsQOXL18GAERHR+Of//wnAKC4uBjPP/88unTpgm7duuGZZ55BQ0MDpk2bhps3b+KFF16Aqakp1q5di+zsbMhkMsTHx8PZ2RlDhw5VLnvw5M3IyEBQUBDMzc0RHh6O27dvA3j4sIWrqyuOHz+OxMRExMbGYt++fTA1NYWvry8A1aGDhoYGrFy5Ei4uLrCxscH06dNRVlYGAMo6duzYAWdnZ1hZWeHtt99u8tiUlZVh+vTpsLa2houLC1auXImGhgYcP34cYWFhyM/Ph6mpKaKjoxs99/5+rF+/HjY2NrC3t8e2bduU6/883LF9+3Y8/fTTyscymQwffPABevfuDTMzM/zP//wPMjIy8F//9V8wNzfHxIkTcffuXZU+Y2NjYWVlBVdXV+zevVu5/I8//sDrr78OZ2dn2Nra4qWXXkJ1dbVKnWvWrIGdnR1efPHFRvvS1DH9448/lEN4vr6+6Nmz50OP47Fjx+Du7g4LCwvMmTMHD35APyMjA0OHDoWlpSWsrKwQGRmJ0tJSAMC6deswfvx4lbbmzp2LefPmKY+Zm5sbzMzM0KNHD5V9ftDSpUsRERGBSZMmwczMDP7+/khNTVWuz8/Px/jx42FtbY0ePXpgy5YtjZ4bFRUFc3NzbN++vVH7zb2WQggsWLAANjY2MDc3h4+Pj/Jn7Elfl/aCwa9DQUFBcHJywunTpxutW79+PZycnFBUVITCwkLExsZCJpNh586dcHZ2xpdffomKigr87W9/Uz7n1KlT+PXXX/H1118/tL9PPvkE//73v1FQUAC5XI65c+e2WOPIkSPx5ptvYtKkSaioqFD54b1v+/bt2L59O06ePInMzExUVFRgzpw5Ktt89913SE9PR1JSEpYvX45ff/31of399a9/RVlZGTIzM3Hq1Cl88skn2LZtG0JDQ3HkyBE4ODigoqLioWEAALdu3UJZWRny8vIQHx+PV199FSUlJS3u531ff/01zp07h7Nnz2Lt2rVQKBTYtWsXcnJycPnyZezZs0elr+LiYuTl5WHHjh1QKBRIT08HACxevBhXr17FxYsXcf36deTl5WH58uUqz719+zZu3LiBuLg4tY+pkZGRcggvNTUVGRkZjZ5bXFyMcePGYeXKlSguLkbPnj1x5swZ5XohBN544w3k5+fj119/RU5ODpYuXQoAiIqKQmJiovIXQV1dHfbu3Yvp06ejsrISc+fOxZEjR1BeXo7vv/8efn5+TR7LgwcPYsKECbh9+zamTp2KMWPGoLa2Fg0NDXjhhRfg6+uLvLw8JCUlYdOmTSrn7cGDBxEREYHS0lJERka2/MI94OjRo/j2229x9epVlJWV4dNPP4WlpSWAJ39d2gsGv445ODgor7wfZGhoiIKCAty4cQOGhoZ45plnIJPJmm1r6dKlMDExgbGx8UPXT5s2Dd7e3jAxMcGKFSvw6aefKt/8fRK7d+/GwoUL4ebmBlNTU6xatQp79+5V+WtjyZIlMDY2hq+vL3x9fR/6C6S+vh579+7FqlWrYGZmBldXV7z22mvYuXOn2rUYGhrirbfegqGhIZ577jmYmpoqw1gdf/vb32Bubg4vLy94e3tj+PDhcHNzg4WFBZ599tlGb6SuWLECRkZGGDx4MEaNGoVPP/0UQgjExcVh48aN6NatG8zMzPDmm29i7969yucZGBhg2bJlMDIyeujrpc4xbcpXX30FLy8vREREwNDQEPPnz1d5M7xXr14ICwuDkZERrK2tsXDhQpw6dQoAYG9vj0GDBmH//v0AgMTERFhZWSEgIEBZ9+XLl1FdXQ17e/tmh5sCAgKUNSxcuBA1NTU4e/YskpOTUVRUhLfeegsdO3aEm5sbYmJiVI7PwIEDMWbMGBgYGDR5PjfF0NAQ5eXlSEtLgxACHh4esLe3b5XXpb1g8OtYXl4eunXr1mj5okWL0KtXL2XwrF69usW2unfvrvZ6FxcX1NbWori4+NGL/pP8/Hy4uLiotF1XV4fCwkLlsgeDp3Pnzg9947m4uBi1tbWN2srLy1O7FktLS8jl/3/PQlN9NcXW1lb5f2Nj40aPH2yra9euMDExUak1Pz8fRUVFqKqqQkBAALp06YIuXbpg5MiRKCoqUm5rbW2NTp06NVmHOse0uec++FrLZDKVx4WFhZg8eTIcHR1hbm6OqKgolfNgxowZyjfyd+3ahWnTpgEATExMsG/fPnz44Yewt7fHqFGjkJaW1mQdD/ZpYGAAJycn5Ofn48aNG8jPz1cemy5duiA2NlZl31o6l5szdOhQzJkzB6+++ipsbGygUChw586dVnld2gsGvw4lJycjLy9PZZz5PjMzM6xfvx6ZmZk4dOgQNmzYgKSkJABo8sq/pb8IcnJylP+/efMmDA0NYWVlBRMTE1RVVSnX1dfXq/wwtNSug4MDbty4odK2XC5XCU11WFlZwdDQsFFbjo6Oj9ROU/68n7du3Xqi9kpKSlBZWal8fPPmTTg4OMDKygrGxsb45ZdfUFpaitLSUpSVlan80tDkMbW3t1d5rYUQKo/ffPNNyGQyXLp0CXfu3MGuXbtU3gMYM2YMfv75Z1y+fBkJCQkqQy0jRozAsWPHUFBQAHd3d8TExDRZx4N9NjQ0IDc3Fw4ODujevTt69OihPDalpaUoLy/HV199pdy+pePT0ms5d+5cnDt3DleuXMHVq1exbt26Vnld2gsGvw7cuXMHCQkJmDx5MqKiouDj49Nom4SEBFy/fh1CCFhYWKBDhw7K2z5tbW2RmZn5yP3u2rULV65cQVVVFd566y1ERESgQ4cO6NOnD2pqanD48GHU1tZi5cqV+OOPP5TPs7W1RXZ2NhoaGh7a7pQpU7Bx40ZkZWWhoqJC+Z7Ag1fe6ujQoQMmTpyIf/zjH8q7nzZs2ICoqKhH3teH8fPzw4EDB1BVVYXr168jPj7+idtcsmQJ7t69i9OnTyMhIQETJkyAgYEBYmJisGDBAvz2228A7v1l19R7Lw/zJMd01KhR+OWXX3DgwAHU1dVhy5YtKsFYXl4OU1NTWFhYIC8vD+vWrVN5fqdOnRAREYGpU6ciKCgIzs7OAO79pXDw4EFUVlbCyMgIpqamynPyYc6dO6esYdOmTTAyMsJTTz2FoKAgmJmZYc2aNaiurkZ9fT0uX76M5ORktY9Pc69lcnIyfvzxR9TW1sLExASdOnWCgYFBq7wu7QWDX4teeOEFmJmZoXv37nj77bexcOFClbtOHnTt2jWEhobC1NQUAwcOxCuvvIKQkBAAwBtvvIGVK1eiS5cueOedd9Tuf9q0aYiOjoadnR1qamqUd1JYWFjggw8+wOzZs+Ho6AgTExOVu3wmTJgA4N4wir+/f6N2Z86ciWnTpmHQoEHo0aMHOnXqhHfffVftuh707rvvwsTEBG5ubnj66acxdepUzJw587Ha+rMFCxagY8eOsLW1xYwZMx75TcM/s7OzQ9euXeHg4IDIyEh8+OGHcHd3BwCsWbMGvXr1wlNPPQVzc3OEhoY+0nsNT3JMrayssH//fixevBiWlpa4du0a/vu//1u5fsmSJTh//jwsLCwwatQojBs3rlEbM2bMwKVLl5TDPMC9q/YNGzbAwcEB3bp1w6lTp/Cvf/2ryTrCw8Oxb98+dO3aFTt37sSBAwdgaGiIDh06ICEhARcvXkSPHj1gZWWF2bNnK+8EU0dzr+WdO3cQExODrl27wsXFBZaWlli0aBGAJ39d2gsZv4iFiP7s5s2bcHd3x61bt2Bubv7Iz3/Yh/6o7eAVPxGpuH9lP3ny5McKfWr7OGUDESlVVlbC1tYWLi4uSExM1HU5pCEc6iEikhgO9RARSYxeDPXcnwuFiIjUl52d/dAPaepF8Lu6uiIlJUXXZRAR6ZXAwMCHLudQDxGRxDD4iYgkhsFPRCQxDH4iIolh8BMRSQyDn4hIYjQW/DNnzoSNjQ28vb0brVu/fj1kMlmrfAkIERE9Go0Ff3R09EPn+sjJycHRo0eVc3wTEZF2aSz4Bw0a9NCvFFywYAHWrl0rmW+6ISJqa7T6yd2DBw/C0dERvr6+LW4bFxen/Jb7B78GkIhaj19gMAoKCprdxt7eHhdTftRSRaQNWgv+qqoqxMbG4ujRo2ptr1AooFAoADT9sWMiejIFBQUIWbKv2W1OLpukpWpIW7R2V09GRgaysrLg6+sLV1dX5Obmwt/f/4m/8JqIiB6N1q74fXx8lF9wDPz/xGtWVlbaKoGIiKDBK/4pU6Zg4MCBSE9Ph5OTE+Lj4zXVFRERPQKNXfHv2bOn2fXZ2dma6pqIiJrBT+4SEUkMg5+ISGIY/EREEsPgJyKSGAY/EZHEMPiJiCSGwU9EJDEMfiIiiWHwExFJDIOfiEhiGPxERBLD4CcikhgGPxGRxDD4iYgkhsFPRCQxDH4iIolh8BMRSQyDn4hIYhj8REQSw+AnIpIYjQX/zJkzYWNjA29vb+WyRYsWwd3dHf369cPYsWNRWlqqqe6JiKgJGgv+6OhoJCYmqiwLCwvD5cuX8fPPP6NPnz5YtWqVpronIqImaCz4Bw0ahG7duqksGz58OORyOQDgqaeeQm5urqa6JyKiJuhsjP/f//43nn322SbXx8XFITAwEIGBgSgqKtJiZURE7ZtOgv/tt9+GXC5HZGRkk9soFAqkpKQgJSUF1tbWWqyOiKh9k2u7w+3btyMhIQFJSUmQyWTa7p6ISPK0GvyJiYlYu3YtTp06hc6dO2uzayIi+j8aG+qZMmUKBg4ciPT0dDg5OSE+Ph5z5sxBeXk5wsLC4Ofnh5deeklT3RMRURM0dsW/Z8+eRstmzZqlqe6IiEhN/OQuEZHEMPiJiCSGwU9EJDEMfiIiiWHwExFJDIOfiEhiGPxERBLD4CcikhgGPxGRxDD4iYgkhsFPRCQxDH4iIolh8BMRSQyDn4hIYhj8REQSw+AnIpIYBj8RkcQw+ImIJIbBT0QkMQx+IiKJYfATEUmMxoJ/5syZsLGxgbe3t3LZ7du3ERYWht69eyMsLAwlJSWa6p6IiJqgseCPjo5GYmKiyrLVq1dj2LBhuHbtGoYNG4bVq1drqnsiImqCxoJ/0KBB6Natm8qygwcPYsaMGQCAGTNm4IsvvtBU90RE1AS5NjsrLCyEvb09AMDOzg6FhYVNbhsXF4e4uDgAQFFRkVbqIyKSAp29uSuTySCTyZpcr1AokJKSgpSUFFhbW2uxMiKi9k2rwW9ra4uCggIAQEFBAWxsbLTZPRERQcvBP3r0aOzYsQMAsGPHDoSHh2uzeyIiggaDf8qUKRg4cCDS09Ph5OSE+Ph4LF68GMeOHUPv3r1x/PhxLF68WFPdExFREzT25u6ePXseujwpKUlTXRIRkRr4yV0iIolh8BMRSQyDn4hIYhj8REQSw+AnIpIYBj8RkcQw+ImIJIbBT0QkMQx+IiKJYfATEUmMWsF/6dIlTddBRERaolbwv/LKKwgKCsIHH3yAsrIyTddEREQapFbwnz59Grt370ZOTg4CAgIwdepUHDt2TNO1ERGRBqg9xt+7d2+sXLkSa9aswalTpzB37ly4u7vjwIEDmqyPiIhamVrB//PPP2PBggXw8PDAiRMn8OWXX+LXX3/FiRMnsGDBAk3XSERErUit+fj/+te/Yvbs2YiNjYWxsbFyuYODA1auXKmx4oiIqPWpFfyHDx+GsbExOnToAABoaGhATU0NOnfujGnTpmm0QCIial1qDfWEhoaiurpa+biqqgqhoaEaK4qIiDRHreCvqamBqamp8rGpqSmqqqo0VhQREWmOWsFvYmKC8+fPKx+fO3dOZayfiIj0h1pj/Js2bcKECRPg4OAAIQRu3bqFffv2PXanGzduxNatWyGTyeDj44Nt27ahU6dOj90eERGpT63gHzBgANLS0pCeng4A6Nu3LwwNDR+rw7y8PGzZsgVXrlyBsbExJk6ciL179yI6Ovqx2iMiokejVvADQHJyMrKzs1FXV6cc9pk+ffpjdVpXV4fq6moYGhqiqqoKDg4Oj9UOERE9OrWCf9q0acjIyICfn5/ylk6ZTPZYwe/o6IjXX38dzs7OMDY2xvDhwzF8+PBG28XFxSEuLg4AUFRU9Mj9EBHRw6kV/CkpKbhy5QpkMtkTd1hSUoKDBw8iKysLXbp0wYQJE7Br1y5ERUWpbKdQKKBQKAAAgYGBT9wvERHdo9ZdPd7e3rh161ardHj8+HH06NED1tbWMDQ0xLhx4/D999+3SttERNQyta74i4uL4enpiaCgIBgZGSmXHzp06JE7dHZ2xtmzZ1FVVQVjY2MkJSXxip6ISIvUCv6lS5e2WofBwcGIiIiAv78/5HI5+vfvrxzSISIizVMr+AcPHowbN27g2rVrCA0NRVVVFerr6x+702XLlmHZsmWP/XwiInp8ao3xf/zxx4iIiMBf/vIXAPfuxR8zZowm6yIiIg1RK/jff/99nDlzBubm5gDufSnLb7/9ptHCiIhIM9QKfiMjI3Ts2FH5uK6urlVu7SQiIu1TK/gHDx6M2NhYVFdX49ixY5gwYQJeeOEFTddGREQaoFbwr169GtbW1vDx8cFHH32E5557jt+8RUSkp9S6q8fAwAAxMTGIiYnRdD1ERKRhagV/jx49Hjqmn5mZ2eoFERGRZqk9V899NTU12L9/P27fvq2xooiISHPUGuO3tLRU/nN0dMT8+fNx+PBhTddGREQaoNYV/4Nfu9jQ0ICUlBTU1dVprCgiajtKSkpg6+jc7Db29va4mPKjliqiJ6VW8L/22mv//wS5HK6urvj00081VhQRtR0NDQIhS5r/qtWTyyZpqRpqDWoF/8mTJzVdBxERaYlawb9hw4Zm1y9cuLBViiEiIs1T+66e5ORkjB49GgDw5ZdfIigoCL1799ZocURE1PrUCv7c3FycP38eZmZmAO7Nzz9q1Cjs2rVLo8UREVHrU+t2zsLCQpVJ2jp27IjCwkKNFUVERJqj1hX/9OnTERQUhLFjxwIAvvjiC8yYMUOjhRERkWaoFfz/+Mc/8Oyzz+L06dMAgG3btqF///4aLYyIiDRDraEeAKiqqoK5uTnmzZsHJycnZGVlabIuIiLSELWCf9myZVizZg1WrVoFAKitrUVUVJRGCyMiIs1QK/g///xzHDp0CCYmJgAABwcHlJeXa7QwIiLSDLWCv2PHjpDJZMqpmSsrK5+o09LSUkRERMDd3R0eHh744Ycfnqg9IiJSn1rBP3HiRPzlL39BaWkpPv74Y4SGhj7Rl7LMmzcPI0eORFpaGlJTU+Hh4fHYbRER0aNp8a4eIQQmTZqEtLQ0mJubIz09HcuXL0dYWNhjdVhWVoZvv/0W27dvB3Dvr4kHPyNARESa1WLwy2QyPPfcc7h06dJjh/2DsrKyYG1tjRdffBGpqakICAjA5s2ble8f3BcXF4e4uDgAQFFR0RP3S0RE96g11OPv74/k5ORW6bCurg7nz5/Hyy+/jAsXLsDExASrV69utJ1CoUBKSgpSUlJgbW3dKn0TEZGawf/jjz/iqaeeQs+ePdGvXz/4+PigX79+j9Whk5MTnJycEBwcDACIiIhQ+aIXIiLSrGaHem7evAlnZ2d8/fXXrdahnZ0dunfvjvT0dPTt2xdJSUnw9PRstfaJiKh5zQb/mDFjcP78ebi4uGD8+PH4z3/+0yqdvvvuu4iMjMTdu3fh5uaGbdu2tUq7RETUsmaDXwih/H9mZmardern54eUlJRWa4+IiNTX7Bj//Q9s/fn/RESkv5q94k9NTYW5uTmEEKiuroa5uTmAe38JyGQy3LlzRytFEhFR62k2+Ovr67VVBxERaYna0zITEVH7wOAnIpIYBj8RkcQw+ImIJIbBT0QkMQx+IiKJYfATEUkMg5+ISGIY/EREEsPgJyKSGAY/EZHEMPiJiCSGwU9EJDEMfiIiiWHwExFJDIOfiEhiGPxERBKjs+Cvr69H//798fzzz+uqBCIiSdJZ8G/evBkeHh666p6ISLJ0Evy5ubk4fPgwZs+erYvuiYgkTSfBP3/+fKxduxYGBnyLgYhI2+Ta7jAhIQE2NjYICAjAN9980+R2cXFxiIuLAwAUFRVpqToiehwlJSWwdXRucr29vT0upvyoxYqoOVoP/jNnzuDQoUP46quvUFNTgzt37iAqKgq7du1S2U6hUEChUAAAAgMDtV0mET2ChgaBkCX7mlx/ctkkLVZDLdH6WMuqVauQm5uL7Oxs7N27F0OHDm0U+kREpDkcZCcikhitD/U8aMiQIRgyZIguSyAikhxe8RMRSQyDn4hIYhj8REQSw+AnIpIYBj8RkcQw+ImIJIbBT0QkMQx+IiKJYfATEUmMTj+5S0R0n19gMAoKCprdhrN8tg4GPxG1CQUFBc3O8Alwls/WwqEeIiKJYfATEUkMg5+ISGIY/EREEsPgJyKSGAY/EZHEMPiJiCSGwU9EJDEMfiIiiWHwExFJjNaDPycnByEhIfD09ISXlxc2b96s7RKIiCRN63P1yOVyrF+/Hv7+/igvL0dAQADCwsLg6emp7VKIiCRJ61f89vb28Pf3BwCYmZnBw8MDeXl52i6DiEiydDo7Z3Z2Ni5cuIDg4OBG6+Li4hAXFwcAKCoqeuw+ONUrke6VlJTA1tG5+W1KS7VTTBuiq3zSWfBXVFRg/Pjx2LRpE8zNzRutVygUUCgUAIDAwMDH7odTvRLpXkODaPHn8LO5YVqqpu3QVT7p5K6e2tpajB8/HpGRkRg3bpwuSiAikiytB78QArNmzYKHhwcWLlyo7e6JiCRP68F/5swZ7Ny5EydOnICfnx/8/Pzw1VdfabsMIiLJ0voY/9NPPw0hhLa7JSKi/8NP7hIRSQyDn4hIYhj8REQSw+AnIpIYBj8RkcQw+ImIJIbBT0QkMQx+IiKJYfATEUmMTqdl1ictTZ+qramdW2sa17ayP9rU0j6XV1TAzNS02TZa49i2pX70bSrklqZ3bo/nrSYw+NXU0vSp2praubWmcW0r+6NNLe3zZ3PDELJO88e2LfWjb1MhtzS9c3s8bzWBQz1ERBLD4CcikhgGPxGRxDD4iYgkhsFPRCQxDH4iIolh8BMRSQyDn4hIYhj8REQSw+AnIpIYnQR/YmIi+vbti169emH16tW6KIGISLK0Hvz19fV49dVXceTIEVy5cgV79uzBlStXtF0GEZFkaT34f/rpJ/Tq1Qtubm7o2LEjJk+ejIMHD2q7DCIiyZIJIYQ2O/zss8+QmJiIrVu3AgB27tyJH3/8Ee+9957KdnFxcYiLiwMApKWlwd3dvcW2i4qKYG1t3fpFa5G+7wPr1z193wfW33qys7NRXFzcaHmbnZZZoVBAoVA80nMCAwORkpKioYq0Q9/3gfXrnr7vA+vXPK0P9Tg6OiInJ0f5ODc3F46Ojtoug4hIsrQe/AMGDMC1a9eQlZWFu3fvYu/evRg9erS2yyAikiytD/XI5XK89957GDFiBOrr6zFz5kx4eXm1StuPOjTUFun7PrB+3dP3fWD9mqf1N3eJiEi3+MldIiKJYfATEUlMuwl+fZsGYubMmbCxsYG3t7dy2e3btxEWFobevXsjLCwMJSUlOqyweTk5OQgJCYGnpye8vLywefNmAPq1DzU1NQgKCoKvry+8vLywZMkSAEBWVhaCg4PRq1cvTJo0CXfv3tVxpc2rr69H//798fzzzwPQr/pdXV3h4+MDPz8/BAYGAtCvcwgASktLERERAXd3d3h4eOCHH35o8/vQLoJfH6eBiI6ORmJiosqy1atXY9iwYbh27RqGDRvWpn+ByeVyrF+/HleuXMHZs2fx/vvv48qVK3q1D0ZGRjhx4gRSU1Nx8eJFJCYm4uzZs/j73/+OBQsW4Pr16+jatSvi4+N1XWqzNm/eDA8PD+Vjfav/5MmTuHjxovLed306hwBg3rx5GDlyJNLS0pCamgoPD4+2vw+iHfj+++/F8OHDlY9jY2NFbGysDitST1ZWlvDy8lI+7tOnj8jPzxdCCJGfny/69Omjq9Ie2ejRo8XRo0f1dh8qKytF//79xdmzZ4WlpaWora0VQjQ+t9qanJwcMXToUJGUlCRGjRolGhoa9Kp+FxcXUVRUpLJMn86h0tJS4erqKhoaGlSWt/V9aBdX/Hl5eejevbvysZOTE/Ly8nRY0eMpLCyEvb09AMDOzg6FhYU6rkg92dnZuHDhAoKDg/VuH+rr6+Hn5wcbGxuEhYWhZ8+e6NKlC+Tye3c6t/Vzaf78+Vi7di0MDO79KP/+++96Vb9MJsPw4cMREBCgnKJFn86hrKwsWFtb48UXX0T//v0xe/ZsVFZWtvl9aBfB3x7JZDLIZDJdl9GiiooKjB8/Hps2bYK5ubnKOn3Yhw4dOuDixYvIzc3FTz/9hLS0NF2XpLaEhATY2NggICBA16U8tu+++w7nz5/HkSNH8P777+Pbb79VWd/Wz6G6ujqcP38eL7/8Mi5cuAATE5NGwzptcR/aRfC3l2kgbG1tUVBQAAAoKCiAjY2NjitqXm1tLcaPH4/IyEiMGzcOgP7tw31dunRBSEgIfvjhB5SWlqKurg5A2z6Xzpw5g0OHDsHV1RWTJ0/GiRMnMG/ePL2pH4CyNhsbG4wdOxY//fSTXp1DTk5OcHJyQnBwMAAgIiIC58+fb/P70C6Cv71MAzF69Gjs2LEDALBjxw6Eh4fruKKmCSEwa9YseHh4YOHChcrl+rQPRUVFKC0tBQBUV1fj2LFj8PDwQEhICD777DMAbXsfVq1ahdzcXGRnZ2Pv3r0YOnQodu/erTf1V1ZWory8XPn/o0ePwtvbW6/OITs7O3Tv3h3p6ekAgKSkJHh6erb9fdD1mwyt5fDhw6J3797Czc1NrFy5UtfltGjy5MnCzs5OyOVy4ejoKLZu3SqKi4vF0KFDRa9evcSwYcPE77//rusym3T69GkBQPj4+AhfX1/h6+srDh8+rFf7kJqaKvz8/ISPj4/w8vISy5YtE0IIkZGRIQYMGCB69uwpIiIiRE1NjY4rbdnJkyfFqFGjhBD6U39GRobo16+f6Nevn/D09FT+3OrTOSSEEBcuXBABAQHCx8dHhIeHi9u3b7f5feCUDUREEtMuhnqIiEh9DH4iIolh8BMRSQyDn4hIYhj8REQSw+AnvSCTyfDaa68pH7/zzjtYunRpq7QdHR2tvO9dk/bv36/8nEBbqIeki8FPesHIyAgHDhxAcXGxrktRcf8TsuqIj4/Hxx9/jJMnT2qwIqKWMfhJL8jlcigUCmzcuLHRuj9fIZuamgIAvvnmGwwePBjh4eFwc3PD4sWLsXv3bgQFBcHHxwcZGRnK5xw/fhyBgYHo06cPEhISANybwG3RokUYMGAA+vXrh48++kjZ7jPPPIPRo0fD09OzUT179uyBj48PvL298fe//x0AsHz5cnz33XeYNWsWFi1apLK9EAJz5sxB3759ERoait9++025bvny5RgwYAC8vb2hUCgghEBGRgb8/f2V21y7dk35ePHixfD09ES/fv3w+uuvP9pBJunQ7efHiNRjYmIiysrKhIuLiygtLRXr1q0TS5YsEUIIMWPGDLF//36VbYW492lWCwsLkZ+fL2pqaoSDg4N46623hBBCbNq0ScybN0/5/BEjRoj6+npx9epV4ejoKKqrq8VHH30kVqxYIYQQoqamRgQEBIjMzExx8uRJ0blzZ5GZmdmozry8PNG9e3fx22+/idraWhESEiI+//xzIYQQgwcPFsnJyY2e85///EeEhoaKuro6kZeXJywsLJT78+AnPqOiosShQ4eEEEIMGTJEXLhwQQghxBtvvCG2bNkiiouLRZ8+fZRTBJeUlDzGkSYp4BU/6Q1zc3NMnz4dW7ZsUfs5AwYMgL29PYyMjNCzZ08MHz4cAODj44Ps7GzldhMnToSBgQF69+4NNzc3pKWl4ejRo/jkk0/g5+eH4OBg/P7777h27RoAICgoCD169GjUX3JyMoYMGQJra2vI5XJERkY2mnHyz7799ltMmTIFHTp0gIODA4YOHapcd/LkSQQHB8PHxwcnTpzAL7/8AgCYPXs2tm3bhvr6euzbtw9Tp06FhYUFOnXqhFmzZuHAgQPo3Lmz2seJpIXBT3pl/vz5iI+PR2VlpXKZXC5HQ0MDAKChoUHlqwaNjIyU/zcwMFA+NjAwUBmf//O0uTKZDEIIvPvuu7h48SIuXryIrKws5S8OExOT1t+5P6mpqcErr7yCzz77DJcuXUJMTAxqamoAAOPHj8eRI0eQkJCAgIAAWFpaQi6X46effkJERAQSEhIwcuRIjddI+onBT3qlW7dumDhxosrXCbq6uuLcuXMAgEOHDqG2tvaR292/fz8aGhqQkZGBzMxM9O3bFyNGjMC//vUvZXtXr15V+YXzMEFBQTh16hSKi4tRX1+PPXv2YPDgwc0+Z9CgQdi3bx/q6+tRUFCgfPP3fshbWVmhoqJC5X2MTp06YcSIEXj55Zfx4osvArj33QhlZWV47rnnsHHjRqSmpj7ycSBpkOu6AKJH9dprr+G9995TPo6JiUF4eDh8fX0xcuTIx7oad3Z2RlBQEO7cuYMPP/wQnTp1wuzZs5GdnQ1/f38IIWBtbY0vvvii2Xbs7e2xevVqhISEQAiBUaNGtTgl79ixY3HixAl4enrC2dkZAwcOBHDvOwJiYmLg7e0NOzs7DBgwQOV5kZGR+Pzzz5V/hZSXlyM8PBw1NTUQQmDDhg2PfBxIGjg7J5Geeuedd1BWVoYVK1bouhTSM7ziJ9JDY8eORUZGBk6cOKHrUkgP8YqfiEhi+OYuEZHEMPiJiCSGwU9EJDEMfiIiiWHwExFJzP8CkWQEIHluKswAAAAASUVORK5CYII=", | |
57 | 159 | "text/plain": [ |
58 | - "0 1\n", | |
59 | - "1 1\n", | |
60 | - "2 1\n", | |
61 | - "3 1\n", | |
62 | - "4 1\n", | |
63 | - "Name: user, dtype: int64" | |
160 | + "<Figure size 432x288 with 1 Axes>" | |
64 | 161 | ] |
65 | 162 | }, |
66 | - "execution_count": 19, | |
67 | 163 | "metadata": {}, |
68 | - "output_type": "execute_result" | |
164 | + "output_type": "display_data" | |
69 | 165 | } |
70 | 166 | ], |
167 | + "source": [ | |
168 | + "# making a stat of the number of days per user\n", | |
169 | + "stat_user = user_date.groupby(['user'])['local_date'].nunique().sort_values()\n", | |
170 | + "\n", | |
171 | + "ax = plt.figure()\n", | |
172 | + "ax.patch.set_facecolor('white')\n", | |
173 | + "ax = sns.histplot(stat_user)\n", | |
174 | + "ax.set_title('Distribution of number of days per user')\n", | |
175 | + "ax.set_xlabel('Number of days')\n", | |
176 | + "ax.set_ylabel('Frequency')\n", | |
177 | + "\n", | |
178 | + "# cut off values that are not in the range of the data\n", | |
179 | + "THRESHOLD_OF_DAYS_PER_USER = 10\n", | |
180 | + "\n", | |
181 | + "# filter out users that have less days of data than THRESHOLD_OF_DAYS_PER_USER\n", | |
182 | + "users_to_be_removed = stat_user[stat_user < THRESHOLD_OF_DAYS_PER_USER].index\n", | |
183 | + "\n", | |
184 | + "print(\"Threshold: {}\".format(THRESHOLD_OF_DAYS_PER_USER))\n", | |
185 | + "print(\"Users to be removed:{}\".format(list(users_to_be_removed)))\n", | |
186 | + "\n", | |
187 | + "jawbone4 = jawbone3[~jawbone3[\"user\"].isin(users_to_be_removed)]\n", | |
188 | + "\n", | |
189 | + "\n", | |
190 | + "# printing the amount of data removed\n", | |
191 | + "jawbone3_count, _ = jawbone3.shape\n", | |
192 | + "jawbone4_count, _ = jawbone4.shape\n", | |
193 | + "\n", | |
194 | + "print(\"Shape Change: {} -> {} (-{}, -{}%)\".format(\n", | |
195 | + " jawbone3_count, \n", | |
196 | + " jawbone4_count, \n", | |
197 | + " jawbone3_count - jawbone4_count, \n", | |
198 | + " round((jawbone3_count - jawbone4_count) / jawbone3_count * 100, 2)\n", | |
199 | + " )\n", | |
200 | + ")" | |
201 | + ] | |
202 | + }, | |
203 | + { | |
204 | + "cell_type": "code", | |
205 | + "execution_count": null, | |
206 | + "metadata": {}, | |
207 | + "outputs": [ | |
208 | + { | |
209 | + "ename": "NameError", | |
210 | + "evalue": "name 'users' is not defined", | |
211 | + "output_type": "error", | |
212 | + "traceback": [ | |
213 | + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
214 | + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", | |
215 | + "\u001b[0;32m/var/folders/m6/l3x11zj94l3dp3wnxy1vnscc0000gn/T/ipykernel_50945/4152346818.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mstandard_minute_index\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSeries\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"local_minute_index\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1440\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0ma_user\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0musers\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0ma_date\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0muser_date2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlocal_date\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
216 | + "\u001b[0;31mNameError\u001b[0m: name 'users' is not defined" | |
217 | + ] | |
218 | + } | |
219 | + ], | |
220 | + "source": [ | |
221 | + "standard_minute_index = pd.Series(name=\"local_minute_index\", data=np.arange(0, 1440, 1))\n", | |
222 | + "\n", | |
223 | + "a_user = users[0]\n", | |
224 | + "a_date = user_date2.local_date[0]\n", | |
225 | + "\n", | |
226 | + "a_jawbone3 = jawbone3.loc[(jawbone3.user == a_user) & (jawbone3.local_date == a_date), :]\n", | |
227 | + "\n", | |
228 | + "vec = a_jawbone3[[\"local_minute_index\", \"steps\"]]\n", | |
229 | + "\n", | |
230 | + "steps = [0] * 1440\n", | |
231 | + "\n", | |
232 | + "for index, row in vec.iterrows():\n", | |
233 | + " steps[row.local_minute_index] += row.steps\n", | |
234 | + "\n", | |
235 | + "print(steps)\n", | |
236 | + "steps_series = pd.Series(name=\"steps\", data=steps)\n", | |
237 | + "steps_series[\"over60\"] = (steps_series > 60) * 1\n", | |
238 | + "\n", | |
239 | + "steps_series[\"roll\"] = steps_series.rolling(window=5, min_periods=1).sum()\n", | |
240 | + "\n", | |
241 | + "steps_series.roll.plot()\n", | |
242 | + "\n", | |
243 | + "\n" | |
244 | + ] | |
245 | + }, | |
246 | + { | |
247 | + "cell_type": "code", | |
248 | + "execution_count": null, | |
249 | + "metadata": {}, | |
250 | + "outputs": [], | |
71 | 251 | "source": [] |
72 | 252 | } |
73 | 253 | ], |
requirements.txt
View file @
a5d4a03
... | ... | @@ -29,6 +29,7 @@ |
29 | 29 | Keras-Preprocessing==1.1.2 |
30 | 30 | kiwisolver==1.3.2 |
31 | 31 | libclang==12.0.0 |
32 | +line-profiler==3.4.0 | |
32 | 33 | Markdown==3.3.6 |
33 | 34 | matplotlib==3.5.1 |
34 | 35 | matplotlib-inline==0.1.3 |
... | ... | @@ -55,6 +56,8 @@ |
55 | 56 | requests==2.27.1 |
56 | 57 | requests-oauthlib==1.3.0 |
57 | 58 | rsa==4.8 |
59 | +scipy==1.7.3 | |
60 | +seaborn==0.11.2 | |
58 | 61 | six==1.16.0 |
59 | 62 | tensorboard==2.7.0 |
60 | 63 | tensorboard-data-server==0.6.1 |