Commit 57ff8482acc06369382eab4063a66356e6258929

Authored by Junghwan Park
1 parent 1ba50105eb
Exists in main

Update data_loading.ipynb

Showing 1 changed file with 29 additions and 74 deletions Side-by-side Diff

python-notebook/data_loading.ipynb View file @ 57ff848
... ... @@ -279,68 +279,36 @@
279 279 },
280 280 {
281 281 "cell_type": "code",
282   - "execution_count": 108,
  282 + "execution_count": 110,
283 283 "metadata": {},
284 284 "outputs": [
285 285 {
286 286 "name": "stdout",
287 287 "output_type": "stream",
288 288 "text": [
289   - " index user local_date hour walked\n",
290   - "0 0 1 2015-07-22 8 2\n",
291   - "1 3 1 2015-07-22 18 2\n",
292   - "2 10 1 2015-07-22 19 2\n",
293   - "3 30 1 2015-07-23 8 2\n",
294   - "4 50 1 2015-07-23 9 2\n",
295   - " user local_date hour walked\n",
296   - "0 1 2015-07-22 0 1.0\n",
297   - "1 1 2015-07-22 1 1.0\n",
298   - "2 1 2015-07-22 2 1.0\n",
299   - "3 1 2015-07-22 3 1.0\n",
300   - "4 1 2015-07-22 4 1.0\n",
301   - "5 1 2015-07-22 5 1.0\n",
302   - "6 1 2015-07-22 6 1.0\n",
303   - "7 1 2015-07-22 7 1.0\n",
304   - "8 1 2015-07-22 8 2.0\n",
305   - "9 1 2015-07-22 9 1.0\n",
306   - "10 1 2015-07-22 10 1.0\n",
307   - "11 1 2015-07-22 11 1.0\n",
308   - "12 1 2015-07-22 12 1.0\n",
309   - "13 1 2015-07-22 13 1.0\n",
310   - "14 1 2015-07-22 14 1.0\n",
311   - "15 1 2015-07-22 15 1.0\n",
312   - "16 1 2015-07-22 16 1.0\n",
313   - "17 1 2015-07-22 17 1.0\n",
314   - "18 1 2015-07-22 18 2.0\n",
315   - "19 1 2015-07-22 19 2.0\n",
316   - "20 1 2015-07-22 20 1.0\n",
317   - "21 1 2015-07-22 21 1.0\n",
318   - "22 1 2015-07-22 22 1.0\n",
319   - "23 1 2015-07-22 23 1.0\n",
320   - "24 1 2015-07-23 0 1.0\n",
321   - "25 1 2015-07-23 1 1.0\n",
322   - "26 1 2015-07-23 2 1.0\n",
323   - "27 1 2015-07-23 3 1.0\n",
324   - "28 1 2015-07-23 4 1.0\n",
325   - "29 1 2015-07-23 5 1.0\n",
326   - "30 1 2015-07-23 6 1.0\n",
327   - "31 1 2015-07-23 7 1.0\n",
328   - "32 1 2015-07-23 8 2.0\n",
329   - "33 1 2015-07-23 9 2.0\n",
330   - "34 1 2015-07-23 10 1.0\n",
331   - "35 1 2015-07-23 11 1.0\n",
332   - "36 1 2015-07-23 12 1.0\n",
333   - "37 1 2015-07-23 13 1.0\n",
334   - "38 1 2015-07-23 14 1.0\n",
335   - "39 1 2015-07-23 15 1.0\n",
336   - "40 1 2015-07-23 16 1.0\n",
337   - "41 1 2015-07-23 17 1.0\n",
338   - "42 1 2015-07-23 18 1.0\n",
339   - "43 1 2015-07-23 19 1.0\n",
340   - "44 1 2015-07-23 20 1.0\n",
341   - "45 1 2015-07-23 21 1.0\n",
342   - "46 1 2015-07-23 22 1.0\n",
343   - "47 1 2015-07-23 23 1.0\n"
  289 + " index user local_date threehour walked\n",
  290 + "0 0 1 2015-07-22 2 2\n",
  291 + "1 3 1 2015-07-22 6 2\n",
  292 + "2 30 1 2015-07-23 2 2\n",
  293 + "3 50 1 2015-07-23 3 2\n",
  294 + "4 58 1 2015-07-23 5 2\n",
  295 + " user local_date threehour walked\n",
  296 + "0 1 2015-07-22 0 1.0\n",
  297 + "1 1 2015-07-22 1 1.0\n",
  298 + "2 1 2015-07-22 2 2.0\n",
  299 + "3 1 2015-07-22 3 1.0\n",
  300 + "4 1 2015-07-22 4 1.0\n",
  301 + "5 1 2015-07-22 5 1.0\n",
  302 + "6 1 2015-07-22 6 2.0\n",
  303 + "7 1 2015-07-22 7 1.0\n",
  304 + "8 1 2015-07-23 0 1.0\n",
  305 + "9 1 2015-07-23 1 1.0\n",
  306 + "10 1 2015-07-23 2 2.0\n",
  307 + "11 1 2015-07-23 3 2.0\n",
  308 + "12 1 2015-07-23 4 1.0\n",
  309 + "13 1 2015-07-23 5 2.0\n",
  310 + "14 1 2015-07-23 6 1.0\n",
  311 + "15 1 2015-07-23 7 1.0\n"
344 312 ]
345 313 }
346 314 ],
347 315  
... ... @@ -381,28 +349,15 @@
381 349 "measured_hour = product_df(walk_by_hours[[\"user\", \"local_date\"]], hours[[\"hour\"]])\n",
382 350 "measured_threehour = product_df(walk_by_threehours[[\"user\", \"local_date\"]], threehours[[\"threehour\"]])\n",
383 351 "\n",
384   - "\n",
385   - "\n",
386   - "print(walk_by_threehours)\n",
387   - "\n",
  352 + "# pad the hourly walk data (fill in missing hours with 1s)\n",
388 353 "padded_hours = walk_by_hours.merge(measured_hour, on=[\"user\", \"local_date\", \"hour\"], how=\"right\")\n",
389 354 "padded_hours = padded_hours[[\"user\", \"local_date\", \"hour\", \"walked\"]]\n",
390 355 "padded_hours = padded_hours.fillna(1)\n",
391 356 "\n",
392   - "padded_threehours = walk_by_threehours.merge(measured_threehour, on=[\"user\", \"local_date\", \"hour\"], how=\"right\")\n",
393   - "padded_threehours = padded_threehours[[\"user\", \"local_date\", \"hour\", \"walked\"]]\n",
394   - "padded_threehours = padded_threehours.fillna(1)\n",
395   - "\n",
396   - "print(padded_threehours)\n",
397   - "\n",
398   - "# walk_by_hour = consecutive_minutes.groupby([\"user\", \"local_date\", \"hour\"])[\"add_count\"].sum().reset_index()\n",
399   - "# walk_by_threehour = consecutive_minutes.groupby([\"user\", \"local_date\", \"threehour\"])[\"add_count\"].sum().reset_index()\n",
400   - "\n",
401   - "# walk_by_hour[\"walked\"] = 1\n",
402   - "# walk_by_threehour[\"walked\"] = 1\n",
403   - "\n",
404   - "\n",
405   - "# hours2 = walk_by_hour.merge(hours, left_on=\"hour\", right_on=\"hour\", how=\"right\")\n"
  357 + "# pad the walk data with 3 hours unit (fill in missing hours with 1s)\n",
  358 + "padded_threehours = walk_by_threehours.merge(measured_threehour, on=[\"user\", \"local_date\", \"threehour\"], how=\"right\")\n",
  359 + "padded_threehours = padded_threehours[[\"user\", \"local_date\", \"threehour\", \"walked\"]]\n",
  360 + "padded_threehours = padded_threehours.fillna(1)"
406 361 ]
407 362 },
408 363 {