Commit 57ff8482acc06369382eab4063a66356e6258929
1 parent
1ba50105eb
Exists in
main
Update data_loading.ipynb
Showing 1 changed file with 29 additions and 74 deletions Side-by-side Diff
python-notebook/data_loading.ipynb
View file @
57ff848
... | ... | @@ -279,68 +279,36 @@ |
279 | 279 | }, |
280 | 280 | { |
281 | 281 | "cell_type": "code", |
282 | - "execution_count": 108, | |
282 | + "execution_count": 110, | |
283 | 283 | "metadata": {}, |
284 | 284 | "outputs": [ |
285 | 285 | { |
286 | 286 | "name": "stdout", |
287 | 287 | "output_type": "stream", |
288 | 288 | "text": [ |
289 | - " index user local_date hour walked\n", | |
290 | - "0 0 1 2015-07-22 8 2\n", | |
291 | - "1 3 1 2015-07-22 18 2\n", | |
292 | - "2 10 1 2015-07-22 19 2\n", | |
293 | - "3 30 1 2015-07-23 8 2\n", | |
294 | - "4 50 1 2015-07-23 9 2\n", | |
295 | - " user local_date hour walked\n", | |
296 | - "0 1 2015-07-22 0 1.0\n", | |
297 | - "1 1 2015-07-22 1 1.0\n", | |
298 | - "2 1 2015-07-22 2 1.0\n", | |
299 | - "3 1 2015-07-22 3 1.0\n", | |
300 | - "4 1 2015-07-22 4 1.0\n", | |
301 | - "5 1 2015-07-22 5 1.0\n", | |
302 | - "6 1 2015-07-22 6 1.0\n", | |
303 | - "7 1 2015-07-22 7 1.0\n", | |
304 | - "8 1 2015-07-22 8 2.0\n", | |
305 | - "9 1 2015-07-22 9 1.0\n", | |
306 | - "10 1 2015-07-22 10 1.0\n", | |
307 | - "11 1 2015-07-22 11 1.0\n", | |
308 | - "12 1 2015-07-22 12 1.0\n", | |
309 | - "13 1 2015-07-22 13 1.0\n", | |
310 | - "14 1 2015-07-22 14 1.0\n", | |
311 | - "15 1 2015-07-22 15 1.0\n", | |
312 | - "16 1 2015-07-22 16 1.0\n", | |
313 | - "17 1 2015-07-22 17 1.0\n", | |
314 | - "18 1 2015-07-22 18 2.0\n", | |
315 | - "19 1 2015-07-22 19 2.0\n", | |
316 | - "20 1 2015-07-22 20 1.0\n", | |
317 | - "21 1 2015-07-22 21 1.0\n", | |
318 | - "22 1 2015-07-22 22 1.0\n", | |
319 | - "23 1 2015-07-22 23 1.0\n", | |
320 | - "24 1 2015-07-23 0 1.0\n", | |
321 | - "25 1 2015-07-23 1 1.0\n", | |
322 | - "26 1 2015-07-23 2 1.0\n", | |
323 | - "27 1 2015-07-23 3 1.0\n", | |
324 | - "28 1 2015-07-23 4 1.0\n", | |
325 | - "29 1 2015-07-23 5 1.0\n", | |
326 | - "30 1 2015-07-23 6 1.0\n", | |
327 | - "31 1 2015-07-23 7 1.0\n", | |
328 | - "32 1 2015-07-23 8 2.0\n", | |
329 | - "33 1 2015-07-23 9 2.0\n", | |
330 | - "34 1 2015-07-23 10 1.0\n", | |
331 | - "35 1 2015-07-23 11 1.0\n", | |
332 | - "36 1 2015-07-23 12 1.0\n", | |
333 | - "37 1 2015-07-23 13 1.0\n", | |
334 | - "38 1 2015-07-23 14 1.0\n", | |
335 | - "39 1 2015-07-23 15 1.0\n", | |
336 | - "40 1 2015-07-23 16 1.0\n", | |
337 | - "41 1 2015-07-23 17 1.0\n", | |
338 | - "42 1 2015-07-23 18 1.0\n", | |
339 | - "43 1 2015-07-23 19 1.0\n", | |
340 | - "44 1 2015-07-23 20 1.0\n", | |
341 | - "45 1 2015-07-23 21 1.0\n", | |
342 | - "46 1 2015-07-23 22 1.0\n", | |
343 | - "47 1 2015-07-23 23 1.0\n" | |
289 | + " index user local_date threehour walked\n", | |
290 | + "0 0 1 2015-07-22 2 2\n", | |
291 | + "1 3 1 2015-07-22 6 2\n", | |
292 | + "2 30 1 2015-07-23 2 2\n", | |
293 | + "3 50 1 2015-07-23 3 2\n", | |
294 | + "4 58 1 2015-07-23 5 2\n", | |
295 | + " user local_date threehour walked\n", | |
296 | + "0 1 2015-07-22 0 1.0\n", | |
297 | + "1 1 2015-07-22 1 1.0\n", | |
298 | + "2 1 2015-07-22 2 2.0\n", | |
299 | + "3 1 2015-07-22 3 1.0\n", | |
300 | + "4 1 2015-07-22 4 1.0\n", | |
301 | + "5 1 2015-07-22 5 1.0\n", | |
302 | + "6 1 2015-07-22 6 2.0\n", | |
303 | + "7 1 2015-07-22 7 1.0\n", | |
304 | + "8 1 2015-07-23 0 1.0\n", | |
305 | + "9 1 2015-07-23 1 1.0\n", | |
306 | + "10 1 2015-07-23 2 2.0\n", | |
307 | + "11 1 2015-07-23 3 2.0\n", | |
308 | + "12 1 2015-07-23 4 1.0\n", | |
309 | + "13 1 2015-07-23 5 2.0\n", | |
310 | + "14 1 2015-07-23 6 1.0\n", | |
311 | + "15 1 2015-07-23 7 1.0\n" | |
344 | 312 | ] |
345 | 313 | } |
346 | 314 | ], |
347 | 315 | |
... | ... | @@ -381,28 +349,15 @@ |
381 | 349 | "measured_hour = product_df(walk_by_hours[[\"user\", \"local_date\"]], hours[[\"hour\"]])\n", |
382 | 350 | "measured_threehour = product_df(walk_by_threehours[[\"user\", \"local_date\"]], threehours[[\"threehour\"]])\n", |
383 | 351 | "\n", |
384 | - "\n", | |
385 | - "\n", | |
386 | - "print(walk_by_threehours)\n", | |
387 | - "\n", | |
352 | + "# pad the hourly walk data (fill in missing hours with 1s)\n", | |
388 | 353 | "padded_hours = walk_by_hours.merge(measured_hour, on=[\"user\", \"local_date\", \"hour\"], how=\"right\")\n", |
389 | 354 | "padded_hours = padded_hours[[\"user\", \"local_date\", \"hour\", \"walked\"]]\n", |
390 | 355 | "padded_hours = padded_hours.fillna(1)\n", |
391 | 356 | "\n", |
392 | - "padded_threehours = walk_by_threehours.merge(measured_threehour, on=[\"user\", \"local_date\", \"hour\"], how=\"right\")\n", | |
393 | - "padded_threehours = padded_threehours[[\"user\", \"local_date\", \"hour\", \"walked\"]]\n", | |
394 | - "padded_threehours = padded_threehours.fillna(1)\n", | |
395 | - "\n", | |
396 | - "print(padded_threehours)\n", | |
397 | - "\n", | |
398 | - "# walk_by_hour = consecutive_minutes.groupby([\"user\", \"local_date\", \"hour\"])[\"add_count\"].sum().reset_index()\n", | |
399 | - "# walk_by_threehour = consecutive_minutes.groupby([\"user\", \"local_date\", \"threehour\"])[\"add_count\"].sum().reset_index()\n", | |
400 | - "\n", | |
401 | - "# walk_by_hour[\"walked\"] = 1\n", | |
402 | - "# walk_by_threehour[\"walked\"] = 1\n", | |
403 | - "\n", | |
404 | - "\n", | |
405 | - "# hours2 = walk_by_hour.merge(hours, left_on=\"hour\", right_on=\"hour\", how=\"right\")\n" | |
357 | + "# pad the walk data with 3 hours unit (fill in missing hours with 1s)\n", | |
358 | + "padded_threehours = walk_by_threehours.merge(measured_threehour, on=[\"user\", \"local_date\", \"threehour\"], how=\"right\")\n", | |
359 | + "padded_threehours = padded_threehours[[\"user\", \"local_date\", \"threehour\", \"walked\"]]\n", | |
360 | + "padded_threehours = padded_threehours.fillna(1)" | |
406 | 361 | ] |
407 | 362 | }, |
408 | 363 | { |