From f0669aafef857f6042e554c34f2c59d896c7ace0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radek=20Gol=C3=A1=C5=88=20jr?= Date: Tue, 7 Apr 2026 14:13:05 +0200 Subject: [PATCH] feat: update data loading code --- notebooks/r6data.ipynb | 106 ++++++++++------------------------------- 1 file changed, 26 insertions(+), 80 deletions(-) diff --git a/notebooks/r6data.ipynb b/notebooks/r6data.ipynb index d2d55f3..4a95142 100644 --- a/notebooks/r6data.ipynb +++ b/notebooks/r6data.ipynb @@ -2,39 +2,19 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "071f6969-c01a-4369-a763-871f5b9e65b3", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: redis in /opt/conda/lib/python3.13/site-packages (7.4.0)\n", - "Requirement already satisfied: hiredis in /opt/conda/lib/python3.13/site-packages (3.3.1)\n", - "Requirement already satisfied: fastid in /opt/conda/lib/python3.13/site-packages (0.0.5)\n", - "Requirement already satisfied: tqdm in /opt/conda/lib/python3.13/site-packages (4.67.3)\n", - "Requirement already satisfied: kagglehub in /opt/conda/lib/python3.13/site-packages (1.0.0)\n", - "Requirement already satisfied: kagglesdk<1.0,>=0.1.14 in /opt/conda/lib/python3.13/site-packages (from kagglehub) (0.1.16)\n", - "Requirement already satisfied: packaging in /opt/conda/lib/python3.13/site-packages (from kagglehub) (26.0)\n", - "Requirement already satisfied: pyyaml in /opt/conda/lib/python3.13/site-packages (from kagglehub) (6.0.3)\n", - "Requirement already satisfied: requests in /opt/conda/lib/python3.13/site-packages (from kagglehub) (2.32.5)\n", - "Requirement already satisfied: protobuf in /opt/conda/lib/python3.13/site-packages (from kagglesdk<1.0,>=0.1.14->kagglehub) (6.33.5)\n", - "Requirement already satisfied: charset_normalizer<4,>=2 in /opt/conda/lib/python3.13/site-packages (from requests->kagglehub) (3.4.6)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.13/site-packages (from requests->kagglehub) (3.11)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.13/site-packages (from requests->kagglehub) (2.6.3)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.13/site-packages (from requests->kagglehub) (2026.2.25)\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "metadata": { + "scrolled": true + }, + "outputs": [], "source": [ "%pip install redis hiredis fastid tqdm kagglehub" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "526cb932-59aa-489e-bbba-954ac645b633", "metadata": {}, "outputs": [], @@ -66,18 +46,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "8d0875b5-f004-44fc-9519-89fc73c42b77", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Path to dataset files: /home/jovyan/.cache/kagglehub/datasets/awesomizer/rainbox-six-siege-dataset/versions/1\n" - ] - } - ], + "outputs": [], "source": [ "dataset = Path(kagglehub.dataset_download(\"awesomizer/rainbox-six-siege-dataset\"))\n", "\n", @@ -86,50 +58,22 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "be4c5461-1db2-4226-b71e-cdc7f06615bd", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "full_dump = dataset/\"datadump_S5\"/\"datadump_S5.csv\"\n", "objectives = dataset/\"datadump_S5_summary_objectives\"/\"datadump_S5_summary_objectives.csv\"\n", "operator_loadouts = dataset/\"dataDump_s5_summary_operator_loadout\"/\"dataDump_s5_summary_operator_loadout.csv\"\n", - "display(full_dump.exists())\n", - "display(objectives.exists())\n", - "display(operator_loadouts.exists())" + "display(f\"Full dump present: {full_dump.exists()}\")\n", + "display(f\"Objective picks present: {objectives.exists()}\")\n", + "display(f\"Operator loadouts present: {operator_loadouts.exists()}\")" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "2efb5699-cced-4df0-9d76-278e23874436", "metadata": {}, "outputs": [], @@ -148,16 +92,18 @@ "daily_matches = {}\n", "with full_dump.open(errors=\"ignore\") as csvfile:\n", " reader = DictReader(csvfile, delimiter=\";\")\n", - " for i, row in enumerate(tqdm(reader, desc=\"Importing raw data into Redis...\", total=rows, unit=\"row\")):\n", - " if row[\"dateid\"] not in daily_matches: # There's a ton of data so we just select two matches per day\n", - " daily_matches[row[\"dateid\"]] = set((row[\"matchid\"],))\n", - " display(f\"Selected {row[\"matchid\"]} for day {row[\"dateid\"]}\")\n", - " if row[\"matchid\"] not in daily_matches[row[\"dateid\"]]: # Rows from a given day that aren't related to a specific match are ignored\n", - " if len(daily_matches[row[\"dateid\"]]) >= 2: # Skip only if we already have two matches picked\n", - " continue\n", - " daily_matches[row[\"dateid\"]].add(row[\"matchid\"])\n", - " display(f\"Selected {row[\"matchid\"]} for day {row[\"dateid\"]}\")\n", - " client.hset(f\"raw:{ulid()}\", mapping=row)" + " with client.pipeline() as p:\n", + " for i, row in enumerate(tqdm(reader, desc=\"Importing raw data into Redis...\", total=rows, unit=\"row\")):\n", + " if row[\"dateid\"] not in daily_matches: # There's a ton of data so we just select two matches per day\n", + " daily_matches[row[\"dateid\"]] = set((row[\"matchid\"],))\n", + " display(f\"Selected {row[\"matchid\"]} for day {row[\"dateid\"]}\")\n", + " if row[\"matchid\"] not in daily_matches[row[\"dateid\"]]: # Rows from a given day that aren't related to a specific match are ignored\n", + " if len(daily_matches[row[\"dateid\"]]) >= 20: # Skip only if we already have two matches picked\n", + " continue\n", + " daily_matches[row[\"dateid\"]].add(row[\"matchid\"])\n", + " display(f\"Selected {row[\"matchid\"]} for day {row[\"dateid\"]}\")\n", + " p.hset(f\"raw:{ulid()}\", mapping=row)\n", + " p.execute()" ] }, {