feat: update data loading code
This commit is contained in:
+26
-80
@@ -2,39 +2,19 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": null,
|
||||||
"id": "071f6969-c01a-4369-a763-871f5b9e65b3",
|
"id": "071f6969-c01a-4369-a763-871f5b9e65b3",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
"outputs": [
|
"scrolled": true
|
||||||
{
|
},
|
||||||
"name": "stdout",
|
"outputs": [],
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Requirement already satisfied: redis in /opt/conda/lib/python3.13/site-packages (7.4.0)\n",
|
|
||||||
"Requirement already satisfied: hiredis in /opt/conda/lib/python3.13/site-packages (3.3.1)\n",
|
|
||||||
"Requirement already satisfied: fastid in /opt/conda/lib/python3.13/site-packages (0.0.5)\n",
|
|
||||||
"Requirement already satisfied: tqdm in /opt/conda/lib/python3.13/site-packages (4.67.3)\n",
|
|
||||||
"Requirement already satisfied: kagglehub in /opt/conda/lib/python3.13/site-packages (1.0.0)\n",
|
|
||||||
"Requirement already satisfied: kagglesdk<1.0,>=0.1.14 in /opt/conda/lib/python3.13/site-packages (from kagglehub) (0.1.16)\n",
|
|
||||||
"Requirement already satisfied: packaging in /opt/conda/lib/python3.13/site-packages (from kagglehub) (26.0)\n",
|
|
||||||
"Requirement already satisfied: pyyaml in /opt/conda/lib/python3.13/site-packages (from kagglehub) (6.0.3)\n",
|
|
||||||
"Requirement already satisfied: requests in /opt/conda/lib/python3.13/site-packages (from kagglehub) (2.32.5)\n",
|
|
||||||
"Requirement already satisfied: protobuf in /opt/conda/lib/python3.13/site-packages (from kagglesdk<1.0,>=0.1.14->kagglehub) (6.33.5)\n",
|
|
||||||
"Requirement already satisfied: charset_normalizer<4,>=2 in /opt/conda/lib/python3.13/site-packages (from requests->kagglehub) (3.4.6)\n",
|
|
||||||
"Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.13/site-packages (from requests->kagglehub) (3.11)\n",
|
|
||||||
"Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.13/site-packages (from requests->kagglehub) (2.6.3)\n",
|
|
||||||
"Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.13/site-packages (from requests->kagglehub) (2026.2.25)\n",
|
|
||||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"%pip install redis hiredis fastid tqdm kagglehub"
|
"%pip install redis hiredis fastid tqdm kagglehub"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": null,
|
||||||
"id": "526cb932-59aa-489e-bbba-954ac645b633",
|
"id": "526cb932-59aa-489e-bbba-954ac645b633",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -66,18 +46,10 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": null,
|
||||||
"id": "8d0875b5-f004-44fc-9519-89fc73c42b77",
|
"id": "8d0875b5-f004-44fc-9519-89fc73c42b77",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Path to dataset files: /home/jovyan/.cache/kagglehub/datasets/awesomizer/rainbox-six-siege-dataset/versions/1\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"dataset = Path(kagglehub.dataset_download(\"awesomizer/rainbox-six-siege-dataset\"))\n",
|
"dataset = Path(kagglehub.dataset_download(\"awesomizer/rainbox-six-siege-dataset\"))\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -86,50 +58,22 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": null,
|
||||||
"id": "be4c5461-1db2-4226-b71e-cdc7f06615bd",
|
"id": "be4c5461-1db2-4226-b71e-cdc7f06615bd",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"True"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "display_data"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"True"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "display_data"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"True"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "display_data"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"full_dump = dataset/\"datadump_S5\"/\"datadump_S5.csv\"\n",
|
"full_dump = dataset/\"datadump_S5\"/\"datadump_S5.csv\"\n",
|
||||||
"objectives = dataset/\"datadump_S5_summary_objectives\"/\"datadump_S5_summary_objectives.csv\"\n",
|
"objectives = dataset/\"datadump_S5_summary_objectives\"/\"datadump_S5_summary_objectives.csv\"\n",
|
||||||
"operator_loadouts = dataset/\"dataDump_s5_summary_operator_loadout\"/\"dataDump_s5_summary_operator_loadout.csv\"\n",
|
"operator_loadouts = dataset/\"dataDump_s5_summary_operator_loadout\"/\"dataDump_s5_summary_operator_loadout.csv\"\n",
|
||||||
"display(full_dump.exists())\n",
|
"display(f\"Full dump present: {full_dump.exists()}\")\n",
|
||||||
"display(objectives.exists())\n",
|
"display(f\"Objective picks present: {objectives.exists()}\")\n",
|
||||||
"display(operator_loadouts.exists())"
|
"display(f\"Operator loadouts present: {operator_loadouts.exists()}\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 6,
|
"execution_count": null,
|
||||||
"id": "2efb5699-cced-4df0-9d76-278e23874436",
|
"id": "2efb5699-cced-4df0-9d76-278e23874436",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -148,16 +92,18 @@
|
|||||||
"daily_matches = {}\n",
|
"daily_matches = {}\n",
|
||||||
"with full_dump.open(errors=\"ignore\") as csvfile:\n",
|
"with full_dump.open(errors=\"ignore\") as csvfile:\n",
|
||||||
" reader = DictReader(csvfile, delimiter=\";\")\n",
|
" reader = DictReader(csvfile, delimiter=\";\")\n",
|
||||||
" for i, row in enumerate(tqdm(reader, desc=\"Importing raw data into Redis...\", total=rows, unit=\"row\")):\n",
|
" with client.pipeline() as p:\n",
|
||||||
" if row[\"dateid\"] not in daily_matches: # There's a ton of data so we just select two matches per day\n",
|
" for i, row in enumerate(tqdm(reader, desc=\"Importing raw data into Redis...\", total=rows, unit=\"row\")):\n",
|
||||||
" daily_matches[row[\"dateid\"]] = set((row[\"matchid\"],))\n",
|
" if row[\"dateid\"] not in daily_matches: # There's a ton of data so we just select two matches per day\n",
|
||||||
" display(f\"Selected {row[\"matchid\"]} for day {row[\"dateid\"]}\")\n",
|
" daily_matches[row[\"dateid\"]] = set((row[\"matchid\"],))\n",
|
||||||
" if row[\"matchid\"] not in daily_matches[row[\"dateid\"]]: # Rows from a given day that aren't related to a specific match are ignored\n",
|
" display(f\"Selected {row[\"matchid\"]} for day {row[\"dateid\"]}\")\n",
|
||||||
" if len(daily_matches[row[\"dateid\"]]) >= 2: # Skip only if we already have two matches picked\n",
|
" if row[\"matchid\"] not in daily_matches[row[\"dateid\"]]: # Rows from a given day that aren't related to a specific match are ignored\n",
|
||||||
" continue\n",
|
" if len(daily_matches[row[\"dateid\"]]) >= 20: # Skip only if we already have two matches picked\n",
|
||||||
" daily_matches[row[\"dateid\"]].add(row[\"matchid\"])\n",
|
" continue\n",
|
||||||
" display(f\"Selected {row[\"matchid\"]} for day {row[\"dateid\"]}\")\n",
|
" daily_matches[row[\"dateid\"]].add(row[\"matchid\"])\n",
|
||||||
" client.hset(f\"raw:{ulid()}\", mapping=row)"
|
" display(f\"Selected {row[\"matchid\"]} for day {row[\"dateid\"]}\")\n",
|
||||||
|
" p.hset(f\"raw:{ulid()}\", mapping=row)\n",
|
||||||
|
" p.execute()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user