Feat: data persistence in shared volume

implemented a template to enable data persistence even with a shared volume across containers
This commit is contained in:
2026-04-20 11:39:35 +02:00
parent 11330e6b89
commit a9f9c834be
8 changed files with 113 additions and 18633 deletions
+7 -54
View File
@@ -2,39 +2,17 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"id": "05c0de49-0eb8-4cd2-89b2-c312111dda85",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: redis in /opt/conda/lib/python3.13/site-packages (7.4.0)\n",
"Requirement already satisfied: hiredis in /opt/conda/lib/python3.13/site-packages (3.3.1)\n",
"Requirement already satisfied: fastid in /opt/conda/lib/python3.13/site-packages (0.0.5)\n",
"Requirement already satisfied: tqdm in /opt/conda/lib/python3.13/site-packages (4.67.3)\n",
"Requirement already satisfied: kagglehub in /opt/conda/lib/python3.13/site-packages (1.0.0)\n",
"Requirement already satisfied: kagglesdk<1.0,>=0.1.14 in /opt/conda/lib/python3.13/site-packages (from kagglehub) (0.1.16)\n",
"Requirement already satisfied: packaging in /opt/conda/lib/python3.13/site-packages (from kagglehub) (26.0)\n",
"Requirement already satisfied: pyyaml in /opt/conda/lib/python3.13/site-packages (from kagglehub) (6.0.3)\n",
"Requirement already satisfied: requests in /opt/conda/lib/python3.13/site-packages (from kagglehub) (2.32.5)\n",
"Requirement already satisfied: protobuf in /opt/conda/lib/python3.13/site-packages (from kagglesdk<1.0,>=0.1.14->kagglehub) (6.33.5)\n",
"Requirement already satisfied: charset_normalizer<4,>=2 in /opt/conda/lib/python3.13/site-packages (from requests->kagglehub) (3.4.6)\n",
"Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.13/site-packages (from requests->kagglehub) (3.11)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.13/site-packages (from requests->kagglehub) (2.6.3)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.13/site-packages (from requests->kagglehub) (2026.2.25)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"outputs": [],
"source": [
"%pip install redis hiredis fastid tqdm kagglehub"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "90326242-f141-4035-9053-d3aab6cc9224",
"metadata": {},
"outputs": [],
@@ -49,25 +27,10 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "50b9f473-8924-4d03-acf8-71ecf25e54a8",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "887890574a2048a587b6ed4dd8eef7c6",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1000000 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"from fastid import ulid\n",
"from tqdm.notebook import trange\n",
@@ -80,20 +43,10 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"id": "23933aa3-ff5d-45f9-b402-59e58c02a2b3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"b'OK'"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"schema = (\n",
" TextField(\"name\"),\n",
File diff suppressed because it is too large Load Diff
+11 -84
View File
@@ -2,39 +2,17 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"id": "071f6969-c01a-4369-a763-871f5b9e65b3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: redis in /opt/conda/lib/python3.13/site-packages (7.4.0)\n",
"Requirement already satisfied: hiredis in /opt/conda/lib/python3.13/site-packages (3.3.1)\n",
"Requirement already satisfied: fastid in /opt/conda/lib/python3.13/site-packages (0.0.5)\n",
"Requirement already satisfied: tqdm in /opt/conda/lib/python3.13/site-packages (4.67.3)\n",
"Requirement already satisfied: kagglehub in /opt/conda/lib/python3.13/site-packages (1.0.0)\n",
"Requirement already satisfied: kagglesdk<1.0,>=0.1.14 in /opt/conda/lib/python3.13/site-packages (from kagglehub) (0.1.16)\n",
"Requirement already satisfied: packaging in /opt/conda/lib/python3.13/site-packages (from kagglehub) (26.0)\n",
"Requirement already satisfied: pyyaml in /opt/conda/lib/python3.13/site-packages (from kagglehub) (6.0.3)\n",
"Requirement already satisfied: requests in /opt/conda/lib/python3.13/site-packages (from kagglehub) (2.32.5)\n",
"Requirement already satisfied: protobuf in /opt/conda/lib/python3.13/site-packages (from kagglesdk<1.0,>=0.1.14->kagglehub) (6.33.5)\n",
"Requirement already satisfied: charset_normalizer<4,>=2 in /opt/conda/lib/python3.13/site-packages (from requests->kagglehub) (3.4.6)\n",
"Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.13/site-packages (from requests->kagglehub) (3.11)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.13/site-packages (from requests->kagglehub) (2.6.3)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.13/site-packages (from requests->kagglehub) (2026.2.25)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"outputs": [],
"source": [
"%pip install redis hiredis fastid tqdm kagglehub"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "526cb932-59aa-489e-bbba-954ac645b633",
"metadata": {},
"outputs": [],
@@ -66,20 +44,10 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "be4c5461-1db2-4226-b71e-cdc7f06615bd",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Downsample of full dump present: True'"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"dataset = Path(kagglehub.dataset_download(\"awesomizer/rainbox-six-siege-dataset\"))\n",
"downsampled = dataset/\"downsampled_S5.csv\"\n",
@@ -88,25 +56,10 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"id": "2efb5699-cced-4df0-9d76-278e23874436",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2fa9c64b42394db7a5764896d7d936d0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"0it [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"with downsampled.open(\"rb\") as f:\n",
" rows = sum(1 for _ in tqdm(f)) - 1"
@@ -114,25 +67,10 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"id": "9177760b-eec4-4ab4-8242-0dd8df5db9dd",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f235da3945864d409e81037d42269756",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Importing data into Redis...: 0%| | 0/102698 [00:00<?, ?row/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"with downsampled.open(errors=\"ignore\") as csvfile:\n",
" reader = DictReader(csvfile)\n",
@@ -144,21 +82,10 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"id": "d8b75e35-8bef-4b2b-ae25-1982076c73fd",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'OK'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"from redis.commands.search.field import TagField, NumericField\n",
"from redis.commands.search.index_definition import IndexDefinition, IndexType\n",
File diff suppressed because it is too large Load Diff