feat: Initial analysis tasks

This commit is contained in:
2026-04-07 20:35:31 +02:00
parent 782445e0df
commit 8c50a62538
2 changed files with 1120 additions and 6 deletions
File diff suppressed because it is too large Load Diff
+10 -6
View File
@@ -22,7 +22,7 @@
"from redis.retry import Retry\n", "from redis.retry import Retry\n",
"from redis.backoff import ExponentialBackoff\n", "from redis.backoff import ExponentialBackoff\n",
"from pathlib import Path\n", "from pathlib import Path\n",
"import kagglehub\n", "import kagglehub # pyright: ignore[reportMissingTypeStubs]\n",
"from csv import DictReader\n", "from csv import DictReader\n",
"from fastid import ulid\n", "from fastid import ulid\n",
"from tqdm.notebook import tqdm\n", "from tqdm.notebook import tqdm\n",
@@ -77,7 +77,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"with full_dump.open(\"rb\") as f:\n", "with full_dump.open(\"rb\") as f:\n",
" rows = sum(1 for line in f) - 1" " rows = sum(1 for _ in f) - 1"
] ]
}, },
{ {
@@ -87,7 +87,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"daily_matches = {}\n", "daily_matches: dict[str, set[str]] = {}\n",
"with full_dump.open(errors=\"ignore\") as csvfile:\n", "with full_dump.open(errors=\"ignore\") as csvfile:\n",
" reader = DictReader(csvfile, delimiter=\";\")\n", " reader = DictReader(csvfile, delimiter=\";\")\n",
" with client.pipeline() as p:\n", " with client.pipeline() as p:\n",
@@ -100,8 +100,8 @@
" continue\n", " continue\n",
" daily_matches[row[\"dateid\"]].add(row[\"matchid\"])\n", " daily_matches[row[\"dateid\"]].add(row[\"matchid\"])\n",
" display(f\"Selected {row[\"matchid\"]} for day {row[\"dateid\"]}\")\n", " display(f\"Selected {row[\"matchid\"]} for day {row[\"dateid\"]}\")\n",
" p.hset(f\"raw:{ulid()}\", mapping=row)\n", " _ = p.hset(f\"raw:{ulid()}\", mapping=row)\n",
" p.execute()" " _ = p.execute()"
] ]
}, },
{ {
@@ -111,7 +111,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from redis.commands.search.field import TagField, NumericField, TextField\n", "from redis.commands.search.field import TagField, NumericField\n",
"from redis.commands.search.index_definition import IndexDefinition, IndexType\n", "from redis.commands.search.index_definition import IndexDefinition, IndexType\n",
"\n", "\n",
"schema = (\n", "schema = (\n",
@@ -148,6 +148,10 @@
" TagField(\"secondarygadget\")\n", " TagField(\"secondarygadget\")\n",
")\n", ")\n",
"\n", "\n",
"try:\n",
" client.ft().dropindex()\n",
"except:\n",
" pass\n",
"client.ft().create_index(schema, definition=IndexDefinition(prefix=(\"raw:\",), index_type=IndexType.HASH))" "client.ft().create_index(schema, definition=IndexDefinition(prefix=(\"raw:\",), index_type=IndexType.HASH))"
] ]
}, },