Compare commits
2 Commits
f0669aafef
...
8c50a62538
| Author | SHA1 | Date | |
|---|---|---|---|
| 8c50a62538 | |||
| 782445e0df |
File diff suppressed because it is too large
Load Diff
+11
-9
@@ -4,9 +4,7 @@
|
|||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "071f6969-c01a-4369-a763-871f5b9e65b3",
|
"id": "071f6969-c01a-4369-a763-871f5b9e65b3",
|
||||||
"metadata": {
|
"metadata": {},
|
||||||
"scrolled": true
|
|
||||||
},
|
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"%pip install redis hiredis fastid tqdm kagglehub"
|
"%pip install redis hiredis fastid tqdm kagglehub"
|
||||||
@@ -24,7 +22,7 @@
|
|||||||
"from redis.retry import Retry\n",
|
"from redis.retry import Retry\n",
|
||||||
"from redis.backoff import ExponentialBackoff\n",
|
"from redis.backoff import ExponentialBackoff\n",
|
||||||
"from pathlib import Path\n",
|
"from pathlib import Path\n",
|
||||||
"import kagglehub\n",
|
"import kagglehub # pyright: ignore[reportMissingTypeStubs]\n",
|
||||||
"from csv import DictReader\n",
|
"from csv import DictReader\n",
|
||||||
"from fastid import ulid\n",
|
"from fastid import ulid\n",
|
||||||
"from tqdm.notebook import tqdm\n",
|
"from tqdm.notebook import tqdm\n",
|
||||||
@@ -79,7 +77,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"with full_dump.open(\"rb\") as f:\n",
|
"with full_dump.open(\"rb\") as f:\n",
|
||||||
" rows = sum(1 for line in f) - 1"
|
" rows = sum(1 for _ in f) - 1"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -89,7 +87,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"daily_matches = {}\n",
|
"daily_matches: dict[str, set[str]] = {}\n",
|
||||||
"with full_dump.open(errors=\"ignore\") as csvfile:\n",
|
"with full_dump.open(errors=\"ignore\") as csvfile:\n",
|
||||||
" reader = DictReader(csvfile, delimiter=\";\")\n",
|
" reader = DictReader(csvfile, delimiter=\";\")\n",
|
||||||
" with client.pipeline() as p:\n",
|
" with client.pipeline() as p:\n",
|
||||||
@@ -102,8 +100,8 @@
|
|||||||
" continue\n",
|
" continue\n",
|
||||||
" daily_matches[row[\"dateid\"]].add(row[\"matchid\"])\n",
|
" daily_matches[row[\"dateid\"]].add(row[\"matchid\"])\n",
|
||||||
" display(f\"Selected {row[\"matchid\"]} for day {row[\"dateid\"]}\")\n",
|
" display(f\"Selected {row[\"matchid\"]} for day {row[\"dateid\"]}\")\n",
|
||||||
" p.hset(f\"raw:{ulid()}\", mapping=row)\n",
|
" _ = p.hset(f\"raw:{ulid()}\", mapping=row)\n",
|
||||||
" p.execute()"
|
" _ = p.execute()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -113,7 +111,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from redis.commands.search.field import TagField, NumericField, TextField\n",
|
"from redis.commands.search.field import TagField, NumericField\n",
|
||||||
"from redis.commands.search.index_definition import IndexDefinition, IndexType\n",
|
"from redis.commands.search.index_definition import IndexDefinition, IndexType\n",
|
||||||
"\n",
|
"\n",
|
||||||
"schema = (\n",
|
"schema = (\n",
|
||||||
@@ -150,6 +148,10 @@
|
|||||||
" TagField(\"secondarygadget\")\n",
|
" TagField(\"secondarygadget\")\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" client.ft().dropindex()\n",
|
||||||
|
"except:\n",
|
||||||
|
" pass\n",
|
||||||
"client.ft().create_index(schema, definition=IndexDefinition(prefix=(\"raw:\",), index_type=IndexType.HASH))"
|
"client.ft().create_index(schema, definition=IndexDefinition(prefix=(\"raw:\",), index_type=IndexType.HASH))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user