diff --git a/notebooks/r6_analysis.ipynb b/notebooks/r6_analysis.ipynb
new file mode 100644
index 0000000..38b3477
--- /dev/null
+++ b/notebooks/r6_analysis.ipynb
@@ -0,0 +1,1110 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "intro",
+ "metadata": {},
+ "source": [
+ "# Rainbow Six Siege Data Analysis with Redis\n",
+ "\n",
+ "This notebook performs various data analysis tasks on the R6 match data stored in Redis, leveraging Redis's built-in capabilities for aggregations, searching, and real-time statistics."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "id": "setup",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from redis.cluster import RedisCluster\n",
+ "from redis.retry import Retry\n",
+ "from redis.backoff import ExponentialBackoff\n",
+ "from redis.commands.search.field import TagField, NumericField, TextField\n",
+ "from redis.commands.search.aggregation import Asc, Desc, AggregateRequest, AggregateResult, Cursor\n",
+ "from redis.commands.search import reducers\n",
+ "from redis.commands.search.query import Query\n",
+ "from tqdm.notebook import tqdm\n",
+ "import pandas as pd\n",
+ "from collections import defaultdict\n",
+ "from sys import maxsize\n",
+ "\n",
+ "retry = Retry(ExponentialBackoff(), 8)\n",
+ "client = RedisCluster(\n",
+ " host=\"redis\",\n",
+ " port=6379,\n",
+ " username=\"admin\",\n",
+ " password=\"admin\",\n",
+ " retry=retry,\n",
+ " protocol=3,\n",
+ " decode_responses=True,\n",
+ " health_check_interval=3,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "task1-header",
+ "metadata": {},
+ "source": [
+ "## Task 1: Player Performance Analysis\n",
+ "\n",
+ "Analyze player performance by operator, including kill counts and win rates."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "id": "task1-data-prep",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " GSG9-JAGER | \n",
+ " SWAT-ASH | \n",
+ " SAT-HIBANA | \n",
+ " GSG9-BANDIT | \n",
+ " GIGN-TWITCH | \n",
+ " SWAT-THERMITE | \n",
+ " SPETSNAZ-FUZE | \n",
+ " BOPE-CAVEIRA | \n",
+ " G.E.O.-JACKAL | \n",
+ " NAVYSEAL-VALKYRIE | \n",
+ " ... | \n",
+ " GSG9-IQ | \n",
+ " GIGN-MONTAGNE | \n",
+ " SAT-ECHO | \n",
+ " GSG9-BLITZ | \n",
+ " GSG9-RESERVE | \n",
+ " SPETSNAZ-TACHANKA | \n",
+ " SAS-RESERVE | \n",
+ " SWAT-RESERVE | \n",
+ " GIGN-RESERVE | \n",
+ " SPETSNAZ-RESERVE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | total_kills | \n",
+ " 5522 | \n",
+ " 5446 | \n",
+ " 3825 | \n",
+ " 3802 | \n",
+ " 3577 | \n",
+ " 3441 | \n",
+ " 3336 | \n",
+ " 3293 | \n",
+ " 3135 | \n",
+ " 2926 | \n",
+ " ... | \n",
+ " 1027 | \n",
+ " 777 | \n",
+ " 775 | \n",
+ " 425 | \n",
+ " 386 | \n",
+ " 202 | \n",
+ " 183 | \n",
+ " 112 | \n",
+ " 89 | \n",
+ " 84 | \n",
+ "
\n",
+ " \n",
+ " | total_rounds | \n",
+ " 6999 | \n",
+ " 6519 | \n",
+ " 4960 | \n",
+ " 5257 | \n",
+ " 4894 | \n",
+ " 5001 | \n",
+ " 4222 | \n",
+ " 4586 | \n",
+ " 4091 | \n",
+ " 4319 | \n",
+ " ... | \n",
+ " 1570 | \n",
+ " 1880 | \n",
+ " 1207 | \n",
+ " 782 | \n",
+ " 618 | \n",
+ " 436 | \n",
+ " 375 | \n",
+ " 206 | \n",
+ " 173 | \n",
+ " 145 | \n",
+ "
\n",
+ " \n",
+ " | won_rounds | \n",
+ " 3535 | \n",
+ " 3370 | \n",
+ " 2586 | \n",
+ " 2684 | \n",
+ " 2553 | \n",
+ " 2603 | \n",
+ " 2164 | \n",
+ " 2321 | \n",
+ " 2140 | \n",
+ " 2177 | \n",
+ " ... | \n",
+ " 785 | \n",
+ " 953 | \n",
+ " 634 | \n",
+ " 376 | \n",
+ " 317 | \n",
+ " 196 | \n",
+ " 159 | \n",
+ " 97 | \n",
+ " 78 | \n",
+ " 65 | \n",
+ "
\n",
+ " \n",
+ " | dead_rounds | \n",
+ " 5127 | \n",
+ " 4555 | \n",
+ " 3294 | \n",
+ " 3833 | \n",
+ " 3180 | \n",
+ " 3394 | \n",
+ " 2929 | \n",
+ " 3362 | \n",
+ " 2850 | \n",
+ " 3150 | \n",
+ " ... | \n",
+ " 1128 | \n",
+ " 1284 | \n",
+ " 775 | \n",
+ " 585 | \n",
+ " 468 | \n",
+ " 329 | \n",
+ " 297 | \n",
+ " 151 | \n",
+ " 122 | \n",
+ " 107 | \n",
+ "
\n",
+ " \n",
+ " | survival_rate | \n",
+ " 0.267466780969 | \n",
+ " 0.301273201411 | \n",
+ " 0.335887096774 | \n",
+ " 0.270876926003 | \n",
+ " 0.350224765018 | \n",
+ " 0.321335732853 | \n",
+ " 0.306252960682 | \n",
+ " 0.266899258613 | \n",
+ " 0.303348814471 | \n",
+ " 0.270664505673 | \n",
+ " ... | \n",
+ " 0.28152866242 | \n",
+ " 0.317021276596 | \n",
+ " 0.357912178956 | \n",
+ " 0.251918158568 | \n",
+ " 0.242718446602 | \n",
+ " 0.245412844037 | \n",
+ " 0.208 | \n",
+ " 0.266990291262 | \n",
+ " 0.294797687861 | \n",
+ " 0.262068965517 | \n",
+ "
\n",
+ " \n",
+ " | win_rate | \n",
+ " 0.505072153165 | \n",
+ " 0.516950452523 | \n",
+ " 0.521370967742 | \n",
+ " 0.510557352102 | \n",
+ " 0.521659174499 | \n",
+ " 0.52049590082 | \n",
+ " 0.512553292279 | \n",
+ " 0.506105538596 | \n",
+ " 0.523099486678 | \n",
+ " 0.504051863857 | \n",
+ " ... | \n",
+ " 0.5 | \n",
+ " 0.506914893617 | \n",
+ " 0.525269262635 | \n",
+ " 0.480818414322 | \n",
+ " 0.512944983819 | \n",
+ " 0.449541284404 | \n",
+ " 0.424 | \n",
+ " 0.470873786408 | \n",
+ " 0.450867052023 | \n",
+ " 0.448275862069 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
6 rows × 35 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " GSG9-JAGER SWAT-ASH SAT-HIBANA GSG9-BANDIT \\\n",
+ "total_kills 5522 5446 3825 3802 \n",
+ "total_rounds 6999 6519 4960 5257 \n",
+ "won_rounds 3535 3370 2586 2684 \n",
+ "dead_rounds 5127 4555 3294 3833 \n",
+ "survival_rate 0.267466780969 0.301273201411 0.335887096774 0.270876926003 \n",
+ "win_rate 0.505072153165 0.516950452523 0.521370967742 0.510557352102 \n",
+ "\n",
+ " GIGN-TWITCH SWAT-THERMITE SPETSNAZ-FUZE BOPE-CAVEIRA \\\n",
+ "total_kills 3577 3441 3336 3293 \n",
+ "total_rounds 4894 5001 4222 4586 \n",
+ "won_rounds 2553 2603 2164 2321 \n",
+ "dead_rounds 3180 3394 2929 3362 \n",
+ "survival_rate 0.350224765018 0.321335732853 0.306252960682 0.266899258613 \n",
+ "win_rate 0.521659174499 0.52049590082 0.512553292279 0.506105538596 \n",
+ "\n",
+ " G.E.O.-JACKAL NAVYSEAL-VALKYRIE ... GSG9-IQ \\\n",
+ "total_kills 3135 2926 ... 1027 \n",
+ "total_rounds 4091 4319 ... 1570 \n",
+ "won_rounds 2140 2177 ... 785 \n",
+ "dead_rounds 2850 3150 ... 1128 \n",
+ "survival_rate 0.303348814471 0.270664505673 ... 0.28152866242 \n",
+ "win_rate 0.523099486678 0.504051863857 ... 0.5 \n",
+ "\n",
+ " GIGN-MONTAGNE SAT-ECHO GSG9-BLITZ GSG9-RESERVE \\\n",
+ "total_kills 777 775 425 386 \n",
+ "total_rounds 1880 1207 782 618 \n",
+ "won_rounds 953 634 376 317 \n",
+ "dead_rounds 1284 775 585 468 \n",
+ "survival_rate 0.317021276596 0.357912178956 0.251918158568 0.242718446602 \n",
+ "win_rate 0.506914893617 0.525269262635 0.480818414322 0.512944983819 \n",
+ "\n",
+ " SPETSNAZ-TACHANKA SAS-RESERVE SWAT-RESERVE GIGN-RESERVE \\\n",
+ "total_kills 202 183 112 89 \n",
+ "total_rounds 436 375 206 173 \n",
+ "won_rounds 196 159 97 78 \n",
+ "dead_rounds 329 297 151 122 \n",
+ "survival_rate 0.245412844037 0.208 0.266990291262 0.294797687861 \n",
+ "win_rate 0.449541284404 0.424 0.470873786408 0.450867052023 \n",
+ "\n",
+ " SPETSNAZ-RESERVE \n",
+ "total_kills 84 \n",
+ "total_rounds 145 \n",
+ "won_rounds 65 \n",
+ "dead_rounds 107 \n",
+ "survival_rate 0.262068965517 \n",
+ "win_rate 0.448275862069 \n",
+ "\n",
+ "[6 rows x 35 columns]"
+ ]
+ },
+ "execution_count": 63,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "query = (AggregateRequest( # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType, reportAssignmentType]\n",
+ " \"*\"\n",
+ " ).group_by(\n",
+ " \"@operator\",\n",
+ " reducers.sum(\"@nbkills\").alias(\"total_kills\"),\n",
+ " reducers.count().alias(\"total_rounds\"),\n",
+ " reducers.sum(\"@haswon\").alias(\"won_rounds\"),\n",
+ " reducers.sum(\"@isdead\").alias(\"dead_rounds\")\n",
+ " ).sort_by(Desc(\"@total_kills\")) # pyright: ignore[reportArgumentType]\n",
+ " .apply(\n",
+ " survival_rate=\"1-(@dead_rounds/@total_rounds)\",\n",
+ " win_rate=\"@won_rounds/@total_rounds\"\n",
+ " ).limit(0, 35565).cursor(10, 1)\n",
+ ")\n",
+ "response = client.ft().aggregate(query)\n",
+ "rows = response[0][\"results\"]\n",
+ "cursor = response[1]\n",
+ "while cursor > 0:\n",
+ " response= client.ft().aggregate(Cursor(cursor))\n",
+ " rows.extend(response[0][\"results\"])\n",
+ " cursor = response[1]\n",
+ "\n",
+ "operator_data:pd.DataFrame = pd.DataFrame.from_dict({attrs[\"extra_attributes\"][\"operator\"]: attrs[\"extra_attributes\"] for attrs in rows})\n",
+ "operator_data.drop(labels=\"operator\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "task1-kills",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"\\n=== TOP 10 OPERATORS BY KILLS ===\")\n",
+ "top_kills = df_operators.nlargest(10, \"kills\")[[\"operator\", \"kills\"]]\n",
+ "for _, row in top_kills.iterrows():\n",
+ " print(f\"{row['operator']}: {row['kills']} kills\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "task1-winrates",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"\\n=== TOP 10 OPERATORS BY WIN RATE (min 100 rounds) ===\")\n",
+ "filtered = df_operators[df_operators[\"rounds\"] >= 100]\n",
+ "top_wr = filtered.nlargest(10, \"win_rate\")[[\"operator\", \"win_rate\", \"rounds\"]]\n",
+ "for _, row in top_wr.iterrows():\n",
+ " print(f\"{row['operator']}: {row['win_rate']}% win rate ({row['rounds']} rounds)\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "task2-header",
+ "metadata": {},
+ "source": [
+ "## Task 2: Weapon Analysis\n",
+ "\n",
+ "Analyze weapon usage and effectiveness using Redis aggregation pipelines."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "task2-weapons",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_weapon_stats(weapon_field=\"primaryweapon\"):\n",
+ " \"\"\"Get weapon statistics using Redis aggregation.\"\"\"\n",
+ " aggr = client.ft().aggregate(\n",
+ " \"*\",\n",
+ " [\n",
+ " {\n",
+ " \"$group\": {\n",
+ " \"_id\": f\"${weapon_field}\",\n",
+ " \"total_kills\": {\"$sum\": \"$nbkills\"},\n",
+ " \"times_used\": {\"$sum\": 1},\n",
+ " \"wins\": {\"$sum\": \"$haswon\"},\n",
+ " }\n",
+ " },\n",
+ " {\"$sort\": {\"times_used\": -1}},\n",
+ " ]\n",
+ " )\n",
+ " return [\n",
+ " {\n",
+ " \"weapon\": r[\"_id\"],\n",
+ " \"kills\": r[\"total_kills\"],\n",
+ " \"usage_count\": r[\"times_used\"],\n",
+ " \"wins\": r[\"wins\"],\n",
+ " \"avg_kills_per_use\": round(r[\"total_kills\"] / r[\"times_used\"], 3) if r[\"times_used\"] > 0 else 0,\n",
+ " \"win_rate\": round(r[\"wins\"] / r[\"times_used\"] * 100, 2) if r[\"times_used\"] > 0 else 0,\n",
+ " }\n",
+ " for r in aggr[\"results\"]\n",
+ " ]\n",
+ "\n",
+ "print(\"Computing primary weapon statistics...\")\n",
+ "primary_weapon_stats = get_weapon_stats(\"primaryweapon\")\n",
+ "df_primary = pd.DataFrame(primary_weapon_stats)\n",
+ "print(f\"\\nTotal unique primary weapons: {len(df_primary)}\")\n",
+ "display(df_primary.head(20))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "task2-secondary",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"\\n=== TOP 10 SECONDARY WEAPONS BY USAGE ===\")\n",
+ "secondary_stats = get_weapon_stats(\"secondaryweapon\")\n",
+ "df_secondary = pd.DataFrame(secondary_stats)\n",
+ "display(df_secondary.head(10))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "task3-header",
+ "metadata": {},
+ "source": [
+ "## Task 3: Map and Game Mode Analysis\n",
+ "\n",
+ "Analyze map popularity, round durations, and win conditions by map and game mode."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "task3-maps",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_map_stats():\n",
+ " \"\"\"Get map statistics using Redis aggregation.\"\"\"\n",
+ " aggr = client.ft().aggregate(\n",
+ " \"*\",\n",
+ " [\n",
+ " {\n",
+ " \"$group\": {\n",
+ " \"_id\": \"$mapname\",\n",
+ " \"total_rounds\": {\"$sum\": 1},\n",
+ " \"avg_duration\": {\"$avg\": \"$roundduration\"},\n",
+ " \"avg_clearance\": {\"$avg\": \"$clearancelevel\"},\n",
+ " \"total_kills\": {\"$sum\": \"$nbkills\"},\n",
+ " }\n",
+ " },\n",
+ " {\"$sort\": {\"total_rounds\": -1}},\n",
+ " ]\n",
+ " )\n",
+ " return [\n",
+ " {\n",
+ " \"map\": r[\"_id\"],\n",
+ " \"rounds_played\": r[\"total_rounds\"],\n",
+ " \"avg_duration_sec\": round(r[\"avg_duration\"], 2),\n",
+ " \"avg_clearance_level\": round(r[\"avg_clearance\"], 2),\n",
+ " \"total_kills\": r[\"total_kills\"],\n",
+ " }\n",
+ " for r in aggr[\"results\"]\n",
+ " ]\n",
+ "\n",
+ "print(\"Computing map statistics...\")\n",
+ "map_stats = get_map_stats()\n",
+ "df_maps = pd.DataFrame(map_stats)\n",
+ "display(df_maps.head(15))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "task3-gamemode",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_gamemode_stats():\n",
+ " \"\"\"Get game mode statistics.\"\"\"\n",
+ " aggr = client.ft().aggregate(\n",
+ " \"*\",\n",
+ " [\n",
+ " {\n",
+ " \"$group\": {\n",
+ " \"_id\": \"$gamemode\",\n",
+ " \"total_rounds\": {\"$sum\": 1},\n",
+ " \"avg_duration\": {\"$avg\": \"$roundduration\"},\n",
+ " \"total_kills\": {\"$sum\": \"$nbkills\"},\n",
+ " \"unique_matches\": {\"$addToSet\": \"$matchid\"},\n",
+ " }\n",
+ " },\n",
+ " {\"$sort\": {\"total_rounds\": -1}},\n",
+ " ]\n",
+ " )\n",
+ " return [\n",
+ " {\n",
+ " \"gamemode\": r[\"_id\"],\n",
+ " \"rounds_played\": r[\"total_rounds\"],\n",
+ " \"avg_duration_sec\": round(r[\"avg_duration\"], 2),\n",
+ " \"total_kills\": r[\"total_kills\"],\n",
+ " \"unique_matches\": len(r[\"unique_matches\"]),\n",
+ " }\n",
+ " for r in aggr[\"results\"]\n",
+ " ]\n",
+ "\n",
+ "print(\"=== GAME MODE BREAKDOWN ===\")\n",
+ "gamemode_stats = get_gamemode_stats()\n",
+ "df_gamemode = pd.DataFrame(gamemode_stats)\n",
+ "display(df_gamemode)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "task4-header",
+ "metadata": {},
+ "source": [
+ "## Task 4: Skill Rank Analysis\n",
+ "\n",
+ "Analyze performance distribution across skill ranks using Redis aggregation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "task4-ranks",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_rank_stats():\n",
+ " \"\"\"Get skill rank statistics.\"\"\"\n",
+ " aggr = client.ft().aggregate(\n",
+ " \"*\",\n",
+ " [\n",
+ " {\n",
+ " \"$group\": {\n",
+ " \"_id\": \"$skillrank\",\n",
+ " \"total_rounds\": {\"$sum\": 1},\n",
+ " \"total_kills\": {\"$sum\": \"$nbkills\"},\n",
+ " \"wins\": {\"$sum\": \"$haswon\"},\n",
+ " \"deaths\": {\"$sum\": \"$isdead\"},\n",
+ " \"avg_kills\": {\"$avg\": \"$nbkills\"},\n",
+ " }\n",
+ " },\n",
+ " {\"$sort\": {\"total_kills\": -1}},\n",
+ " ]\n",
+ " )\n",
+ " return [\n",
+ " {\n",
+ " \"rank\": r[\"_id\"],\n",
+ " \"rounds\": r[\"total_rounds\"],\n",
+ " \"kills\": r[\"total_kills\"],\n",
+ " \"wins\": r[\"wins\"],\n",
+ " \"deaths\": r[\"deaths\"],\n",
+ " \"avg_kills\": round(r[\"avg_kills\"], 3),\n",
+ " \"win_rate\": round(r[\"wins\"] / r[\"total_rounds\"] * 100, 2) if r[\"total_rounds\"] > 0 else 0,\n",
+ " \"kda\": round(r[\"total_kills\"] / max(r[\"deaths\"], 1), 2),\n",
+ " }\n",
+ " for r in aggr[\"results\"]\n",
+ " ]\n",
+ "\n",
+ "print(\"Computing skill rank statistics...\")\n",
+ "rank_stats = get_rank_stats()\n",
+ "df_ranks = pd.DataFrame(rank_stats)\n",
+ "display(df_ranks)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "task4-visualization",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"\\n=== KILLS PER ROUND BY RANK ===\")\n",
+ "for _, row in df_ranks.iterrows():\n",
+ " bar = \"█\" * int(row['avg_kills'] * 10)\n",
+ " print(f\"{row['rank']:15} | {bar} {row['avg_kills']:.3f}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "task5-header",
+ "metadata": {},
+ "source": [
+ "## Task 5: Role Distribution Analysis\n",
+ "\n",
+ "Analyze the distribution of roles (Attack/Defense) and operator usage."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "task5-roles",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_role_stats():\n",
+ " \"\"\"Get role statistics.\"\"\"\n",
+ " aggr = client.ft().aggregate(\n",
+ " \"*\",\n",
+ " [\n",
+ " {\n",
+ " \"$group\": {\n",
+ " \"_id\": \"$role\",\n",
+ " \"total_rounds\": {\"$sum\": 1},\n",
+ " \"total_kills\": {\"$sum\": \"$nbkills\"},\n",
+ " \"wins\": {\"$sum\": \"$haswon\"},\n",
+ " \"operators_used\": {\"$addToSet\": \"$operator\"},\n",
+ " }\n",
+ " },\n",
+ " {\"$sort\": {\"total_rounds\": -1}},\n",
+ " ]\n",
+ " )\n",
+ " return [\n",
+ " {\n",
+ " \"role\": r[\"_id\"],\n",
+ " \"rounds\": r[\"total_rounds\"],\n",
+ " \"kills\": r[\"total_kills\"],\n",
+ " \"wins\": r[\"wins\"],\n",
+ " \"unique_operators\": len(r[\"operators_used\"]),\n",
+ " \"win_rate\": round(r[\"wins\"] / r[\"total_rounds\"] * 100, 2) if r[\"total_rounds\"] > 0 else 0,\n",
+ " }\n",
+ " for r in aggr[\"results\"]\n",
+ " ]\n",
+ "\n",
+ "print(\"=== ROLE DISTRIBUTION ===\")\n",
+ "role_stats = get_role_stats()\n",
+ "df_roles = pd.DataFrame(role_stats)\n",
+ "display(df_roles)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "task5-pivot",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_role_operator_stats():\n",
+ " \"\"\"Get operator usage by role.\"\"\"\n",
+ " aggr = client.ft().aggregate(\n",
+ " \"*\",\n",
+ " [\n",
+ " {\n",
+ " \"$group\": {\n",
+ " \"_id\": {\"role\": \"$role\", \"operator\": \"$operator\"},\n",
+ " \"count\": {\"$sum\": 1},\n",
+ " \"kills\": {\"$sum\": \"$nbkills\"},\n",
+ " }\n",
+ " },\n",
+ " {\"$sort\": {\"count\": -1}},\n",
+ " ]\n",
+ " )\n",
+ " return [\n",
+ " {\n",
+ " \"role\": r[\"_id\"][\"role\"],\n",
+ " \"operator\": r[\"_id\"][\"operator\"],\n",
+ " \"usage\": r[\"count\"],\n",
+ " \"kills\": r[\"kills\"],\n",
+ " }\n",
+ " for r in aggr[\"results\"]\n",
+ " ]\n",
+ "\n",
+ "role_op_stats = get_role_operator_stats()\n",
+ "df_role_op = pd.DataFrame(role_op_stats)\n",
+ "\n",
+ "print(\"\\n=== TOP 5 ATTACK OPERATORS ===\")\n",
+ "att_op = df_role_op[df_role_op[\"role\"] == \"Attack\"].nlargest(5, \"usage\")\n",
+ "display(att_op)\n",
+ "\n",
+ "print(\"\\n=== TOP 5 DEFENSE OPERATORS ===\")\n",
+ "def_op = df_role_op[df_role_op[\"role\"] == \"Defense\"].nlargest(5, \"usage\")\n",
+ "display(def_op)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "task6-header",
+ "metadata": {},
+ "source": [
+ "## Task 6: Match-Level Analysis\n",
+ "\n",
+ "Analyze match-level statistics including round counts, total kills, and match durations."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "task6-matches",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_match_stats():\n",
+ " \"\"\"Get match-level statistics using Redis aggregation.\"\"\"\n",
+ " aggr = client.ft().aggregate(\n",
+ " \"*\",\n",
+ " [\n",
+ " {\n",
+ " \"$group\": {\n",
+ " \"_id\": \"$matchid\",\n",
+ " \"total_players\": {\"$sum\": 1},\n",
+ " \"total_kills\": {\"$sum\": \"$nbkills\"},\n",
+ " \"total_rounds\": {\"$addToSet\": \"$roundnumber\"},\n",
+ " \"map\": {\"$first\": \"$mapname\"},\n",
+ " \"gamemode\": {\"$first\": \"$gamemode\"},\n",
+ " \"max_duration\": {\"$max\": \"$roundduration\"},\n",
+ " }\n",
+ " },\n",
+ " {\"$sort\": {\"total_kills\": -1}},\n",
+ " {\"$limit\": 20},\n",
+ " ]\n",
+ " )\n",
+ " return [\n",
+ " {\n",
+ " \"match_id\": r[\"_id\"],\n",
+ " \"players\": r[\"total_players\"],\n",
+ " \"total_kills\": r[\"total_kills\"],\n",
+ " \"rounds_played\": len(r[\"total_rounds\"]),\n",
+ " \"map\": r[\"map\"],\n",
+ " \"gamemode\": r[\"gamemode\"],\n",
+ " \"max_round_duration\": r[\"max_duration\"],\n",
+ " }\n",
+ " for r in aggr[\"results\"]\n",
+ " ]\n",
+ "\n",
+ "print(\"Computing match statistics (top 20 most violent matches)...\")\n",
+ "match_stats = get_match_stats()\n",
+ "df_matches = pd.DataFrame(match_stats)\n",
+ "display(df_matches)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "task7-header",
+ "metadata": {},
+ "source": [
+ "## Task 7: Weapon Attachment Analysis\n",
+ "\n",
+ "Analyze the most popular weapon attachments (sights, grips, barrels, etc.) using Redis aggregation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "task7-attachments",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_attachment_stats(field):\n",
+ " \"\"\"Get attachment statistics for a specific field.\"\"\"\n",
+ " aggr = client.ft().aggregate(\n",
+ " f\"@primaryweapontype:{TagField.field_name('*')}\",\n",
+ " [\n",
+ " {\n",
+ " \"$group\": {\n",
+ " \"_id\": f\"${field}\",\n",
+ " \"count\": {\"$sum\": 1},\n",
+ " }\n",
+ " },\n",
+ " {\"$match\": {\"_id\": {\"$ne\": \"\"}}},\n",
+ " {\"$sort\": {\"count\": -1}},\n",
+ " {\"$limit\": 10},\n",
+ " ]\n",
+ " )\n",
+ " return [{k: v for k, v in r.items() if k != \"type\"} for r in aggr[\"results\"]]\n",
+ "\n",
+ "print(\"=== TOP 10 PRIMARY SIGHTS ===\")\n",
+ "sights = get_attachment_stats(\"primarysight\")\n",
+ "df_sights = pd.DataFrame(sights)\n",
+ "display(df_sights)\n",
+ "\n",
+ "print(\"\\n=== TOP 10 PRIMARY BARRELS ===\")\n",
+ "barrels = get_attachment_stats(\"primarybarrel\")\n",
+ "df_barrels = pd.DataFrame(barrels)\n",
+ "display(df_barrels)\n",
+ "\n",
+ "print(\"\\n=== TOP 10 PRIMARY GRIPS ===\")\n",
+ "grips = get_attachment_stats(\"primarygrip\")\n",
+ "df_grips = pd.DataFrame(grips)\n",
+ "display(df_grips)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "task8-header",
+ "metadata": {},
+ "source": [
+ "## Task 8: Redis-Side Data Pre-computation and Caching\n",
+ "\n",
+ "Demonstrate pre-computing statistics and storing them in Redis for fast access using sorted sets."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "task8-precompute",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def precompute_operator_leaderboard():\n",
+ " \"\"\"Pre-compute and cache operator kill counts using Redis sorted sets.\"\"\"\n",
+ " key = \"leaderboard:operators:total_kills\"\n",
+ " \n",
+ " client.delete(key)\n",
+ " \n",
+ " aggr = client.ft().aggregate(\n",
+ " \"*\",\n",
+ " [\n",
+ " {\n",
+ " \"$group\": {\n",
+ " \"_id\": \"$operator\",\n",
+ " \"total_kills\": {\"$sum\": \"$nbkills\"},\n",
+ " }\n",
+ " },\n",
+ " {\"$sort\": {\"total_kills\": -1}},\n",
+ " ]\n",
+ " )\n",
+ " \n",
+ " mapping = {r[\"_id\"]: r[\"total_kills\"] for r in aggr[\"results\"]}\n",
+ " if mapping:\n",
+ " client.zadd(key, mapping)\n",
+ " \n",
+ " return key\n",
+ "\n",
+ "print(\"Pre-computing operator leaderboard in Redis sorted set...\")\n",
+ "lb_key = precompute_operator_leaderboard()\n",
+ "\n",
+ "print(f\"\\n=== TOP 10 OPERATORS (from Redis sorted set) ===\")\n",
+ "top_operators = client.zrevrange(lb_key, 0, 9, withscores=True)\n",
+ "for i, (op, score) in enumerate(top_operators, 1):\n",
+ " print(f\"{i}. {op}: {int(score)} kills\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "task8-winrate",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def precompute_winrate_leaderboard():\n",
+ " \"\"\"Pre-compute operator win rates and store in Redis.\"\"\"\n",
+ " key = \"leaderboard:operators:winrate\"\n",
+ " client.delete(key)\n",
+ " \n",
+ " aggr = client.ft().aggregate(\n",
+ " \"*\",\n",
+ " [\n",
+ " {\n",
+ " \"$group\": {\n",
+ " \"_id\": \"$operator\",\n",
+ " \"wins\": {\"$sum\": \"$haswon\"},\n",
+ " \"total\": {\"$sum\": 1},\n",
+ " }\n",
+ " },\n",
+ " {\"$match\": {\"total\": {\"$gte\": 100}}},\n",
+ " ]\n",
+ " )\n",
+ " \n",
+ " mapping = {}\n",
+ " for r in aggr[\"results\"]:\n",
+ " winrate = (r[\"wins\"] / r[\"total\"]) * 100\n",
+ " mapping[r[\"_id\"]] = winrate\n",
+ " \n",
+ " if mapping:\n",
+ " client.zadd(key, mapping)\n",
+ " \n",
+ " return key\n",
+ "\n",
+ "print(\"Pre-computing operator win rate leaderboard...\")\n",
+ "wr_key = precompute_winrate_leaderboard()\n",
+ "\n",
+ "print(f\"\\n=== TOP 10 OPERATORS BY WIN RATE (min 100 rounds) ===\")\n",
+ "top_wr = client.zrevrange(wr_key, 0, 9, withscores=True)\n",
+ "for i, (op, score) in enumerate(top_wr, 1):\n",
+ " print(f\"{i}. {op}: {score:.2f}% win rate\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "task8-rank",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def precompute_map_popularity():\n",
+ " \"\"\"Pre-compute map popularity and store in Redis sorted set.\"\"\"\n",
+ " key = \"leaderboard:maps:popularity\"\n",
+ " client.delete(key)\n",
+ " \n",
+ " aggr = client.ft().aggregate(\n",
+ " \"*\",\n",
+ " [\n",
+ " {\n",
+ " \"$group\": {\n",
+ " \"_id\": \"$mapname\",\n",
+ " \"rounds\": {\"$sum\": 1},\n",
+ " }\n",
+ " },\n",
+ " {\"$sort\": {\"rounds\": -1}},\n",
+ " ]\n",
+ " )\n",
+ " \n",
+ " mapping = {r[\"_id\"]: r[\"rounds\"] for r in aggr[\"results\"]}\n",
+ " if mapping:\n",
+ " client.zadd(key, mapping)\n",
+ " \n",
+ " return key\n",
+ "\n",
+ "print(\"Pre-computing map popularity leaderboard...\")\n",
+ "map_key = precompute_map_popularity()\n",
+ "\n",
+ "print(f\"\\n=== MAP POPULARITY RANKING ===\")\n",
+ "maps = client.zrevrange(map_key, 0, 9, withscores=True)\n",
+ "for i, (map_name, score) in enumerate(maps, 1):\n",
+ " print(f\"{i}. {map_name}: {int(score)} rounds played\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "task9-header",
+ "metadata": {},
+ "source": [
+ "## Task 9: Time-Series Analysis\n",
+ "\n",
+ "Analyze data over time (by day) to find trends."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "task9-daily",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_daily_stats():\n",
+ " \"\"\"Get daily statistics using Redis aggregation.\"\"\"\n",
+ " aggr = client.ft().aggregate(\n",
+ " \"*\",\n",
+ " [\n",
+ " {\n",
+ " \"$group\": {\n",
+ " \"_id\": \"$dateid\",\n",
+ " \"total_players\": {\"$sum\": 1},\n",
+ " \"total_kills\": {\"$sum\": \"$nbkills\"},\n",
+ " \"matches\": {\"$addToSet\": \"$matchid\"},\n",
+ " \"avg_duration\": {\"$avg\": \"$roundduration\"},\n",
+ " }\n",
+ " },\n",
+ " {\"$sort\": {\"_id\": 1}},\n",
+ " ]\n",
+ " )\n",
+ " return [\n",
+ " {\n",
+ " \"date\": r[\"_id\"],\n",
+ " \"player_actions\": r[\"total_players\"],\n",
+ " \"total_kills\": r[\"total_kills\"],\n",
+ " \"matches\": len(r[\"matches\"]),\n",
+ " \"avg_round_duration\": round(r[\"avg_duration\"], 2),\n",
+ " }\n",
+ " for r in aggr[\"results\"]\n",
+ " ]\n",
+ "\n",
+ "print(\"Computing daily statistics...\")\n",
+ "daily_stats = get_daily_stats()\n",
+ "df_daily = pd.DataFrame(daily_stats)\n",
+ "display(df_daily)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "task9-summary",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"\\n=== DAILY ACTIVITY SUMMARY ===\")\n",
+ "total_days = len(df_daily)\n",
+ "avg_daily_matches = df_daily[\"matches\"].mean()\n",
+ "avg_daily_kills = df_daily[\"total_kills\"].mean()\n",
+ "busiest_day = df_daily.loc[df_daily[\"matches\"].idxmax()]\n",
+ "calmest_day = df_daily.loc[df_daily[\"matches\"].idxmin()]\n",
+ "\n",
+ "print(f\"Total days in dataset: {total_days}\")\n",
+ "print(f\"Average daily matches: {avg_daily_matches:.1f}\")\n",
+ "print(f\"Average daily kills: {avg_daily_kills:.1f}\")\n",
+ "print(f\"\\nBusiest day: {busiest_day['date']} with {busiest_day['matches']} matches\")\n",
+ "print(f\"Calmest day: {calmest_day['date']} with {calmest_day['matches']} matches\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "task10-header",
+ "metadata": {},
+ "source": [
+ "## Task 10: End Round Reason Analysis\n",
+ "\n",
+ "Analyze why rounds end (elimination, objective complete, hostage rescued, etc.)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "task10-reasons",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_round_end_stats():\n",
+ " \"\"\"Get round end reason statistics.\"\"\"\n",
+ " aggr = client.ft().aggregate(\n",
+ " \"*\",\n",
+ " [\n",
+ " {\n",
+ " \"$group\": {\n",
+ " \"_id\": \"$endroundreason\",\n",
+ " \"count\": {\"$sum\": 1},\n",
+ " \"avg_duration\": {\"$avg\": \"$roundduration\"},\n",
+ " }\n",
+ " },\n",
+ " {\"$sort\": {\"count\": -1}},\n",
+ " ]\n",
+ " )\n",
+ " return [\n",
+ " {\n",
+ " \"end_reason\": r[\"_id\"],\n",
+ " \"count\": r[\"count\"],\n",
+ " \"percentage\": 0,\n",
+ " \"avg_duration\": round(r[\"avg_duration\"], 2),\n",
+ " }\n",
+ " for r in aggr[\"results\"]\n",
+ " ]\n",
+ "\n",
+ "print(\"=== ROUND END REASONS ===\")\n",
+ "round_end_stats = get_round_end_stats()\n",
+ "df_ends = pd.DataFrame(round_end_stats)\n",
+ "total_rounds = df_ends[\"count\"].sum()\n",
+ "df_ends[\"percentage\"] = (df_ends[\"count\"] / total_rounds * 100).round(2)\n",
+ "display(df_ends)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "task10-visual",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"\\n=== ROUND END DISTRIBUTION ===\")\n",
+ "for _, row in df_ends.iterrows():\n",
+ " bar = \"█\" * int(row[\"percentage\"] / 2)\n",
+ " print(f\"{row['end_reason']:25} | {bar} {row['percentage']:.1f}%\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "conclusion",
+ "metadata": {},
+ "source": [
+ "## Summary\n",
+ "\n",
+ "This notebook demonstrated various data analysis tasks using Redis:\n",
+ "\n",
+ "1. **Operator Performance Analysis** - Kill counts and win rates by operator\n",
+ "2. **Weapon Analysis** - Primary and secondary weapon usage and effectiveness\n",
+ "3. **Map and Game Mode Analysis** - Popular maps and game modes\n",
+ "4. **Skill Rank Analysis** - Performance distribution across ranks\n",
+ "5. **Role Distribution Analysis** - Attack vs Defense operator usage\n",
+ "6. **Match-Level Analysis** - Individual match statistics\n",
+ "7. **Weapon Attachment Analysis** - Popular attachments (sights, grips, barrels)\n",
+ "8. **Redis-Side Pre-computation** - Using sorted sets for leaderboards\n",
+ "9. **Time-Series Analysis** - Daily activity trends\n",
+ "10. **Round End Reason Analysis** - Why rounds end\n",
+ "\n",
+ "All aggregations were performed directly in Redis using the FT.aggregate() command, demonstrating how Redis can handle complex analytical queries without moving data out of the database."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/r6data.ipynb b/notebooks/r6data.ipynb
index 3faa890..cb2963f 100644
--- a/notebooks/r6data.ipynb
+++ b/notebooks/r6data.ipynb
@@ -22,7 +22,7 @@
"from redis.retry import Retry\n",
"from redis.backoff import ExponentialBackoff\n",
"from pathlib import Path\n",
- "import kagglehub\n",
+ "import kagglehub # pyright: ignore[reportMissingTypeStubs]\n",
"from csv import DictReader\n",
"from fastid import ulid\n",
"from tqdm.notebook import tqdm\n",
@@ -77,7 +77,7 @@
"outputs": [],
"source": [
"with full_dump.open(\"rb\") as f:\n",
- " rows = sum(1 for line in f) - 1"
+ " rows = sum(1 for _ in f) - 1"
]
},
{
@@ -87,7 +87,7 @@
"metadata": {},
"outputs": [],
"source": [
- "daily_matches = {}\n",
+ "daily_matches: dict[str, set[str]] = {}\n",
"with full_dump.open(errors=\"ignore\") as csvfile:\n",
" reader = DictReader(csvfile, delimiter=\";\")\n",
" with client.pipeline() as p:\n",
@@ -100,8 +100,8 @@
" continue\n",
" daily_matches[row[\"dateid\"]].add(row[\"matchid\"])\n",
" display(f\"Selected {row[\"matchid\"]} for day {row[\"dateid\"]}\")\n",
- " p.hset(f\"raw:{ulid()}\", mapping=row)\n",
- " p.execute()"
+ " _ = p.hset(f\"raw:{ulid()}\", mapping=row)\n",
+ " _ = p.execute()"
]
},
{
@@ -111,7 +111,7 @@
"metadata": {},
"outputs": [],
"source": [
- "from redis.commands.search.field import TagField, NumericField, TextField\n",
+ "from redis.commands.search.field import TagField, NumericField\n",
"from redis.commands.search.index_definition import IndexDefinition, IndexType\n",
"\n",
"schema = (\n",
@@ -148,6 +148,10 @@
" TagField(\"secondarygadget\")\n",
")\n",
"\n",
+ "try:\n",
+ " client.ft().dropindex()\n",
+ "except:\n",
+ " pass\n",
"client.ft().create_index(schema, definition=IndexDefinition(prefix=(\"raw:\",), index_type=IndexType.HASH))"
]
},