diff --git a/notebooks/r6_analysis.ipynb b/notebooks/r6_analysis.ipynb new file mode 100644 index 0000000..38b3477 --- /dev/null +++ b/notebooks/r6_analysis.ipynb @@ -0,0 +1,1110 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "intro", + "metadata": {}, + "source": [ + "# Rainbow Six Siege Data Analysis with Redis\n", + "\n", + "This notebook performs various data analysis tasks on the R6 match data stored in Redis, leveraging Redis's built-in capabilities for aggregations, searching, and real-time statistics." + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "setup", + "metadata": {}, + "outputs": [], + "source": [ + "from redis.cluster import RedisCluster\n", + "from redis.retry import Retry\n", + "from redis.backoff import ExponentialBackoff\n", + "from redis.commands.search.field import TagField, NumericField, TextField\n", + "from redis.commands.search.aggregation import Asc, Desc, AggregateRequest, AggregateResult, Cursor\n", + "from redis.commands.search import reducers\n", + "from redis.commands.search.query import Query\n", + "from tqdm.notebook import tqdm\n", + "import pandas as pd\n", + "from collections import defaultdict\n", + "from sys import maxsize\n", + "\n", + "retry = Retry(ExponentialBackoff(), 8)\n", + "client = RedisCluster(\n", + " host=\"redis\",\n", + " port=6379,\n", + " username=\"admin\",\n", + " password=\"admin\",\n", + " retry=retry,\n", + " protocol=3,\n", + " decode_responses=True,\n", + " health_check_interval=3,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "task1-header", + "metadata": {}, + "source": [ + "## Task 1: Player Performance Analysis\n", + "\n", + "Analyze player performance by operator, including kill counts and win rates." + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "task1-data-prep", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GSG9-JAGERSWAT-ASHSAT-HIBANAGSG9-BANDITGIGN-TWITCHSWAT-THERMITESPETSNAZ-FUZEBOPE-CAVEIRAG.E.O.-JACKALNAVYSEAL-VALKYRIE...GSG9-IQGIGN-MONTAGNESAT-ECHOGSG9-BLITZGSG9-RESERVESPETSNAZ-TACHANKASAS-RESERVESWAT-RESERVEGIGN-RESERVESPETSNAZ-RESERVE
total_kills5522544638253802357734413336329331352926...10277777754253862021831128984
total_rounds6999651949605257489450014222458640914319...157018801207782618436375206173145
won_rounds3535337025862684255326032164232121402177...785953634376317196159977865
dead_rounds5127455532943833318033942929336228503150...11281284775585468329297151122107
survival_rate0.2674667809690.3012732014110.3358870967740.2708769260030.3502247650180.3213357328530.3062529606820.2668992586130.3033488144710.270664505673...0.281528662420.3170212765960.3579121789560.2519181585680.2427184466020.2454128440370.2080.2669902912620.2947976878610.262068965517
win_rate0.5050721531650.5169504525230.5213709677420.5105573521020.5216591744990.520495900820.5125532922790.5061055385960.5230994866780.504051863857...0.50.5069148936170.5252692626350.4808184143220.5129449838190.4495412844040.4240.4708737864080.4508670520230.448275862069
\n", + "

6 rows × 35 columns

\n", + "
" + ], + "text/plain": [ + " GSG9-JAGER SWAT-ASH SAT-HIBANA GSG9-BANDIT \\\n", + "total_kills 5522 5446 3825 3802 \n", + "total_rounds 6999 6519 4960 5257 \n", + "won_rounds 3535 3370 2586 2684 \n", + "dead_rounds 5127 4555 3294 3833 \n", + "survival_rate 0.267466780969 0.301273201411 0.335887096774 0.270876926003 \n", + "win_rate 0.505072153165 0.516950452523 0.521370967742 0.510557352102 \n", + "\n", + " GIGN-TWITCH SWAT-THERMITE SPETSNAZ-FUZE BOPE-CAVEIRA \\\n", + "total_kills 3577 3441 3336 3293 \n", + "total_rounds 4894 5001 4222 4586 \n", + "won_rounds 2553 2603 2164 2321 \n", + "dead_rounds 3180 3394 2929 3362 \n", + "survival_rate 0.350224765018 0.321335732853 0.306252960682 0.266899258613 \n", + "win_rate 0.521659174499 0.52049590082 0.512553292279 0.506105538596 \n", + "\n", + " G.E.O.-JACKAL NAVYSEAL-VALKYRIE ... GSG9-IQ \\\n", + "total_kills 3135 2926 ... 1027 \n", + "total_rounds 4091 4319 ... 1570 \n", + "won_rounds 2140 2177 ... 785 \n", + "dead_rounds 2850 3150 ... 1128 \n", + "survival_rate 0.303348814471 0.270664505673 ... 0.28152866242 \n", + "win_rate 0.523099486678 0.504051863857 ... 0.5 \n", + "\n", + " GIGN-MONTAGNE SAT-ECHO GSG9-BLITZ GSG9-RESERVE \\\n", + "total_kills 777 775 425 386 \n", + "total_rounds 1880 1207 782 618 \n", + "won_rounds 953 634 376 317 \n", + "dead_rounds 1284 775 585 468 \n", + "survival_rate 0.317021276596 0.357912178956 0.251918158568 0.242718446602 \n", + "win_rate 0.506914893617 0.525269262635 0.480818414322 0.512944983819 \n", + "\n", + " SPETSNAZ-TACHANKA SAS-RESERVE SWAT-RESERVE GIGN-RESERVE \\\n", + "total_kills 202 183 112 89 \n", + "total_rounds 436 375 206 173 \n", + "won_rounds 196 159 97 78 \n", + "dead_rounds 329 297 151 122 \n", + "survival_rate 0.245412844037 0.208 0.266990291262 0.294797687861 \n", + "win_rate 0.449541284404 0.424 0.470873786408 0.450867052023 \n", + "\n", + " SPETSNAZ-RESERVE \n", + "total_kills 84 \n", + "total_rounds 145 \n", + "won_rounds 65 \n", + "dead_rounds 107 \n", + "survival_rate 0.262068965517 \n", + "win_rate 0.448275862069 \n", + "\n", + "[6 rows x 35 columns]" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (AggregateRequest( # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType, reportAssignmentType]\n", + " \"*\"\n", + " ).group_by(\n", + " \"@operator\",\n", + " reducers.sum(\"@nbkills\").alias(\"total_kills\"),\n", + " reducers.count().alias(\"total_rounds\"),\n", + " reducers.sum(\"@haswon\").alias(\"won_rounds\"),\n", + " reducers.sum(\"@isdead\").alias(\"dead_rounds\")\n", + " ).sort_by(Desc(\"@total_kills\")) # pyright: ignore[reportArgumentType]\n", + " .apply(\n", + " survival_rate=\"1-(@dead_rounds/@total_rounds)\",\n", + " win_rate=\"@won_rounds/@total_rounds\"\n", + " ).limit(0, 35565).cursor(10, 1)\n", + ")\n", + "response = client.ft().aggregate(query)\n", + "rows = response[0][\"results\"]\n", + "cursor = response[1]\n", + "while cursor > 0:\n", + " response= client.ft().aggregate(Cursor(cursor))\n", + " rows.extend(response[0][\"results\"])\n", + " cursor = response[1]\n", + "\n", + "operator_data:pd.DataFrame = pd.DataFrame.from_dict({attrs[\"extra_attributes\"][\"operator\"]: attrs[\"extra_attributes\"] for attrs in rows})\n", + "operator_data.drop(labels=\"operator\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "task1-kills", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n=== TOP 10 OPERATORS BY KILLS ===\")\n", + "top_kills = df_operators.nlargest(10, \"kills\")[[\"operator\", \"kills\"]]\n", + "for _, row in top_kills.iterrows():\n", + " print(f\"{row['operator']}: {row['kills']} kills\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "task1-winrates", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n=== TOP 10 OPERATORS BY WIN RATE (min 100 rounds) ===\")\n", + "filtered = df_operators[df_operators[\"rounds\"] >= 100]\n", + "top_wr = filtered.nlargest(10, \"win_rate\")[[\"operator\", \"win_rate\", \"rounds\"]]\n", + "for _, row in top_wr.iterrows():\n", + " print(f\"{row['operator']}: {row['win_rate']}% win rate ({row['rounds']} rounds)\")" + ] + }, + { + "cell_type": "markdown", + "id": "task2-header", + "metadata": {}, + "source": [ + "## Task 2: Weapon Analysis\n", + "\n", + "Analyze weapon usage and effectiveness using Redis aggregation pipelines." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "task2-weapons", + "metadata": {}, + "outputs": [], + "source": [ + "def get_weapon_stats(weapon_field=\"primaryweapon\"):\n", + " \"\"\"Get weapon statistics using Redis aggregation.\"\"\"\n", + " aggr = client.ft().aggregate(\n", + " \"*\",\n", + " [\n", + " {\n", + " \"$group\": {\n", + " \"_id\": f\"${weapon_field}\",\n", + " \"total_kills\": {\"$sum\": \"$nbkills\"},\n", + " \"times_used\": {\"$sum\": 1},\n", + " \"wins\": {\"$sum\": \"$haswon\"},\n", + " }\n", + " },\n", + " {\"$sort\": {\"times_used\": -1}},\n", + " ]\n", + " )\n", + " return [\n", + " {\n", + " \"weapon\": r[\"_id\"],\n", + " \"kills\": r[\"total_kills\"],\n", + " \"usage_count\": r[\"times_used\"],\n", + " \"wins\": r[\"wins\"],\n", + " \"avg_kills_per_use\": round(r[\"total_kills\"] / r[\"times_used\"], 3) if r[\"times_used\"] > 0 else 0,\n", + " \"win_rate\": round(r[\"wins\"] / r[\"times_used\"] * 100, 2) if r[\"times_used\"] > 0 else 0,\n", + " }\n", + " for r in aggr[\"results\"]\n", + " ]\n", + "\n", + "print(\"Computing primary weapon statistics...\")\n", + "primary_weapon_stats = get_weapon_stats(\"primaryweapon\")\n", + "df_primary = pd.DataFrame(primary_weapon_stats)\n", + "print(f\"\\nTotal unique primary weapons: {len(df_primary)}\")\n", + "display(df_primary.head(20))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "task2-secondary", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n=== TOP 10 SECONDARY WEAPONS BY USAGE ===\")\n", + "secondary_stats = get_weapon_stats(\"secondaryweapon\")\n", + "df_secondary = pd.DataFrame(secondary_stats)\n", + "display(df_secondary.head(10))" + ] + }, + { + "cell_type": "markdown", + "id": "task3-header", + "metadata": {}, + "source": [ + "## Task 3: Map and Game Mode Analysis\n", + "\n", + "Analyze map popularity, round durations, and win conditions by map and game mode." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "task3-maps", + "metadata": {}, + "outputs": [], + "source": [ + "def get_map_stats():\n", + " \"\"\"Get map statistics using Redis aggregation.\"\"\"\n", + " aggr = client.ft().aggregate(\n", + " \"*\",\n", + " [\n", + " {\n", + " \"$group\": {\n", + " \"_id\": \"$mapname\",\n", + " \"total_rounds\": {\"$sum\": 1},\n", + " \"avg_duration\": {\"$avg\": \"$roundduration\"},\n", + " \"avg_clearance\": {\"$avg\": \"$clearancelevel\"},\n", + " \"total_kills\": {\"$sum\": \"$nbkills\"},\n", + " }\n", + " },\n", + " {\"$sort\": {\"total_rounds\": -1}},\n", + " ]\n", + " )\n", + " return [\n", + " {\n", + " \"map\": r[\"_id\"],\n", + " \"rounds_played\": r[\"total_rounds\"],\n", + " \"avg_duration_sec\": round(r[\"avg_duration\"], 2),\n", + " \"avg_clearance_level\": round(r[\"avg_clearance\"], 2),\n", + " \"total_kills\": r[\"total_kills\"],\n", + " }\n", + " for r in aggr[\"results\"]\n", + " ]\n", + "\n", + "print(\"Computing map statistics...\")\n", + "map_stats = get_map_stats()\n", + "df_maps = pd.DataFrame(map_stats)\n", + "display(df_maps.head(15))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "task3-gamemode", + "metadata": {}, + "outputs": [], + "source": [ + "def get_gamemode_stats():\n", + " \"\"\"Get game mode statistics.\"\"\"\n", + " aggr = client.ft().aggregate(\n", + " \"*\",\n", + " [\n", + " {\n", + " \"$group\": {\n", + " \"_id\": \"$gamemode\",\n", + " \"total_rounds\": {\"$sum\": 1},\n", + " \"avg_duration\": {\"$avg\": \"$roundduration\"},\n", + " \"total_kills\": {\"$sum\": \"$nbkills\"},\n", + " \"unique_matches\": {\"$addToSet\": \"$matchid\"},\n", + " }\n", + " },\n", + " {\"$sort\": {\"total_rounds\": -1}},\n", + " ]\n", + " )\n", + " return [\n", + " {\n", + " \"gamemode\": r[\"_id\"],\n", + " \"rounds_played\": r[\"total_rounds\"],\n", + " \"avg_duration_sec\": round(r[\"avg_duration\"], 2),\n", + " \"total_kills\": r[\"total_kills\"],\n", + " \"unique_matches\": len(r[\"unique_matches\"]),\n", + " }\n", + " for r in aggr[\"results\"]\n", + " ]\n", + "\n", + "print(\"=== GAME MODE BREAKDOWN ===\")\n", + "gamemode_stats = get_gamemode_stats()\n", + "df_gamemode = pd.DataFrame(gamemode_stats)\n", + "display(df_gamemode)" + ] + }, + { + "cell_type": "markdown", + "id": "task4-header", + "metadata": {}, + "source": [ + "## Task 4: Skill Rank Analysis\n", + "\n", + "Analyze performance distribution across skill ranks using Redis aggregation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "task4-ranks", + "metadata": {}, + "outputs": [], + "source": [ + "def get_rank_stats():\n", + " \"\"\"Get skill rank statistics.\"\"\"\n", + " aggr = client.ft().aggregate(\n", + " \"*\",\n", + " [\n", + " {\n", + " \"$group\": {\n", + " \"_id\": \"$skillrank\",\n", + " \"total_rounds\": {\"$sum\": 1},\n", + " \"total_kills\": {\"$sum\": \"$nbkills\"},\n", + " \"wins\": {\"$sum\": \"$haswon\"},\n", + " \"deaths\": {\"$sum\": \"$isdead\"},\n", + " \"avg_kills\": {\"$avg\": \"$nbkills\"},\n", + " }\n", + " },\n", + " {\"$sort\": {\"total_kills\": -1}},\n", + " ]\n", + " )\n", + " return [\n", + " {\n", + " \"rank\": r[\"_id\"],\n", + " \"rounds\": r[\"total_rounds\"],\n", + " \"kills\": r[\"total_kills\"],\n", + " \"wins\": r[\"wins\"],\n", + " \"deaths\": r[\"deaths\"],\n", + " \"avg_kills\": round(r[\"avg_kills\"], 3),\n", + " \"win_rate\": round(r[\"wins\"] / r[\"total_rounds\"] * 100, 2) if r[\"total_rounds\"] > 0 else 0,\n", + " \"kda\": round(r[\"total_kills\"] / max(r[\"deaths\"], 1), 2),\n", + " }\n", + " for r in aggr[\"results\"]\n", + " ]\n", + "\n", + "print(\"Computing skill rank statistics...\")\n", + "rank_stats = get_rank_stats()\n", + "df_ranks = pd.DataFrame(rank_stats)\n", + "display(df_ranks)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "task4-visualization", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n=== KILLS PER ROUND BY RANK ===\")\n", + "for _, row in df_ranks.iterrows():\n", + " bar = \"█\" * int(row['avg_kills'] * 10)\n", + " print(f\"{row['rank']:15} | {bar} {row['avg_kills']:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "id": "task5-header", + "metadata": {}, + "source": [ + "## Task 5: Role Distribution Analysis\n", + "\n", + "Analyze the distribution of roles (Attack/Defense) and operator usage." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "task5-roles", + "metadata": {}, + "outputs": [], + "source": [ + "def get_role_stats():\n", + " \"\"\"Get role statistics.\"\"\"\n", + " aggr = client.ft().aggregate(\n", + " \"*\",\n", + " [\n", + " {\n", + " \"$group\": {\n", + " \"_id\": \"$role\",\n", + " \"total_rounds\": {\"$sum\": 1},\n", + " \"total_kills\": {\"$sum\": \"$nbkills\"},\n", + " \"wins\": {\"$sum\": \"$haswon\"},\n", + " \"operators_used\": {\"$addToSet\": \"$operator\"},\n", + " }\n", + " },\n", + " {\"$sort\": {\"total_rounds\": -1}},\n", + " ]\n", + " )\n", + " return [\n", + " {\n", + " \"role\": r[\"_id\"],\n", + " \"rounds\": r[\"total_rounds\"],\n", + " \"kills\": r[\"total_kills\"],\n", + " \"wins\": r[\"wins\"],\n", + " \"unique_operators\": len(r[\"operators_used\"]),\n", + " \"win_rate\": round(r[\"wins\"] / r[\"total_rounds\"] * 100, 2) if r[\"total_rounds\"] > 0 else 0,\n", + " }\n", + " for r in aggr[\"results\"]\n", + " ]\n", + "\n", + "print(\"=== ROLE DISTRIBUTION ===\")\n", + "role_stats = get_role_stats()\n", + "df_roles = pd.DataFrame(role_stats)\n", + "display(df_roles)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "task5-pivot", + "metadata": {}, + "outputs": [], + "source": [ + "def get_role_operator_stats():\n", + " \"\"\"Get operator usage by role.\"\"\"\n", + " aggr = client.ft().aggregate(\n", + " \"*\",\n", + " [\n", + " {\n", + " \"$group\": {\n", + " \"_id\": {\"role\": \"$role\", \"operator\": \"$operator\"},\n", + " \"count\": {\"$sum\": 1},\n", + " \"kills\": {\"$sum\": \"$nbkills\"},\n", + " }\n", + " },\n", + " {\"$sort\": {\"count\": -1}},\n", + " ]\n", + " )\n", + " return [\n", + " {\n", + " \"role\": r[\"_id\"][\"role\"],\n", + " \"operator\": r[\"_id\"][\"operator\"],\n", + " \"usage\": r[\"count\"],\n", + " \"kills\": r[\"kills\"],\n", + " }\n", + " for r in aggr[\"results\"]\n", + " ]\n", + "\n", + "role_op_stats = get_role_operator_stats()\n", + "df_role_op = pd.DataFrame(role_op_stats)\n", + "\n", + "print(\"\\n=== TOP 5 ATTACK OPERATORS ===\")\n", + "att_op = df_role_op[df_role_op[\"role\"] == \"Attack\"].nlargest(5, \"usage\")\n", + "display(att_op)\n", + "\n", + "print(\"\\n=== TOP 5 DEFENSE OPERATORS ===\")\n", + "def_op = df_role_op[df_role_op[\"role\"] == \"Defense\"].nlargest(5, \"usage\")\n", + "display(def_op)" + ] + }, + { + "cell_type": "markdown", + "id": "task6-header", + "metadata": {}, + "source": [ + "## Task 6: Match-Level Analysis\n", + "\n", + "Analyze match-level statistics including round counts, total kills, and match durations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "task6-matches", + "metadata": {}, + "outputs": [], + "source": [ + "def get_match_stats():\n", + " \"\"\"Get match-level statistics using Redis aggregation.\"\"\"\n", + " aggr = client.ft().aggregate(\n", + " \"*\",\n", + " [\n", + " {\n", + " \"$group\": {\n", + " \"_id\": \"$matchid\",\n", + " \"total_players\": {\"$sum\": 1},\n", + " \"total_kills\": {\"$sum\": \"$nbkills\"},\n", + " \"total_rounds\": {\"$addToSet\": \"$roundnumber\"},\n", + " \"map\": {\"$first\": \"$mapname\"},\n", + " \"gamemode\": {\"$first\": \"$gamemode\"},\n", + " \"max_duration\": {\"$max\": \"$roundduration\"},\n", + " }\n", + " },\n", + " {\"$sort\": {\"total_kills\": -1}},\n", + " {\"$limit\": 20},\n", + " ]\n", + " )\n", + " return [\n", + " {\n", + " \"match_id\": r[\"_id\"],\n", + " \"players\": r[\"total_players\"],\n", + " \"total_kills\": r[\"total_kills\"],\n", + " \"rounds_played\": len(r[\"total_rounds\"]),\n", + " \"map\": r[\"map\"],\n", + " \"gamemode\": r[\"gamemode\"],\n", + " \"max_round_duration\": r[\"max_duration\"],\n", + " }\n", + " for r in aggr[\"results\"]\n", + " ]\n", + "\n", + "print(\"Computing match statistics (top 20 most violent matches)...\")\n", + "match_stats = get_match_stats()\n", + "df_matches = pd.DataFrame(match_stats)\n", + "display(df_matches)" + ] + }, + { + "cell_type": "markdown", + "id": "task7-header", + "metadata": {}, + "source": [ + "## Task 7: Weapon Attachment Analysis\n", + "\n", + "Analyze the most popular weapon attachments (sights, grips, barrels, etc.) using Redis aggregation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "task7-attachments", + "metadata": {}, + "outputs": [], + "source": [ + "def get_attachment_stats(field):\n", + " \"\"\"Get attachment statistics for a specific field.\"\"\"\n", + " aggr = client.ft().aggregate(\n", + " f\"@primaryweapontype:{TagField.field_name('*')}\",\n", + " [\n", + " {\n", + " \"$group\": {\n", + " \"_id\": f\"${field}\",\n", + " \"count\": {\"$sum\": 1},\n", + " }\n", + " },\n", + " {\"$match\": {\"_id\": {\"$ne\": \"\"}}},\n", + " {\"$sort\": {\"count\": -1}},\n", + " {\"$limit\": 10},\n", + " ]\n", + " )\n", + " return [{k: v for k, v in r.items() if k != \"type\"} for r in aggr[\"results\"]]\n", + "\n", + "print(\"=== TOP 10 PRIMARY SIGHTS ===\")\n", + "sights = get_attachment_stats(\"primarysight\")\n", + "df_sights = pd.DataFrame(sights)\n", + "display(df_sights)\n", + "\n", + "print(\"\\n=== TOP 10 PRIMARY BARRELS ===\")\n", + "barrels = get_attachment_stats(\"primarybarrel\")\n", + "df_barrels = pd.DataFrame(barrels)\n", + "display(df_barrels)\n", + "\n", + "print(\"\\n=== TOP 10 PRIMARY GRIPS ===\")\n", + "grips = get_attachment_stats(\"primarygrip\")\n", + "df_grips = pd.DataFrame(grips)\n", + "display(df_grips)" + ] + }, + { + "cell_type": "markdown", + "id": "task8-header", + "metadata": {}, + "source": [ + "## Task 8: Redis-Side Data Pre-computation and Caching\n", + "\n", + "Demonstrate pre-computing statistics and storing them in Redis for fast access using sorted sets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "task8-precompute", + "metadata": {}, + "outputs": [], + "source": [ + "def precompute_operator_leaderboard():\n", + " \"\"\"Pre-compute and cache operator kill counts using Redis sorted sets.\"\"\"\n", + " key = \"leaderboard:operators:total_kills\"\n", + " \n", + " client.delete(key)\n", + " \n", + " aggr = client.ft().aggregate(\n", + " \"*\",\n", + " [\n", + " {\n", + " \"$group\": {\n", + " \"_id\": \"$operator\",\n", + " \"total_kills\": {\"$sum\": \"$nbkills\"},\n", + " }\n", + " },\n", + " {\"$sort\": {\"total_kills\": -1}},\n", + " ]\n", + " )\n", + " \n", + " mapping = {r[\"_id\"]: r[\"total_kills\"] for r in aggr[\"results\"]}\n", + " if mapping:\n", + " client.zadd(key, mapping)\n", + " \n", + " return key\n", + "\n", + "print(\"Pre-computing operator leaderboard in Redis sorted set...\")\n", + "lb_key = precompute_operator_leaderboard()\n", + "\n", + "print(f\"\\n=== TOP 10 OPERATORS (from Redis sorted set) ===\")\n", + "top_operators = client.zrevrange(lb_key, 0, 9, withscores=True)\n", + "for i, (op, score) in enumerate(top_operators, 1):\n", + " print(f\"{i}. {op}: {int(score)} kills\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "task8-winrate", + "metadata": {}, + "outputs": [], + "source": [ + "def precompute_winrate_leaderboard():\n", + " \"\"\"Pre-compute operator win rates and store in Redis.\"\"\"\n", + " key = \"leaderboard:operators:winrate\"\n", + " client.delete(key)\n", + " \n", + " aggr = client.ft().aggregate(\n", + " \"*\",\n", + " [\n", + " {\n", + " \"$group\": {\n", + " \"_id\": \"$operator\",\n", + " \"wins\": {\"$sum\": \"$haswon\"},\n", + " \"total\": {\"$sum\": 1},\n", + " }\n", + " },\n", + " {\"$match\": {\"total\": {\"$gte\": 100}}},\n", + " ]\n", + " )\n", + " \n", + " mapping = {}\n", + " for r in aggr[\"results\"]:\n", + " winrate = (r[\"wins\"] / r[\"total\"]) * 100\n", + " mapping[r[\"_id\"]] = winrate\n", + " \n", + " if mapping:\n", + " client.zadd(key, mapping)\n", + " \n", + " return key\n", + "\n", + "print(\"Pre-computing operator win rate leaderboard...\")\n", + "wr_key = precompute_winrate_leaderboard()\n", + "\n", + "print(f\"\\n=== TOP 10 OPERATORS BY WIN RATE (min 100 rounds) ===\")\n", + "top_wr = client.zrevrange(wr_key, 0, 9, withscores=True)\n", + "for i, (op, score) in enumerate(top_wr, 1):\n", + " print(f\"{i}. {op}: {score:.2f}% win rate\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "task8-rank", + "metadata": {}, + "outputs": [], + "source": [ + "def precompute_map_popularity():\n", + " \"\"\"Pre-compute map popularity and store in Redis sorted set.\"\"\"\n", + " key = \"leaderboard:maps:popularity\"\n", + " client.delete(key)\n", + " \n", + " aggr = client.ft().aggregate(\n", + " \"*\",\n", + " [\n", + " {\n", + " \"$group\": {\n", + " \"_id\": \"$mapname\",\n", + " \"rounds\": {\"$sum\": 1},\n", + " }\n", + " },\n", + " {\"$sort\": {\"rounds\": -1}},\n", + " ]\n", + " )\n", + " \n", + " mapping = {r[\"_id\"]: r[\"rounds\"] for r in aggr[\"results\"]}\n", + " if mapping:\n", + " client.zadd(key, mapping)\n", + " \n", + " return key\n", + "\n", + "print(\"Pre-computing map popularity leaderboard...\")\n", + "map_key = precompute_map_popularity()\n", + "\n", + "print(f\"\\n=== MAP POPULARITY RANKING ===\")\n", + "maps = client.zrevrange(map_key, 0, 9, withscores=True)\n", + "for i, (map_name, score) in enumerate(maps, 1):\n", + " print(f\"{i}. {map_name}: {int(score)} rounds played\")" + ] + }, + { + "cell_type": "markdown", + "id": "task9-header", + "metadata": {}, + "source": [ + "## Task 9: Time-Series Analysis\n", + "\n", + "Analyze data over time (by day) to find trends." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "task9-daily", + "metadata": {}, + "outputs": [], + "source": [ + "def get_daily_stats():\n", + " \"\"\"Get daily statistics using Redis aggregation.\"\"\"\n", + " aggr = client.ft().aggregate(\n", + " \"*\",\n", + " [\n", + " {\n", + " \"$group\": {\n", + " \"_id\": \"$dateid\",\n", + " \"total_players\": {\"$sum\": 1},\n", + " \"total_kills\": {\"$sum\": \"$nbkills\"},\n", + " \"matches\": {\"$addToSet\": \"$matchid\"},\n", + " \"avg_duration\": {\"$avg\": \"$roundduration\"},\n", + " }\n", + " },\n", + " {\"$sort\": {\"_id\": 1}},\n", + " ]\n", + " )\n", + " return [\n", + " {\n", + " \"date\": r[\"_id\"],\n", + " \"player_actions\": r[\"total_players\"],\n", + " \"total_kills\": r[\"total_kills\"],\n", + " \"matches\": len(r[\"matches\"]),\n", + " \"avg_round_duration\": round(r[\"avg_duration\"], 2),\n", + " }\n", + " for r in aggr[\"results\"]\n", + " ]\n", + "\n", + "print(\"Computing daily statistics...\")\n", + "daily_stats = get_daily_stats()\n", + "df_daily = pd.DataFrame(daily_stats)\n", + "display(df_daily)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "task9-summary", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n=== DAILY ACTIVITY SUMMARY ===\")\n", + "total_days = len(df_daily)\n", + "avg_daily_matches = df_daily[\"matches\"].mean()\n", + "avg_daily_kills = df_daily[\"total_kills\"].mean()\n", + "busiest_day = df_daily.loc[df_daily[\"matches\"].idxmax()]\n", + "calmest_day = df_daily.loc[df_daily[\"matches\"].idxmin()]\n", + "\n", + "print(f\"Total days in dataset: {total_days}\")\n", + "print(f\"Average daily matches: {avg_daily_matches:.1f}\")\n", + "print(f\"Average daily kills: {avg_daily_kills:.1f}\")\n", + "print(f\"\\nBusiest day: {busiest_day['date']} with {busiest_day['matches']} matches\")\n", + "print(f\"Calmest day: {calmest_day['date']} with {calmest_day['matches']} matches\")" + ] + }, + { + "cell_type": "markdown", + "id": "task10-header", + "metadata": {}, + "source": [ + "## Task 10: End Round Reason Analysis\n", + "\n", + "Analyze why rounds end (elimination, objective complete, hostage rescued, etc.)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "task10-reasons", + "metadata": {}, + "outputs": [], + "source": [ + "def get_round_end_stats():\n", + " \"\"\"Get round end reason statistics.\"\"\"\n", + " aggr = client.ft().aggregate(\n", + " \"*\",\n", + " [\n", + " {\n", + " \"$group\": {\n", + " \"_id\": \"$endroundreason\",\n", + " \"count\": {\"$sum\": 1},\n", + " \"avg_duration\": {\"$avg\": \"$roundduration\"},\n", + " }\n", + " },\n", + " {\"$sort\": {\"count\": -1}},\n", + " ]\n", + " )\n", + " return [\n", + " {\n", + " \"end_reason\": r[\"_id\"],\n", + " \"count\": r[\"count\"],\n", + " \"percentage\": 0,\n", + " \"avg_duration\": round(r[\"avg_duration\"], 2),\n", + " }\n", + " for r in aggr[\"results\"]\n", + " ]\n", + "\n", + "print(\"=== ROUND END REASONS ===\")\n", + "round_end_stats = get_round_end_stats()\n", + "df_ends = pd.DataFrame(round_end_stats)\n", + "total_rounds = df_ends[\"count\"].sum()\n", + "df_ends[\"percentage\"] = (df_ends[\"count\"] / total_rounds * 100).round(2)\n", + "display(df_ends)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "task10-visual", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n=== ROUND END DISTRIBUTION ===\")\n", + "for _, row in df_ends.iterrows():\n", + " bar = \"█\" * int(row[\"percentage\"] / 2)\n", + " print(f\"{row['end_reason']:25} | {bar} {row['percentage']:.1f}%\")" + ] + }, + { + "cell_type": "markdown", + "id": "conclusion", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "This notebook demonstrated various data analysis tasks using Redis:\n", + "\n", + "1. **Operator Performance Analysis** - Kill counts and win rates by operator\n", + "2. **Weapon Analysis** - Primary and secondary weapon usage and effectiveness\n", + "3. **Map and Game Mode Analysis** - Popular maps and game modes\n", + "4. **Skill Rank Analysis** - Performance distribution across ranks\n", + "5. **Role Distribution Analysis** - Attack vs Defense operator usage\n", + "6. **Match-Level Analysis** - Individual match statistics\n", + "7. **Weapon Attachment Analysis** - Popular attachments (sights, grips, barrels)\n", + "8. **Redis-Side Pre-computation** - Using sorted sets for leaderboards\n", + "9. **Time-Series Analysis** - Daily activity trends\n", + "10. **Round End Reason Analysis** - Why rounds end\n", + "\n", + "All aggregations were performed directly in Redis using the FT.aggregate() command, demonstrating how Redis can handle complex analytical queries without moving data out of the database." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/r6data.ipynb b/notebooks/r6data.ipynb index 3faa890..cb2963f 100644 --- a/notebooks/r6data.ipynb +++ b/notebooks/r6data.ipynb @@ -22,7 +22,7 @@ "from redis.retry import Retry\n", "from redis.backoff import ExponentialBackoff\n", "from pathlib import Path\n", - "import kagglehub\n", + "import kagglehub # pyright: ignore[reportMissingTypeStubs]\n", "from csv import DictReader\n", "from fastid import ulid\n", "from tqdm.notebook import tqdm\n", @@ -77,7 +77,7 @@ "outputs": [], "source": [ "with full_dump.open(\"rb\") as f:\n", - " rows = sum(1 for line in f) - 1" + " rows = sum(1 for _ in f) - 1" ] }, { @@ -87,7 +87,7 @@ "metadata": {}, "outputs": [], "source": [ - "daily_matches = {}\n", + "daily_matches: dict[str, set[str]] = {}\n", "with full_dump.open(errors=\"ignore\") as csvfile:\n", " reader = DictReader(csvfile, delimiter=\";\")\n", " with client.pipeline() as p:\n", @@ -100,8 +100,8 @@ " continue\n", " daily_matches[row[\"dateid\"]].add(row[\"matchid\"])\n", " display(f\"Selected {row[\"matchid\"]} for day {row[\"dateid\"]}\")\n", - " p.hset(f\"raw:{ulid()}\", mapping=row)\n", - " p.execute()" + " _ = p.hset(f\"raw:{ulid()}\", mapping=row)\n", + " _ = p.execute()" ] }, { @@ -111,7 +111,7 @@ "metadata": {}, "outputs": [], "source": [ - "from redis.commands.search.field import TagField, NumericField, TextField\n", + "from redis.commands.search.field import TagField, NumericField\n", "from redis.commands.search.index_definition import IndexDefinition, IndexType\n", "\n", "schema = (\n", @@ -148,6 +148,10 @@ " TagField(\"secondarygadget\")\n", ")\n", "\n", + "try:\n", + " client.ft().dropindex()\n", + "except:\n", + " pass\n", "client.ft().create_index(schema, definition=IndexDefinition(prefix=(\"raw:\",), index_type=IndexType.HASH))" ] },