projet_dd/Main.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "8b22a2d1",
   "metadata": {},
   "source": [
    "# Introduction\n",
    "\n",
    "Bonjour et bienvenue dans le notebook du projet sur les bases de données tablebase.\n",
    "\n",
    "Dans ce notebook. Nous allons montrer comme fonctionne le stockage d'une base de données tablebases en prenant comme exemple la table base \"syzygy\" avec comme taille  3 à 5 pièces pour des raisons de taille.\n",
    "Nous allons expliquer comment les finales ( ou position d'échecs) sont stockés dans le format key-value puis nous verrons ensuite les performances de la base à l'aide de Redis.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "78fdf40a",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-12-06T09:21:56.560113Z",
     "start_time": "2025-12-06T09:21:56.104871Z"
    }
   },
   "outputs": [],
   "source": [
    "#import du projet\n",
    "import redis\n",
    "import time\n",
    "import chess\n",
    "import chess.polyglot\n",
    "import chess.syzygy\n",
    "import random\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import os\n",
    "import hashlib\n",
    "from IPython.display import display\n",
    "from scipy import stats\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d584aba1",
   "metadata": {},
   "source": [
    "## Peuplement de Redis\n",
    "\n",
    "Dans un premier temps, on ajoute les positions de la tablebase à Redis afin de pouvoir les utilisers derrières pour faire les tests."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e6058974",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-12-06T09:22:49.905572Z",
     "start_time": "2025-12-06T09:22:06.592819Z"
    }
   },
   "outputs": [],
   "source": [
    "def get_250_random_position(tablebases, config):\n",
    "    positions = []\n",
    "    tries = 0\n",
    "\n",
    "    while len(positions) < 10000 and tries < 5000:\n",
    "        board = generate_board_from_config(config)\n",
    "        positions.append(board)\n",
    "        tries += 1\n",
    "\n",
    "    return positions\n",
    "\n",
    "def generate_board_from_config(config):\n",
    "    \"\"\"\n",
    "    Génère un board contenant exactement le matériel d'une config Syzygy. Exemples de config :\n",
    "    - 'KQvK', 'KPvKP'\n",
    "    Cette fonction est important car il faut générer les positions qui dont les informations sont dans la tablebases.\n",
    "    \"\"\"\n",
    "\n",
    "    piece_map = {\"K\": chess.KING,\"Q\": chess.QUEEN,\"R\": chess.ROOK,\"B\": chess.BISHOP,\"N\": chess.KNIGHT,\"P\": chess.PAWN}\n",
    "\n",
    "    white_str, black_str = config.split(\"v\") #Sépare la config entre les 2 couleurs\n",
    "\n",
    "    board = chess.Board(None)  # donne un plateau vide\n",
    "\n",
    "    # boucles pour placer les pièces sur le plateau\n",
    "    for p in white_str:\n",
    "        piece = piece_map[p]\n",
    "        sq = random.choice([s for s in chess.SQUARES if board.piece_at(s) is None]) #choisi une pièce et vérifie que il n'y a pas déjà une pièce dessus\n",
    "        board.set_piece_at(sq, chess.Piece(piece, chess.WHITE))\n",
    "\n",
    "    for p in black_str:\n",
    "        piece = piece_map[p]\n",
    "        sq = random.choice([s for s in chess.SQUARES if board.piece_at(s) is None])#choisi une pièce et vérifie que il n'y a pas déjà une pièce dessus\n",
    "        board.set_piece_at(sq, chess.Piece(piece, chess.BLACK))\n",
    "\n",
    "    # désactive les règles des échecs.\n",
    "    board.castling_rights = 0\n",
    "    board.ep_square = None\n",
    "\n",
    "    # Donne une couleur aléatoire pour savoir quelle couleur doit jouer le prochain coup.\n",
    "    board.turn = random.choice([chess.WHITE, chess.BLACK])\n",
    "\n",
    "    if not board.is_valid():\n",
    "        return generate_board_from_config(config)\n",
    "\n",
    "    return board\n",
    "\n",
    "def add_tablebase_to_redis():\n",
    "    #Connect to Redis server\n",
    "    redis_server = redis.Redis(host='localhost', port=6379, db=0)\n",
    "    redis_server.flushdb() \n",
    "    \n",
    "    tablebases_path = \"tablebases/\" #Chemin vers les tables bases Syzygy\n",
    "\n",
    "    tablebases = chess.syzygy.open_tablebase(tablebases_path)\n",
    "\n",
    "    available_tables = set()\n",
    "    for f in os.listdir(tablebases_path):\n",
    "        if f.endswith(\".rtbw\") or f.endswith(\".rtbz\"):\n",
    "            available_tables.add(f.split(\".\")[0])\n",
    "\n",
    "    print(\"Configs ok:\", available_tables)\n",
    "\n",
    "    position = chess.Board(\"4k3/8/3n4/8/7p/1P6/4B3/4K3 b - - 0 1\") #Position d'exemple\n",
    "    display(position)    \n",
    "    \n",
    "    configs = [\"KBNvK\",\"KPvK\",\"KPvKP\",\"KQvK\",\"KQvKP\",\"KRBvKP\",\"KRPvKP\",\"KRvK\",\"KRvKB\",\"KBvK\",\"KNvK\"]\n",
    "    config_valide = [c for c in configs if c in available_tables]\n",
    "    #Configurations de toute les tablebases que l'on utilisera dans le projet.\n",
    "    all_positions = []\n",
    "    for config in config_valide:\n",
    "        print(f\"Récupération des positions de la tablebase {config}\")\n",
    "        position = get_250_random_position(tablebases,config)\n",
    "        all_positions.extend(position) #Ajout des positions générées à la liste globale\n",
    "        \n",
    "    print(f\"Total positions récupérées: {len(all_positions)}\")\n",
    "    \n",
    "    count = 0\n",
    "    for position in all_positions:\n",
    "        try:\n",
    "            fen = position.fen()\n",
    "            wdl = tablebases.probe_wdl(position)\n",
    "            dtz = tablebases.probe_dtz(position)\n",
    "            pieces = len(position.piece_map())\n",
    "            \n",
    "            redis_server.hset(fen, mapping={\n",
    "                \"wdl\": wdl,\n",
    "                \"dtz\": dtz,\n",
    "                \"pieces\": pieces\n",
    "            })\n",
    "            count += 1\n",
    "        except:\n",
    "            continue\n",
    "    \n",
    "    print(f\"✓ {count} positions ajoutées dans Redis DB 0\")\n",
    "    print(\"Ajout de toutes les positions des tablebases dans Redis terminé!\")\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    add_tablebase_to_redis()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1b443c8c",
   "metadata": {},
   "source": [
    "## Accès à Redis\n",
    "\n",
    "Nous avons rajouter 250 positions apparentant à chacune des tablesbases présent dans le dossier. Il y a des configurations ayant 3,4 ainsi que 5 pièces pour voir si cela a une incidence sur le temps d'accès à Redis.\n",
    "\n",
    "Pour la suite, nous allons donc pouvoir passer sur Redis pour vérifier cette hypothèse et faire d'autres tests sur notre base.\n",
    "\n",
    "## Benchmark redis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8d49b928-7522-4727-b6d8-224e2bd3a1b7",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-12-06T09:27:00.752325Z",
     "start_time": "2025-12-06T09:27:00.748915Z"
    }
   },
   "outputs": [],
   "source": [
    "def benchmark_redis(redis_serveur, pos):\n",
    "    latency = []\n",
    "    start_time_all = time.perf_counter()  \n",
    "    \n",
    "    for board in pos:\n",
    "        fen = board.fen()\n",
    "        start_individual = time.perf_counter()\n",
    "        result = redis_serveur.hgetall(fen)\n",
    "        latency.append(time.perf_counter() - start_individual)\n",
    "    \n",
    "    total_time_all = time.perf_counter() - start_time_all\n",
    "    latency = np.array(latency)\n",
    "    \n",
    "    return {\n",
    "        \"count\": len(pos),\n",
    "        \"average_latency\": np.mean(latency),\n",
    "        \"p50\": np.percentile(latency, 50),\n",
    "        \"p90\": np.percentile(latency, 90),\n",
    "        \"p95\": np.percentile(latency, 95),\n",
    "        \"p99\": np.percentile(latency, 99),\n",
    "        \"std\": np.std(latency),\n",
    "        \"min\": np.min(latency),\n",
    "        \"max\": np.max(latency),\n",
    "        \"total_time\": total_time_all,\n",
    "        \"latency\": latency,\n",
    "        \"rps\": len(latency) / total_time_all,\n",
    "    }\n",
    "def benchmark_syzygy(tablebases, pos):\n",
    "    latency = []\n",
    "    start_time_all = time.time()\n",
    "\n",
    "    for board in pos:\n",
    "        try:\n",
    "            start = time.time()\n",
    "            tablebases.probe_wdl(board)\n",
    "            tablebases.probe_dtz(board)\n",
    "            latency.append(time.time() - start)\n",
    "        except chess.syzygy.MissingTableError:\n",
    "            continue\n",
    "\n",
    "    total_time_all=time.time()-start_time_all\n",
    "\n",
    "    return{\n",
    "        \"count\": len(pos),\n",
    "        \"average_latency\": np.mean(latency),\n",
    "        \"p50\": np.percentile(latency,50),\n",
    "        \"p90\": np.percentile(latency,90),\n",
    "        \"p95\": np.percentile(latency,95),\n",
    "        \"p99\": np.percentile(latency,99),\n",
    "        \"std\": np.std(latency),\n",
    "        \"min\": np.min(latency),\n",
    "        \"max\": np.max(latency),\n",
    "        \"total_time\": total_time_all,\n",
    "        \"latency\": latency,\n",
    "        \"rps\": len(latency)/total_time_all\n",
    "    }\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "37b01080",
   "metadata": {},
   "source": [
    "# Test de l'hypothèse 1\n",
    "\n",
    "Cette partie teste les accès Redis, afin de savoir si le nombre de pièces de la configuration importe sur le temps d'accès : "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a68060e1-0052-4446-a0d3-40bc1756f210",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\"\"\"\n",
    "redis_client : instance redis.Redis déjà créée\n",
    "test_positions : dict {\"3men\": [...], \"4men\": [...], \"5men\": [...]}\n",
    "\"\"\"\n",
    "def benchmark_redis_get_comparison(redis_client, test_positions: dict, iterations=100):\n",
    "\n",
    "    results = {}\n",
    "    j = 0\n",
    "    for label, fens in test_positions.items():\n",
    "\n",
    "        times = np.zeros(iterations)\n",
    "\n",
    "        for i in range(iterations):\n",
    "            fen = random.choice(fens)\n",
    "            key = f\"tb:{fen}\"\n",
    "            j = j + 1\n",
    "            start = time.perf_counter()\n",
    "            _ = redis_client.get(key)\n",
    "            times[i] = time.perf_counter() - start\n",
    "\n",
    "        results[label] = {\n",
    "            \"mean_ms\":  np.mean(times)  * 1000,\n",
    "            \"median_ms\": np.median(times) * 1000,\n",
    "            \"min_ms\": np.min(times) * 1000,\n",
    "            \"max_ms\": np.max(times) * 1000,\n",
    "            \"std_ms\": np.std(times) * 1000,\n",
    "        }\n",
    "    print(j)\n",
    "    return results\n",
    "\n",
    "\n",
    "#IMPORTANT : Es-ce qu'on garde ca comme ca ? \n",
    "TEST_POSITIONS = {\n",
    "    \"3pieces\": [\n",
    "        \"K7/8/8/8/8/8/8/k7 w - - 0 1\",\n",
    "        \"KB6/8/8/8/8/8/8/k7 w - - 0 1\",\n",
    "        \"KN6/8/8/8/8/8/8/k7 w - - 0 1\",\n",
    "    ],\n",
    "    \"4pieces\": [\n",
    "        \"KQ6/8/8/8/8/8/8/k7 w - - 0 1\",\n",
    "        \"KP6/8/8/8/8/8/8/k7 w - - 0 1\",\n",
    "        \"KQ6/kr6/8/8/8/8/8/8 w - - 0 1\",\n",
    "    ],\n",
    "    \"5pieces\": [\n",
    "        \"KQ6/8/8/Kp6/8/8/8/k7 w - - 0 1\",\n",
    "        \"KR6/1p6/8/8/8/8/8/k7 w - - 0 1\",\n",
    "        \"KRP5/1p6/8/8/8/8/8/k7 w - - 0 1\",\n",
    "    ],\n",
    "}\n",
    "\n",
    "redis_server = redis.Redis(host=\"localhost\", port=6379, db=0)\n",
    "benchmark_redis_get_comparison(redis_server, TEST_POSITIONS, 100000)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "abd8ddc4",
   "metadata": {},
   "source": [
    "Nous pouvons réaliser un graphique permettant de visualiser l'évolution du temps d'accès aux données avec Matplotlib : "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fd521dcc",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "nb_tests = [100, 1000, 5000, 10000, 50000, 100000]\n",
    "\n",
    "#Pour 3 pieces\n",
    "moy3 = [0.1858, 0.1169, 0.1128, 0.1064, 0.1637, 0.1295]\n",
    "min3 = [0.0874, 0.0833, 0.0694, 0.0498, 0.0510, 0.0506]\n",
    "max3 = [2.2310, 2.2054, 2.5722, 4.1666, 3.7527, 3.1546]\n",
    "\n",
    "#Pour 4 pieces\n",
    "moy4 = [0.1224, 0.1038, 0.1037, 0.1037, 0.0969, 0.0110]\n",
    "min4 = [0.0897, 0.0513, 0.0507, 0.0706, 0.0506, 0.0504]\n",
    "max4 = [0.7966, 2.8069, 2.5015, 1.6767, 3.0232, 500.6105]\n",
    "\n",
    "#Pour 5 pièces \n",
    "moy5 = [0.2286, 0.0574, 0.0656, 0.1007, 0.0975, 0.1211]\n",
    "min5 = [0.0854, 0.0511, 0.0562, 0.0706, 0.0662, 0.0503]\n",
    "max5 = [2.7619, 0.1623, 2.5723, 3.9987, 8.1657, 500.7266] \n",
    "\n",
    "plt.plot(nb_tests, moy3, marker='o', label='3pieces')\n",
    "plt.plot(nb_tests, moy4, marker='o', label='4pieces')\n",
    "plt.plot(nb_tests, moy5, marker='o', label='5pieces')\n",
    "\n",
    "plt.xlabel('nb_tests')\n",
    "plt.ylabel('valeurs')\n",
    "plt.title('Moyenne du temps de requête')\n",
    "plt.legend()\n",
    "plt.grid(True)\n",
    "plt.show()\n",
    "\n",
    "plt.plot(nb_tests, min3, marker='o', label='3pieces')\n",
    "plt.plot(nb_tests, min4, marker='o', label='4pieces')\n",
    "plt.plot(nb_tests, min5, marker='o', label='5pieces')\n",
    "\n",
    "plt.xlabel('nb_tests')\n",
    "plt.ylabel('valeurs')\n",
    "plt.title('Minimum du temps de requête')\n",
    "plt.legend()\n",
    "plt.grid(True)\n",
    "plt.show()\n",
    "\n",
    "plt.plot(nb_tests, max3, marker='o', label='3pieces')\n",
    "plt.plot(nb_tests, max4, marker='o', label='4pieces')\n",
    "plt.plot(nb_tests, max5, marker='o', label='5pieces')\n",
    "\n",
    "plt.xlabel('nb_tests')\n",
    "plt.ylabel('valeurs')\n",
    "plt.title('Maximum du temps de requête')\n",
    "plt.legend()\n",
    "plt.grid(True)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a68060e1-0052-4446-a0d3-40bc1756f210",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Test Mémoire Redis ( Stockage du Redis )\n",
    "redis_server = redis.Redis(host=\"localhost\", port=6379, db=0)\n",
    "info= redis_server.info(\"memory\")\n",
    "mem_human = info[\"used_memory_human\"]\n",
    "print(\"MEMORY USAGE REDIS\")\n",
    "print(f\"Memory Redis: {mem_human}\")\n",
    "\n",
    "\"\"\"\n",
    "#Test Mémoire Syzygy\n",
    "import psutil\n",
    "\n",
    "tablebases = chess.syzygy.open_tablebase(\"tablebases/\")\n",
    "\n",
    "#Récuperer les données de proccesus de Syzygy\n",
    "process = psutil.Process(os.getpid())\n",
    "mem_bytes = process.memory_info().rss\n",
    "mem_mb = mem_bytes / (1024**2)\n",
    "print(\"MEMORY USAGE SYZYGY\")\n",
    "print(f\"Memory Syzygy: {mem_mb}\")\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "48b2de71",
   "metadata": {},
   "source": [
    "# Test de l'hypothèse 2\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0d955b0d-dc80-4518-954b-7c2df295adca",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-12-06T10:18:18.501308Z",
     "start_time": "2025-12-06T10:18:18.393765Z"
    }
   },
   "outputs": [],
   "source": [
    "def statistical_tests(bench_redis, bench_syzygy):\n",
    "    redis_lat = np.array(bench_redis[\"latency\"], dtype=np.float64)\n",
    "    syz_lat = np.array(bench_syzygy[\"latency\"], dtype=np.float64)\n",
    "    \n",
    "    min_len = min(len(redis_lat), len(syz_lat))\n",
    "    redis_lat = redis_lat[:min_len]\n",
    "    syz_lat = syz_lat[:min_len]\n",
    "\n",
    "    # PAIRED T-Test (Comparaison median)\n",
    "    t_stat, t_p = stats.ttest_rel(syz_lat, redis_lat)\n",
    "\n",
    "    # KS Test (Comparaison distribution)\n",
    "    ks_stat, ks_p = stats.ks_2samp(syz_lat, redis_lat)\n",
    "\n",
    "    df_stats = pd.DataFrame({\n",
    "        \"test\": [\"paired t-test\", \"KS-test\"],\n",
    "        \"statistic\": [t_stat, ks_stat],\n",
    "        \"p-value\": [t_p, ks_p],\n",
    "        \"interpretation\": [\n",
    "            \"H0: mean latency equal (Redis vs Syzygy)\",\n",
    "            \"H0: same latency distribution (Redis vs Syzygy)\"\n",
    "        ]\n",
    "    })\n",
    "\n",
    "    print(df_stats)\n",
    "    return df_stats\n",
    "\n",
    "\n",
    "def run_experiment_test_latency():\n",
    "    redis_server = redis.Redis(host=\"localhost\", port=6379, db=0)\n",
    "    tablebases = chess.syzygy.open_tablebase(\"tablebases/\")\n",
    "\n",
    "    configs = [\"KBNvK\",\"KPvK\",\"KPvKP\",\"KQvK\",\"KQvKP\",\"KRBvKP\",\"KRPvKP\",\"KRvK\",\"KRvKB\",\"KBvK\",\"KNvK\"]\n",
    "\n",
    "    all_positions = []\n",
    "    for config in configs:\n",
    "        all_positions += get_250_random_position(tablebases, config)\n",
    "\n",
    "    # Benchmark Syzygy\n",
    "    bench_syzygy = benchmark_syzygy(tablebases, all_positions)\n",
    "    print(\"BENCHMARK SYZYGY\")\n",
    "    print(bench_syzygy)\n",
    "\n",
    "    # Benchmark Redis\n",
    "    bench_redis = benchmark_redis(redis_server, all_positions)\n",
    "    print(\"BENCHMARK REDIS\")\n",
    "    print(bench_redis)\n",
    "\n",
    "    # Length info\n",
    "    print(f\"Redis latency array length: {len(bench_redis['latency'])}\")\n",
    "    print(f\"Syzygy latency array length: {len(bench_syzygy['latency'])}\")\n",
    "\n",
    "    return {\n",
    "        \"bench_syzygy\": bench_syzygy,\n",
    "        \"bench_redis\": bench_redis\n",
    "    }\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    results = run_experiment_test_latency()\n",
    "    statistical_tests(results[\"bench_redis\"], results[\"bench_syzygy\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3d0daed3",
   "metadata": {},
   "source": [
    "# Test de l'hypothèse 3\n",
    "\n",
    "Ici nous allons faire des tests sur les différentes structures de données possible pour stocker les positions.\n",
    "Nous allons dans un premier temps peupler Redis avec la version enrichie des positions (c'est à dire avec la FEN, le wdl, le dtz et le nombre de pièces) que nous allons comparer  à Zobrit hash qui est la fonction de haschage de python chess.\n",
    "Ensuite nous allons créer le benchmark puis faire la comparaison.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "53baf548",
   "metadata": {},
   "outputs": [],
   "source": [
    "def benchmark_redis_zobrist(redis_serveur, pos):\n",
    "    latency = []\n",
    "    start_time_all = time.perf_counter()  \n",
    "    \n",
    "    for board in pos:\n",
    "        key = str(chess.polyglot.zobrist_hash(board))\n",
    "        start_individual = time.perf_counter()\n",
    "        result = redis_serveur.hgetall(key)\n",
    "        latency.append(time.perf_counter() - start_individual)\n",
    "    \n",
    "    total_time_all = time.perf_counter() - start_time_all\n",
    "    latency = np.array(latency)\n",
    "    \n",
    "    return {\n",
    "        \"count\": len(pos),\n",
    "        \"average_latency\": np.mean(latency),\n",
    "        \"p50\": np.percentile(latency, 50),\n",
    "        \"p90\": np.percentile(latency, 90),\n",
    "        \"p95\": np.percentile(latency, 95),\n",
    "        \"p99\": np.percentile(latency, 99),\n",
    "        \"std\": np.std(latency),\n",
    "        \"min\": np.min(latency),\n",
    "        \"max\": np.max(latency),\n",
    "        \"total_time\": total_time_all,\n",
    "        \"latency\": latency,\n",
    "        \"rps\": len(latency) / total_time_all,\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "656e2100",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_250_random_position(tablebases, config):\n",
    "    positions = []\n",
    "    tries = 0\n",
    "\n",
    "    while len(positions) < 10000 and tries < 5000:\n",
    "        board = generate_board_from_config(config)\n",
    "        positions.append(board)\n",
    "        tries += 1\n",
    "\n",
    "    return positions\n",
    "\n",
    "def generate_board_from_config(config):\n",
    "    \"\"\"\n",
    "    Génère un board contenant exactement le matériel d'une config Syzygy. Exemples de config :\n",
    "    - 'KQvK', 'KPvKP'\n",
    "    Cette fonction est important car il faut générer les positions qui dont les informations sont dans la tablebases.\n",
    "    \"\"\"\n",
    "\n",
    "    piece_map = {\"K\": chess.KING,\"Q\": chess.QUEEN,\"R\": chess.ROOK,\"B\": chess.BISHOP,\"N\": chess.KNIGHT,\"P\": chess.PAWN}\n",
    "\n",
    "    white_str, black_str = config.split(\"v\") #Sépare la config entre les 2 couleurs\n",
    "\n",
    "    board = chess.Board(None)  # donne un plateau vide\n",
    "\n",
    "    # boucles pour placer les pièces sur le plateau\n",
    "    for p in white_str:\n",
    "        piece = piece_map[p]\n",
    "        sq = random.choice([s for s in chess.SQUARES if board.piece_at(s) is None]) #choisi une pièce et vérifie que il n'y a pas déjà une pièce dessus\n",
    "        board.set_piece_at(sq, chess.Piece(piece, chess.WHITE))\n",
    "\n",
    "    for p in black_str:\n",
    "        piece = piece_map[p]\n",
    "        sq = random.choice([s for s in chess.SQUARES if board.piece_at(s) is None])#choisi une pièce et vérifie que il n'y a pas déjà une pièce dessus\n",
    "        board.set_piece_at(sq, chess.Piece(piece, chess.BLACK))\n",
    "\n",
    "    # désactive les règles des échecs.\n",
    "    board.castling_rights = 0\n",
    "    board.ep_square = None\n",
    "\n",
    "    # Donne une couleur aléatoire pour savoir quelle couleur doit jouer le prochain coup.\n",
    "    board.turn = random.choice([chess.WHITE, chess.BLACK])\n",
    "\n",
    "    if not board.is_valid():\n",
    "        return generate_board_from_config(config)\n",
    "\n",
    "    return board\n",
    "\n",
    "def add_tablebase_zobrist_to_redis():\n",
    "    #Connect to Redis server\n",
    "    redis_server = redis.Redis(host='localhost', port=6379, db=3)\n",
    "    \n",
    "    tablebases_path = \"tablebases/\" #Chemin vers les tables bases Syzygy\n",
    "    tablebases = chess.syzygy.open_tablebase(tablebases_path)\n",
    "\n",
    "    available_tables = set()\n",
    "    for f in os.listdir(tablebases_path):\n",
    "        if f.endswith(\".rtbw\") or f.endswith(\".rtbz\"):\n",
    "            available_tables.add(f.split(\".\")[0])\n",
    "\n",
    "    print(\"Configs ok:\", available_tables)\n",
    "\n",
    "    configs = [\"KBNvK\",\"KPvK\",\"KPvKP\",\"KQvK\",\"KQvKP\",\"KRBvKP\",\"KRPvKP\",\"KRvK\",\"KRvKB\",\"KBvK\",\"KNvK\"]\n",
    "    config_valide = [c for c in configs if c in available_tables]\n",
    "    #Configurations de toute les tablebases que l'on utilisera dans le projet.\n",
    "    all_positions = []\n",
    "    for config in config_valide:\n",
    "        print(f\"Récupération des positions de la tablebase {config}\")\n",
    "        position = get_250_random_position(tablebases,config)\n",
    "        all_positions.extend(position) #Ajout des positions générées à la liste globale\n",
    "        \n",
    "    print(f\"Total positions récupérées: {len(all_positions)}\")\n",
    "    \n",
    "    for position in all_positions:\n",
    "        try:\n",
    "            zobrist_key = str(chess.polyglot.zobrist_hash(position))\n",
    "            wdl = tablebases.probe_wdl(position)\n",
    "            dtz = tablebases.probe_dtz(position)\n",
    "            pieces = len(position.piece_map())\n",
    "            \n",
    "            redis_server.hset(zobrist_key, mapping={\n",
    "                \"wdl\": wdl,\n",
    "                \"dtz\": dtz,\n",
    "                \"pieces\": pieces\n",
    "            })\n",
    "        except:\n",
    "            continue\n",
    "    print(\"Ajoute de toutes les positions hashées des tablebases dans Redis terminé!\")\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "        add_tablebase_zobrist_to_redis()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "973deef0",
   "metadata": {},
   "outputs": [],
   "source": [
    "def statistical_tests(bench_fen, bench_zobrist):\n",
    "    from scipy import stats\n",
    "    import pandas as pd\n",
    "    \n",
    "    fen_lat = np.array(bench_fen[\"latency\"], dtype=np.float64)\n",
    "    zobrist_lat = np.array(bench_zobrist[\"latency\"], dtype=np.float64)\n",
    "    \n",
    "    min_len = min(len(fen_lat), len(zobrist_lat))\n",
    "    fen_lat = fen_lat[:min_len]\n",
    "    zobrist_lat = zobrist_lat[:min_len]\n",
    "    \n",
    "    # PAIRED T-Test (Comparaison median)\n",
    "    t_stat, t_p = stats.ttest_rel(fen_lat, zobrist_lat)\n",
    "    \n",
    "    # KS Test (Comparaison distribution)\n",
    "    ks_stat, ks_p = stats.ks_2samp(fen_lat, zobrist_lat)\n",
    "    \n",
    "    df_stats = pd.DataFrame({\n",
    "        \"test\": [\"paired t-test\", \"KS-test\"],\n",
    "        \"statistic\": [t_stat, ks_stat],\n",
    "        \"p-value\": [t_p, ks_p],\n",
    "        \"interpretation\": [\n",
    "            \"H0: mean latency equal (FEN vs Zobrist)\",\n",
    "            \"H0: same latency distribution (FEN vs Zobrist)\"\n",
    "        ]\n",
    "    })\n",
    "    \n",
    "    print(df_stats)\n",
    "    return df_stats\n",
    "\n",
    "def run_experiment():\n",
    "    redis_server = redis.Redis(host=\"localhost\", port=6379, db=0)\n",
    "    redis_zobrist = redis.Redis(host=\"localhost\", port=6379, db=3)\n",
    "    tablebases = chess.syzygy.open_tablebase(\"tablebases/\")\n",
    "    configs = [\"KBNvK\",\"KPvK\",\"KPvKP\",\"KQvK\",\"KQvKP\",\"KRBvKP\",\"KRPvKP\",\"KRvK\",\"KRvKB\"]\n",
    "    all_positions = []\n",
    "    for config in configs:\n",
    "        all_positions += get_250_random_position(tablebases, config)\n",
    "    valid_positions = []\n",
    "    for board in all_positions:\n",
    "        try:\n",
    "            tablebases.probe_wdl(board)\n",
    "            valid_positions.append(board)\n",
    "        except chess.syzygy.MissingTableError:\n",
    "            continue\n",
    "    print(\"BENCHMARK REDIS (FEN)\")\n",
    "    bench_redis = benchmark_redis(redis_server, valid_positions)\n",
    "    print(bench_redis)\n",
    "    print(\"BENCHMARK REDIS (Zobrist)\")\n",
    "    bench_zobrist = benchmark_redis_zobrist(redis_zobrist, valid_positions)\n",
    "    print(bench_zobrist)\n",
    "    print(f\"Redis FEN latency array length: {len(bench_redis['latency'])}\")\n",
    "    print(f\"Redis Hash latency array length: {len(bench_zobrist['latency'])}\")\n",
    "    print(\"\\n\" + \"=\" * 60)\n",
    "    print(\"MEMORY USAGE BY KEY (ESTIMATION)\")\n",
    "    print(\"=\" * 60)\n",
    "\n",
    "    # Échantillonner 50 clés de chaque base\n",
    "    sample_fen = [redis_server.randomkey() for _ in range(50)]\n",
    "    mem_per_key_fen = [redis_server.memory_usage(k) for k in sample_fen if k]\n",
    "    avg_fen = np.mean(mem_per_key_fen)\n",
    "\n",
    "    sample_zobrist = [redis_zobrist.randomkey() for _ in range(50)]\n",
    "    mem_per_key_zobrist = [redis_zobrist.memory_usage(k) for k in sample_zobrist if k]\n",
    "    avg_zobrist = np.mean(mem_per_key_zobrist)\n",
    "\n",
    "    nb_keys = redis_server.dbsize()\n",
    "    estimated_fen = avg_fen * nb_keys\n",
    "    estimated_zobrist = avg_zobrist * nb_keys\n",
    "\n",
    "    print(f\"Taille moyenne clé FEN:  {avg_fen:.2f} bytes\")\n",
    "    print(f\"Taille moyenne clé Zobrist: {avg_zobrist:.2f} bytes\")\n",
    "    print(f\"\\nEstimation pour {nb_keys} clés:\")\n",
    "    print(f\"  FEN:  {estimated_fen / (1024*1024):.2f} MB\")\n",
    "    print(f\"  Zobrist: {estimated_zobrist / (1024*1024):.2f} MB\")\n",
    "    print(f\"  Gain: {(1 - avg_zobrist/avg_fen) * 100:.1f}%\")\n",
    "    print(\"=\" * 60)\n",
    "    \n",
    "    return {\n",
    "        \"bench_redis\": bench_redis,\n",
    "        \"bench_zobrist\": bench_zobrist\n",
    "    }\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    results = run_experiment()\n",
    "    statistical_tests(results[\"bench_redis\"], results[\"bench_zobrist\"])\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "feb0d3b9",
   "metadata": {},
   "source": [
    "# Test de l'hypothèse 4\n",
    "\n",
    "Les cellules suivantes nous permettents de faire le test concernant les fonctions de Hash.\n",
    "On doit donc créer le benchmark ainsi que peupler Redis avec les tables hashées."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d6d03e08",
   "metadata": {},
   "outputs": [],
   "source": [
    "def benchmark_hash(redis_serveur, pos):\n",
    "    latency = []\n",
    "    start_time_all = time.perf_counter()\n",
    "    \n",
    "    for board in pos:\n",
    "        fen = board.fen()\n",
    "        key = hashlib.md5(fen.encode()).hexdigest()[:16]\n",
    "        \n",
    "        start_individual = time.perf_counter()\n",
    "        results = redis_serveur.hgetall(key)\n",
    "        latency.append(time.perf_counter() - start_individual)\n",
    "    \n",
    "    total_time_all = time.perf_counter() - start_time_all\n",
    "    latency = np.array(latency)\n",
    "    \n",
    "    return {\n",
    "        \"count\": len(pos),\n",
    "        \"average_latency\": latency.mean(),\n",
    "        \"p50\": np.percentile(latency, 50),\n",
    "        \"p90\": np.percentile(latency, 90),\n",
    "        \"p95\": np.percentile(latency, 95),\n",
    "        \"p99\": np.percentile(latency, 99),\n",
    "        \"std\": latency.std(),\n",
    "        \"min\": latency.min(),\n",
    "        \"max\": latency.max(),\n",
    "        \"total_time\": total_time_all,\n",
    "        \"latency\": latency,\n",
    "        \"rps\": len(latency) / total_time_all,\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "46157e5c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_250_random_position(tablebases, config):\n",
    "    positions = []\n",
    "    tries = 0\n",
    "\n",
    "    while len(positions) < 250 and tries < 5000:\n",
    "        board = generate_board_from_config(config)\n",
    "        try:\n",
    "            tablebases.probe_wdl(board)\n",
    "            positions.append(board)\n",
    "        except chess.syzygy.MissingTableError:\n",
    "            pass  \n",
    "        tries += 1\n",
    "    return positions\n",
    "\n",
    "def add_tablebase_to_redis():\n",
    "    h = hashlib.new('sha512_256')\n",
    "    #Connect to Redis server\n",
    "    redis_server = redis.Redis(host='localhost', port=6379, db=1) #db1 pour H4\n",
    "    \n",
    "    tablebases_path = \"tablebases/\" #Chemin vers les tables bases Syzygy\n",
    "\n",
    "    tablebases = chess.syzygy.open_tablebase(tablebases_path)\n",
    "\n",
    "    available_tables = set()\n",
    "    for f in os.listdir(tablebases_path):\n",
    "        if f.endswith(\".rtbw\") or f.endswith(\".rtbz\"):\n",
    "            available_tables.add(f.split(\".\")[0])\n",
    "\n",
    "    print(\"Configs ok:\", available_tables)\n",
    "    \n",
    "    configs = [\"KBNvK\",\"KPvK\",\"KPvKP\",\"KQvK\",\"KQvKP\",\"KRBvKP\",\"KRPvKP\",\"KRvK\",\"KRvKB\"]\n",
    "    config_valide = [c for c in configs if c in available_tables]\n",
    "    #Configurations de toute les tablebases que l'on utilisera dans le projet.\n",
    "    all_positions = []\n",
    "    for config in config_valide:\n",
    "        print(f\"Récupération des positions de la tablebase {config}\")\n",
    "        position = get_250_random_position(tablebases,config)\n",
    "        all_positions.extend(position) #Ajout des positions générées à la liste globale\n",
    "        \n",
    "    print(f\"Total positions récupérées: {len(all_positions)}\")\n",
    "    for position in all_positions:\n",
    "        try:\n",
    "            fen = position.fen()\n",
    "            key = hashlib.md5(fen.encode()).hexdigest()[:16] #Limite à 16 caractères\n",
    "            wdl = tablebases.probe_wdl(position)\n",
    "            dtz = tablebases.probe_dtz(position)\n",
    "            redis_server.hset(key, mapping={\"wdl\": wdl, \"dtz\": dtz})  \n",
    "        except (KeyError,chess.syzygy.MissingTableError): #vérifie que les positions générés appartiennent bien au table du projet seulement\n",
    "            continue  \n",
    "        \n",
    "    print(\"Ajoute de toutes les positions  hashés des tablebases dans Redis terminé!\")\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "        add_tablebase_to_redis()\n",
    "    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "099ce824",
   "metadata": {},
   "outputs": [],
   "source": [
    "def run_experiment():\n",
    "    redis_server = redis.Redis(host=\"localhost\", port=6379, db=0)\n",
    "    redis_hash = redis.Redis(host=\"localhost\", port=6379, db=1)\n",
    "    tablebases = chess.syzygy.open_tablebase(\"tablebases/\")\n",
    "    configs = [\"KBNvK\",\"KPvK\",\"KPvKP\",\"KQvK\",\"KQvKP\",\"KRBvKP\",\"KRPvKP\",\"KRvK\",\"KRvKB\"]\n",
    "    all_positions = []\n",
    "    for config in configs:\n",
    "        all_positions += get_250_random_position(tablebases, config)\n",
    "    valid_positions = []\n",
    "    for board in all_positions:\n",
    "        try:\n",
    "            tablebases.probe_wdl(board)\n",
    "            valid_positions.append(board)\n",
    "        except chess.syzygy.MissingTableError:\n",
    "            continue\n",
    "    print(\"BENCHMARK REDIS (FEN)\")\n",
    "    bench_redis = benchmark_redis(redis_server, valid_positions)\n",
    "    print(bench_redis)\n",
    "    print(\"BENCHMARK REDIS (HASH)\")\n",
    "    bench_hash = benchmark_hash(redis_hash, valid_positions)\n",
    "    print(bench_hash)\n",
    "    print(f\"Redis FEN latency array length: {len(bench_redis['latency'])}\")\n",
    "    print(f\"Redis Hash latency array length: {len(bench_hash['latency'])}\")\n",
    "    print(\"\\n\" + \"=\" * 60)\n",
    "    print(\"MEMORY USAGE BY KEY (ESTIMATION)\")\n",
    "    print(\"=\" * 60)\n",
    "\n",
    "    # Échantillonner 50 clés de chaque base\n",
    "    sample_fen = [redis_server.randomkey() for _ in range(50)]\n",
    "    mem_per_key_fen = [redis_server.memory_usage(k) for k in sample_fen if k]\n",
    "    avg_fen = np.mean(mem_per_key_fen)\n",
    "\n",
    "    sample_hash = [redis_hash.randomkey() for _ in range(50)]\n",
    "    mem_per_key_hash = [redis_hash.memory_usage(k) for k in sample_hash if k]\n",
    "    avg_hash = np.mean(mem_per_key_hash)\n",
    "\n",
    "    nb_keys = redis_server.dbsize()\n",
    "    estimated_fen = avg_fen * nb_keys\n",
    "    estimated_hash = avg_hash * nb_keys\n",
    "\n",
    "    print(f\"Taille moyenne clé FEN:  {avg_fen:.2f} bytes\")\n",
    "    print(f\"Taille moyenne clé Hash: {avg_hash:.2f} bytes\")\n",
    "    print(f\"\\nEstimation pour {nb_keys} clés:\")\n",
    "    print(f\"  FEN:  {estimated_fen / (1024*1024):.2f} MB\")\n",
    "    print(f\"  Hash: {estimated_hash / (1024*1024):.2f} MB\")\n",
    "    print(f\"  Gain: {(1 - avg_hash/avg_fen) * 100:.1f}%\")\n",
    "    print(\"=\" * 60)\n",
    "    \n",
    "    return {\n",
    "        \"bench_redis\": bench_redis,\n",
    "        \"bench_hash\": bench_hash\n",
    "    }\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    results = run_experiment()\n",
    "    statistical_tests(results[\"bench_redis\"], results[\"bench_hash\"])\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}