Fix : ajout test memoire Hash

2025-12-06 16:29:46 +01:00
parent 72140b4bd7
commit 82cad63790
1 changed files with 168 additions and 114 deletions
--- a/Main.ipynb
+++ b/Main.ipynb
@@ -65,7 +65,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Configs ok: {'KRBvKP', 'KRvKB', 'KRvK', 'KBNvK', 'KBvK', 'KRPvKP', 'KPvK', 'KNvK', 'KQvK', 'KQvKP', 'KPvKP'}\n"
+      "Configs ok: {'KRvK', 'KQvKP', 'KRvKB', 'KNvK', 'KRBvKP', 'KBvK', 'KBNvK', 'KPvK', 'KPvKP', 'KRPvKP', 'KQvK'}\n"
     ]
    },
    {
@@ -205,7 +205,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "id": "8d49b928-7522-4727-b6d8-224e2bd3a1b7",
   "metadata": {
    "ExecuteTime": {
@@ -276,7 +276,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "id": "0d955b0d-dc80-4518-954b-7c2df295adca",
   "metadata": {
    "ExecuteTime": {
@@ -292,10 +292,10 @@
      "BENCHMARK SYZYGY\n",
      "BENCHMARK REDIS\n",
      "Redis latency array length: 55000\n",
-      "Syzygy latency array length: 50127\n",
+      "Syzygy latency array length: 50215\n",
      "            test  statistic  p-value  \\\n",
-      "0  paired t-test  85.042506      0.0   \n",
-      "1        KS-test   0.546193      0.0   \n",
+      "0  paired t-test  93.454797      0.0   \n",
+      "1        KS-test   0.690232      0.0   \n",
      "\n",
      "                                    interpretation  \n",
      "0         H0: mean latency equal (Redis vs Syzygy)  \n",
@@ -376,7 +376,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "id": "a68060e1-0052-4446-a0d3-40bc1756f210",
   "metadata": {},
   "outputs": [
@@ -395,24 +395,24 @@
    {
     "data": {
      "text/plain": [
-       "{'3pieces': {'mean_ms': np.float64(0.4589625600419822),\n",
-       "  'median_ms': np.float64(0.40479199969922774),\n",
-       "  'min_ms': np.float64(0.20995799968659412),\n",
-       "  'max_ms': np.float64(4.594801999701303),\n",
-       "  'std_ms': np.float64(0.42983928490153817)},\n",
-       " '4pieces': {'mean_ms': np.float64(0.36221424002178537),\n",
-       "  'median_ms': np.float64(0.33665850014585885),\n",
-       "  'min_ms': np.float64(0.2021390000663814),\n",
-       "  'max_ms': np.float64(1.0131979997822782),\n",
-       "  'std_ms': np.float64(0.12193858759433863)},\n",
-       " '5pieces': {'mean_ms': np.float64(0.3863061399442813),\n",
-       "  'median_ms': np.float64(0.3765404999285238),\n",
-       "  'min_ms': np.float64(0.20748299994011177),\n",
-       "  'max_ms': np.float64(0.6716950001646182),\n",
-       "  'std_ms': np.float64(0.09189286718199571)}}"
+       "{'3pieces': {'mean_ms': np.float64(0.4743400499864947),\n",
+       "  'median_ms': np.float64(0.4443719999471796),\n",
+       "  'min_ms': np.float64(0.30335500014189165),\n",
+       "  'max_ms': np.float64(3.0856469993523206),\n",
+       "  'std_ms': np.float64(0.2824736354026385)},\n",
+       " '4pieces': {'mean_ms': np.float64(0.45132315999580896),\n",
+       "  'median_ms': np.float64(0.4520549996414047),\n",
+       "  'min_ms': np.float64(0.2539389997764374),\n",
+       "  'max_ms': np.float64(0.838474999909522),\n",
+       "  'std_ms': np.float64(0.13465422472708688)},\n",
+       " '5pieces': {'mean_ms': np.float64(0.5181703400103288),\n",
+       "  'median_ms': np.float64(0.4918450003970065),\n",
+       "  'min_ms': np.float64(0.3080989999943995),\n",
+       "  'max_ms': np.float64(1.0084479999932228),\n",
+       "  'std_ms': np.float64(0.11421552341082152)}}"
      ]
     },
-     "execution_count": 5,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -475,10 +475,29 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
   "id": "a68060e1-0052-4446-a0d3-40bc1756f210",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MEMORY USAGE REDIS\n",
+      "Memory Redis: 9.43M\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'\\n#Test Mémoire Syzygy\\nimport psutil\\n\\ntablebases = chess.syzygy.open_tablebase(\"tablebases/\")\\n\\n#Récuperer les données de proccesus de Syzygy\\nprocess = psutil.Process(os.getpid())\\nmem_bytes = process.memory_info().rss\\nmem_mb = mem_bytes / (1024**2)\\nprint(\"MEMORY USAGE SYZYGY\")\\nprint(f\"Memory Syzygy: {mem_mb}\")\\n'"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
    "#Test Mémoire Redis ( Stockage du Redis )\n",
    "redis_server = redis.Redis(host=\"localhost\", port=6379, db=0)\n",
@@ -504,16 +523,73 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
-   "id": "46157e5c",
+   "execution_count": 10,
+   "id": "d6d03e08",
   "metadata": {},
   "outputs": [],
+   "source": [
+    "def benchmark_hash(redis_serveur, pos):\n",
+    "    latency = []\n",
+    "    start_time_all = time.perf_counter()\n",
+    "    \n",
+    "    for board in pos:\n",
+    "        fen = board.fen()\n",
+    "        key = hashlib.md5(fen.encode()).hexdigest()[:16]\n",
+    "        \n",
+    "        start_individual = time.perf_counter()\n",
+    "        results = redis_serveur.hgetall(key)\n",
+    "        latency.append(time.perf_counter() - start_individual)\n",
+    "    \n",
+    "    total_time_all = time.perf_counter() - start_time_all\n",
+    "    latency = np.array(latency)\n",
+    "    \n",
+    "    return {\n",
+    "        \"count\": len(pos),\n",
+    "        \"average_latency\": latency.mean(),\n",
+    "        \"p50\": np.percentile(latency, 50),\n",
+    "        \"p90\": np.percentile(latency, 90),\n",
+    "        \"p95\": np.percentile(latency, 95),\n",
+    "        \"p99\": np.percentile(latency, 99),\n",
+    "        \"std\": latency.std(),\n",
+    "        \"min\": latency.min(),\n",
+    "        \"max\": latency.max(),\n",
+    "        \"total_time\": total_time_all,\n",
+    "        \"latency\": latency,\n",
+    "        \"rps\": len(latency) / total_time_all,\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "46157e5c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Configs ok: {'KRvK', 'KQvKP', 'KRvKB', 'KNvK', 'KRBvKP', 'KBvK', 'KBNvK', 'KPvK', 'KPvKP', 'KRPvKP', 'KQvK'}\n",
+      "Récupération des positions de la tablebase KBNvK\n",
+      "Récupération des positions de la tablebase KPvK\n",
+      "Récupération des positions de la tablebase KPvKP\n",
+      "Récupération des positions de la tablebase KQvK\n",
+      "Récupération des positions de la tablebase KQvKP\n",
+      "Récupération des positions de la tablebase KRBvKP\n",
+      "Récupération des positions de la tablebase KRPvKP\n",
+      "Récupération des positions de la tablebase KRvK\n",
+      "Récupération des positions de la tablebase KRvKB\n",
+      "Total positions récupérées: 2250\n",
+      "Ajoute de toutes les positions  hashés des tablebases dans Redis terminé!\n"
+     ]
+    }
+   ],
   "source": [
    "def get_250_random_position(tablebases, config):\n",
    "    positions = []\n",
    "    tries = 0\n",
    "\n",
-    "    while len(positions) < 250 and tries < 5000:\n",
+    "    while len(positions) < 250 and tries < 500:\n",
    "        board = generate_board_from_config(config)\n",
    "        try:\n",
    "            tablebases.probe_wdl(board)  # thử probe WDL\n",
@@ -568,85 +644,53 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
   "id": "099ce824",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "BENCHMARK REDIS (FEN)\n",
+      "{'count': 2250, 'average_latency': np.float64(0.0007091928058200412), 'p50': np.float64(0.0006681680679321289), 'p90': np.float64(0.0009292125701904298), 'p95': np.float64(0.00102473497390747), 'p99': np.float64(0.0013124537467956525), 'std': np.float64(0.00022609616592649846), 'min': np.float64(0.0004031658172607422), 'max': np.float64(0.004224300384521484), 'total_time': 2.1053922176361084, 'latency': array([0.00342536, 0.00068593, 0.00071573, ..., 0.00083399, 0.00082922,\n",
+      "       0.00101638], shape=(2250,)), 'rps': 1068.6844860318968}\n",
+      "BENCHMARK REDIS (HASH)\n",
+      "{'count': 2250, 'average_latency': np.float64(0.0004489188648941409), 'p50': np.float64(0.0004307850003897329), 'p90': np.float64(0.0005560255000091274), 'p95': np.float64(0.0006055185504919784), 'p99': np.float64(0.000822205319836939), 'std': np.float64(0.00018291225647658574), 'min': np.float64(0.00024306299928866792), 'max': np.float64(0.003079133999563055), 'total_time': 1.5033779259993025, 'latency': array([0.00307913, 0.00059742, 0.00061489, ..., 0.00038755, 0.00035628,\n",
+      "       0.00050254], shape=(2250,)), 'rps': 1496.6296638314775}\n",
+      "Redis FEN latency array length: 2250\n",
+      "Redis Hash latency array length: 2250\n",
+      "\n",
+      "============================================================\n",
+      "MEMORY USAGE BY KEY (ESTIMATION)\n",
+      "============================================================\n",
+      "Taille moyenne clé FEN:  136.46 bytes\n",
+      "Taille moyenne clé Hash: 97.84 bytes\n",
+      "\n",
+      "Estimation pour 66009 clés:\n",
+      "  FEN:  8.59 MB\n",
+      "  Hash: 6.16 MB\n",
+      "  Gain: 28.3%\n",
+      "============================================================\n",
+      "            test  statistic        p-value  \\\n",
+      "0  paired t-test -44.041712  4.787276e-306   \n",
+      "1        KS-test   0.691556   0.000000e+00   \n",
+      "\n",
+      "                                    interpretation  \n",
+      "0         H0: mean latency equal (Redis vs Syzygy)  \n",
+      "1  H0: same latency distribution (Redis vs Syzygy)  \n"
+     ]
+    }
+   ],
   "source": [
-    "def benchmark_hash(redis_serveur, pos):\n",
-    "    latency = []\n",
-    "    start_time_all = time.time()\n",
-    "\n",
-    "    for board in pos:\n",
-    "        fen = board.fen()\n",
-    "        key = hashlib.md5(fen.encode()).hexdigest()[:16] #Limite à 16 caractères\n",
-    "        start_individual = time.time()\n",
-    "        results = redis_serveur.hgetall(key)\n",
-    "        latency.append(time.time() - start_individual)\n",
-    "\n",
-    "    total_time_all=time.time()-start_time_all\n",
-    "    latency= np.array(latency)\n",
-    "\n",
-    "    return{\n",
-    "        \"count\": len(pos),\n",
-    "        \"average_latency\": latency.mean(),\n",
-    "        \"p50\": np.percentile(latency,50),\n",
-    "        \"p90\": np.percentile(latency,90),\n",
-    "        \"p95\": np.percentile(latency,95),\n",
-    "        \"p99\": np.percentile(latency,99),\n",
-    "        \"std\": latency.std(),\n",
-    "        \"min\": latency.min(),\n",
-    "        \"max\": latency.max(),\n",
-    "        \"total_time\": total_time_all,\n",
-    "        \"latency\": latency,\n",
-    "        \"rps\": len(latency)/total_time_all,\n",
-    "    }"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "646babb4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def statistical_tests(bench_redis, bench_hash):\n",
-    "    redis_lat = np.array(bench_redis[\"latency\"], dtype=np.float64)\n",
-    "    hash_lat = np.array(bench_hash[\"latency\"], dtype=np.float64)\n",
-    "    min_len = min(len(redis_lat), len(hash_lat))\n",
-    "    redis_lat = redis_lat[:min_len]\n",
-    "    hash_lat = hash_lat[:min_len]\n",
-    "    \n",
-    "    # PAIRED T-Test (Comparaison median)\n",
-    "    t_stat, t_p = stats.ttest_rel(hash_lat, redis_lat)\n",
-    "    \n",
-    "    # KS Test (Comparaison distribution)\n",
-    "    ks_stat, ks_p = stats.ks_2samp(hash_lat, redis_lat)\n",
-    "    \n",
-    "    df_stats = pd.DataFrame({\n",
-    "        \"test\": [\"paired t-test\", \"KS-test\"],\n",
-    "        \"statistic\": [t_stat, ks_stat],\n",
-    "        \"p-value\": [t_p, ks_p],\n",
-    "        \"interpretation\": [\n",
-    "            \"H0: mean latency equal (Redis vs Hash)\",\n",
-    "            \"H0: same latency distribution (Redis vs Hash)\"\n",
-    "        ]\n",
-    "    })\n",
-    "    print(df_stats)\n",
-    "    return df_stats\n",
-    "\n",
    "def run_experiment():\n",
    "    redis_server = redis.Redis(host=\"localhost\", port=6379, db=0)\n",
    "    redis_hash = redis.Redis(host=\"localhost\", port=6379, db=1)\n",
    "    tablebases = chess.syzygy.open_tablebase(\"tablebases/\")\n",
-    "    \n",
    "    configs = [\"KBNvK\",\"KPvK\",\"KPvKP\",\"KQvK\",\"KQvKP\",\"KRBvKP\",\"KRPvKP\",\"KRvK\",\"KRvKB\"]\n",
-    "    \n",
    "    all_positions = []\n",
    "    for config in configs:\n",
    "        all_positions += get_250_random_position(tablebases, config)\n",
-    "    \n",
-    "    # Filter to have 2 paired test have the same N\n",
    "    valid_positions = []\n",
    "    for board in all_positions:\n",
    "        try:\n",
@@ -654,37 +698,47 @@
    "            valid_positions.append(board)\n",
    "        except chess.syzygy.MissingTableError:\n",
    "            continue\n",
-    "    \n",
    "    print(\"BENCHMARK REDIS (FEN)\")\n",
-    "    bench_redis = benchmark_redis(redis_server, valid_positions)  # ← db=0 (FEN)\n",
+    "    bench_redis = benchmark_redis(redis_server, valid_positions)\n",
    "    print(bench_redis)\n",
-    "    \n",
    "    print(\"BENCHMARK REDIS (HASH)\")\n",
-    "    bench_hash = benchmark_hash(redis_hash, valid_positions)  # ← db=1 (Hash), fonction différente\n",
+    "    bench_hash = benchmark_hash(redis_hash, valid_positions)\n",
    "    print(bench_hash)\n",
-    "    \n",
-    "    # Afficher les longueurs APRÈS avoir créé les benchmarks\n",
    "    print(f\"Redis FEN latency array length: {len(bench_redis['latency'])}\")\n",
    "    print(f\"Redis Hash latency array length: {len(bench_hash['latency'])}\")\n",
-    "    \n",
-    "    print(\"MEMORY USAGE REDIS (FEN - db=0)\")\n",
-    "    mem_fen_bytes, mem_fen_human = get_redis_memory(redis_server)\n",
-    "    print(f\"Memory: {mem_fen_human} ({mem_fen_bytes} bytes)\")\n",
-    "    \n",
-    "    print(\"MEMORY USAGE REDIS (HASH - db=1)\")\n",
-    "    mem_hash_bytes, mem_hash_human = get_redis_memory(redis_hash)\n",
-    "    print(f\"Memory: {mem_hash_human} ({mem_hash_bytes} bytes)\")\n",
+    "    print(\"\\n\" + \"=\" * 60)\n",
+    "    print(\"MEMORY USAGE BY KEY (ESTIMATION)\")\n",
+    "    print(\"=\" * 60)\n",
+    "\n",
+    "    # Échantillonner 50 clés de chaque base\n",
+    "    sample_fen = [redis_server.randomkey() for _ in range(50)]\n",
+    "    mem_per_key_fen = [redis_server.memory_usage(k) for k in sample_fen if k]\n",
+    "    avg_fen = np.mean(mem_per_key_fen)\n",
+    "\n",
+    "    sample_hash = [redis_hash.randomkey() for _ in range(50)]\n",
+    "    mem_per_key_hash = [redis_hash.memory_usage(k) for k in sample_hash if k]\n",
+    "    avg_hash = np.mean(mem_per_key_hash)\n",
+    "\n",
+    "    nb_keys = redis_server.dbsize()\n",
+    "    estimated_fen = avg_fen * nb_keys\n",
+    "    estimated_hash = avg_hash * nb_keys\n",
+    "\n",
+    "    print(f\"Taille moyenne clé FEN:  {avg_fen:.2f} bytes\")\n",
+    "    print(f\"Taille moyenne clé Hash: {avg_hash:.2f} bytes\")\n",
+    "    print(f\"\\nEstimation pour {nb_keys} clés:\")\n",
+    "    print(f\"  FEN:  {estimated_fen / (1024*1024):.2f} MB\")\n",
+    "    print(f\"  Hash: {estimated_hash / (1024*1024):.2f} MB\")\n",
+    "    print(f\"  Gain: {(1 - avg_hash/avg_fen) * 100:.1f}%\")\n",
+    "    print(\"=\" * 60)\n",
    "    \n",
    "    return {\n",
    "        \"bench_redis\": bench_redis,\n",
-    "        \"bench_hash\": bench_hash,\n",
-    "        \"memory_redis\": mem_fen_bytes,\n",
-    "        \"memory_hash\": mem_hash_bytes\n",
+    "        \"bench_hash\": bench_hash\n",
    "    }\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    results = run_experiment()\n",
-    "    statistical_tests(results[\"bench_redis\"], results[\"bench_hash\"])"
+    "    statistical_tests(results[\"bench_redis\"], results[\"bench_hash\"])\n"
   ]
  }
 ],