From d1b738ed1345ee63c8fedb42ec48542ded5bb197 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 31 Mar 2026 13:31:32 +0000
Subject: [PATCH 01/41] Add QRS-Tune PUCT hyperparameter tuning and match
 statistics

Introduce tune-params subcommand for sequential optimization of KataGo
PUCT parameters (cpuctExploration, cpuctExplorationLog,
cpuctUtilityStdevPrior) using QRS-Tune, a quadratic response surface
optimizer with logistic regression and confidence-based pruning.

Add match statistics output with Bradley-Terry Elo ratings, Wilson
confidence intervals, and pairwise win/loss/draw summaries.

New files:
- cpp/qrstune/QRSOptimizer.h: header-only QRS-Tune optimizer library
- cpp/command/tuneparams.cpp: tune-params subcommand implementation

Modified files:
- cpp/CMakeLists.txt: add tuneparams.cpp to build
- cpp/main.h, cpp/main.cpp: register tune-params subcommand
- cpp/command/match.cpp: add Elo/CI/p-value statistics after matches

https://claude.ai/code/session_01396bbJUdHCsiWRVPM58895
---
 cpp/CMakeLists.txt         |   1 +
 cpp/command/match.cpp      | 189 +++++++++++++++++++-
 cpp/command/tuneparams.cpp | 353 +++++++++++++++++++++++++++++++++++++
 cpp/main.cpp               |   3 +
 cpp/main.h                 |   1 +
 cpp/qrstune/QRSOptimizer.h | 346 ++++++++++++++++++++++++++++++++++++
 6 files changed, 892 insertions(+), 1 deletion(-)
 create mode 100644 cpp/command/tuneparams.cpp
 create mode 100644 cpp/qrstune/QRSOptimizer.h
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 8db79ca73..b71adc444 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -318,6 +318,7 @@ add_executable(katago
   command/gputest.cpp
   command/gtp.cpp
   command/match.cpp
+  command/tuneparams.cpp
   command/misc.cpp
   command/runtests.cpp
   command/sandbox.cpp
diff --git a/cpp/command/match.cpp b/cpp/command/match.cpp
index 721c3ef20..bdd7aa818 100644
--- a/cpp/command/match.cpp
+++ b/cpp/command/match.cpp
@@ -11,6 +11,8 @@
 #include "../command/commandline.h"
 #include "../main.h"
 
+#include <array>
+#include <cmath>
 #include <csignal>
 
 using namespace std;
@@ -26,6 +28,124 @@ static void signalHandler(int signal)
   }
 }
 
+// ===== Match statistics helpers =====
+
+// Wilson score 95% two-tailed confidence interval (draws counted as 0.5 wins)
+static void wilsonCI95(double wins, double n, double& lo, double& hi) {
+  const double z = 1.96;
+  double p = wins / n;
+  double denom = 1.0 + z*z/n;
+  double center = (p + z*z/(2*n)) / denom;
+  double margin = z * sqrt(p*(1-p)/n + z*z/(4*n*n)) / denom;
+  lo = center - margin;
+  hi = center + margin;
+}
+
+// One-tailed p-value: P(experiment winrate <= 0.5 | data), using normal approximation
+static double oneTailedPValue(double wins, double n) {
+  if(n <= 0) return 0.5;
+  double z = (wins - 0.5*n) / (0.5*sqrt(n));
+  return 0.5 * erfc(z / sqrt(2.0));
+}
+
+// Bradley-Terry MLE Elo (global, all-bot ranking)
+// pairStats: {nameA,nameB} -> {winsA, winsB, draws}  nameA < nameB lexicographically
+static void computeBradleyTerryElo(
+  const vector<string>& botNames,
+  const map<pair<string,string>, array<int64_t,3>>& pairStats,
+  vector<double>& outElo,
+  vector<double>& outStderr
+) {
+  int N = (int)botNames.size();
+  const double ELO_PER_STRENGTH = 400.0 * log10(exp(1.0)); // ~173.7
+
+  map<string,int> nameIdx;
+  for(int i = 0; i < N; i++) nameIdx[botNames[i]] = i;
+
+  // w[i][j] = effective wins of i vs j (draws count 0.5)
+  vector<vector<double>> w(N, vector<double>(N, 0.0));
+  for(auto& kv : pairStats) {
+    auto itA = nameIdx.find(kv.first.first);
+    auto itB = nameIdx.find(kv.first.second);
+    if(itA == nameIdx.end() || itB == nameIdx.end()) continue;
+    int a = itA->second, b = itB->second;
+    w[a][b] += kv.second[0] + 0.5 * kv.second[2];
+    w[b][a] += kv.second[1] + 0.5 * kv.second[2];
+  }
+
+  // theta[0] = 0 (reference, first bot), optimize theta[1..N-1]
+  vector<double> theta(N, 0.0);
+  int M = N - 1;
+
+  if(M > 0) {
+    for(int iter = 0; iter < 200; iter++) {
+      vector<double> grad(M, 0.0);
+      vector<vector<double>> H(M, vector<double>(M, 0.0));
+      for(int i = 0; i < N; i++) {
+        for(int j = i+1; j < N; j++) {
+          double nij = w[i][j] + w[j][i];
+          if(nij <= 0.0) continue;
+          double sigma = 1.0 / (1.0 + exp(theta[j] - theta[i]));
+          double fish = nij * sigma * (1.0 - sigma);
+          double gij = w[i][j] - nij * sigma;
+          if(i > 0) { grad[i-1] += gij; H[i-1][i-1] -= fish; }
+          if(j > 0) { grad[j-1] -= gij; H[j-1][j-1] -= fish; }
+          if(i > 0 && j > 0) { H[i-1][j-1] += fish; H[j-1][i-1] += fish; }
+        }
+      }
+      // Solve H*delta = -grad via Gaussian elimination
+      vector<vector<double>> aug(M, vector<double>(M+1, 0.0));
+      for(int r = 0; r < M; r++) {
+        for(int c = 0; c < M; c++) aug[r][c] = H[r][c];
+        aug[r][M] = -grad[r];
+      }
+      for(int col = 0; col < M; col++) {
+        int piv = col;
+        for(int r = col+1; r < M; r++)
+          if(fabs(aug[r][col]) > fabs(aug[piv][col])) piv = r;
+        swap(aug[col], aug[piv]);
+        if(fabs(aug[col][col]) < 1e-12) continue;
+        double inv = 1.0 / aug[col][col];
+        for(int r = col+1; r < M; r++) {
+          double f = aug[r][col] * inv;
+          for(int c = col; c <= M; c++) aug[r][c] -= f * aug[col][c];
+        }
+      }
+      vector<double> delta(M, 0.0);
+      for(int r = M-1; r >= 0; r--) {
+        double s = aug[r][M];
+        for(int c = r+1; c < M; c++) s -= aug[r][c] * delta[c];
+        if(fabs(aug[r][r]) > 1e-12) delta[r] = s / aug[r][r];
+      }
+      double maxDelta = 0.0;
+      for(int r = 0; r < M; r++) {
+        theta[r+1] += delta[r];
+        maxDelta = max(maxDelta, fabs(delta[r]));
+      }
+      if(maxDelta < 1e-6) break;
+    }
+  }
+
+  // Convert log-strength to Elo relative to bot 0
+  outElo.resize(N);
+  outStderr.resize(N, 0.0);
+  for(int i = 0; i < N; i++)
+    outElo[i] = (theta[i] - theta[0]) * ELO_PER_STRENGTH;
+
+  // Fisher information diagonal -> stderr
+  for(int i = 1; i < N; i++) {
+    double fish = 0.0;
+    for(int j = 0; j < N; j++) {
+      if(j == i) continue;
+      double nij = w[i][j] + w[j][i];
+      if(nij <= 0.0) continue;
+      double sigma = 1.0 / (1.0 + exp(theta[j] - theta[i]));
+      fish += nij * sigma * (1.0 - sigma);
+    }
+    if(fish > 0.0) outStderr[i] = ELO_PER_STRENGTH / sqrt(fish);
+  }
+}
+
 int MainCmds::match(const vector<string>& args) {
   Board::initHash();
   ScoreValue::initTables();
@@ -250,10 +370,13 @@ int MainCmds::match(const vector<string>& args) {
   int64_t gameCount = 0;
   std::map<string,double> timeUsedByBotMap;
   std::map<string,double> movesByBotMap;
+  map<pair<string,string>, array<int64_t,3>> pairStats;
+  // key: {nameA, nameB} with nameA < nameB lexicographically
+  // value: {winsA, winsB, draws}
 
   auto runMatchLoop = [
     &gameRunner,&matchPairer,&sgfOutputDir,&logger,&gameSeedBase,&patternBonusTables,
-    &statsMutex, &gameCount, &timeUsedByBotMap, &movesByBotMap
+    &statsMutex, &gameCount, &timeUsedByBotMap, &movesByBotMap, &pairStats
   ](
     uint64_t threadHash
   ) {
@@ -303,6 +426,20 @@ int MainCmds::match(const vector<string>& args) {
           movesByBotMap[gameData->bName] += (double)gameData->bMoveCount;
           movesByBotMap[gameData->wName] += (double)gameData->wMoveCount;
 
+          // Update pairwise W/L/D stats
+          {
+            const string& bName = gameData->bName;
+            const string& wName = gameData->wName;
+            Player winner = gameData->endHist.winner;
+            bool aIsBlack = (bName < wName);
+            const string& nameA = aIsBlack ? bName : wName;
+            const string& nameB = aIsBlack ? wName : bName;
+            auto& ps = pairStats[{nameA, nameB}];
+            if(winner == P_BLACK)      { if(aIsBlack) ps[0]++; else ps[1]++; }
+            else if(winner == P_WHITE) { if(aIsBlack) ps[1]++; else ps[0]++; }
+            else                       { ps[2]++; }
+          }
+
           int64_t x = gameCount;
           while(x % 2 == 0 && x > 1) x /= 2;
           if(x == 1 || x == 3 || x == 5) {
@@ -344,6 +481,56 @@ int MainCmds::match(const vector<string>& args) {
   for(int i = 0; i<threads.size(); i++)
     threads[i].join();
 
+  // ===== Final match statistics =====
+  if(!pairStats.empty()) {
+    vector<string> activeBots;
+    {
+      set<string> seen;
+      for(auto& kv : pairStats) {
+        seen.insert(kv.first.first);
+        seen.insert(kv.first.second);
+      }
+      activeBots.assign(seen.begin(), seen.end());
+    }
+
+    vector<double> elo, eloStderr;
+    computeBradleyTerryElo(activeBots, pairStats, elo, eloStderr);
+
+    logger.write("");
+    logger.write("=== match Results ===");
+    logger.write("Global Elo (Bradley-Terry MLE, reference=" + activeBots[0] + "):");
+    for(int i = 0; i < (int)activeBots.size(); i++) {
+      string sign = (elo[i] >= 0) ? "+" : "";
+      string line = "  " + activeBots[i] + " : " +
+        sign + Global::strprintf("%.1f", elo[i]) + " +/- " + Global::strprintf("%.1f", eloStderr[i]);
+      if(i == 0) line += "  (reference)";
+      logger.write(line);
+    }
+    logger.write("");
+    logger.write("Pairwise summary:");
+    for(auto& kv : pairStats) {
+      int64_t wA = kv.second[0], wB = kv.second[1], d = kv.second[2];
+      int64_t total = wA + wB + d;
+      if(total == 0) continue;
+      double wins = wA + 0.5 * d;
+      double lo, hi;
+      wilsonCI95(wins, (double)total, lo, hi);
+      double pval = oneTailedPValue(wins, (double)total);
+      string sig = (pval < 0.05) ? " *" : "";
+      logger.write(
+        "  " + kv.first.first + " vs " + kv.first.second +
+        " : Games=" + Global::int64ToString(total) +
+        " W=" + Global::int64ToString(wA) +
+        " L=" + Global::int64ToString(wB) +
+        " D=" + Global::int64ToString(d) +
+        " | " + kv.first.first + " winrate=" + Global::strprintf("%.3f", wins/total) +
+        " [95% CI: " + Global::strprintf("%.3f", lo) + ", " + Global::strprintf("%.3f", hi) + "]" +
+        " | p=" + Global::strprintf("%.4f", pval) + sig
+      );
+    }
+    logger.write("");
+  }
+
   delete matchPairer;
   delete gameRunner;
 
diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
new file mode 100644
index 000000000..8d97470c5
--- /dev/null
+++ b/cpp/command/tuneparams.cpp
@@ -0,0 +1,353 @@
+// command/tuneparams.cpp
+// KataGo hyperparameter tuning via QRS-Tune sequential optimization.
+// Runs two bots (base reference vs experiment) for numTrials games,
+// adapting experiment bot's PUCT parameters toward higher win rates.
+
+#include "../core/global.h"
+#include "../core/config_parser.h"
+#include "../core/logger.h"
+#include "../core/rand.h"
+#include "../search/searchparams.h"
+#include "../program/setup.h"
+#include "../program/play.h"
+#include "../program/playsettings.h"
+#include "../command/commandline.h"
+#include "../main.h"
+
+#include "../qrstune/QRSOptimizer.h"
+
+#include <vector>
+#include <algorithm>
+
+using namespace std;
+
+// Number of dimensions = number of PUCT params being tuned
+static const int NDIMS = 3;
+
+static const char* PARAM_NAMES[NDIMS] = {
+  "cpuctExploration",
+  "cpuctExplorationLog",
+  "cpuctUtilityStdevPrior"
+};
+
+// Default search ranges (used when config keys are absent)
+static const double QRS_DEFAULT_MINS[NDIMS] = {0.5,  0.05, 0.1};
+static const double QRS_DEFAULT_MAXS[NDIMS] = {2.0,  1.0,  0.8};
+
+// Config keys for per-dimension search ranges
+static const char* RANGE_MIN_KEYS[NDIMS] = {
+  "cpuctExplorationMin", "cpuctExplorationLogMin", "cpuctUtilityStdevPriorMin"
+};
+static const char* RANGE_MAX_KEYS[NDIMS] = {
+  "cpuctExplorationMax", "cpuctExplorationLogMax", "cpuctUtilityStdevPriorMax"
+};
+
+// Map QRS-Tune normalized coordinate x in [-1,+1] to real PUCT value.
+static double qrsDimToReal(int dim, double x, const double* mins, const double* maxs) {
+  double center = (mins[dim] + maxs[dim]) * 0.5;
+  double radius = (maxs[dim] - mins[dim]) * 0.5;
+  return center + x * radius;
+}
+
+static void qrsToPUCT(
+  const vector<double>& x,
+  double& cpuctExploration,
+  double& cpuctExplorationLog,
+  double& cpuctUtilityStdevPrior,
+  const double* mins, const double* maxs
+) {
+  cpuctExploration       = qrsDimToReal(0, x[0], mins, maxs);
+  cpuctExplorationLog    = qrsDimToReal(1, x[1], mins, maxs);
+  cpuctUtilityStdevPrior = qrsDimToReal(2, x[2], mins, maxs);
+}
+
+// Print ASCII-art regression curve for each PUCT dimension.
+// For dimension d: fix all other dims at vBest, sweep d from -1 to +1.
+static void printRegressionCurves(const QRSTune::QRSTuner& tuner,
+                                   const vector<double>& vBest,
+                                   const double* mins, const double* maxs,
+                                   Logger& logger) {
+  const int PLOT_W = 60;
+  const int PLOT_H = 20;
+
+  for(int dim = 0; dim < NDIMS; dim++) {
+    vector<string> canvas(PLOT_H, string(PLOT_W, ' '));
+
+    int bestCol = (int)((vBest[dim] + 1.0) / 2.0 * (PLOT_W - 1) + 0.5);
+    bestCol = max(0, min(PLOT_W - 1, bestCol));
+
+    vector<double> xSlice(vBest);
+    for(int col = 0; col < PLOT_W; col++) {
+      double t = -1.0 + 2.0 * col / (PLOT_W - 1);
+      xSlice[dim] = t;
+      double winRate = tuner.model().predict(xSlice.data());
+
+      int row = (int)((1.0 - winRate) * (PLOT_H - 1) + 0.5);
+      row = max(0, min(PLOT_H - 1, row));
+      canvas[row][col] = (col == bestCol) ? '*' : 'o';
+    }
+
+    double bestReal    = qrsDimToReal(dim, vBest[dim], mins, maxs);
+    double bestWinRate = tuner.model().predict(vBest.data());
+    logger.write("");
+    logger.write(
+      "[Dim " + Global::intToString(dim) + "] " + PARAM_NAMES[dim] +
+      "  (best QRS=" + Global::strprintf("%.3f", vBest[dim]) +
+      " -> real=" + Global::strprintf("%.3f", bestReal) +
+      ", est.winrate=" + Global::strprintf("%.3f", bestWinRate) + ")"
+    );
+
+    for(int row = 0; row < PLOT_H; row++) {
+      string label;
+      if(row == 0)               label = "1.0 |";
+      else if(row == PLOT_H / 2) label = "0.5 |";
+      else if(row == PLOT_H - 1) label = "0.0 |";
+      else                       label = "    |";
+      logger.write(label + canvas[row]);
+    }
+    logger.write("    +" + string(PLOT_W, '-'));
+
+    {
+      string line(PLOT_W + 5, ' ');
+      const int OFF = 5;
+      auto place = [&](int col, const string& lbl) {
+        int pos = OFF + col - (int)lbl.size() / 2;
+        if(pos < 0) pos = 0;
+        for(int i = 0; i < (int)lbl.size() && pos + i < (int)line.size(); i++)
+          line[pos + i] = lbl[i];
+      };
+      place(0,          Global::strprintf("%.3f", qrsDimToReal(dim, -1.0, mins, maxs)));
+      place(PLOT_W / 2, Global::strprintf("%.3f", qrsDimToReal(dim,  0.0, mins, maxs)));
+      place(PLOT_W - 1, Global::strprintf("%.3f", qrsDimToReal(dim, +1.0, mins, maxs)));
+      size_t last = line.find_last_not_of(' ');
+      logger.write(line.substr(0, last + 1));
+    }
+  }
+  logger.write("");
+}
+
+int MainCmds::tuneparams(const vector<string>& args) {
+  Board::initHash();
+  ScoreValue::initTables();
+  Rand seedRand;
+
+  ConfigParser cfg;
+  string logFile;
+  try {
+    KataGoCommandLine cmd(
+      "Tune KataGo hyperparameters using sequential optimization (QRS-Tune).\n"
+      "Runs numTrials games between a fixed reference bot (bot0) and an\n"
+      "experiment bot (bot1) whose PUCT parameters are adapted each trial."
+    );
+    cmd.addConfigFileArg("", "tune_params.cfg");
+
+    TCLAP::ValueArg<string> logFileArg("", "log-file", "Log file to output to", false, string(), "FILE");
+    cmd.add(logFileArg);
+    cmd.setShortUsageArgLimit();
+    cmd.addOverrideConfigArg();
+
+    cmd.parseArgs(args);
+    logFile = logFileArg.getValue();
+    cmd.getConfig(cfg);
+  }
+  catch(TCLAP::ArgException& e) {
+    cerr << "Error: " << e.error() << " for argument " << e.argId() << endl;
+    return 1;
+  }
+
+  Logger logger(&cfg);
+  logger.addFile(logFile);
+  logger.write("tune-params starting...");
+  logger.write(string("Git revision: ") + Version::getGitRevision());
+
+  // --- Read tuning-specific config ---
+  int numTrials = cfg.getInt("numTrials", 1, 100000);
+
+  // --- Search ranges (configurable; defaults preserve prior behaviour) ---
+  double qrsMins[NDIMS], qrsMaxs[NDIMS];
+  for(int d = 0; d < NDIMS; d++) {
+    qrsMins[d] = cfg.contains(RANGE_MIN_KEYS[d])
+                    ? cfg.getDouble(RANGE_MIN_KEYS[d], -1e9, 1e9)
+                    : QRS_DEFAULT_MINS[d];
+    qrsMaxs[d] = cfg.contains(RANGE_MAX_KEYS[d])
+                    ? cfg.getDouble(RANGE_MAX_KEYS[d], -1e9, 1e9)
+                    : QRS_DEFAULT_MAXS[d];
+    if(qrsMins[d] >= qrsMaxs[d])
+      throw StringError(
+        string("tune-params: ") + RANGE_MIN_KEYS[d] + " must be < " + RANGE_MAX_KEYS[d]);
+  }
+  logger.write(
+    "QRS ranges: cpuctExploration=[" +
+    Global::strprintf("%.4f", qrsMins[0]) + "," + Global::strprintf("%.4f", qrsMaxs[0]) +
+    "] cpuctExplorationLog=[" +
+    Global::strprintf("%.4f", qrsMins[1]) + "," + Global::strprintf("%.4f", qrsMaxs[1]) +
+    "] cpuctUtilityStdevPrior=[" +
+    Global::strprintf("%.4f", qrsMins[2]) + "," + Global::strprintf("%.4f", qrsMaxs[2]) + "]"
+  );
+
+  // --- Load search params for both bots ---
+  vector<SearchParams> paramss = Setup::loadParams(cfg, Setup::SETUP_FOR_MATCH);
+  if((int)paramss.size() < 2)
+    throw StringError("tune-params: config must define numBots = 2 (bot0 = reference, bot1 = experiment)");
+
+  // --- Model files ---
+  string nnModelFile0 = cfg.getString("nnModelFile0");
+  string nnModelFile1 = cfg.getString("nnModelFile1");
+  vector<string> nnModelFiles = {nnModelFile0, nnModelFile1};
+
+  // --- Game runner setup ---
+  PlaySettings playSettings = PlaySettings::loadForMatch(cfg);
+  GameRunner* gameRunner = new GameRunner(cfg, playSettings, logger);
+  int maxBoardX = gameRunner->getGameInitializer()->getMaxBoardXSize();
+  int maxBoardY = gameRunner->getGameInitializer()->getMaxBoardYSize();
+
+  // --- Initialize neural net inference ---
+  Setup::initializeSession(cfg);
+  const int expectedConcurrentEvals = max(paramss[0].numThreads, paramss[1].numThreads);
+  vector<string> expectedSha256s;
+  vector<NNEvaluator*> nnEvals = Setup::initializeNNEvaluators(
+    nnModelFiles, nnModelFiles, expectedSha256s,
+    cfg, logger, seedRand,
+    expectedConcurrentEvals,
+    maxBoardX, maxBoardY,
+    /*defaultMaxBatchSize=*/-1,
+    /*defaultRequireExactNNLen=*/(maxBoardX == gameRunner->getGameInitializer()->getMinBoardXSize() &&
+                                  maxBoardY == gameRunner->getGameInitializer()->getMinBoardYSize()),
+    /*disableFP16=*/false,
+    Setup::SETUP_FOR_MATCH
+  );
+  logger.write("Loaded neural nets");
+
+  // --- QRS-Tune setup ---
+  uint64_t qrsSeed = seedRand.nextUInt64();
+  QRSTune::QRSTuner tuner(NDIMS, qrsSeed, numTrials);
+
+  const string gameSeedBase = Global::uint64ToHexString(seedRand.nextUInt64());
+
+  int wins = 0, losses = 0, draws = 0;
+
+  logger.write("Starting " + Global::intToString(numTrials) + " tuning trials");
+
+  for(int trial = 0; trial < numTrials; trial++) {
+    // Step 1: Get next sample from QRS-Tune
+    vector<double> sample = tuner.nextSample();
+
+    // Step 2: Map normalized coordinates to PUCT parameter values
+    double cpuctExploration, cpuctExplorationLog, cpuctUtilityStdevPrior;
+    qrsToPUCT(sample, cpuctExploration, cpuctExplorationLog, cpuctUtilityStdevPrior, qrsMins, qrsMaxs);
+
+    // Step 3: Build experiment bot params with updated PUCT values
+    SearchParams expParams = paramss[1];
+    expParams.cpuctExploration       = cpuctExploration;
+    expParams.cpuctExplorationLog    = cpuctExplorationLog;
+    expParams.cpuctUtilityStdevPrior = cpuctUtilityStdevPrior;
+
+    // Step 4: Alternate colors to remove first-move advantage bias
+    // Even trials: experiment bot plays Black; odd trials: experiment bot plays White
+    bool expIsBlack = (trial % 2 == 0);
+    MatchPairer::BotSpec botSpecB, botSpecW;
+    if(expIsBlack) {
+      botSpecB.botIdx     = 1;
+      botSpecB.botName    = "experiment";
+      botSpecB.nnEval     = nnEvals[1];
+      botSpecB.baseParams = expParams;
+      botSpecW.botIdx     = 0;
+      botSpecW.botName    = "base";
+      botSpecW.nnEval     = nnEvals[0];
+      botSpecW.baseParams = paramss[0];
+    } else {
+      botSpecB.botIdx     = 0;
+      botSpecB.botName    = "base";
+      botSpecB.nnEval     = nnEvals[0];
+      botSpecB.baseParams = paramss[0];
+      botSpecW.botIdx     = 1;
+      botSpecW.botName    = "experiment";
+      botSpecW.nnEval     = nnEvals[1];
+      botSpecW.baseParams = expParams;
+    }
+
+    // Step 5: Run one game
+    string seed = gameSeedBase + ":" + Global::intToString(trial);
+    auto shouldStopFunc = []() noexcept { return false; };
+
+    FinishedGameData* gameData = gameRunner->runGame(
+      seed, botSpecB, botSpecW,
+      /*forkData=*/nullptr,
+      /*startPosSample=*/nullptr,
+      logger,
+      shouldStopFunc,
+      /*shouldPause=*/nullptr,
+      /*checkForNewNNEval=*/nullptr,
+      /*afterInitialization=*/nullptr,
+      /*onEachMove=*/nullptr
+    );
+
+    // Step 6: Determine outcome for experiment bot
+    double outcome = 0.5;  // draw default
+    if(gameData != nullptr) {
+      Player winner = gameData->endHist.winner;
+      if(expIsBlack) {
+        if(winner == P_BLACK)       { outcome = 1.0; wins++; }
+        else if(winner == P_WHITE)  { outcome = 0.0; losses++; }
+        else                        { outcome = 0.5; draws++; }
+      } else {
+        if(winner == P_WHITE)       { outcome = 1.0; wins++; }
+        else if(winner == P_BLACK)  { outcome = 0.0; losses++; }
+        else                        { outcome = 0.5; draws++; }
+      }
+      delete gameData;
+    } else {
+      draws++;
+      logger.write("Warning: trial " + Global::intToString(trial) + " returned null game data");
+    }
+
+    // Step 7: Feed result to QRS-Tune (triggers periodic refit and pruning)
+    tuner.addResult(sample, outcome);
+
+    // Progress report every 100 trials
+    if((trial + 1) % 100 == 0) {
+      vector<double> vBest = tuner.bestCoords();
+      double bE, bLog, bStdev;
+      qrsToPUCT(vBest, bE, bLog, bStdev, qrsMins, qrsMaxs);
+      logger.write(
+        "Trial " + Global::intToString(trial + 1) + "/" + Global::intToString(numTrials) +
+        " | W=" + Global::intToString(wins) + " L=" + Global::intToString(losses) + " D=" + Global::intToString(draws) +
+        " | best: cpuctExploration=" + Global::doubleToString(bE) +
+        " cpuctExplorationLog=" + Global::doubleToString(bLog) +
+        " cpuctUtilityStdevPrior=" + Global::doubleToString(bStdev)
+      );
+    }
+  }
+
+  // --- Final result ---
+  vector<double> vBest = tuner.bestCoords();
+  double bestE, bestLog, bestStdev;
+  qrsToPUCT(vBest, bestE, bestLog, bestStdev, qrsMins, qrsMaxs);
+
+  logger.write("");
+  logger.write("=== tune-params Results ===");
+  logger.write(
+    "Trials: " + Global::intToString(numTrials) +
+    "  Wins: " + Global::intToString(wins) +
+    "  Losses: " + Global::intToString(losses) +
+    "  Draws: " + Global::intToString(draws)
+  );
+  logger.write("Best cpuctExploration       = " + Global::doubleToString(bestE));
+  logger.write("Best cpuctExplorationLog    = " + Global::doubleToString(bestLog));
+  logger.write("Best cpuctUtilityStdevPrior = " + Global::doubleToString(bestStdev));
+  logger.write(
+    "QRS raw coordinates: [" + Global::doubleToString(vBest[0]) + ", " +
+    Global::doubleToString(vBest[1]) + ", " + Global::doubleToString(vBest[2]) + "]"
+  );
+
+  // --- ASCII-art regression curves (one per PUCT dimension) ---
+  printRegressionCurves(tuner, vBest, qrsMins, qrsMaxs, logger);
+
+  // --- Cleanup ---
+  delete gameRunner;
+  for(NNEvaluator* eval : nnEvals)
+    delete eval;
+
+  ScoreValue::freeTables();
+  return 0;
+}
diff --git a/cpp/main.cpp b/cpp/main.cpp
index 0fcc36dea..f1f7c1553 100644
--- a/cpp/main.cpp
+++ b/cpp/main.cpp
@@ -33,6 +33,7 @@ genconfig : User-friendly interface to generate a config with rules and automati
 contribute : Connect to online distributed KataGo training and run perpetually contributing selfplay games.
 
 match : Run self-play match games based on a config, more efficient than gtp due to batching.
+tune-params : Tune KataGo PUCT hyperparameters via sequential optimization (QRS-Tune).
 version : Print version and exit.
 
 analysis : Runs an engine designed to analyze entire games in parallel.
@@ -87,6 +88,8 @@ static int handleSubcommand(const string& subcommand, const vector<string>& args
     return MainCmds::tuner(subArgs);
   else if(subcommand == "match")
     return MainCmds::match(subArgs);
+  else if(subcommand == "tune-params")
+    return MainCmds::tuneparams(subArgs);
   else if(subcommand == "selfplay")
     return MainCmds::selfplay(subArgs);
   else if(subcommand == "testgpuerror")
diff --git a/cpp/main.h b/cpp/main.h
index 3f8ad78d4..6fcf7499f 100644
--- a/cpp/main.h
+++ b/cpp/main.h
@@ -10,6 +10,7 @@ namespace MainCmds {
   int gtp(const std::vector<std::string>& args);
   int tuner(const std::vector<std::string>& args);
   int match(const std::vector<std::string>& args);
+  int tuneparams(const std::vector<std::string>& args);
   int selfplay(const std::vector<std::string>& args);
 
   int testgpuerror(const std::vector<std::string>& args);
diff --git a/cpp/qrstune/QRSOptimizer.h b/cpp/qrstune/QRSOptimizer.h
new file mode 100644
index 000000000..5e8415eba
--- /dev/null
+++ b/cpp/qrstune/QRSOptimizer.h
@@ -0,0 +1,346 @@
+// qrstune/QRSOptimizer.h
+
+#pragma once
+
+#include <vector>
+#include <cmath>
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <random>
+#include <stdexcept>
+
+namespace QRSTune {
+
+// ============================================================
+// Feature map phi(x):
+//   [1,  x_0..x_{D-1},  x_0^2..x_{D-1}^2,  x_i*x_j for i<j]
+//   Total features F = 1 + D + D*(D+1)/2
+// ============================================================
+
+static inline int numFeatures(int D) {
+  return 1 + D + D * (D + 1) / 2;
+}
+
+// Fill phi[0..F-1] given x[0..D-1].
+static inline void computeFeatures(int D, const double* x, double* phi) {
+  int k = 0;
+  phi[k++] = 1.0;
+  for(int i = 0; i < D; i++) phi[k++] = x[i];
+  for(int i = 0; i < D; i++) phi[k++] = x[i] * x[i];
+  for(int i = 0; i < D; i++)
+    for(int j = i + 1; j < D; j++)
+      phi[k++] = x[i] * x[j];
+}
+
+static inline double sigmoid(double z) {
+  if(z > 40.0) return 1.0;
+  if(z < -40.0) return 0.0;
+  return 1.0 / (1.0 + std::exp(-z));
+}
+
+// Solve Ax = b in-place (A is F x F, b is length F) via partial-pivot
+// Gaussian elimination. Returns false if singular. Overwrites A and b.
+static bool gaussianSolve(int F, std::vector<std::vector<double>>& A, std::vector<double>& b) {
+  for(int col = 0; col < F; col++) {
+    int piv = col;
+    for(int r = col + 1; r < F; r++)
+      if(std::fabs(A[r][col]) > std::fabs(A[piv][col])) piv = r;
+    std::swap(A[col], A[piv]);
+    std::swap(b[col], b[piv]);
+    if(std::fabs(A[col][col]) < 1e-12) return false;
+    double inv = 1.0 / A[col][col];
+    for(int r = col + 1; r < F; r++) {
+      double f = A[r][col] * inv;
+      for(int c = col; c < F; c++) A[r][c] -= f * A[col][c];
+      b[r] -= f * b[col];
+    }
+  }
+  for(int r = F - 1; r >= 0; r--) {
+    for(int c = r + 1; c < F; c++) b[r] -= A[r][c] * b[c];
+    b[r] /= A[r][r];
+  }
+  return true;
+}
+
+// ============================================================
+// QRSModel: quadratic logistic regression with L2 regularization.
+// Provides MAP estimation and win-probability prediction.
+// ============================================================
+class QRSModel {
+  int D_, F_;
+  std::vector<double> beta_;   // F coefficients (intercept, linear, quad, cross)
+  double l2_;                  // L2 regularization strength
+
+ public:
+  QRSModel() : D_(0), F_(0), l2_(0.1) {}
+  QRSModel(int D, double l2_reg = 0.1)
+    : D_(D), F_(numFeatures(D)), beta_(numFeatures(D), 0.0), l2_(l2_reg) {}
+
+  // Newton-Raphson MAP estimation.
+  // xs: sample coordinates; ys: outcomes in {0.0, 0.5, 1.0}
+  void fit(const std::vector<std::vector<double>>& xs,
+           const std::vector<double>& ys,
+           int max_iter = 30) {
+    int N = (int)xs.size();
+    if(N < F_) return;  // underdetermined; keep prior beta = 0
+
+    std::vector<double> phi(F_);
+
+    for(int iter = 0; iter < max_iter; iter++) {
+      // Gradient and (negative) Hessian from L2 prior
+      std::vector<double> grad(F_, 0.0);
+      std::vector<std::vector<double>> negH(F_, std::vector<double>(F_, 0.0));
+      for(int f = 0; f < F_; f++) {
+        grad[f] = -l2_ * beta_[f];
+        negH[f][f] = l2_;
+      }
+
+      // Data contribution
+      for(int n = 0; n < N; n++) {
+        computeFeatures(D_, xs[n].data(), phi.data());
+        double z = 0.0;
+        for(int f = 0; f < F_; f++) z += beta_[f] * phi[f];
+        double p = sigmoid(z);
+        double w = p * (1.0 - p);
+        double resid = ys[n] - p;
+        for(int f = 0; f < F_; f++) {
+          grad[f] += resid * phi[f];
+          for(int g = f; g < F_; g++)
+            negH[f][g] += w * phi[f] * phi[g];
+        }
+      }
+      // Symmetrize negH
+      for(int f = 0; f < F_; f++)
+        for(int g = f + 1; g < F_; g++)
+          negH[g][f] = negH[f][g];
+
+      // Solve negH * delta = grad  =>  beta += delta
+      if(!gaussianSolve(F_, negH, grad)) break;
+      double maxd = 0.0;
+      for(int f = 0; f < F_; f++) {
+        beta_[f] += grad[f];
+        maxd = std::max(maxd, std::fabs(grad[f]));
+      }
+      if(maxd < 1e-7) break;
+    }
+  }
+
+  // Win probability at x[0..D-1]
+  double predict(const double* x) const {
+    std::vector<double> phi(F_);
+    computeFeatures(D_, x, phi.data());
+    double z = 0.0;
+    for(int f = 0; f < F_; f++) z += beta_[f] * phi[f];
+    return sigmoid(z);
+  }
+
+  // Linear score phi(x)^T beta (used for MAP maximization)
+  double score(const double* x) const {
+    std::vector<double> phi(F_);
+    computeFeatures(D_, x, phi.data());
+    double z = 0.0;
+    for(int f = 0; f < F_; f++) z += beta_[f] * phi[f];
+    return z;
+  }
+
+  // Find x in [-1,+1]^D that maximizes score(x) = phi(x)^T beta.
+  // For a quadratic, the unconstrained stationary point satisfies:
+  //   M x = -b_lin
+  // where M[i][i] = 2*beta_quad[i], M[i][j]=M[j][i] = beta_cross[i,j],
+  //       b_lin[i] = beta_linear[i].
+  // The solution is clamped to [-1,+1]^D.
+  void mapOptimum(double* out_x) const {
+    // Beta layout: [intercept, linear[0..D-1], quad[0..D-1], cross by (i<j)]
+    const double* b_lin  = beta_.data() + 1;
+    const double* b_quad = beta_.data() + 1 + D_;
+    const double* b_cross = beta_.data() + 1 + 2 * D_;
+
+    std::vector<std::vector<double>> M(D_, std::vector<double>(D_, 0.0));
+    std::vector<double> rhs(D_);
+
+    for(int k = 0; k < D_; k++) {
+      M[k][k] = 2.0 * b_quad[k];
+      rhs[k]  = -b_lin[k];
+    }
+    int idx = 0;
+    for(int i = 0; i < D_; i++)
+      for(int j = i + 1; j < D_; j++) {
+        M[i][j] += b_cross[idx];
+        M[j][i] += b_cross[idx];
+        idx++;
+      }
+
+    if(!gaussianSolve(D_, M, rhs)) {
+      for(int i = 0; i < D_; i++) out_x[i] = 0.0;
+      return;
+    }
+    for(int i = 0; i < D_; i++)
+      out_x[i] = std::max(-1.0, std::min(1.0, rhs[i]));
+  }
+
+  int dims()     const { return D_; }
+  int features() const { return F_; }
+};
+
+// ============================================================
+// QRSBuffer: sample storage with confidence-based pruning.
+// Samples whose predicted win rate is far below the current
+// MAP estimate are dropped to keep the model locally focused.
+// ============================================================
+class QRSBuffer {
+  std::vector<std::vector<double>> xs_;
+  std::vector<double> ys_;
+  int min_keep_;        // never prune below this count
+  double prune_margin_; // drop samples where p_pred < p_best - margin
+
+ public:
+  QRSBuffer(int min_keep = 30, double prune_margin = 0.25)
+    : min_keep_(min_keep), prune_margin_(prune_margin) {}
+
+  void add(const std::vector<double>& x, double y) {
+    xs_.push_back(x);
+    ys_.push_back(y);
+  }
+
+  // Remove samples significantly below the current MAP win estimate.
+  void prune(const QRSModel& model) {
+    int N = (int)xs_.size();
+    if(N <= min_keep_ * 2) return;
+
+    // Best predicted win rate across all stored samples
+    double p_best = 0.0;
+    for(int i = 0; i < N; i++) {
+      double p = model.predict(xs_[i].data());
+      if(p > p_best) p_best = p;
+    }
+    double threshold = p_best - prune_margin_;
+
+    std::vector<std::vector<double>> nx;
+    std::vector<double> ny;
+    for(int i = 0; i < N; i++) {
+      double p = model.predict(xs_[i].data());
+      if(p >= threshold || (int)nx.size() < min_keep_) {
+        nx.push_back(xs_[i]);
+        ny.push_back(ys_[i]);
+      }
+    }
+    xs_ = std::move(nx);
+    ys_ = std::move(ny);
+  }
+
+  const std::vector<std::vector<double>>& xs() const { return xs_; }
+  const std::vector<double>&              ys() const { return ys_; }
+  int size() const { return (int)xs_.size(); }
+};
+
+// ============================================================
+// QRSTuner: top-level interface
+//
+// Usage:
+//   QRSTuner tuner(D, seed, numTrials);
+//   for each trial:
+//     auto x = tuner.nextSample();
+//     ... run game, get win=1.0 / loss=0.0 / draw=0.5 ...
+//     tuner.addResult(x, outcome);
+//   auto best = tuner.bestCoords();
+// ============================================================
+class QRSTuner {
+  int D_;
+  QRSModel  model_;
+  QRSBuffer buffer_;
+  std::mt19937_64 rng_;
+  int trial_count_;
+  int total_trials_;
+  int refit_every_;    // refit model after every N trials
+  int prune_every_;    // prune once per this many refits
+
+  // Exploration noise std dev: decays linearly from initial to final
+  double sigma_initial_;
+  double sigma_final_;
+
+ public:
+  // D            : number of dimensions
+  // seed         : RNG seed for reproducibility
+  // total_trials : expected total number of trials (for scheduling)
+  // l2_reg       : L2 regularization for QRSModel (default 0.1)
+  // refit_every  : how often to refit model (default 10 trials)
+  // prune_every  : prune every N-th refit (default 5)
+  QRSTuner(int D, uint64_t seed, int total_trials,
+           double l2_reg     = 0.1,
+           int refit_every   = 10,
+           int prune_every   = 5,
+           double sigma_init = 0.40,
+           double sigma_fin  = 0.05)
+    : D_(D),
+      model_(D, l2_reg),
+      buffer_(/*min_keep=*/std::max(20, total_trials / 50),
+              /*prune_margin=*/0.25),
+      rng_(seed),
+      trial_count_(0),
+      total_trials_(total_trials),
+      refit_every_(refit_every),
+      prune_every_(prune_every),
+      sigma_initial_(sigma_init),
+      sigma_final_(sigma_fin) {}
+
+  // Propose next point to evaluate.
+  // During early exploration (< F samples) returns a random point.
+  // Afterwards: MAP optimum + decaying Gaussian noise clamped to [-1,+1]^D.
+  std::vector<double> nextSample() {
+    std::vector<double> x(D_);
+    int F = model_.features();
+
+    if(buffer_.size() < F + 1) {
+      // Insufficient data for reliable fit — explore uniformly
+      std::uniform_real_distribution<double> uni(-1.0, 1.0);
+      for(int i = 0; i < D_; i++) x[i] = uni(rng_);
+      return x;
+    }
+
+    // Base: MAP optimum
+    model_.mapOptimum(x.data());
+
+    // Decaying exploration noise
+    double progress = (double)trial_count_ / std::max(1, total_trials_ - 1);
+    double sigma = sigma_initial_ + progress * (sigma_final_ - sigma_initial_);
+    std::normal_distribution<double> noise(0.0, sigma);
+    for(int i = 0; i < D_; i++)
+      x[i] = std::max(-1.0, std::min(1.0, x[i] + noise(rng_)));
+
+    return x;
+  }
+
+  // Record the outcome of a trial.
+  // y: 1.0 = win, 0.0 = loss, 0.5 = draw
+  void addResult(const std::vector<double>& x, double y) {
+    buffer_.add(x, y);
+    trial_count_++;
+
+    if(trial_count_ % refit_every_ == 0 && buffer_.size() >= model_.features() + 1) {
+      model_.fit(buffer_.xs(), buffer_.ys());
+      int refit_count = trial_count_ / refit_every_;
+      if(refit_count % prune_every_ == 0)
+        buffer_.prune(model_);
+    }
+  }
+
+  // Return current MAP optimum in [-1,+1]^D
+  std::vector<double> bestCoords() const {
+    std::vector<double> best(D_);
+    model_.mapOptimum(best.data());
+    return best;
+  }
+
+  // Estimated win probability at the MAP optimum
+  double bestWinProb() const {
+    auto best = bestCoords();
+    return model_.predict(best.data());
+  }
+
+  int trialCount()   const { return trial_count_; }
+  int dims()         const { return D_; }
+  const QRSModel& model() const { return model_; }
+};
+
+}  // namespace QRSTune

From 80c8ff7dfdf498575870aea7821fb198cae2f892 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 31 Mar 2026 13:32:20 +0000
Subject: [PATCH 02/41] Add build/ to .gitignore

The root-level build/ directory is used for out-of-source CMake builds.

https://claude.ai/code/session_01396bbJUdHCsiWRVPM58895
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 2e933d553..06343f2c3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,7 @@ cpp/tests/results/matchsgfs2/games.sgfs
 
 cpp/data/
 versions/
+build/
 cpp/build
 cpp/out
 

From ef10813e57c3696219523324f01857a752dd628e Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Tue, 31 Mar 2026 22:22:48 +0800
Subject: [PATCH 03/41] Fix missing NeuralNet::globalCleanup() and minor
 cleanup in tuneparams

- Add missing NeuralNet::globalCleanup() call before ScoreValue::freeTables()
  to properly clean up neural net backend state on exit
- Hoist bestWinRate computation out of per-dimension loop in
  printRegressionCurves() (value is invariant across dimensions)
- Remove unnecessary step-number comments that restated the code

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/command/tuneparams.cpp | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index 8d97470c5..b3a18a654 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -69,6 +69,7 @@ static void printRegressionCurves(const QRSTune::QRSTuner& tuner,
                                    Logger& logger) {
   const int PLOT_W = 60;
   const int PLOT_H = 20;
+  double bestWinRate = tuner.model().predict(vBest.data());
 
   for(int dim = 0; dim < NDIMS; dim++) {
     vector<string> canvas(PLOT_H, string(PLOT_W, ' '));
@@ -87,8 +88,7 @@ static void printRegressionCurves(const QRSTune::QRSTuner& tuner,
       canvas[row][col] = (col == bestCol) ? '*' : 'o';
     }
 
-    double bestReal    = qrsDimToReal(dim, vBest[dim], mins, maxs);
-    double bestWinRate = tuner.model().predict(vBest.data());
+    double bestReal = qrsDimToReal(dim, vBest[dim], mins, maxs);
     logger.write("");
     logger.write(
       "[Dim " + Global::intToString(dim) + "] " + PARAM_NAMES[dim] +
@@ -229,21 +229,17 @@ int MainCmds::tuneparams(const vector<string>& args) {
   logger.write("Starting " + Global::intToString(numTrials) + " tuning trials");
 
   for(int trial = 0; trial < numTrials; trial++) {
-    // Step 1: Get next sample from QRS-Tune
     vector<double> sample = tuner.nextSample();
 
-    // Step 2: Map normalized coordinates to PUCT parameter values
     double cpuctExploration, cpuctExplorationLog, cpuctUtilityStdevPrior;
     qrsToPUCT(sample, cpuctExploration, cpuctExplorationLog, cpuctUtilityStdevPrior, qrsMins, qrsMaxs);
 
-    // Step 3: Build experiment bot params with updated PUCT values
     SearchParams expParams = paramss[1];
     expParams.cpuctExploration       = cpuctExploration;
     expParams.cpuctExplorationLog    = cpuctExplorationLog;
     expParams.cpuctUtilityStdevPrior = cpuctUtilityStdevPrior;
 
-    // Step 4: Alternate colors to remove first-move advantage bias
-    // Even trials: experiment bot plays Black; odd trials: experiment bot plays White
+    // Alternate colors to remove first-move advantage bias
     bool expIsBlack = (trial % 2 == 0);
     MatchPairer::BotSpec botSpecB, botSpecW;
     if(expIsBlack) {
@@ -266,7 +262,6 @@ int MainCmds::tuneparams(const vector<string>& args) {
       botSpecW.baseParams = expParams;
     }
 
-    // Step 5: Run one game
     string seed = gameSeedBase + ":" + Global::intToString(trial);
     auto shouldStopFunc = []() noexcept { return false; };
 
@@ -282,8 +277,7 @@ int MainCmds::tuneparams(const vector<string>& args) {
       /*onEachMove=*/nullptr
     );
 
-    // Step 6: Determine outcome for experiment bot
-    double outcome = 0.5;  // draw default
+    double outcome = 0.5;
     if(gameData != nullptr) {
       Player winner = gameData->endHist.winner;
       if(expIsBlack) {
@@ -301,7 +295,6 @@ int MainCmds::tuneparams(const vector<string>& args) {
       logger.write("Warning: trial " + Global::intToString(trial) + " returned null game data");
     }
 
-    // Step 7: Feed result to QRS-Tune (triggers periodic refit and pruning)
     tuner.addResult(sample, outcome);
 
     // Progress report every 100 trials
@@ -348,6 +341,7 @@ int MainCmds::tuneparams(const vector<string>& args) {
   for(NNEvaluator* eval : nnEvals)
     delete eval;
 
+  NeuralNet::globalCleanup();
   ScoreValue::freeTables();
   return 0;
 }

From d22c0f2ebffdf6fb8fb70e6a0cebf56cc51c7f53 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 31 Mar 2026 15:04:25 +0000
Subject: [PATCH 04/41] Fix naming conventions and comment style to match
 KataGo codebase

- Rename ALL_CAPS constants to camelCase (nDims, paramNames, plotW,
  plotH, qrsDefaultMins/Maxs, rangeMinKeys/MaxKeys, eloPerStrength)
- Change nullptr to NULL to match KataGo's dominant convention
- Change "// Comment" to "//Comment" (no space after //)
- Change "// --- Section ---" separators to "//Section" style
- Leave QRSOptimizer.h unchanged (standalone library, own namespace)

https://claude.ai/code/session_01396bbJUdHCsiWRVPM58895
---
 cpp/command/match.cpp      |  34 +++++-----
 cpp/command/tuneparams.cpp | 130 ++++++++++++++++++-------------------
 2 files changed, 82 insertions(+), 82 deletions(-)

diff --git a/cpp/command/match.cpp b/cpp/command/match.cpp
index bdd7aa818..0390a9901 100644
--- a/cpp/command/match.cpp
+++ b/cpp/command/match.cpp
@@ -28,9 +28,9 @@ static void signalHandler(int signal)
   }
 }
 
-// ===== Match statistics helpers =====
+//Match statistics helpers
 
-// Wilson score 95% two-tailed confidence interval (draws counted as 0.5 wins)
+//Wilson score 95% two-tailed confidence interval (draws counted as 0.5 wins)
 static void wilsonCI95(double wins, double n, double& lo, double& hi) {
   const double z = 1.96;
   double p = wins / n;
@@ -41,15 +41,15 @@ static void wilsonCI95(double wins, double n, double& lo, double& hi) {
   hi = center + margin;
 }
 
-// One-tailed p-value: P(experiment winrate <= 0.5 | data), using normal approximation
+//One-tailed p-value: P(experiment winrate <= 0.5 | data), using normal approximation
 static double oneTailedPValue(double wins, double n) {
   if(n <= 0) return 0.5;
   double z = (wins - 0.5*n) / (0.5*sqrt(n));
   return 0.5 * erfc(z / sqrt(2.0));
 }
 
-// Bradley-Terry MLE Elo (global, all-bot ranking)
-// pairStats: {nameA,nameB} -> {winsA, winsB, draws}  nameA < nameB lexicographically
+//Bradley-Terry MLE Elo (global, all-bot ranking)
+//pairStats: {nameA,nameB} -> {winsA, winsB, draws}  nameA < nameB lexicographically
 static void computeBradleyTerryElo(
   const vector<string>& botNames,
   const map<pair<string,string>, array<int64_t,3>>& pairStats,
@@ -57,12 +57,12 @@ static void computeBradleyTerryElo(
   vector<double>& outStderr
 ) {
   int N = (int)botNames.size();
-  const double ELO_PER_STRENGTH = 400.0 * log10(exp(1.0)); // ~173.7
+  const double eloPerStrength = 400.0 * log10(exp(1.0)); //~173.7
 
   map<string,int> nameIdx;
   for(int i = 0; i < N; i++) nameIdx[botNames[i]] = i;
 
-  // w[i][j] = effective wins of i vs j (draws count 0.5)
+  //w[i][j] = effective wins of i vs j (draws count 0.5)
   vector<vector<double>> w(N, vector<double>(N, 0.0));
   for(auto& kv : pairStats) {
     auto itA = nameIdx.find(kv.first.first);
@@ -73,7 +73,7 @@ static void computeBradleyTerryElo(
     w[b][a] += kv.second[1] + 0.5 * kv.second[2];
   }
 
-  // theta[0] = 0 (reference, first bot), optimize theta[1..N-1]
+  //theta[0] = 0 (reference, first bot), optimize theta[1..N-1]
   vector<double> theta(N, 0.0);
   int M = N - 1;
 
@@ -93,7 +93,7 @@ static void computeBradleyTerryElo(
           if(i > 0 && j > 0) { H[i-1][j-1] += fish; H[j-1][i-1] += fish; }
         }
       }
-      // Solve H*delta = -grad via Gaussian elimination
+      //Solve H*delta = -grad via Gaussian elimination
       vector<vector<double>> aug(M, vector<double>(M+1, 0.0));
       for(int r = 0; r < M; r++) {
         for(int c = 0; c < M; c++) aug[r][c] = H[r][c];
@@ -126,13 +126,13 @@ static void computeBradleyTerryElo(
     }
   }
 
-  // Convert log-strength to Elo relative to bot 0
+  //Convert log-strength to Elo relative to bot 0
   outElo.resize(N);
   outStderr.resize(N, 0.0);
   for(int i = 0; i < N; i++)
-    outElo[i] = (theta[i] - theta[0]) * ELO_PER_STRENGTH;
+    outElo[i] = (theta[i] - theta[0]) * eloPerStrength;
 
-  // Fisher information diagonal -> stderr
+  //Fisher information diagonal -> stderr
   for(int i = 1; i < N; i++) {
     double fish = 0.0;
     for(int j = 0; j < N; j++) {
@@ -142,7 +142,7 @@ static void computeBradleyTerryElo(
       double sigma = 1.0 / (1.0 + exp(theta[j] - theta[i]));
       fish += nij * sigma * (1.0 - sigma);
     }
-    if(fish > 0.0) outStderr[i] = ELO_PER_STRENGTH / sqrt(fish);
+    if(fish > 0.0) outStderr[i] = eloPerStrength / sqrt(fish);
   }
 }
 
@@ -371,8 +371,8 @@ int MainCmds::match(const vector<string>& args) {
   std::map<string,double> timeUsedByBotMap;
   std::map<string,double> movesByBotMap;
   map<pair<string,string>, array<int64_t,3>> pairStats;
-  // key: {nameA, nameB} with nameA < nameB lexicographically
-  // value: {winsA, winsB, draws}
+  //key: {nameA, nameB} with nameA < nameB lexicographically
+  //value: {winsA, winsB, draws}
 
   auto runMatchLoop = [
     &gameRunner,&matchPairer,&sgfOutputDir,&logger,&gameSeedBase,&patternBonusTables,
@@ -426,7 +426,7 @@ int MainCmds::match(const vector<string>& args) {
           movesByBotMap[gameData->bName] += (double)gameData->bMoveCount;
           movesByBotMap[gameData->wName] += (double)gameData->wMoveCount;
 
-          // Update pairwise W/L/D stats
+          //Update pairwise W/L/D stats
           {
             const string& bName = gameData->bName;
             const string& wName = gameData->wName;
@@ -481,7 +481,7 @@ int MainCmds::match(const vector<string>& args) {
   for(int i = 0; i<threads.size(); i++)
     threads[i].join();
 
-  // ===== Final match statistics =====
+  //Final match statistics
   if(!pairStats.empty()) {
     vector<string> activeBots;
     {
diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index b3a18a654..67348b554 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -1,7 +1,7 @@
-// command/tuneparams.cpp
-// KataGo hyperparameter tuning via QRS-Tune sequential optimization.
-// Runs two bots (base reference vs experiment) for numTrials games,
-// adapting experiment bot's PUCT parameters toward higher win rates.
+//command/tuneparams.cpp
+//KataGo hyperparameter tuning via QRS-Tune sequential optimization.
+//Runs two bots (base reference vs experiment) for numTrials games,
+//adapting experiment bot's PUCT parameters toward higher win rates.
 
 #include "../core/global.h"
 #include "../core/config_parser.h"
@@ -21,28 +21,28 @@
 
 using namespace std;
 
-// Number of dimensions = number of PUCT params being tuned
-static const int NDIMS = 3;
+//Number of dimensions = number of PUCT params being tuned
+static const int nDims = 3;
 
-static const char* PARAM_NAMES[NDIMS] = {
+static const char* paramNames[nDims] = {
   "cpuctExploration",
   "cpuctExplorationLog",
   "cpuctUtilityStdevPrior"
 };
 
-// Default search ranges (used when config keys are absent)
-static const double QRS_DEFAULT_MINS[NDIMS] = {0.5,  0.05, 0.1};
-static const double QRS_DEFAULT_MAXS[NDIMS] = {2.0,  1.0,  0.8};
+//Default search ranges (used when config keys are absent)
+static const double qrsDefaultMins[nDims] = {0.5,  0.05, 0.1};
+static const double qrsDefaultMaxs[nDims] = {2.0,  1.0,  0.8};
 
-// Config keys for per-dimension search ranges
-static const char* RANGE_MIN_KEYS[NDIMS] = {
+//Config keys for per-dimension search ranges
+static const char* rangeMinKeys[nDims] = {
   "cpuctExplorationMin", "cpuctExplorationLogMin", "cpuctUtilityStdevPriorMin"
 };
-static const char* RANGE_MAX_KEYS[NDIMS] = {
+static const char* rangeMaxKeys[nDims] = {
   "cpuctExplorationMax", "cpuctExplorationLogMax", "cpuctUtilityStdevPriorMax"
 };
 
-// Map QRS-Tune normalized coordinate x in [-1,+1] to real PUCT value.
+//Map QRS-Tune normalized coordinate x in [-1,+1] to real PUCT value.
 static double qrsDimToReal(int dim, double x, const double* mins, const double* maxs) {
   double center = (mins[dim] + maxs[dim]) * 0.5;
   double radius = (maxs[dim] - mins[dim]) * 0.5;
@@ -61,64 +61,64 @@ static void qrsToPUCT(
   cpuctUtilityStdevPrior = qrsDimToReal(2, x[2], mins, maxs);
 }
 
-// Print ASCII-art regression curve for each PUCT dimension.
-// For dimension d: fix all other dims at vBest, sweep d from -1 to +1.
+//Print ASCII-art regression curve for each PUCT dimension.
+//For dimension d: fix all other dims at vBest, sweep d from -1 to +1.
 static void printRegressionCurves(const QRSTune::QRSTuner& tuner,
                                    const vector<double>& vBest,
                                    const double* mins, const double* maxs,
                                    Logger& logger) {
-  const int PLOT_W = 60;
-  const int PLOT_H = 20;
+  const int plotW = 60;
+  const int plotH = 20;
   double bestWinRate = tuner.model().predict(vBest.data());
 
-  for(int dim = 0; dim < NDIMS; dim++) {
-    vector<string> canvas(PLOT_H, string(PLOT_W, ' '));
+  for(int dim = 0; dim < nDims; dim++) {
+    vector<string> canvas(plotH, string(plotW, ' '));
 
-    int bestCol = (int)((vBest[dim] + 1.0) / 2.0 * (PLOT_W - 1) + 0.5);
-    bestCol = max(0, min(PLOT_W - 1, bestCol));
+    int bestCol = (int)((vBest[dim] + 1.0) / 2.0 * (plotW - 1) + 0.5);
+    bestCol = max(0, min(plotW - 1, bestCol));
 
     vector<double> xSlice(vBest);
-    for(int col = 0; col < PLOT_W; col++) {
-      double t = -1.0 + 2.0 * col / (PLOT_W - 1);
+    for(int col = 0; col < plotW; col++) {
+      double t = -1.0 + 2.0 * col / (plotW - 1);
       xSlice[dim] = t;
       double winRate = tuner.model().predict(xSlice.data());
 
-      int row = (int)((1.0 - winRate) * (PLOT_H - 1) + 0.5);
-      row = max(0, min(PLOT_H - 1, row));
+      int row = (int)((1.0 - winRate) * (plotH - 1) + 0.5);
+      row = max(0, min(plotH - 1, row));
       canvas[row][col] = (col == bestCol) ? '*' : 'o';
     }
 
     double bestReal = qrsDimToReal(dim, vBest[dim], mins, maxs);
     logger.write("");
     logger.write(
-      "[Dim " + Global::intToString(dim) + "] " + PARAM_NAMES[dim] +
+      "[Dim " + Global::intToString(dim) + "] " + paramNames[dim] +
       "  (best QRS=" + Global::strprintf("%.3f", vBest[dim]) +
       " -> real=" + Global::strprintf("%.3f", bestReal) +
       ", est.winrate=" + Global::strprintf("%.3f", bestWinRate) + ")"
     );
 
-    for(int row = 0; row < PLOT_H; row++) {
+    for(int row = 0; row < plotH; row++) {
       string label;
       if(row == 0)               label = "1.0 |";
-      else if(row == PLOT_H / 2) label = "0.5 |";
-      else if(row == PLOT_H - 1) label = "0.0 |";
+      else if(row == plotH / 2) label = "0.5 |";
+      else if(row == plotH - 1) label = "0.0 |";
       else                       label = "    |";
       logger.write(label + canvas[row]);
     }
-    logger.write("    +" + string(PLOT_W, '-'));
+    logger.write("    +" + string(plotW, '-'));
 
     {
-      string line(PLOT_W + 5, ' ');
-      const int OFF = 5;
+      string line(plotW + 5, ' ');
+      const int off = 5;
       auto place = [&](int col, const string& lbl) {
-        int pos = OFF + col - (int)lbl.size() / 2;
+        int pos = off + col - (int)lbl.size() / 2;
         if(pos < 0) pos = 0;
         for(int i = 0; i < (int)lbl.size() && pos + i < (int)line.size(); i++)
           line[pos + i] = lbl[i];
       };
       place(0,          Global::strprintf("%.3f", qrsDimToReal(dim, -1.0, mins, maxs)));
-      place(PLOT_W / 2, Global::strprintf("%.3f", qrsDimToReal(dim,  0.0, mins, maxs)));
-      place(PLOT_W - 1, Global::strprintf("%.3f", qrsDimToReal(dim, +1.0, mins, maxs)));
+      place(plotW / 2, Global::strprintf("%.3f", qrsDimToReal(dim,  0.0, mins, maxs)));
+      place(plotW - 1, Global::strprintf("%.3f", qrsDimToReal(dim, +1.0, mins, maxs)));
       size_t last = line.find_last_not_of(' ');
       logger.write(line.substr(0, last + 1));
     }
@@ -160,21 +160,21 @@ int MainCmds::tuneparams(const vector<string>& args) {
   logger.write("tune-params starting...");
   logger.write(string("Git revision: ") + Version::getGitRevision());
 
-  // --- Read tuning-specific config ---
+  //Read tuning-specific config
   int numTrials = cfg.getInt("numTrials", 1, 100000);
 
-  // --- Search ranges (configurable; defaults preserve prior behaviour) ---
-  double qrsMins[NDIMS], qrsMaxs[NDIMS];
-  for(int d = 0; d < NDIMS; d++) {
-    qrsMins[d] = cfg.contains(RANGE_MIN_KEYS[d])
-                    ? cfg.getDouble(RANGE_MIN_KEYS[d], -1e9, 1e9)
-                    : QRS_DEFAULT_MINS[d];
-    qrsMaxs[d] = cfg.contains(RANGE_MAX_KEYS[d])
-                    ? cfg.getDouble(RANGE_MAX_KEYS[d], -1e9, 1e9)
-                    : QRS_DEFAULT_MAXS[d];
+  //Search ranges (configurable; defaults preserve prior behaviour)
+  double qrsMins[nDims], qrsMaxs[nDims];
+  for(int d = 0; d < nDims; d++) {
+    qrsMins[d] = cfg.contains(rangeMinKeys[d])
+                    ? cfg.getDouble(rangeMinKeys[d], -1e9, 1e9)
+                    : qrsDefaultMins[d];
+    qrsMaxs[d] = cfg.contains(rangeMaxKeys[d])
+                    ? cfg.getDouble(rangeMaxKeys[d], -1e9, 1e9)
+                    : qrsDefaultMaxs[d];
     if(qrsMins[d] >= qrsMaxs[d])
       throw StringError(
-        string("tune-params: ") + RANGE_MIN_KEYS[d] + " must be < " + RANGE_MAX_KEYS[d]);
+        string("tune-params: ") + rangeMinKeys[d] + " must be < " + rangeMaxKeys[d]);
   }
   logger.write(
     "QRS ranges: cpuctExploration=[" +
@@ -185,23 +185,23 @@ int MainCmds::tuneparams(const vector<string>& args) {
     Global::strprintf("%.4f", qrsMins[2]) + "," + Global::strprintf("%.4f", qrsMaxs[2]) + "]"
   );
 
-  // --- Load search params for both bots ---
+  //Load search params for both bots
   vector<SearchParams> paramss = Setup::loadParams(cfg, Setup::SETUP_FOR_MATCH);
   if((int)paramss.size() < 2)
     throw StringError("tune-params: config must define numBots = 2 (bot0 = reference, bot1 = experiment)");
 
-  // --- Model files ---
+  //Model files
   string nnModelFile0 = cfg.getString("nnModelFile0");
   string nnModelFile1 = cfg.getString("nnModelFile1");
   vector<string> nnModelFiles = {nnModelFile0, nnModelFile1};
 
-  // --- Game runner setup ---
+  //Game runner setup
   PlaySettings playSettings = PlaySettings::loadForMatch(cfg);
   GameRunner* gameRunner = new GameRunner(cfg, playSettings, logger);
   int maxBoardX = gameRunner->getGameInitializer()->getMaxBoardXSize();
   int maxBoardY = gameRunner->getGameInitializer()->getMaxBoardYSize();
 
-  // --- Initialize neural net inference ---
+  //Initialize neural net inference
   Setup::initializeSession(cfg);
   const int expectedConcurrentEvals = max(paramss[0].numThreads, paramss[1].numThreads);
   vector<string> expectedSha256s;
@@ -218,9 +218,9 @@ int MainCmds::tuneparams(const vector<string>& args) {
   );
   logger.write("Loaded neural nets");
 
-  // --- QRS-Tune setup ---
+  //QRS-Tune setup
   uint64_t qrsSeed = seedRand.nextUInt64();
-  QRSTune::QRSTuner tuner(NDIMS, qrsSeed, numTrials);
+  QRSTune::QRSTuner tuner(nDims, qrsSeed, numTrials);
 
   const string gameSeedBase = Global::uint64ToHexString(seedRand.nextUInt64());
 
@@ -239,7 +239,7 @@ int MainCmds::tuneparams(const vector<string>& args) {
     expParams.cpuctExplorationLog    = cpuctExplorationLog;
     expParams.cpuctUtilityStdevPrior = cpuctUtilityStdevPrior;
 
-    // Alternate colors to remove first-move advantage bias
+    //Alternate colors to remove first-move advantage bias
     bool expIsBlack = (trial % 2 == 0);
     MatchPairer::BotSpec botSpecB, botSpecW;
     if(expIsBlack) {
@@ -267,18 +267,18 @@ int MainCmds::tuneparams(const vector<string>& args) {
 
     FinishedGameData* gameData = gameRunner->runGame(
       seed, botSpecB, botSpecW,
-      /*forkData=*/nullptr,
-      /*startPosSample=*/nullptr,
+      /*forkData=*/NULL,
+      /*startPosSample=*/NULL,
       logger,
       shouldStopFunc,
-      /*shouldPause=*/nullptr,
-      /*checkForNewNNEval=*/nullptr,
-      /*afterInitialization=*/nullptr,
-      /*onEachMove=*/nullptr
+      /*shouldPause=*/NULL,
+      /*checkForNewNNEval=*/NULL,
+      /*afterInitialization=*/NULL,
+      /*onEachMove=*/NULL
     );
 
     double outcome = 0.5;
-    if(gameData != nullptr) {
+    if(gameData != NULL) {
       Player winner = gameData->endHist.winner;
       if(expIsBlack) {
         if(winner == P_BLACK)       { outcome = 1.0; wins++; }
@@ -297,7 +297,7 @@ int MainCmds::tuneparams(const vector<string>& args) {
 
     tuner.addResult(sample, outcome);
 
-    // Progress report every 100 trials
+    //Progress report every 100 trials
     if((trial + 1) % 100 == 0) {
       vector<double> vBest = tuner.bestCoords();
       double bE, bLog, bStdev;
@@ -312,7 +312,7 @@ int MainCmds::tuneparams(const vector<string>& args) {
     }
   }
 
-  // --- Final result ---
+  //Final result
   vector<double> vBest = tuner.bestCoords();
   double bestE, bestLog, bestStdev;
   qrsToPUCT(vBest, bestE, bestLog, bestStdev, qrsMins, qrsMaxs);
@@ -333,10 +333,10 @@ int MainCmds::tuneparams(const vector<string>& args) {
     Global::doubleToString(vBest[1]) + ", " + Global::doubleToString(vBest[2]) + "]"
   );
 
-  // --- ASCII-art regression curves (one per PUCT dimension) ---
+  //ASCII-art regression curves (one per PUCT dimension)
   printRegressionCurves(tuner, vBest, qrsMins, qrsMaxs, logger);
 
-  // --- Cleanup ---
+  //Cleanup
   delete gameRunner;
   for(NNEvaluator* eval : nnEvals)
     delete eval;

From efb3338909ed2a25dded06b7b9fbfc1235efae2d Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Wed, 1 Apr 2026 06:51:28 +0800
Subject: [PATCH 05/41] Fix tuneparams signal handling, pruning bias, and
 header inline consistency

Add graceful SIGINT/SIGTERM shutdown to tuneparams matching the pattern
used by match.cpp and other long-running commands. Fix QRSBuffer::prune
to retain highest-quality samples rather than oldest insertion-order ones
when applying min_keep. Add missing inline on gaussianSolve in header.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/command/tuneparams.cpp | 22 +++++++++++++++++++++-
 cpp/qrstune/QRSOptimizer.h | 29 +++++++++++++++++++++++++----
 2 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index 67348b554..d33a56dc5 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -18,9 +18,20 @@
 
 #include <vector>
 #include <algorithm>
+#include <csignal>
 
 using namespace std;
 
+static std::atomic<bool> sigReceived(false);
+static std::atomic<bool> shouldStop(false);
+static void signalHandler(int signal)
+{
+  if(signal == SIGINT || signal == SIGTERM) {
+    sigReceived.store(true);
+    shouldStop.store(true);
+  }
+}
+
 //Number of dimensions = number of PUCT params being tuned
 static const int nDims = 3;
 
@@ -218,6 +229,12 @@ int MainCmds::tuneparams(const vector<string>& args) {
   );
   logger.write("Loaded neural nets");
 
+  //Signal handling for graceful shutdown
+  if(!std::atomic_is_lock_free(&shouldStop))
+    throw StringError("shouldStop is not lock free, signal-quitting mechanism for terminating will NOT work!");
+  std::signal(SIGINT, signalHandler);
+  std::signal(SIGTERM, signalHandler);
+
   //QRS-Tune setup
   uint64_t qrsSeed = seedRand.nextUInt64();
   QRSTune::QRSTuner tuner(nDims, qrsSeed, numTrials);
@@ -263,7 +280,7 @@ int MainCmds::tuneparams(const vector<string>& args) {
     }
 
     string seed = gameSeedBase + ":" + Global::intToString(trial);
-    auto shouldStopFunc = []() noexcept { return false; };
+    auto shouldStopFunc = []() noexcept { return shouldStop.load(); };
 
     FinishedGameData* gameData = gameRunner->runGame(
       seed, botSpecB, botSpecW,
@@ -297,6 +314,9 @@ int MainCmds::tuneparams(const vector<string>& args) {
 
     tuner.addResult(sample, outcome);
 
+    if(shouldStop.load())
+      break;
+
     //Progress report every 100 trials
     if((trial + 1) % 100 == 0) {
       vector<double> vBest = tuner.bestCoords();
diff --git a/cpp/qrstune/QRSOptimizer.h b/cpp/qrstune/QRSOptimizer.h
index 5e8415eba..702cd440e 100644
--- a/cpp/qrstune/QRSOptimizer.h
+++ b/cpp/qrstune/QRSOptimizer.h
@@ -41,7 +41,7 @@ static inline double sigmoid(double z) {
 
 // Solve Ax = b in-place (A is F x F, b is length F) via partial-pivot
 // Gaussian elimination. Returns false if singular. Overwrites A and b.
-static bool gaussianSolve(int F, std::vector<std::vector<double>>& A, std::vector<double>& b) {
+static inline bool gaussianSolve(int F, std::vector<std::vector<double>>& A, std::vector<double>& b) {
   for(int col = 0; col < F; col++) {
     int piv = col;
     for(int r = col + 1; r < F; r++)
@@ -204,23 +204,44 @@ class QRSBuffer {
   }
 
   // Remove samples significantly below the current MAP win estimate.
+  // Samples are ranked by predicted quality so that min_keep_ retains the
+  // best samples rather than the oldest (which are typically from early
+  // uniform random exploration).
   void prune(const QRSModel& model) {
     int N = (int)xs_.size();
     if(N <= min_keep_ * 2) return;
 
-    // Best predicted win rate across all stored samples
+    // Score all samples and find best predicted win rate
+    std::vector<std::pair<double, int>> scored(N);
     double p_best = 0.0;
     for(int i = 0; i < N; i++) {
       double p = model.predict(xs_[i].data());
+      scored[i] = {p, i};
       if(p > p_best) p_best = p;
     }
     double threshold = p_best - prune_margin_;
 
+    // Sort by descending predicted quality
+    std::sort(scored.begin(), scored.end(),
+      [](const std::pair<double,int>& a, const std::pair<double,int>& b) {
+        return a.first > b.first;
+      });
+
+    // Keep samples above threshold, plus top-quality samples up to min_keep_
+    std::vector<bool> keep(N, false);
+    int kept = 0;
+    for(auto& kv : scored) {
+      if(kv.first >= threshold || kept < min_keep_) {
+        keep[kv.second] = true;
+        kept++;
+      }
+    }
+
+    // Rebuild in original order to preserve temporal structure
     std::vector<std::vector<double>> nx;
     std::vector<double> ny;
     for(int i = 0; i < N; i++) {
-      double p = model.predict(xs_[i].data());
-      if(p >= threshold || (int)nx.size() < min_keep_) {
+      if(keep[i]) {
         nx.push_back(xs_[i]);
         ny.push_back(ys_[i]);
       }

From b52825b18bc0e83282ac0707f161af9011986673 Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Wed, 1 Apr 2026 07:51:04 +0800
Subject: [PATCH 06/41] Add Bradley-Terry convergence warning and fix header
 inline linkage

Add convergence detection to computeBradleyTerryElo in match.cpp so
that a warning is logged when the Newton-Raphson solver hits the 200
iteration limit without converging. Change QRSOptimizer.h free functions
from static inline to inline for correct weak external linkage in the
namespaced header-only library.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/command/match.cpp      | 10 +++++++---
 cpp/qrstune/QRSOptimizer.h |  8 ++++----
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/cpp/command/match.cpp b/cpp/command/match.cpp
index 0390a9901..e8fdb0cc1 100644
--- a/cpp/command/match.cpp
+++ b/cpp/command/match.cpp
@@ -50,7 +50,7 @@ static double oneTailedPValue(double wins, double n) {
 
 //Bradley-Terry MLE Elo (global, all-bot ranking)
 //pairStats: {nameA,nameB} -> {winsA, winsB, draws}  nameA < nameB lexicographically
-static void computeBradleyTerryElo(
+static bool computeBradleyTerryElo(
   const vector<string>& botNames,
   const map<pair<string,string>, array<int64_t,3>>& pairStats,
   vector<double>& outElo,
@@ -77,6 +77,7 @@ static void computeBradleyTerryElo(
   vector<double> theta(N, 0.0);
   int M = N - 1;
 
+  bool converged = (M == 0);
   if(M > 0) {
     for(int iter = 0; iter < 200; iter++) {
       vector<double> grad(M, 0.0);
@@ -122,7 +123,7 @@ static void computeBradleyTerryElo(
         theta[r+1] += delta[r];
         maxDelta = max(maxDelta, fabs(delta[r]));
       }
-      if(maxDelta < 1e-6) break;
+      if(maxDelta < 1e-6) { converged = true; break; }
     }
   }
 
@@ -144,6 +145,7 @@ static void computeBradleyTerryElo(
     }
     if(fish > 0.0) outStderr[i] = eloPerStrength / sqrt(fish);
   }
+  return converged;
 }
 
 int MainCmds::match(const vector<string>& args) {
@@ -494,9 +496,11 @@ int MainCmds::match(const vector<string>& args) {
     }
 
     vector<double> elo, eloStderr;
-    computeBradleyTerryElo(activeBots, pairStats, elo, eloStderr);
+    bool eloConverged = computeBradleyTerryElo(activeBots, pairStats, elo, eloStderr);
 
     logger.write("");
+    if(!eloConverged)
+      logger.write("Warning: Bradley-Terry Elo estimation did not fully converge");
     logger.write("=== match Results ===");
     logger.write("Global Elo (Bradley-Terry MLE, reference=" + activeBots[0] + "):");
     for(int i = 0; i < (int)activeBots.size(); i++) {
diff --git a/cpp/qrstune/QRSOptimizer.h b/cpp/qrstune/QRSOptimizer.h
index 702cd440e..55f4d4b2e 100644
--- a/cpp/qrstune/QRSOptimizer.h
+++ b/cpp/qrstune/QRSOptimizer.h
@@ -18,12 +18,12 @@ namespace QRSTune {
 //   Total features F = 1 + D + D*(D+1)/2
 // ============================================================
 
-static inline int numFeatures(int D) {
+inline int numFeatures(int D) {
   return 1 + D + D * (D + 1) / 2;
 }
 
 // Fill phi[0..F-1] given x[0..D-1].
-static inline void computeFeatures(int D, const double* x, double* phi) {
+inline void computeFeatures(int D, const double* x, double* phi) {
   int k = 0;
   phi[k++] = 1.0;
   for(int i = 0; i < D; i++) phi[k++] = x[i];
@@ -33,7 +33,7 @@ static inline void computeFeatures(int D, const double* x, double* phi) {
       phi[k++] = x[i] * x[j];
 }
 
-static inline double sigmoid(double z) {
+inline double sigmoid(double z) {
   if(z > 40.0) return 1.0;
   if(z < -40.0) return 0.0;
   return 1.0 / (1.0 + std::exp(-z));
@@ -41,7 +41,7 @@ static inline double sigmoid(double z) {
 
 // Solve Ax = b in-place (A is F x F, b is length F) via partial-pivot
 // Gaussian elimination. Returns false if singular. Overwrites A and b.
-static inline bool gaussianSolve(int F, std::vector<std::vector<double>>& A, std::vector<double>& b) {
+inline bool gaussianSolve(int F, std::vector<std::vector<double>>& A, std::vector<double>& b) {
   for(int col = 0; col < F; col++) {
     int piv = col;
     for(int r = col + 1; r < F; r++)

From 7528c87d90d255beb3103e380a336121a016b5b8 Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Wed, 1 Apr 2026 08:02:52 +0800
Subject: [PATCH 07/41] Add tune-params and QRS-Tune documentation to README
 files

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 README.md     | 4 ++++
 cpp/README.md | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/README.md b/README.md
index ce7e87b97..e751cf556 100644
--- a/README.md
+++ b/README.md
@@ -153,6 +153,10 @@ Run a high-performance match engine that will play a pool of bots against each o
 
    * `./katago match -config <MATCH_CONFIG>.cfg -log-file match.log -sgf-output-dir <DIR TO WRITE THE SGFS>`
 
+Tune PUCT search hyperparameters via QRS-Tune sequential optimization:
+
+   * `./katago tune-params -config <TUNE_CONFIG>.cfg`
+
 Force OpenCL tuner to re-tune:
 
    * `./katago tuner -config <GTP_CONFIG>.cfg`
diff --git a/cpp/README.md b/cpp/README.md
index 1f5d8d21f..04e6f252d 100644
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -35,6 +35,7 @@ Summary of source folders, in approximate dependency order, from lowest level to
 * `distributed` - Code for talking to https webserver for volunteers to contribute distributed self-play games for training.
 * `tests` - A variety of tests.
   * `models` - A directory with a small number of small-sized (and not very strong) models for running tests.
+* `qrstune` - Header-only QRS-Tune (Quadratic Regression Sequential) optimizer for hyperparameter tuning.
 * `command` - Top-level subcommands callable by users. GTP, analysis commands, benchmarking, selfplay data generation, etc.
   * `commandline.{cpp,h}` - Common command line logic shared by all subcommands.
   * `gtp.cpp` - Main GTP engine.
@@ -44,6 +45,7 @@ Summary of source folders, in approximate dependency order, from lowest level to
   * `selfplay.cpp` - Selfplay data generation engine.
   * `gatekeeper.cpp` - Gating engine to filter neural nets for selfplay data generation.
   * `match.cpp` - Match engine for testing different parameters that can use huge batch sizes to efficiently play games in parallel.
+  * `tuneparams.cpp` - Tune PUCT search hyperparameters via QRS-Tune sequential optimization.
 
 Other folders:
 

From b6cb552aeb5185797d4c48b5381d918e0984f362 Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Wed, 1 Apr 2026 08:16:33 +0800
Subject: [PATCH 08/41] Add example config for tune-params subcommand

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/configs/tune_params_example.cfg | 109 ++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100644 cpp/configs/tune_params_example.cfg

diff --git a/cpp/configs/tune_params_example.cfg b/cpp/configs/tune_params_example.cfg
new file mode 100644
index 000000000..38e74e69b
--- /dev/null
+++ b/cpp/configs/tune_params_example.cfg
@@ -0,0 +1,109 @@
+# Example config for tune-params (QRS-Tune PUCT hyperparameter tuning)
+# This is an example template config for the "tune-params" subcommand of KataGo. e.g:
+# ./katago tune-params -config configs/tune_params_example.cfg -log-file tune.log
+#
+# This command runs sequential head-to-head matches between a fixed reference bot (bot0)
+# and an experiment bot (bot1) whose PUCT parameters are adapted each trial using
+# QRS-Tune (Quadratic Regression Sequential optimization).
+#
+# After all trials, it reports the best-found values for cpuctExploration,
+# cpuctExplorationLog, and cpuctUtilityStdevPrior, along with ASCII regression curves
+# showing each parameter's estimated effect on win rate.
+#
+# See gtp config and match config for descriptions of most search and GPU params.
+
+# Tuning------------------------------------------------------------------------------------
+
+# Total number of tuning trials (games). More trials = better estimates but slower.
+# A few hundred trials is a reasonable starting point; 1000+ for higher confidence.
+numTrials = 500
+
+# Search ranges for PUCT parameters being tuned.
+# The optimizer explores within [Min, Max] for each parameter.
+# If omitted, defaults are used: cpuctExploration [0.5, 2.0], cpuctExplorationLog [0.05, 1.0],
+# cpuctUtilityStdevPrior [0.1, 0.8].
+# cpuctExplorationMin = 0.5
+# cpuctExplorationMax = 2.0
+# cpuctExplorationLogMin = 0.05
+# cpuctExplorationLogMax = 1.0
+# cpuctUtilityStdevPriorMin = 0.1
+# cpuctUtilityStdevPriorMax = 0.8
+
+# Logs------------------------------------------------------------------------------------
+
+logSearchInfo = false
+logMoves = false
+logGamesEvery = 100
+logToStdout = true
+
+# Bots-------------------------------------------------------------------------------------
+# Exactly 2 bots are required: bot0 = reference (fixed params), bot1 = experiment (tuned params).
+# Both bots can use the same model or different models.
+
+numBots = 2
+botName0 = base
+botName1 = experiment
+
+# Neural net model files - can be the same model for both bots
+nnModelFile0 = PATH_TO_MODEL
+nnModelFile1 = PATH_TO_MODEL
+
+# Match-----------------------------------------------------------------------------------
+
+numGameThreads = 8
+maxMovesPerGame = 1200
+
+allowResignation = true
+resignThreshold = -0.95
+resignConsecTurns = 6
+
+# Rules------------------------------------------------------------------------------------
+# Use a single fixed ruleset for consistent tuning results.
+
+koRules = SIMPLE
+scoringRules = AREA
+taxRules = NONE
+multiStoneSuicideLegals = false
+hasButtons = false
+
+bSizes = 19
+bSizeRelProbs = 1
+komiAuto = True
+handicapProb = 0.0
+handicapCompensateKomiProb = 1.0
+
+# Search limits-----------------------------------------------------------------------------------
+# Use fixed visits (not time) for reproducible results across trials.
+
+maxVisits = 500
+# maxVisits0 = 500
+# maxVisits1 = 500
+
+numSearchThreads = 1
+
+# GPU Settings-------------------------------------------------------------------------------
+
+nnMaxBatchSize = 32
+nnCacheSizePowerOfTwo = 21
+nnMutexPoolSizePowerOfTwo = 17
+nnRandomize = true
+numNNServerThreadsPerModel = 1
+
+# Root move selection and biases------------------------------------------------------------------------------
+
+chosenMoveTemperatureEarly = 0.60
+chosenMoveTemperature = 0.20
+
+# Internal params------------------------------------------------------------------------------
+# These are the FIXED params for bot0 (reference). Bot1's cpuctExploration,
+# cpuctExplorationLog, and cpuctUtilityStdevPrior will be overridden by the optimizer.
+
+# cpuctExploration = 0.9
+# cpuctExplorationLog = 0.4
+# cpuctUtilityStdevPrior = 0.40
+# fpuReductionMax = 0.2
+# rootFpuReductionMax = 0.1
+# valueWeightExponent = 0.25
+# subtreeValueBiasFactor = 0.45
+# subtreeValueBiasWeightExponent = 0.85
+# useGraphSearch = true

From 57d44039cfc8ada3d40d5bfcbdd85473c6751829 Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Wed, 1 Apr 2026 08:18:21 +0800
Subject: [PATCH 09/41] Revert .gitignore change that removed build/ entry

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .gitignore | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 06343f2c3..2e933d553 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,7 +32,6 @@ cpp/tests/results/matchsgfs2/games.sgfs
 
 cpp/data/
 versions/
-build/
 cpp/build
 cpp/out
 

From 0746c0a094128ab8b263bcad01e1b63b2bcf44e2 Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Wed, 1 Apr 2026 09:19:00 +0800
Subject: [PATCH 10/41] Split QRSOptimizer from header-only into header +
 source file

The header-only design violated KataGo's convention of separating
declarations (.h) from implementations (.cpp). Move all non-trivial
function bodies to QRSOptimizer.cpp, replace #pragma once with
#ifndef guard, trim header includes, and have predict() delegate
to score() to eliminate duplicated logic.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/CMakeLists.txt           |   1 +
 cpp/qrstune/QRSOptimizer.cpp | 282 +++++++++++++++++++++++++++++++++++
 cpp/qrstune/QRSOptimizer.h   | 259 +++-----------------------------
 3 files changed, 306 insertions(+), 236 deletions(-)
 create mode 100644 cpp/qrstune/QRSOptimizer.cpp

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index b71adc444..e909a76b6 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -307,6 +307,7 @@ add_executable(katago
   tests/tinymodel.cpp
   tests/tinymodeldata.cpp
   distributed/client.cpp
+  qrstune/QRSOptimizer.cpp
   command/commandline.cpp
   command/analysis.cpp
   command/benchmark.cpp
diff --git a/cpp/qrstune/QRSOptimizer.cpp b/cpp/qrstune/QRSOptimizer.cpp
new file mode 100644
index 000000000..ccfd0a15e
--- /dev/null
+++ b/cpp/qrstune/QRSOptimizer.cpp
@@ -0,0 +1,282 @@
+// qrstune/QRSOptimizer.cpp
+
+#include "../qrstune/QRSOptimizer.h"
+
+#include <algorithm>
+#include <cmath>
+
+using namespace std;
+
+// ============================================================
+// Free functions
+// ============================================================
+
+int QRSTune::numFeatures(int D) {
+  return 1 + D + D * (D + 1) / 2;
+}
+
+void QRSTune::computeFeatures(int D, const double* x, double* phi) {
+  int k = 0;
+  phi[k++] = 1.0;
+  for(int i = 0; i < D; i++) phi[k++] = x[i];
+  for(int i = 0; i < D; i++) phi[k++] = x[i] * x[i];
+  for(int i = 0; i < D; i++)
+    for(int j = i + 1; j < D; j++)
+      phi[k++] = x[i] * x[j];
+}
+
+double QRSTune::sigmoid(double z) {
+  if(z > 40.0) return 1.0;
+  if(z < -40.0) return 0.0;
+  return 1.0 / (1.0 + exp(-z));
+}
+
+bool QRSTune::gaussianSolve(int F, vector<vector<double>>& A, vector<double>& b) {
+  for(int col = 0; col < F; col++) {
+    int piv = col;
+    for(int r = col + 1; r < F; r++)
+      if(fabs(A[r][col]) > fabs(A[piv][col])) piv = r;
+    swap(A[col], A[piv]);
+    swap(b[col], b[piv]);
+    if(fabs(A[col][col]) < 1e-12) return false;
+    double inv = 1.0 / A[col][col];
+    for(int r = col + 1; r < F; r++) {
+      double f = A[r][col] * inv;
+      for(int c = col; c < F; c++) A[r][c] -= f * A[col][c];
+      b[r] -= f * b[col];
+    }
+  }
+  for(int r = F - 1; r >= 0; r--) {
+    for(int c = r + 1; c < F; c++) b[r] -= A[r][c] * b[c];
+    b[r] /= A[r][r];
+  }
+  return true;
+}
+
+// ============================================================
+// QRSModel
+// ============================================================
+
+QRSTune::QRSModel::QRSModel()
+  :D_(0),
+   F_(0),
+   l2_(0.1)
+{}
+
+QRSTune::QRSModel::QRSModel(int D, double l2_reg)
+  :D_(D),
+   F_(numFeatures(D)),
+   beta_(numFeatures(D), 0.0),
+   l2_(l2_reg)
+{}
+
+void QRSTune::QRSModel::fit(const vector<vector<double>>& xs,
+                             const vector<double>& ys,
+                             int max_iter) {
+  int N = (int)xs.size();
+  if(N < F_) return;  // underdetermined; keep prior beta = 0
+
+  vector<double> phi(F_);
+
+  for(int iter = 0; iter < max_iter; iter++) {
+    // Gradient and (negative) Hessian from L2 prior
+    vector<double> grad(F_, 0.0);
+    vector<vector<double>> negH(F_, vector<double>(F_, 0.0));
+    for(int f = 0; f < F_; f++) {
+      grad[f] = -l2_ * beta_[f];
+      negH[f][f] = l2_;
+    }
+
+    // Data contribution
+    for(int n = 0; n < N; n++) {
+      computeFeatures(D_, xs[n].data(), phi.data());
+      double z = 0.0;
+      for(int f = 0; f < F_; f++) z += beta_[f] * phi[f];
+      double p = sigmoid(z);
+      double w = p * (1.0 - p);
+      double resid = ys[n] - p;
+      for(int f = 0; f < F_; f++) {
+        grad[f] += resid * phi[f];
+        for(int g = f; g < F_; g++)
+          negH[f][g] += w * phi[f] * phi[g];
+      }
+    }
+    // Symmetrize negH
+    for(int f = 0; f < F_; f++)
+      for(int g = f + 1; g < F_; g++)
+        negH[g][f] = negH[f][g];
+
+    // Solve negH * delta = grad  =>  beta += delta
+    if(!gaussianSolve(F_, negH, grad)) break;
+    double maxd = 0.0;
+    for(int f = 0; f < F_; f++) {
+      beta_[f] += grad[f];
+      maxd = max(maxd, fabs(grad[f]));
+    }
+    if(maxd < 1e-7) break;
+  }
+}
+
+double QRSTune::QRSModel::predict(const double* x) const {
+  return sigmoid(score(x));
+}
+
+double QRSTune::QRSModel::score(const double* x) const {
+  vector<double> phi(F_);
+  computeFeatures(D_, x, phi.data());
+  double z = 0.0;
+  for(int f = 0; f < F_; f++) z += beta_[f] * phi[f];
+  return z;
+}
+
+void QRSTune::QRSModel::mapOptimum(double* out_x) const {
+  // Beta layout: [intercept, linear[0..D-1], quad[0..D-1], cross by (i<j)]
+  const double* b_lin  = beta_.data() + 1;
+  const double* b_quad = beta_.data() + 1 + D_;
+  const double* b_cross = beta_.data() + 1 + 2 * D_;
+
+  vector<vector<double>> M(D_, vector<double>(D_, 0.0));
+  vector<double> rhs(D_);
+
+  for(int k = 0; k < D_; k++) {
+    M[k][k] = 2.0 * b_quad[k];
+    rhs[k]  = -b_lin[k];
+  }
+  int idx = 0;
+  for(int i = 0; i < D_; i++)
+    for(int j = i + 1; j < D_; j++) {
+      M[i][j] += b_cross[idx];
+      M[j][i] += b_cross[idx];
+      idx++;
+    }
+
+  if(!gaussianSolve(D_, M, rhs)) {
+    for(int i = 0; i < D_; i++) out_x[i] = 0.0;
+    return;
+  }
+  for(int i = 0; i < D_; i++)
+    out_x[i] = max(-1.0, min(1.0, rhs[i]));
+}
+
+// ============================================================
+// QRSBuffer
+// ============================================================
+
+QRSTune::QRSBuffer::QRSBuffer(int min_keep, double prune_margin)
+  :min_keep_(min_keep),
+   prune_margin_(prune_margin)
+{}
+
+void QRSTune::QRSBuffer::add(const vector<double>& x, double y) {
+  xs_.push_back(x);
+  ys_.push_back(y);
+}
+
+void QRSTune::QRSBuffer::prune(const QRSModel& model) {
+  int N = (int)xs_.size();
+  if(N <= min_keep_ * 2) return;
+
+  // Score all samples and find best predicted win rate
+  vector<pair<double, int>> scored(N);
+  double p_best = 0.0;
+  for(int i = 0; i < N; i++) {
+    double p = model.predict(xs_[i].data());
+    scored[i] = {p, i};
+    if(p > p_best) p_best = p;
+  }
+  double threshold = p_best - prune_margin_;
+
+  // Sort by descending predicted quality
+  sort(scored.begin(), scored.end(),
+    [](const pair<double,int>& a, const pair<double,int>& b) {
+      return a.first > b.first;
+    });
+
+  // Keep samples above threshold, plus top-quality samples up to min_keep_
+  vector<bool> keep(N, false);
+  int kept = 0;
+  for(auto& kv : scored) {
+    if(kv.first >= threshold || kept < min_keep_) {
+      keep[kv.second] = true;
+      kept++;
+    }
+  }
+
+  // Rebuild in original order to preserve temporal structure
+  vector<vector<double>> nx;
+  vector<double> ny;
+  for(int i = 0; i < N; i++) {
+    if(keep[i]) {
+      nx.push_back(xs_[i]);
+      ny.push_back(ys_[i]);
+    }
+  }
+  xs_ = std::move(nx);
+  ys_ = std::move(ny);
+}
+
+// ============================================================
+// QRSTuner
+// ============================================================
+
+QRSTune::QRSTuner::QRSTuner(int D, uint64_t seed, int total_trials,
+                             double l2_reg, int refit_every, int prune_every,
+                             double sigma_init, double sigma_fin)
+  :D_(D),
+   model_(D, l2_reg),
+   buffer_(max(20, total_trials / 50), 0.25),
+   rng_(seed),
+   trial_count_(0),
+   total_trials_(total_trials),
+   refit_every_(refit_every),
+   prune_every_(prune_every),
+   sigma_initial_(sigma_init),
+   sigma_final_(sigma_fin)
+{}
+
+vector<double> QRSTune::QRSTuner::nextSample() {
+  vector<double> x(D_);
+  int F = model_.features();
+
+  if(buffer_.size() < F + 1) {
+    // Insufficient data for reliable fit — explore uniformly
+    uniform_real_distribution<double> uni(-1.0, 1.0);
+    for(int i = 0; i < D_; i++) x[i] = uni(rng_);
+    return x;
+  }
+
+  // Base: MAP optimum
+  model_.mapOptimum(x.data());
+
+  // Decaying exploration noise
+  double progress = (double)trial_count_ / max(1, total_trials_ - 1);
+  double sigma = sigma_initial_ + progress * (sigma_final_ - sigma_initial_);
+  normal_distribution<double> noise(0.0, sigma);
+  for(int i = 0; i < D_; i++)
+    x[i] = max(-1.0, min(1.0, x[i] + noise(rng_)));
+
+  return x;
+}
+
+void QRSTune::QRSTuner::addResult(const vector<double>& x, double y) {
+  buffer_.add(x, y);
+  trial_count_++;
+
+  if(trial_count_ % refit_every_ == 0 && buffer_.size() >= model_.features() + 1) {
+    model_.fit(buffer_.xs(), buffer_.ys());
+    int refit_count = trial_count_ / refit_every_;
+    if(refit_count % prune_every_ == 0)
+      buffer_.prune(model_);
+  }
+}
+
+vector<double> QRSTune::QRSTuner::bestCoords() const {
+  vector<double> best(D_);
+  model_.mapOptimum(best.data());
+  return best;
+}
+
+double QRSTune::QRSTuner::bestWinProb() const {
+  auto best = bestCoords();
+  return model_.predict(best.data());
+}
diff --git a/cpp/qrstune/QRSOptimizer.h b/cpp/qrstune/QRSOptimizer.h
index 55f4d4b2e..d3ba3fa6f 100644
--- a/cpp/qrstune/QRSOptimizer.h
+++ b/cpp/qrstune/QRSOptimizer.h
@@ -1,14 +1,11 @@
 // qrstune/QRSOptimizer.h
 
-#pragma once
+#ifndef QRSTUNE_QRSOPTIMIZER_H_
+#define QRSTUNE_QRSOPTIMIZER_H_
 
-#include <vector>
-#include <cmath>
-#include <algorithm>
-#include <cassert>
 #include <cstdint>
 #include <random>
-#include <stdexcept>
+#include <vector>
 
 namespace QRSTune {
 
@@ -18,50 +15,16 @@ namespace QRSTune {
 //   Total features F = 1 + D + D*(D+1)/2
 // ============================================================
 
-inline int numFeatures(int D) {
-  return 1 + D + D * (D + 1) / 2;
-}
+int numFeatures(int D);
 
 // Fill phi[0..F-1] given x[0..D-1].
-inline void computeFeatures(int D, const double* x, double* phi) {
-  int k = 0;
-  phi[k++] = 1.0;
-  for(int i = 0; i < D; i++) phi[k++] = x[i];
-  for(int i = 0; i < D; i++) phi[k++] = x[i] * x[i];
-  for(int i = 0; i < D; i++)
-    for(int j = i + 1; j < D; j++)
-      phi[k++] = x[i] * x[j];
-}
+void computeFeatures(int D, const double* x, double* phi);
 
-inline double sigmoid(double z) {
-  if(z > 40.0) return 1.0;
-  if(z < -40.0) return 0.0;
-  return 1.0 / (1.0 + std::exp(-z));
-}
+double sigmoid(double z);
 
 // Solve Ax = b in-place (A is F x F, b is length F) via partial-pivot
 // Gaussian elimination. Returns false if singular. Overwrites A and b.
-inline bool gaussianSolve(int F, std::vector<std::vector<double>>& A, std::vector<double>& b) {
-  for(int col = 0; col < F; col++) {
-    int piv = col;
-    for(int r = col + 1; r < F; r++)
-      if(std::fabs(A[r][col]) > std::fabs(A[piv][col])) piv = r;
-    std::swap(A[col], A[piv]);
-    std::swap(b[col], b[piv]);
-    if(std::fabs(A[col][col]) < 1e-12) return false;
-    double inv = 1.0 / A[col][col];
-    for(int r = col + 1; r < F; r++) {
-      double f = A[r][col] * inv;
-      for(int c = col; c < F; c++) A[r][c] -= f * A[col][c];
-      b[r] -= f * b[col];
-    }
-  }
-  for(int r = F - 1; r >= 0; r--) {
-    for(int c = r + 1; c < F; c++) b[r] -= A[r][c] * b[c];
-    b[r] /= A[r][r];
-  }
-  return true;
-}
+bool gaussianSolve(int F, std::vector<std::vector<double>>& A, std::vector<double>& b);
 
 // ============================================================
 // QRSModel: quadratic logistic regression with L2 regularization.
@@ -73,76 +36,20 @@ class QRSModel {
   double l2_;                  // L2 regularization strength
 
  public:
-  QRSModel() : D_(0), F_(0), l2_(0.1) {}
-  QRSModel(int D, double l2_reg = 0.1)
-    : D_(D), F_(numFeatures(D)), beta_(numFeatures(D), 0.0), l2_(l2_reg) {}
+  QRSModel();
+  QRSModel(int D, double l2_reg = 0.1);
 
   // Newton-Raphson MAP estimation.
   // xs: sample coordinates; ys: outcomes in {0.0, 0.5, 1.0}
   void fit(const std::vector<std::vector<double>>& xs,
            const std::vector<double>& ys,
-           int max_iter = 30) {
-    int N = (int)xs.size();
-    if(N < F_) return;  // underdetermined; keep prior beta = 0
-
-    std::vector<double> phi(F_);
-
-    for(int iter = 0; iter < max_iter; iter++) {
-      // Gradient and (negative) Hessian from L2 prior
-      std::vector<double> grad(F_, 0.0);
-      std::vector<std::vector<double>> negH(F_, std::vector<double>(F_, 0.0));
-      for(int f = 0; f < F_; f++) {
-        grad[f] = -l2_ * beta_[f];
-        negH[f][f] = l2_;
-      }
-
-      // Data contribution
-      for(int n = 0; n < N; n++) {
-        computeFeatures(D_, xs[n].data(), phi.data());
-        double z = 0.0;
-        for(int f = 0; f < F_; f++) z += beta_[f] * phi[f];
-        double p = sigmoid(z);
-        double w = p * (1.0 - p);
-        double resid = ys[n] - p;
-        for(int f = 0; f < F_; f++) {
-          grad[f] += resid * phi[f];
-          for(int g = f; g < F_; g++)
-            negH[f][g] += w * phi[f] * phi[g];
-        }
-      }
-      // Symmetrize negH
-      for(int f = 0; f < F_; f++)
-        for(int g = f + 1; g < F_; g++)
-          negH[g][f] = negH[f][g];
-
-      // Solve negH * delta = grad  =>  beta += delta
-      if(!gaussianSolve(F_, negH, grad)) break;
-      double maxd = 0.0;
-      for(int f = 0; f < F_; f++) {
-        beta_[f] += grad[f];
-        maxd = std::max(maxd, std::fabs(grad[f]));
-      }
-      if(maxd < 1e-7) break;
-    }
-  }
+           int max_iter = 30);
 
   // Win probability at x[0..D-1]
-  double predict(const double* x) const {
-    std::vector<double> phi(F_);
-    computeFeatures(D_, x, phi.data());
-    double z = 0.0;
-    for(int f = 0; f < F_; f++) z += beta_[f] * phi[f];
-    return sigmoid(z);
-  }
+  double predict(const double* x) const;
 
   // Linear score phi(x)^T beta (used for MAP maximization)
-  double score(const double* x) const {
-    std::vector<double> phi(F_);
-    computeFeatures(D_, x, phi.data());
-    double z = 0.0;
-    for(int f = 0; f < F_; f++) z += beta_[f] * phi[f];
-    return z;
-  }
+  double score(const double* x) const;
 
   // Find x in [-1,+1]^D that maximizes score(x) = phi(x)^T beta.
   // For a quadratic, the unconstrained stationary point satisfies:
@@ -150,34 +57,7 @@ class QRSModel {
   // where M[i][i] = 2*beta_quad[i], M[i][j]=M[j][i] = beta_cross[i,j],
   //       b_lin[i] = beta_linear[i].
   // The solution is clamped to [-1,+1]^D.
-  void mapOptimum(double* out_x) const {
-    // Beta layout: [intercept, linear[0..D-1], quad[0..D-1], cross by (i<j)]
-    const double* b_lin  = beta_.data() + 1;
-    const double* b_quad = beta_.data() + 1 + D_;
-    const double* b_cross = beta_.data() + 1 + 2 * D_;
-
-    std::vector<std::vector<double>> M(D_, std::vector<double>(D_, 0.0));
-    std::vector<double> rhs(D_);
-
-    for(int k = 0; k < D_; k++) {
-      M[k][k] = 2.0 * b_quad[k];
-      rhs[k]  = -b_lin[k];
-    }
-    int idx = 0;
-    for(int i = 0; i < D_; i++)
-      for(int j = i + 1; j < D_; j++) {
-        M[i][j] += b_cross[idx];
-        M[j][i] += b_cross[idx];
-        idx++;
-      }
-
-    if(!gaussianSolve(D_, M, rhs)) {
-      for(int i = 0; i < D_; i++) out_x[i] = 0.0;
-      return;
-    }
-    for(int i = 0; i < D_; i++)
-      out_x[i] = std::max(-1.0, std::min(1.0, rhs[i]));
-  }
+  void mapOptimum(double* out_x) const;
 
   int dims()     const { return D_; }
   int features() const { return F_; }
@@ -195,60 +75,15 @@ class QRSBuffer {
   double prune_margin_; // drop samples where p_pred < p_best - margin
 
  public:
-  QRSBuffer(int min_keep = 30, double prune_margin = 0.25)
-    : min_keep_(min_keep), prune_margin_(prune_margin) {}
+  QRSBuffer(int min_keep = 30, double prune_margin = 0.25);
 
-  void add(const std::vector<double>& x, double y) {
-    xs_.push_back(x);
-    ys_.push_back(y);
-  }
+  void add(const std::vector<double>& x, double y);
 
   // Remove samples significantly below the current MAP win estimate.
   // Samples are ranked by predicted quality so that min_keep_ retains the
   // best samples rather than the oldest (which are typically from early
   // uniform random exploration).
-  void prune(const QRSModel& model) {
-    int N = (int)xs_.size();
-    if(N <= min_keep_ * 2) return;
-
-    // Score all samples and find best predicted win rate
-    std::vector<std::pair<double, int>> scored(N);
-    double p_best = 0.0;
-    for(int i = 0; i < N; i++) {
-      double p = model.predict(xs_[i].data());
-      scored[i] = {p, i};
-      if(p > p_best) p_best = p;
-    }
-    double threshold = p_best - prune_margin_;
-
-    // Sort by descending predicted quality
-    std::sort(scored.begin(), scored.end(),
-      [](const std::pair<double,int>& a, const std::pair<double,int>& b) {
-        return a.first > b.first;
-      });
-
-    // Keep samples above threshold, plus top-quality samples up to min_keep_
-    std::vector<bool> keep(N, false);
-    int kept = 0;
-    for(auto& kv : scored) {
-      if(kv.first >= threshold || kept < min_keep_) {
-        keep[kv.second] = true;
-        kept++;
-      }
-    }
-
-    // Rebuild in original order to preserve temporal structure
-    std::vector<std::vector<double>> nx;
-    std::vector<double> ny;
-    for(int i = 0; i < N; i++) {
-      if(keep[i]) {
-        nx.push_back(xs_[i]);
-        ny.push_back(ys_[i]);
-      }
-    }
-    xs_ = std::move(nx);
-    ys_ = std::move(ny);
-  }
+  void prune(const QRSModel& model);
 
   const std::vector<std::vector<double>>& xs() const { return xs_; }
   const std::vector<double>&              ys() const { return ys_; }
@@ -292,72 +127,22 @@ class QRSTuner {
            int refit_every   = 10,
            int prune_every   = 5,
            double sigma_init = 0.40,
-           double sigma_fin  = 0.05)
-    : D_(D),
-      model_(D, l2_reg),
-      buffer_(/*min_keep=*/std::max(20, total_trials / 50),
-              /*prune_margin=*/0.25),
-      rng_(seed),
-      trial_count_(0),
-      total_trials_(total_trials),
-      refit_every_(refit_every),
-      prune_every_(prune_every),
-      sigma_initial_(sigma_init),
-      sigma_final_(sigma_fin) {}
+           double sigma_fin  = 0.05);
 
   // Propose next point to evaluate.
   // During early exploration (< F samples) returns a random point.
   // Afterwards: MAP optimum + decaying Gaussian noise clamped to [-1,+1]^D.
-  std::vector<double> nextSample() {
-    std::vector<double> x(D_);
-    int F = model_.features();
-
-    if(buffer_.size() < F + 1) {
-      // Insufficient data for reliable fit — explore uniformly
-      std::uniform_real_distribution<double> uni(-1.0, 1.0);
-      for(int i = 0; i < D_; i++) x[i] = uni(rng_);
-      return x;
-    }
-
-    // Base: MAP optimum
-    model_.mapOptimum(x.data());
-
-    // Decaying exploration noise
-    double progress = (double)trial_count_ / std::max(1, total_trials_ - 1);
-    double sigma = sigma_initial_ + progress * (sigma_final_ - sigma_initial_);
-    std::normal_distribution<double> noise(0.0, sigma);
-    for(int i = 0; i < D_; i++)
-      x[i] = std::max(-1.0, std::min(1.0, x[i] + noise(rng_)));
-
-    return x;
-  }
+  std::vector<double> nextSample();
 
   // Record the outcome of a trial.
   // y: 1.0 = win, 0.0 = loss, 0.5 = draw
-  void addResult(const std::vector<double>& x, double y) {
-    buffer_.add(x, y);
-    trial_count_++;
-
-    if(trial_count_ % refit_every_ == 0 && buffer_.size() >= model_.features() + 1) {
-      model_.fit(buffer_.xs(), buffer_.ys());
-      int refit_count = trial_count_ / refit_every_;
-      if(refit_count % prune_every_ == 0)
-        buffer_.prune(model_);
-    }
-  }
+  void addResult(const std::vector<double>& x, double y);
 
   // Return current MAP optimum in [-1,+1]^D
-  std::vector<double> bestCoords() const {
-    std::vector<double> best(D_);
-    model_.mapOptimum(best.data());
-    return best;
-  }
+  std::vector<double> bestCoords() const;
 
   // Estimated win probability at the MAP optimum
-  double bestWinProb() const {
-    auto best = bestCoords();
-    return model_.predict(best.data());
-  }
+  double bestWinProb() const;
 
   int trialCount()   const { return trial_count_; }
   int dims()         const { return D_; }
@@ -365,3 +150,5 @@ class QRSTuner {
 };
 
 }  // namespace QRSTune
+
+#endif  // QRSTUNE_QRSOPTIMIZER_H_

From 460b71ee80a6d59e3dd9d4330ac314f55cc666f6 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 1 Apr 2026 02:21:57 +0000
Subject: [PATCH 11/41] Add build/ to .gitignore for root-level build directory

https://claude.ai/code/session_01396bbJUdHCsiWRVPM58895
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 2e933d553..71c41cd24 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 *~
 __pycache__
 *.h5
+build/
 *.log
 *.cbp
 

From 3fa57491b56f1a1413241edea4ccc41813f0a7d1 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 1 Apr 2026 02:25:49 +0000
Subject: [PATCH 12/41] Revert adding build/ to .gitignore

Build directory moved under cpp/build which is already gitignored.

https://claude.ai/code/session_01396bbJUdHCsiWRVPM58895
---
 .gitignore | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 71c41cd24..2e933d553 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,6 @@
 *~
 __pycache__
 *.h5
-build/
 *.log
 *.cbp
 

From 4a88e32938ecf51aa1b1f36015b4f82755dc5542 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 1 Apr 2026 02:49:21 +0000
Subject: [PATCH 13/41] Move match statistics functions to core namespaces

Move wilsonCI95 and oneTailedPValue from static functions in match.cpp
to FancyMath namespace in core/fancymath.h/.cpp, following KataGo's
pattern of placing reusable math utilities in core namespaces.

Move computeBradleyTerryElo from static function in match.cpp to
ComputeElos namespace in core/elo.h/.cpp, alongside the existing
Elo computation utilities.

match.cpp now calls FancyMath::wilsonCI95(), FancyMath::oneTailedPValue(),
and ComputeElos::computeBradleyTerryElo() instead of file-local statics.

tuneparams.cpp static functions (qrsDimToReal, qrsToPUCT,
printRegressionCurves) are kept as file-local statics since they are
command-specific helpers, matching KataGo's pattern for command files.

https://claude.ai/code/session_01396bbJUdHCsiWRVPM58895
---
 cpp/command/match.cpp  | 129 ++---------------------------------------
 cpp/core/elo.cpp       | 100 ++++++++++++++++++++++++++++++++
 cpp/core/elo.h         |  13 +++++
 cpp/core/fancymath.cpp |  16 +++++
 cpp/core/fancymath.h   |   7 +++
 5 files changed, 141 insertions(+), 124 deletions(-)

diff --git a/cpp/command/match.cpp b/cpp/command/match.cpp
index e8fdb0cc1..9db0d1050 100644
--- a/cpp/command/match.cpp
+++ b/cpp/command/match.cpp
@@ -1,4 +1,6 @@
 #include "../core/global.h"
+#include "../core/elo.h"
+#include "../core/fancymath.h"
 #include "../core/fileutils.h"
 #include "../core/makedir.h"
 #include "../core/config_parser.h"
@@ -12,7 +14,6 @@
 #include "../main.h"
 
 #include <array>
-#include <cmath>
 #include <csignal>
 
 using namespace std;
@@ -28,126 +29,6 @@ static void signalHandler(int signal)
   }
 }
 
-//Match statistics helpers
-
-//Wilson score 95% two-tailed confidence interval (draws counted as 0.5 wins)
-static void wilsonCI95(double wins, double n, double& lo, double& hi) {
-  const double z = 1.96;
-  double p = wins / n;
-  double denom = 1.0 + z*z/n;
-  double center = (p + z*z/(2*n)) / denom;
-  double margin = z * sqrt(p*(1-p)/n + z*z/(4*n*n)) / denom;
-  lo = center - margin;
-  hi = center + margin;
-}
-
-//One-tailed p-value: P(experiment winrate <= 0.5 | data), using normal approximation
-static double oneTailedPValue(double wins, double n) {
-  if(n <= 0) return 0.5;
-  double z = (wins - 0.5*n) / (0.5*sqrt(n));
-  return 0.5 * erfc(z / sqrt(2.0));
-}
-
-//Bradley-Terry MLE Elo (global, all-bot ranking)
-//pairStats: {nameA,nameB} -> {winsA, winsB, draws}  nameA < nameB lexicographically
-static bool computeBradleyTerryElo(
-  const vector<string>& botNames,
-  const map<pair<string,string>, array<int64_t,3>>& pairStats,
-  vector<double>& outElo,
-  vector<double>& outStderr
-) {
-  int N = (int)botNames.size();
-  const double eloPerStrength = 400.0 * log10(exp(1.0)); //~173.7
-
-  map<string,int> nameIdx;
-  for(int i = 0; i < N; i++) nameIdx[botNames[i]] = i;
-
-  //w[i][j] = effective wins of i vs j (draws count 0.5)
-  vector<vector<double>> w(N, vector<double>(N, 0.0));
-  for(auto& kv : pairStats) {
-    auto itA = nameIdx.find(kv.first.first);
-    auto itB = nameIdx.find(kv.first.second);
-    if(itA == nameIdx.end() || itB == nameIdx.end()) continue;
-    int a = itA->second, b = itB->second;
-    w[a][b] += kv.second[0] + 0.5 * kv.second[2];
-    w[b][a] += kv.second[1] + 0.5 * kv.second[2];
-  }
-
-  //theta[0] = 0 (reference, first bot), optimize theta[1..N-1]
-  vector<double> theta(N, 0.0);
-  int M = N - 1;
-
-  bool converged = (M == 0);
-  if(M > 0) {
-    for(int iter = 0; iter < 200; iter++) {
-      vector<double> grad(M, 0.0);
-      vector<vector<double>> H(M, vector<double>(M, 0.0));
-      for(int i = 0; i < N; i++) {
-        for(int j = i+1; j < N; j++) {
-          double nij = w[i][j] + w[j][i];
-          if(nij <= 0.0) continue;
-          double sigma = 1.0 / (1.0 + exp(theta[j] - theta[i]));
-          double fish = nij * sigma * (1.0 - sigma);
-          double gij = w[i][j] - nij * sigma;
-          if(i > 0) { grad[i-1] += gij; H[i-1][i-1] -= fish; }
-          if(j > 0) { grad[j-1] -= gij; H[j-1][j-1] -= fish; }
-          if(i > 0 && j > 0) { H[i-1][j-1] += fish; H[j-1][i-1] += fish; }
-        }
-      }
-      //Solve H*delta = -grad via Gaussian elimination
-      vector<vector<double>> aug(M, vector<double>(M+1, 0.0));
-      for(int r = 0; r < M; r++) {
-        for(int c = 0; c < M; c++) aug[r][c] = H[r][c];
-        aug[r][M] = -grad[r];
-      }
-      for(int col = 0; col < M; col++) {
-        int piv = col;
-        for(int r = col+1; r < M; r++)
-          if(fabs(aug[r][col]) > fabs(aug[piv][col])) piv = r;
-        swap(aug[col], aug[piv]);
-        if(fabs(aug[col][col]) < 1e-12) continue;
-        double inv = 1.0 / aug[col][col];
-        for(int r = col+1; r < M; r++) {
-          double f = aug[r][col] * inv;
-          for(int c = col; c <= M; c++) aug[r][c] -= f * aug[col][c];
-        }
-      }
-      vector<double> delta(M, 0.0);
-      for(int r = M-1; r >= 0; r--) {
-        double s = aug[r][M];
-        for(int c = r+1; c < M; c++) s -= aug[r][c] * delta[c];
-        if(fabs(aug[r][r]) > 1e-12) delta[r] = s / aug[r][r];
-      }
-      double maxDelta = 0.0;
-      for(int r = 0; r < M; r++) {
-        theta[r+1] += delta[r];
-        maxDelta = max(maxDelta, fabs(delta[r]));
-      }
-      if(maxDelta < 1e-6) { converged = true; break; }
-    }
-  }
-
-  //Convert log-strength to Elo relative to bot 0
-  outElo.resize(N);
-  outStderr.resize(N, 0.0);
-  for(int i = 0; i < N; i++)
-    outElo[i] = (theta[i] - theta[0]) * eloPerStrength;
-
-  //Fisher information diagonal -> stderr
-  for(int i = 1; i < N; i++) {
-    double fish = 0.0;
-    for(int j = 0; j < N; j++) {
-      if(j == i) continue;
-      double nij = w[i][j] + w[j][i];
-      if(nij <= 0.0) continue;
-      double sigma = 1.0 / (1.0 + exp(theta[j] - theta[i]));
-      fish += nij * sigma * (1.0 - sigma);
-    }
-    if(fish > 0.0) outStderr[i] = eloPerStrength / sqrt(fish);
-  }
-  return converged;
-}
-
 int MainCmds::match(const vector<string>& args) {
   Board::initHash();
   ScoreValue::initTables();
@@ -496,7 +377,7 @@ int MainCmds::match(const vector<string>& args) {
     }
 
     vector<double> elo, eloStderr;
-    bool eloConverged = computeBradleyTerryElo(activeBots, pairStats, elo, eloStderr);
+    bool eloConverged = ComputeElos::computeBradleyTerryElo(activeBots, pairStats, elo, eloStderr);
 
     logger.write("");
     if(!eloConverged)
@@ -518,8 +399,8 @@ int MainCmds::match(const vector<string>& args) {
       if(total == 0) continue;
       double wins = wA + 0.5 * d;
       double lo, hi;
-      wilsonCI95(wins, (double)total, lo, hi);
-      double pval = oneTailedPValue(wins, (double)total);
+      FancyMath::wilsonCI95(wins, (double)total, lo, hi);
+      double pval = FancyMath::oneTailedPValue(wins, (double)total);
       string sig = (pval < 0.05) ? " *" : "";
       logger.write(
         "  " + kv.first.first + " vs " + kv.first.second +
diff --git a/cpp/core/elo.cpp b/cpp/core/elo.cpp
index b624ed22f..86da194f1 100644
--- a/cpp/core/elo.cpp
+++ b/cpp/core/elo.cpp
@@ -1,6 +1,8 @@
 #include "../core/elo.h"
 
+#include <array>
 #include <cmath>
+#include <map>
 
 #include "../core/test.h"
 #include "../core/os.h"
@@ -268,6 +270,104 @@ vector<double> ComputeElos::computeElos(
   return elos;
 }
 
+bool ComputeElos::computeBradleyTerryElo(
+  const vector<string>& botNames,
+  const map<pair<string,string>, array<int64_t,3>>& pairStats,
+  vector<double>& outElo,
+  vector<double>& outStderr
+) {
+  int N = (int)botNames.size();
+  const double eloPerStrength = 400.0 * log10(exp(1.0)); //~173.7
+
+  map<string,int> nameIdx;
+  for(int i = 0; i < N; i++) nameIdx[botNames[i]] = i;
+
+  //w[i][j] = effective wins of i vs j (draws count 0.5)
+  vector<vector<double>> w(N, vector<double>(N, 0.0));
+  for(auto& kv : pairStats) {
+    auto itA = nameIdx.find(kv.first.first);
+    auto itB = nameIdx.find(kv.first.second);
+    if(itA == nameIdx.end() || itB == nameIdx.end()) continue;
+    int a = itA->second, b = itB->second;
+    w[a][b] += kv.second[0] + 0.5 * kv.second[2];
+    w[b][a] += kv.second[1] + 0.5 * kv.second[2];
+  }
+
+  //theta[0] = 0 (reference, first bot), optimize theta[1..N-1]
+  vector<double> theta(N, 0.0);
+  int M = N - 1;
+
+  bool converged = (M == 0);
+  if(M > 0) {
+    for(int iter = 0; iter < 200; iter++) {
+      vector<double> grad(M, 0.0);
+      vector<vector<double>> H(M, vector<double>(M, 0.0));
+      for(int i = 0; i < N; i++) {
+        for(int j = i+1; j < N; j++) {
+          double nij = w[i][j] + w[j][i];
+          if(nij <= 0.0) continue;
+          double sigma = 1.0 / (1.0 + exp(theta[j] - theta[i]));
+          double fish = nij * sigma * (1.0 - sigma);
+          double gij = w[i][j] - nij * sigma;
+          if(i > 0) { grad[i-1] += gij; H[i-1][i-1] -= fish; }
+          if(j > 0) { grad[j-1] -= gij; H[j-1][j-1] -= fish; }
+          if(i > 0 && j > 0) { H[i-1][j-1] += fish; H[j-1][i-1] += fish; }
+        }
+      }
+      //Solve H*delta = -grad via Gaussian elimination
+      vector<vector<double>> aug(M, vector<double>(M+1, 0.0));
+      for(int r = 0; r < M; r++) {
+        for(int c = 0; c < M; c++) aug[r][c] = H[r][c];
+        aug[r][M] = -grad[r];
+      }
+      for(int col = 0; col < M; col++) {
+        int piv = col;
+        for(int r = col+1; r < M; r++)
+          if(fabs(aug[r][col]) > fabs(aug[piv][col])) piv = r;
+        swap(aug[col], aug[piv]);
+        if(fabs(aug[col][col]) < 1e-12) continue;
+        double inv = 1.0 / aug[col][col];
+        for(int r = col+1; r < M; r++) {
+          double f = aug[r][col] * inv;
+          for(int c = col; c <= M; c++) aug[r][c] -= f * aug[col][c];
+        }
+      }
+      vector<double> delta(M, 0.0);
+      for(int r = M-1; r >= 0; r--) {
+        double s = aug[r][M];
+        for(int c = r+1; c < M; c++) s -= aug[r][c] * delta[c];
+        if(fabs(aug[r][r]) > 1e-12) delta[r] = s / aug[r][r];
+      }
+      double maxDelta = 0.0;
+      for(int r = 0; r < M; r++) {
+        theta[r+1] += delta[r];
+        maxDelta = max(maxDelta, fabs(delta[r]));
+      }
+      if(maxDelta < 1e-6) { converged = true; break; }
+    }
+  }
+
+  //Convert log-strength to Elo relative to bot 0
+  outElo.resize(N);
+  outStderr.resize(N, 0.0);
+  for(int i = 0; i < N; i++)
+    outElo[i] = (theta[i] - theta[0]) * eloPerStrength;
+
+  //Fisher information diagonal -> stderr
+  for(int i = 1; i < N; i++) {
+    double fish = 0.0;
+    for(int j = 0; j < N; j++) {
+      if(j == i) continue;
+      double nij = w[i][j] + w[j][i];
+      if(nij <= 0.0) continue;
+      double sigma = 1.0 / (1.0 + exp(theta[j] - theta[i]));
+      fish += nij * sigma * (1.0 - sigma);
+    }
+    if(fish > 0.0) outStderr[i] = eloPerStrength / sqrt(fish);
+  }
+  return converged;
+}
+
 static bool approxEqual(double x, double y, double tolerance) {
   return std::fabs(x - y) < tolerance;
 }
diff --git a/cpp/core/elo.h b/cpp/core/elo.h
index ff4ad4f8d..883e020b9 100644
--- a/cpp/core/elo.h
+++ b/cpp/core/elo.h
@@ -3,6 +3,9 @@
 
 #include "../core/global.h"
 
+#include <array>
+#include <map>
+
 namespace ComputeElos {
   STRUCT_NAMED_PAIR(double,firstWins,double,secondWins,WLRecord);
     
@@ -30,6 +33,16 @@ namespace ComputeElos {
   //What's the probability of winning correspnding to this elo difference?
   double probWin(double eloDiff);
 
+  //Bradley-Terry MLE Elo via Newton-Raphson, for symmetric pairwise W/L/D data.
+  //pairStats: {nameA,nameB} -> {winsA, winsB, draws}, nameA < nameB lexicographically.
+  //Draws count as 0.5 wins for each side. Returns true if converged.
+  bool computeBradleyTerryElo(
+    const std::vector<std::string>& botNames,
+    const std::map<std::pair<std::string,std::string>, std::array<int64_t,3>>& pairStats,
+    std::vector<double>& outElo,
+    std::vector<double>& outStderr
+  );
+
   void runTests();
 }
 
diff --git a/cpp/core/fancymath.cpp b/cpp/core/fancymath.cpp
index 4481ae463..70920eda1 100644
--- a/cpp/core/fancymath.cpp
+++ b/cpp/core/fancymath.cpp
@@ -148,6 +148,22 @@ double FancyMath::binaryCrossEntropy(double predProb, double targetProb, double
   return targetProb * (-log(predProb)) + (1.0-targetProb) * (-log(reverseProb));
 }
 
+void FancyMath::wilsonCI95(double wins, double n, double& lo, double& hi) {
+  const double z = 1.96;
+  double p = wins / n;
+  double denom = 1.0 + z*z/n;
+  double center = (p + z*z/(2*n)) / denom;
+  double margin = z * sqrt(p*(1-p)/n + z*z/(4*n*n)) / denom;
+  lo = center - margin;
+  hi = center + margin;
+}
+
+double FancyMath::oneTailedPValue(double wins, double n) {
+  if(n <= 0) return 0.5;
+  double z = (wins - 0.5*n) / (0.5*sqrt(n));
+  return 0.5 * erfc(z / sqrt(2.0));
+}
+
 
 #define APPROX_EQ(x,y,tolerance) testApproxEq((x),(y),(tolerance), #x, #y, __FILE__, __LINE__)
 static void testApproxEq(double x, double y, double tolerance, const char* msgX, const char* msgY, const char *file, int line) {
diff --git a/cpp/core/fancymath.h b/cpp/core/fancymath.h
index 5155450ac..363403eb3 100644
--- a/cpp/core/fancymath.h
+++ b/cpp/core/fancymath.h
@@ -26,6 +26,13 @@ namespace FancyMath {
   //predProb is scaled into the range [epsilon,1.0-epsilon].
   double binaryCrossEntropy(double predProb, double targetProb, double epsilon);
 
+  //Wilson score 95% two-tailed confidence interval for binomial proportion.
+  //Draws should be counted as 0.5 wins before calling.
+  void wilsonCI95(double wins, double n, double& lo, double& hi);
+
+  //One-tailed p-value: P(observed winrate <= 0.5 | data), using normal approximation.
+  double oneTailedPValue(double wins, double n);
+
   void runTests();
 }
 

From 6d7ac6e8f17cdbfba4ee59ace44955a70022e9e5 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 1 Apr 2026 02:59:03 +0000
Subject: [PATCH 14/41] Simplify: fix review findings and update cpp/README.md

- Use existing ELO_PER_LOG_GAMMA constant instead of recomputing
- Hoist Newton-loop allocations in computeBradleyTerryElo (grad, H, aug, delta)
- Hoist Newton-loop allocations in QRSModel::fit (grad, negH)
- Remove dead sigReceived state in tuneparams.cpp
- Add n <= 0 guard to FancyMath::wilsonCI95 to prevent division by zero
- Use std::move in QRSBuffer::prune for kept sample vectors
- Fix cpp/README.md: qrstune is no longer header-only, fix algorithm name

https://claude.ai/code/session_01396bbJUdHCsiWRVPM58895
---
 cpp/README.md                |  2 +-
 cpp/command/tuneparams.cpp   |  2 --
 cpp/core/elo.cpp             | 16 +++++++++-------
 cpp/core/fancymath.cpp       |  1 +
 cpp/qrstune/QRSOptimizer.cpp |  8 +++++---
 5 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/cpp/README.md b/cpp/README.md
index 04e6f252d..90e328144 100644
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -35,7 +35,7 @@ Summary of source folders, in approximate dependency order, from lowest level to
 * `distributed` - Code for talking to https webserver for volunteers to contribute distributed self-play games for training.
 * `tests` - A variety of tests.
   * `models` - A directory with a small number of small-sized (and not very strong) models for running tests.
-* `qrstune` - Header-only QRS-Tune (Quadratic Regression Sequential) optimizer for hyperparameter tuning.
+* `qrstune` - QRS-Tune (Quadratic Response Surface) optimizer for hyperparameter tuning.
 * `command` - Top-level subcommands callable by users. GTP, analysis commands, benchmarking, selfplay data generation, etc.
   * `commandline.{cpp,h}` - Common command line logic shared by all subcommands.
   * `gtp.cpp` - Main GTP engine.
diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index d33a56dc5..058f4f861 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -22,12 +22,10 @@
 
 using namespace std;
 
-static std::atomic<bool> sigReceived(false);
 static std::atomic<bool> shouldStop(false);
 static void signalHandler(int signal)
 {
   if(signal == SIGINT || signal == SIGTERM) {
-    sigReceived.store(true);
     shouldStop.store(true);
   }
 }
diff --git a/cpp/core/elo.cpp b/cpp/core/elo.cpp
index 86da194f1..4b66ba23e 100644
--- a/cpp/core/elo.cpp
+++ b/cpp/core/elo.cpp
@@ -277,7 +277,6 @@ bool ComputeElos::computeBradleyTerryElo(
   vector<double>& outStderr
 ) {
   int N = (int)botNames.size();
-  const double eloPerStrength = 400.0 * log10(exp(1.0)); //~173.7
 
   map<string,int> nameIdx;
   for(int i = 0; i < N; i++) nameIdx[botNames[i]] = i;
@@ -299,9 +298,13 @@ bool ComputeElos::computeBradleyTerryElo(
 
   bool converged = (M == 0);
   if(M > 0) {
+    vector<double> grad(M);
+    vector<vector<double>> H(M, vector<double>(M));
+    vector<vector<double>> aug(M, vector<double>(M+1));
+    vector<double> delta(M);
     for(int iter = 0; iter < 200; iter++) {
-      vector<double> grad(M, 0.0);
-      vector<vector<double>> H(M, vector<double>(M, 0.0));
+      fill(grad.begin(), grad.end(), 0.0);
+      for(int r = 0; r < M; r++) fill(H[r].begin(), H[r].end(), 0.0);
       for(int i = 0; i < N; i++) {
         for(int j = i+1; j < N; j++) {
           double nij = w[i][j] + w[j][i];
@@ -315,7 +318,6 @@ bool ComputeElos::computeBradleyTerryElo(
         }
       }
       //Solve H*delta = -grad via Gaussian elimination
-      vector<vector<double>> aug(M, vector<double>(M+1, 0.0));
       for(int r = 0; r < M; r++) {
         for(int c = 0; c < M; c++) aug[r][c] = H[r][c];
         aug[r][M] = -grad[r];
@@ -332,7 +334,7 @@ bool ComputeElos::computeBradleyTerryElo(
           for(int c = col; c <= M; c++) aug[r][c] -= f * aug[col][c];
         }
       }
-      vector<double> delta(M, 0.0);
+      fill(delta.begin(), delta.end(), 0.0);
       for(int r = M-1; r >= 0; r--) {
         double s = aug[r][M];
         for(int c = r+1; c < M; c++) s -= aug[r][c] * delta[c];
@@ -351,7 +353,7 @@ bool ComputeElos::computeBradleyTerryElo(
   outElo.resize(N);
   outStderr.resize(N, 0.0);
   for(int i = 0; i < N; i++)
-    outElo[i] = (theta[i] - theta[0]) * eloPerStrength;
+    outElo[i] = (theta[i] - theta[0]) * ELO_PER_LOG_GAMMA;
 
   //Fisher information diagonal -> stderr
   for(int i = 1; i < N; i++) {
@@ -363,7 +365,7 @@ bool ComputeElos::computeBradleyTerryElo(
       double sigma = 1.0 / (1.0 + exp(theta[j] - theta[i]));
       fish += nij * sigma * (1.0 - sigma);
     }
-    if(fish > 0.0) outStderr[i] = eloPerStrength / sqrt(fish);
+    if(fish > 0.0) outStderr[i] = ELO_PER_LOG_GAMMA / sqrt(fish);
   }
   return converged;
 }
diff --git a/cpp/core/fancymath.cpp b/cpp/core/fancymath.cpp
index 70920eda1..13bab76f4 100644
--- a/cpp/core/fancymath.cpp
+++ b/cpp/core/fancymath.cpp
@@ -149,6 +149,7 @@ double FancyMath::binaryCrossEntropy(double predProb, double targetProb, double
 }
 
 void FancyMath::wilsonCI95(double wins, double n, double& lo, double& hi) {
+  if(n <= 0) { lo = 0; hi = 1; return; }
   const double z = 1.96;
   double p = wins / n;
   double denom = 1.0 + z*z/n;
diff --git a/cpp/qrstune/QRSOptimizer.cpp b/cpp/qrstune/QRSOptimizer.cpp
index ccfd0a15e..395c1f6cd 100644
--- a/cpp/qrstune/QRSOptimizer.cpp
+++ b/cpp/qrstune/QRSOptimizer.cpp
@@ -77,11 +77,13 @@ void QRSTune::QRSModel::fit(const vector<vector<double>>& xs,
   if(N < F_) return;  // underdetermined; keep prior beta = 0
 
   vector<double> phi(F_);
+  vector<double> grad(F_);
+  vector<vector<double>> negH(F_, vector<double>(F_));
 
   for(int iter = 0; iter < max_iter; iter++) {
     // Gradient and (negative) Hessian from L2 prior
-    vector<double> grad(F_, 0.0);
-    vector<vector<double>> negH(F_, vector<double>(F_, 0.0));
+    fill(grad.begin(), grad.end(), 0.0);
+    for(int f = 0; f < F_; f++) fill(negH[f].begin(), negH[f].end(), 0.0);
     for(int f = 0; f < F_; f++) {
       grad[f] = -l2_ * beta_[f];
       negH[f][f] = l2_;
@@ -207,7 +209,7 @@ void QRSTune::QRSBuffer::prune(const QRSModel& model) {
   vector<double> ny;
   for(int i = 0; i < N; i++) {
     if(keep[i]) {
-      nx.push_back(xs_[i]);
+      nx.push_back(std::move(xs_[i]));
       ny.push_back(ys_[i]);
     }
   }

From e622289b9718b215bb0ae95802c6bd22d17f5860 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 1 Apr 2026 06:22:50 +0000
Subject: [PATCH 15/41] Improve QRSOptimizer readability and add unit tests

Readability:
- Add file-level comment explaining the QRS-Tune algorithm
- Document feature layout with example (D=2: [1, a, b, a^2, b^2, a*b])
- Name magic numbers: SINGULAR_THRESHOLD, CONVERGENCE_THRESHOLD, SIGMOID_CLAMP
- Rename shadow variable 'f' to 'mult' in gaussianSolve
- Rename terse variables: z->logit, w->hessianWeight, resid->residual,
  maxd->maxStep, b_lin->linearCoeffs, b_quad->quadCoeffs, b_cross->crossCoeffs,
  p_best->bestPrediction, kv->entry, nx/ny->newXs/newYs
- Add phase comments in fit() documenting Newton-Raphson steps
- Add algorithm-level comment above fit() explaining the objective function

Tests (8 test cases):
- numFeatures: verify D=0,1,2,3
- computeFeatures: verify feature vector for D=2
- sigmoid: boundary, midpoint, and clamp behavior
- gaussianSolve: 2x2 system, 3x3 identity, singular detection
- QRSModel fit+predict: 1D and 2D separable data
- QRSModel mapOptimum: optimum better than anti-optimum
- QRSTuner end-to-end: 100 trials with deterministic seed
- QRSBuffer prune: verify buffer size reduction

https://claude.ai/code/session_01396bbJUdHCsiWRVPM58895
---
 cpp/command/runtests.cpp     |   2 +
 cpp/qrstune/QRSOptimizer.cpp | 332 +++++++++++++++++++++++++++++------
 cpp/qrstune/QRSOptimizer.h   |   2 +
 3 files changed, 287 insertions(+), 49 deletions(-)

diff --git a/cpp/command/runtests.cpp b/cpp/command/runtests.cpp
index 75bb9760c..520d9bc0d 100644
--- a/cpp/command/runtests.cpp
+++ b/cpp/command/runtests.cpp
@@ -5,6 +5,7 @@
 #include "../core/rand.h"
 #include "../core/elo.h"
 #include "../core/fancymath.h"
+#include "../qrstune/QRSOptimizer.h"
 #include "../core/config_parser.h"
 #include "../core/datetime.h"
 #include "../core/fileutils.h"
@@ -35,6 +36,7 @@ int MainCmds::runtests(const vector<string>& args) {
   DateTime::runTests();
   FancyMath::runTests();
   ComputeElos::runTests();
+  QRSTune::runTests();
   Base64::runTests();
   ThreadTest::runTests();
 
diff --git a/cpp/qrstune/QRSOptimizer.cpp b/cpp/qrstune/QRSOptimizer.cpp
index 395c1f6cd..d4f2bdf78 100644
--- a/cpp/qrstune/QRSOptimizer.cpp
+++ b/cpp/qrstune/QRSOptimizer.cpp
@@ -1,12 +1,32 @@
 // qrstune/QRSOptimizer.cpp
+//
+// QRS-Tune: Quadratic Response Surface optimizer for binary-outcome tuning.
+//
+// Models win probability as sigmoid(phi(x)^T * beta) where phi(x) is a
+// quadratic feature map: [1, x_i, x_i^2, x_i*x_j]. The model is fit via
+// Newton-Raphson MAP estimation with L2 regularization. The MAP optimum
+// of the fitted quadratic surface is used as the next evaluation point,
+// with decaying Gaussian noise for exploration.
 
 #include "../qrstune/QRSOptimizer.h"
 
 #include <algorithm>
 #include <cmath>
+#include <iostream>
+
+#include "../core/test.h"
 
 using namespace std;
 
+// Pivot values smaller than this are treated as singular
+static const double SINGULAR_THRESHOLD = 1e-12;
+
+// Newton-Raphson stops when the largest coefficient change is below this
+static const double CONVERGENCE_THRESHOLD = 1e-7;
+
+// Sigmoid is clamped to 0 or 1 beyond this magnitude to avoid overflow
+static const double SIGMOID_CLAMP = 40.0;
+
 // ============================================================
 // Free functions
 // ============================================================
@@ -15,6 +35,9 @@ int QRSTune::numFeatures(int D) {
   return 1 + D + D * (D + 1) / 2;
 }
 
+// Feature layout for D dimensions:
+//   [intercept(1), linear(D), quadratic(D), cross-terms(D*(D-1)/2)]
+// Example for D=2, x=[a,b]: phi = [1, a, b, a^2, b^2, a*b]
 void QRSTune::computeFeatures(int D, const double* x, double* phi) {
   int k = 0;
   phi[k++] = 1.0;
@@ -26,26 +49,31 @@ void QRSTune::computeFeatures(int D, const double* x, double* phi) {
 }
 
 double QRSTune::sigmoid(double z) {
-  if(z > 40.0) return 1.0;
-  if(z < -40.0) return 0.0;
+  if(z > SIGMOID_CLAMP) return 1.0;
+  if(z < -SIGMOID_CLAMP) return 0.0;
   return 1.0 / (1.0 + exp(-z));
 }
 
+// Partial-pivot Gaussian elimination: solves Ax = b in-place.
+// On return, b contains the solution x. A is destroyed.
+// Returns false if A is singular (pivot below SINGULAR_THRESHOLD).
 bool QRSTune::gaussianSolve(int F, vector<vector<double>>& A, vector<double>& b) {
+  // Forward elimination with partial pivoting
   for(int col = 0; col < F; col++) {
     int piv = col;
     for(int r = col + 1; r < F; r++)
       if(fabs(A[r][col]) > fabs(A[piv][col])) piv = r;
     swap(A[col], A[piv]);
     swap(b[col], b[piv]);
-    if(fabs(A[col][col]) < 1e-12) return false;
+    if(fabs(A[col][col]) < SINGULAR_THRESHOLD) return false;
     double inv = 1.0 / A[col][col];
     for(int r = col + 1; r < F; r++) {
-      double f = A[r][col] * inv;
-      for(int c = col; c < F; c++) A[r][c] -= f * A[col][c];
-      b[r] -= f * b[col];
+      double mult = A[r][col] * inv;
+      for(int c = col; c < F; c++) A[r][c] -= mult * A[col][c];
+      b[r] -= mult * b[col];
     }
   }
+  // Back substitution
   for(int r = F - 1; r >= 0; r--) {
     for(int c = r + 1; c < F; c++) b[r] -= A[r][c] * b[c];
     b[r] /= A[r][r];
@@ -70,6 +98,16 @@ QRSTune::QRSModel::QRSModel(int D, double l2_reg)
    l2_(l2_reg)
 {}
 
+// Newton-Raphson MAP estimation for L2-regularized quadratic logistic regression.
+//
+// Maximizes: sum_n [ y_n * log(p_n) + (1-y_n) * log(1-p_n) ] - (l2/2) * ||beta||^2
+// where p_n = sigmoid(phi(x_n)^T * beta).
+//
+// Each iteration:
+//   1. Compute gradient and negative Hessian (including L2 prior)
+//   2. Solve the Newton system: negH * delta = grad
+//   3. Update: beta += delta
+//   4. Stop when max |delta_f| < CONVERGENCE_THRESHOLD
 void QRSTune::QRSModel::fit(const vector<vector<double>>& xs,
                              const vector<double>& ys,
                              int max_iter) {
@@ -81,7 +119,7 @@ void QRSTune::QRSModel::fit(const vector<vector<double>>& xs,
   vector<vector<double>> negH(F_, vector<double>(F_));
 
   for(int iter = 0; iter < max_iter; iter++) {
-    // Gradient and (negative) Hessian from L2 prior
+    // Initialize with L2 prior contribution: grad = -l2*beta, negH = l2*I
     fill(grad.begin(), grad.end(), 0.0);
     for(int f = 0; f < F_; f++) fill(negH[f].begin(), negH[f].end(), 0.0);
     for(int f = 0; f < F_; f++) {
@@ -89,33 +127,35 @@ void QRSTune::QRSModel::fit(const vector<vector<double>>& xs,
       negH[f][f] = l2_;
     }
 
-    // Data contribution
+    // Accumulate data likelihood: grad += (y-p)*phi, negH += p*(1-p)*phi*phi^T
     for(int n = 0; n < N; n++) {
       computeFeatures(D_, xs[n].data(), phi.data());
-      double z = 0.0;
-      for(int f = 0; f < F_; f++) z += beta_[f] * phi[f];
-      double p = sigmoid(z);
-      double w = p * (1.0 - p);
-      double resid = ys[n] - p;
+      double logit = 0.0;
+      for(int f = 0; f < F_; f++) logit += beta_[f] * phi[f];
+      double p = sigmoid(logit);
+      double hessianWeight = p * (1.0 - p);
+      double residual = ys[n] - p;
       for(int f = 0; f < F_; f++) {
-        grad[f] += resid * phi[f];
+        grad[f] += residual * phi[f];
         for(int g = f; g < F_; g++)
-          negH[f][g] += w * phi[f] * phi[g];
+          negH[f][g] += hessianWeight * phi[f] * phi[g];
       }
     }
-    // Symmetrize negH
+    // Symmetrize: negH is only filled for g >= f above
     for(int f = 0; f < F_; f++)
       for(int g = f + 1; g < F_; g++)
         negH[g][f] = negH[f][g];
 
-    // Solve negH * delta = grad  =>  beta += delta
+    // Solve Newton step: negH * delta = grad (grad is overwritten with delta)
     if(!gaussianSolve(F_, negH, grad)) break;
-    double maxd = 0.0;
+
+    // Apply step and check convergence
+    double maxStep = 0.0;
     for(int f = 0; f < F_; f++) {
       beta_[f] += grad[f];
-      maxd = max(maxd, fabs(grad[f]));
+      maxStep = max(maxStep, fabs(grad[f]));
     }
-    if(maxd < 1e-7) break;
+    if(maxStep < CONVERGENCE_THRESHOLD) break;
   }
 }
 
@@ -126,29 +166,34 @@ double QRSTune::QRSModel::predict(const double* x) const {
 double QRSTune::QRSModel::score(const double* x) const {
   vector<double> phi(F_);
   computeFeatures(D_, x, phi.data());
-  double z = 0.0;
-  for(int f = 0; f < F_; f++) z += beta_[f] * phi[f];
-  return z;
+  double logit = 0.0;
+  for(int f = 0; f < F_; f++) logit += beta_[f] * phi[f];
+  return logit;
 }
 
+// Find the unconstrained optimum of the quadratic score surface, then clamp to [-1,+1]^D.
+//
+// Beta layout: [intercept, linear[0..D-1], quadratic[0..D-1], cross[i<j]]
+// The quadratic surface gradient is: M*x + linearCoeffs = 0
+// where M[i][i] = 2*quadCoeffs[i], M[i][j] = crossCoeffs[pair(i,j)]
 void QRSTune::QRSModel::mapOptimum(double* out_x) const {
-  // Beta layout: [intercept, linear[0..D-1], quad[0..D-1], cross by (i<j)]
-  const double* b_lin  = beta_.data() + 1;
-  const double* b_quad = beta_.data() + 1 + D_;
-  const double* b_cross = beta_.data() + 1 + 2 * D_;
+  const double* linearCoeffs = beta_.data() + 1;
+  const double* quadCoeffs   = beta_.data() + 1 + D_;
+  const double* crossCoeffs  = beta_.data() + 1 + 2 * D_;
 
+  // Build the Hessian matrix M and right-hand side for M*x = -linearCoeffs
   vector<vector<double>> M(D_, vector<double>(D_, 0.0));
   vector<double> rhs(D_);
 
   for(int k = 0; k < D_; k++) {
-    M[k][k] = 2.0 * b_quad[k];
-    rhs[k]  = -b_lin[k];
+    M[k][k] = 2.0 * quadCoeffs[k];
+    rhs[k]  = -linearCoeffs[k];
   }
   int idx = 0;
   for(int i = 0; i < D_; i++)
     for(int j = i + 1; j < D_; j++) {
-      M[i][j] += b_cross[idx];
-      M[j][i] += b_cross[idx];
+      M[i][j] += crossCoeffs[idx];
+      M[j][i] += crossCoeffs[idx];
       idx++;
     }
 
@@ -156,6 +201,7 @@ void QRSTune::QRSModel::mapOptimum(double* out_x) const {
     for(int i = 0; i < D_; i++) out_x[i] = 0.0;
     return;
   }
+  // Clamp to the normalized coordinate range [-1, +1]
   for(int i = 0; i < D_; i++)
     out_x[i] = max(-1.0, min(1.0, rhs[i]));
 }
@@ -174,47 +220,51 @@ void QRSTune::QRSBuffer::add(const vector<double>& x, double y) {
   ys_.push_back(y);
 }
 
+// Confidence-based pruning: drop samples whose predicted win rate
+// is more than prune_margin_ below the best predicted win rate.
+// Samples are ranked so that min_keep_ retains the highest-quality
+// samples (not just the oldest).
 void QRSTune::QRSBuffer::prune(const QRSModel& model) {
   int N = (int)xs_.size();
   if(N <= min_keep_ * 2) return;
 
   // Score all samples and find best predicted win rate
-  vector<pair<double, int>> scored(N);
-  double p_best = 0.0;
+  vector<pair<double, int>> scored(N);  // (predicted winrate, original index)
+  double bestPrediction = 0.0;
   for(int i = 0; i < N; i++) {
     double p = model.predict(xs_[i].data());
     scored[i] = {p, i};
-    if(p > p_best) p_best = p;
+    if(p > bestPrediction) bestPrediction = p;
   }
-  double threshold = p_best - prune_margin_;
+  double threshold = bestPrediction - prune_margin_;
 
-  // Sort by descending predicted quality
+  // Sort by descending predicted quality so min_keep_ retains the best
   sort(scored.begin(), scored.end(),
     [](const pair<double,int>& a, const pair<double,int>& b) {
       return a.first > b.first;
     });
 
-  // Keep samples above threshold, plus top-quality samples up to min_keep_
+  // Mark samples to keep: above threshold, or among top min_keep_
   vector<bool> keep(N, false);
   int kept = 0;
-  for(auto& kv : scored) {
-    if(kv.first >= threshold || kept < min_keep_) {
-      keep[kv.second] = true;
+  for(auto& entry : scored) {
+    if(entry.first >= threshold || kept < min_keep_) {
+      keep[entry.second] = true;
       kept++;
     }
   }
 
   // Rebuild in original order to preserve temporal structure
-  vector<vector<double>> nx;
-  vector<double> ny;
+  vector<vector<double>> newXs;
+  vector<double> newYs;
   for(int i = 0; i < N; i++) {
     if(keep[i]) {
-      nx.push_back(std::move(xs_[i]));
-      ny.push_back(ys_[i]);
+      newXs.push_back(std::move(xs_[i]));
+      newYs.push_back(ys_[i]);
     }
   }
-  xs_ = std::move(nx);
-  ys_ = std::move(ny);
+  xs_ = std::move(newXs);
+  ys_ = std::move(newYs);
 }
 
 // ============================================================
@@ -247,10 +297,8 @@ vector<double> QRSTune::QRSTuner::nextSample() {
     return x;
   }
 
-  // Base: MAP optimum
+  // Start from MAP optimum, add decaying Gaussian noise for exploration
   model_.mapOptimum(x.data());
-
-  // Decaying exploration noise
   double progress = (double)trial_count_ / max(1, total_trials_ - 1);
   double sigma = sigma_initial_ + progress * (sigma_final_ - sigma_initial_);
   normal_distribution<double> noise(0.0, sigma);
@@ -282,3 +330,189 @@ double QRSTune::QRSTuner::bestWinProb() const {
   auto best = bestCoords();
   return model_.predict(best.data());
 }
+
+// ============================================================
+// Tests
+// ============================================================
+
+static bool approxEqual(double x, double y, double tolerance) {
+  return fabs(x - y) < tolerance;
+}
+
+void QRSTune::runTests() {
+  cout << "Running QRSTune tests" << endl;
+
+  // Test numFeatures: F = 1 + D + D*(D+1)/2
+  // D=0: 1, D=1: 3, D=2: 6, D=3: 10
+  {
+    testAssert(numFeatures(0) == 1);
+    testAssert(numFeatures(1) == 3);
+    testAssert(numFeatures(2) == 6);
+    testAssert(numFeatures(3) == 10);
+  }
+
+  // Test computeFeatures: D=2, x=[0.5, -0.3]
+  // Expected: [1.0, 0.5, -0.3, 0.25, 0.09, -0.15]
+  {
+    double x[2] = {0.5, -0.3};
+    double phi[6];
+    computeFeatures(2, x, phi);
+    testAssert(approxEqual(phi[0], 1.0, 1e-15));
+    testAssert(approxEqual(phi[1], 0.5, 1e-15));
+    testAssert(approxEqual(phi[2], -0.3, 1e-15));
+    testAssert(approxEqual(phi[3], 0.25, 1e-15));
+    testAssert(approxEqual(phi[4], 0.09, 1e-15));
+    testAssert(approxEqual(phi[5], -0.15, 1e-15));
+  }
+
+  // Test sigmoid
+  {
+    testAssert(approxEqual(sigmoid(0.0), 0.5, 1e-15));
+    testAssert(sigmoid(50.0) == 1.0);
+    testAssert(sigmoid(-50.0) == 0.0);
+    testAssert(approxEqual(sigmoid(1.0), 1.0 / (1.0 + exp(-1.0)), 1e-12));
+    // Beyond clamp threshold: exactly 0 or 1
+    testAssert(sigmoid(SIGMOID_CLAMP + 1.0) == 1.0);
+    testAssert(sigmoid(-SIGMOID_CLAMP - 1.0) == 0.0);
+    // Moderate values: still fractional
+    testAssert(sigmoid(5.0) < 1.0);
+    testAssert(sigmoid(-5.0) > 0.0);
+  }
+
+  // Test gaussianSolve: 2x2 system [[2,1],[1,3]] * x = [5,7] => x = [8/5, 9/5]
+  {
+    vector<vector<double>> A = {{2.0, 1.0}, {1.0, 3.0}};
+    vector<double> b = {5.0, 7.0};
+    bool ok = gaussianSolve(2, A, b);
+    testAssert(ok);
+    testAssert(approxEqual(b[0], 8.0 / 5.0, 1e-12));
+    testAssert(approxEqual(b[1], 9.0 / 5.0, 1e-12));
+  }
+
+  // Test gaussianSolve: 3x3 identity system
+  {
+    vector<vector<double>> A = {{1,0,0},{0,1,0},{0,0,1}};
+    vector<double> b = {3.0, -1.0, 7.0};
+    bool ok = gaussianSolve(3, A, b);
+    testAssert(ok);
+    testAssert(approxEqual(b[0], 3.0, 1e-15));
+    testAssert(approxEqual(b[1], -1.0, 1e-15));
+    testAssert(approxEqual(b[2], 7.0, 1e-15));
+  }
+
+  // Test gaussianSolve: singular matrix returns false
+  {
+    vector<vector<double>> A = {{1.0, 2.0}, {2.0, 4.0}};
+    vector<double> b = {3.0, 6.0};
+    bool ok = gaussianSolve(2, A, b);
+    testAssert(!ok);
+  }
+
+  // Test QRSModel fit + predict: 1D separable data
+  // All samples at x=+0.8 win, all at x=-0.8 lose.
+  // After fitting, predict(+0.8) should be high and predict(-0.8) should be low.
+  {
+    QRSModel model(1, 0.1);
+    vector<vector<double>> xs;
+    vector<double> ys;
+    for(int i = 0; i < 20; i++) {
+      xs.push_back({0.8});
+      ys.push_back(1.0);
+      xs.push_back({-0.8});
+      ys.push_back(0.0);
+    }
+    model.fit(xs, ys);
+    double xWin[] = {0.8};
+    double xLose[] = {-0.8};
+    double xMid[] = {0.0};
+    double pWin = model.predict(xWin);
+    double pLose = model.predict(xLose);
+    testAssert(pWin > 0.7);
+    testAssert(pLose < 0.3);
+    // Midpoint should be near 0.5
+    double pMid = model.predict(xMid);
+    testAssert(approxEqual(pMid, 0.5, 0.15));
+  }
+
+  // Test QRSModel mapOptimum: after fitting 1D win-at-positive data,
+  // the MAP optimum should be in the positive region (clamped to [−1,+1]).
+  {
+    QRSModel model(1, 0.1);
+    vector<vector<double>> xs;
+    vector<double> ys;
+    for(int i = 0; i < 20; i++) {
+      xs.push_back({0.8});
+      ys.push_back(1.0);
+      xs.push_back({-0.8});
+      ys.push_back(0.0);
+    }
+    model.fit(xs, ys);
+    double bestX;
+    model.mapOptimum(&bestX);
+    // The optimum should have a higher predicted win rate than the anti-optimum
+    double negOne = -1.0;
+    testAssert(model.predict(&bestX) > model.predict(&negOne) + 0.1);
+  }
+
+  // Test QRSModel 2D: wins cluster at (+0.5, +0.5), losses at (-0.5, -0.5)
+  {
+    QRSModel model(2, 0.1);
+    vector<vector<double>> xs;
+    vector<double> ys;
+    for(int i = 0; i < 20; i++) {
+      xs.push_back({0.5, 0.5});
+      ys.push_back(1.0);
+      xs.push_back({-0.5, -0.5});
+      ys.push_back(0.0);
+    }
+    model.fit(xs, ys);
+    double xWin[] = {0.5, 0.5};
+    double xLose[] = {-0.5, -0.5};
+    double pWin = model.predict(xWin);
+    double pLose = model.predict(xLose);
+    testAssert(pWin > 0.7);
+    testAssert(pLose < 0.3);
+  }
+
+  // Test QRSTuner end-to-end: 1D, deterministic seed, outcome strongly correlated
+  // with x > 0. After enough trials the best predicted win rate should exceed 0.5.
+  {
+    const int numTrials = 100;
+    QRSTuner tuner(1, /*seed=*/42, numTrials,
+                   /*l2_reg=*/0.1, /*refit_every=*/10, /*prune_every=*/5);
+    for(int trial = 0; trial < numTrials; trial++) {
+      vector<double> sample = tuner.nextSample();
+      // Strong signal: win when x > 0, lose when x < 0
+      double outcome = (sample[0] > 0.0) ? 1.0 : 0.0;
+      tuner.addResult(sample, outcome);
+    }
+    testAssert(tuner.trialCount() == numTrials);
+    // The fitted model should recognize that positive x is better
+    testAssert(tuner.bestWinProb() > 0.5);
+  }
+
+  // Test QRSBuffer prune: verify pruning reduces buffer size
+  {
+    QRSModel model(1, 0.1);
+    vector<vector<double>> xs;
+    vector<double> ys;
+    // Build data with a clear win region
+    for(int i = 0; i < 30; i++) {
+      xs.push_back({0.8});
+      ys.push_back(1.0);
+      xs.push_back({-0.8});
+      ys.push_back(0.0);
+    }
+    model.fit(xs, ys);
+
+    QRSBuffer buffer(5, 0.10);  // tight margin, keep at least 5
+    for(int i = 0; i < 60; i++) {
+      buffer.add(xs[i], ys[i]);
+    }
+    testAssert(buffer.size() == 60);
+    buffer.prune(model);
+    // Should have pruned some low-quality samples
+    testAssert(buffer.size() < 60);
+    testAssert(buffer.size() >= 5);  // min_keep
+  }
+}
diff --git a/cpp/qrstune/QRSOptimizer.h b/cpp/qrstune/QRSOptimizer.h
index d3ba3fa6f..4dadcb079 100644
--- a/cpp/qrstune/QRSOptimizer.h
+++ b/cpp/qrstune/QRSOptimizer.h
@@ -149,6 +149,8 @@ class QRSTuner {
   const QRSModel& model() const { return model_; }
 };
 
+void runTests();
+
 }  // namespace QRSTune
 
 #endif  // QRSTUNE_QRSOPTIMIZER_H_

From cee03a991a54a7b143065fefe86c434569086cd9 Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Wed, 1 Apr 2026 20:54:26 +0800
Subject: [PATCH 16/41] Add 10% progress reporting with ETA to tune-params

Replace the hardcoded every-100-trials progress log with reporting
at every 10% of total trials. Each progress line now includes
percentage, win/loss/draw counts, best parameters, and estimated
time of arrival.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/command/tuneparams.cpp | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index 058f4f861..a13029918 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -19,6 +19,7 @@
 #include <vector>
 #include <algorithm>
 #include <csignal>
+#include "../core/timer.h"
 
 using namespace std;
 
@@ -240,6 +241,8 @@ int MainCmds::tuneparams(const vector<string>& args) {
   const string gameSeedBase = Global::uint64ToHexString(seedRand.nextUInt64());
 
   int wins = 0, losses = 0, draws = 0;
+  int reportInterval = std::max(1, numTrials / 10);
+  ClockTimer timer;
 
   logger.write("Starting " + Global::intToString(numTrials) + " tuning trials");
 
@@ -315,18 +318,28 @@ int MainCmds::tuneparams(const vector<string>& args) {
     if(shouldStop.load())
       break;
 
-    //Progress report every 100 trials
-    if((trial + 1) % 100 == 0) {
+    //Progress report every 10% of trials
+    if((trial + 1) % reportInterval == 0) {
       vector<double> vBest = tuner.bestCoords();
       double bE, bLog, bStdev;
       qrsToPUCT(vBest, bE, bLog, bStdev, qrsMins, qrsMaxs);
-      logger.write(
-        "Trial " + Global::intToString(trial + 1) + "/" + Global::intToString(numTrials) +
-        " | W=" + Global::intToString(wins) + " L=" + Global::intToString(losses) + " D=" + Global::intToString(draws) +
-        " | best: cpuctExploration=" + Global::doubleToString(bE) +
-        " cpuctExplorationLog=" + Global::doubleToString(bLog) +
-        " cpuctUtilityStdevPrior=" + Global::doubleToString(bStdev)
-      );
+
+      int completed = trial + 1;
+      int pct = completed * 100 / numTrials;
+      double elapsed = timer.getSeconds();
+      int etaSec = (int)(elapsed / completed * (numTrials - completed));
+      string eta;
+      if(etaSec < 60)
+        eta = Global::intToString(etaSec) + "s";
+      else if(etaSec < 3600)
+        eta = Global::intToString(etaSec / 60) + "m" + Global::intToString(etaSec % 60) + "s";
+      else
+        eta = Global::intToString(etaSec / 3600) + "h" + Global::intToString((etaSec % 3600) / 60) + "m";
+
+      logger.write(Global::strprintf(
+        "[%d%%] %d/%d | W=%d L=%d D=%d | best: E=%.4f Log=%.4f Stdev=%.4f | ETA %s",
+        pct, completed, numTrials, wins, losses, draws, bE, bLog, bStdev, eta.c_str()
+      ));
     }
   }
 

From 4cdf5bc2c571988edc3b3325b6916f77bde46834 Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Wed, 1 Apr 2026 23:34:33 +0800
Subject: [PATCH 17/41] Add 95% CIs via delta method, remove misleading ASCII
 charts

The ASCII regression charts and est.winrate displayed saturated sigmoid
values (always 1.0 or 0.0) due to aggressive buffer pruning inflating
model coefficients, confusing users who saw ~50% actual win rates.

Replace charts with 95% confidence intervals computed via the delta
method (computeOptimumSE), shown in both progress reports and final
results. Extract shared helpers (buildNegHessian, buildQuadHessian,
computeParamCIs) to eliminate duplicated matrix construction logic.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/command/tuneparams.cpp   | 117 ++++++++----------
 cpp/qrstune/QRSOptimizer.cpp | 230 +++++++++++++++++++++++++++++++----
 cpp/qrstune/QRSOptimizer.h   |  20 +++
 3 files changed, 274 insertions(+), 93 deletions(-)

diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index a13029918..9d694f6d7 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -71,69 +71,26 @@ static void qrsToPUCT(
   cpuctUtilityStdevPrior = qrsDimToReal(2, x[2], mins, maxs);
 }
 
-//Print ASCII-art regression curve for each PUCT dimension.
-//For dimension d: fix all other dims at vBest, sweep d from -1 to +1.
-static void printRegressionCurves(const QRSTune::QRSTuner& tuner,
-                                   const vector<double>& vBest,
-                                   const double* mins, const double* maxs,
-                                   Logger& logger) {
-  const int plotW = 60;
-  const int plotH = 20;
-  double bestWinRate = tuner.model().predict(vBest.data());
-
-  for(int dim = 0; dim < nDims; dim++) {
-    vector<string> canvas(plotH, string(plotW, ' '));
-
-    int bestCol = (int)((vBest[dim] + 1.0) / 2.0 * (plotW - 1) + 0.5);
-    bestCol = max(0, min(plotW - 1, bestCol));
-
-    vector<double> xSlice(vBest);
-    for(int col = 0; col < plotW; col++) {
-      double t = -1.0 + 2.0 * col / (plotW - 1);
-      xSlice[dim] = t;
-      double winRate = tuner.model().predict(xSlice.data());
-
-      int row = (int)((1.0 - winRate) * (plotH - 1) + 0.5);
-      row = max(0, min(plotH - 1, row));
-      canvas[row][col] = (col == bestCol) ? '*' : 'o';
-    }
-
-    double bestReal = qrsDimToReal(dim, vBest[dim], mins, maxs);
-    logger.write("");
-    logger.write(
-      "[Dim " + Global::intToString(dim) + "] " + paramNames[dim] +
-      "  (best QRS=" + Global::strprintf("%.3f", vBest[dim]) +
-      " -> real=" + Global::strprintf("%.3f", bestReal) +
-      ", est.winrate=" + Global::strprintf("%.3f", bestWinRate) + ")"
-    );
-
-    for(int row = 0; row < plotH; row++) {
-      string label;
-      if(row == 0)               label = "1.0 |";
-      else if(row == plotH / 2) label = "0.5 |";
-      else if(row == plotH - 1) label = "0.0 |";
-      else                       label = "    |";
-      logger.write(label + canvas[row]);
-    }
-    logger.write("    +" + string(plotW, '-'));
-
-    {
-      string line(plotW + 5, ' ');
-      const int off = 5;
-      auto place = [&](int col, const string& lbl) {
-        int pos = off + col - (int)lbl.size() / 2;
-        if(pos < 0) pos = 0;
-        for(int i = 0; i < (int)lbl.size() && pos + i < (int)line.size(); i++)
-          line[pos + i] = lbl[i];
-      };
-      place(0,          Global::strprintf("%.3f", qrsDimToReal(dim, -1.0, mins, maxs)));
-      place(plotW / 2, Global::strprintf("%.3f", qrsDimToReal(dim,  0.0, mins, maxs)));
-      place(plotW - 1, Global::strprintf("%.3f", qrsDimToReal(dim, +1.0, mins, maxs)));
-      size_t last = line.find_last_not_of(' ');
-      logger.write(line.substr(0, last + 1));
-    }
+static const double Z_95 = 1.96;
+
+// Compute 95% CI bounds for each parameter in real (non-normalized) coordinates.
+// Returns false if CIs are unavailable. Fills ciLo/ciHi/clamped arrays of size nDims.
+static bool computeParamCIs(const QRSTune::QRSTuner& tuner,
+                             const vector<double>& vBest,
+                             const double* mins, const double* maxs,
+                             double* ciLo, double* ciHi, bool* clamped) {
+  double se[nDims];
+  bool hasCIs = tuner.model().computeOptimumSE(
+    tuner.buffer().xs(), tuner.buffer().ys(), se, clamped);
+  if(!hasCIs) return false;
+  for(int d = 0; d < nDims; d++) {
+    double radius = (maxs[d] - mins[d]) * 0.5;
+    double seReal = se[d] * radius;
+    double bestReal = qrsDimToReal(d, vBest[d], mins, maxs);
+    ciLo[d] = bestReal - Z_95 * seReal;
+    ciHi[d] = bestReal + Z_95 * seReal;
   }
-  logger.write("");
+  return true;
 }
 
 int MainCmds::tuneparams(const vector<string>& args) {
@@ -340,6 +297,19 @@ int MainCmds::tuneparams(const vector<string>& args) {
         "[%d%%] %d/%d | W=%d L=%d D=%d | best: E=%.4f Log=%.4f Stdev=%.4f | ETA %s",
         pct, completed, numTrials, wins, losses, draws, bE, bLog, bStdev, eta.c_str()
       ));
+
+      {
+        double ciLo[nDims], ciHi[nDims];
+        bool clampedDims[nDims];
+        if(computeParamCIs(tuner, vBest, qrsMins, qrsMaxs, ciLo, ciHi, clampedDims)) {
+          string ciLine = "  95% CIs:";
+          for(int d = 0; d < nDims; d++) {
+            ciLine += Global::strprintf(" %s=[%.4f, %.4f]", paramNames[d], ciLo[d], ciHi[d]);
+            if(clampedDims[d]) ciLine += "*";
+          }
+          logger.write(ciLine);
+        }
+      }
     }
   }
 
@@ -356,17 +326,28 @@ int MainCmds::tuneparams(const vector<string>& args) {
     "  Losses: " + Global::intToString(losses) +
     "  Draws: " + Global::intToString(draws)
   );
-  logger.write("Best cpuctExploration       = " + Global::doubleToString(bestE));
-  logger.write("Best cpuctExplorationLog    = " + Global::doubleToString(bestLog));
-  logger.write("Best cpuctUtilityStdevPrior = " + Global::doubleToString(bestStdev));
+  {
+    double ciLo[nDims], ciHi[nDims];
+    bool clampedDims[nDims];
+    bool hasCIs = computeParamCIs(tuner, vBest, qrsMins, qrsMaxs, ciLo, ciHi, clampedDims);
+
+    for(int d = 0; d < nDims; d++) {
+      double bestReal = qrsDimToReal(d, vBest[d], qrsMins, qrsMaxs);
+      if(hasCIs) {
+        string warn = clampedDims[d] ? "  [boundary - CI may be unreliable]" : "";
+        logger.write(Global::strprintf("Best %-25s = %.4f  95%%CI [%.4f, %.4f]%s",
+          paramNames[d], bestReal, ciLo[d], ciHi[d], warn.c_str()));
+      } else {
+        logger.write(Global::strprintf("Best %-25s = %.4f  (CI unavailable)",
+          paramNames[d], bestReal));
+      }
+    }
+  }
   logger.write(
     "QRS raw coordinates: [" + Global::doubleToString(vBest[0]) + ", " +
     Global::doubleToString(vBest[1]) + ", " + Global::doubleToString(vBest[2]) + "]"
   );
 
-  //ASCII-art regression curves (one per PUCT dimension)
-  printRegressionCurves(tuner, vBest, qrsMins, qrsMaxs, logger);
-
   //Cleanup
   delete gameRunner;
   for(NNEvaluator* eval : nnEvals)
diff --git a/cpp/qrstune/QRSOptimizer.cpp b/cpp/qrstune/QRSOptimizer.cpp
index d4f2bdf78..c412fa069 100644
--- a/cpp/qrstune/QRSOptimizer.cpp
+++ b/cpp/qrstune/QRSOptimizer.cpp
@@ -98,6 +98,53 @@ QRSTune::QRSModel::QRSModel(int D, double l2_reg)
    l2_(l2_reg)
 {}
 
+// Build the negative Hessian (Fisher info + L2 prior) at current beta.
+// negH must be pre-sized to F_ x F_; contents are overwritten.
+void QRSTune::QRSModel::buildNegHessian(const vector<vector<double>>& xs,
+                                         vector<vector<double>>& negH) const {
+  int N = (int)xs.size();
+  for(int f = 0; f < F_; f++) fill(negH[f].begin(), negH[f].end(), 0.0);
+  for(int f = 0; f < F_; f++)
+    negH[f][f] = l2_;
+
+  vector<double> phi(F_);
+  for(int n = 0; n < N; n++) {
+    computeFeatures(D_, xs[n].data(), phi.data());
+    double logit = 0.0;
+    for(int f = 0; f < F_; f++) logit += beta_[f] * phi[f];
+    double p = sigmoid(logit);
+    double w = p * (1.0 - p);
+    for(int f = 0; f < F_; f++)
+      for(int g = f; g < F_; g++)
+        negH[f][g] += w * phi[f] * phi[g];
+  }
+  for(int f = 0; f < F_; f++)
+    for(int g = f + 1; g < F_; g++)
+      negH[g][f] = negH[f][g];
+}
+
+// Build the D x D quadratic Hessian M and rhs for M*x = -linearCoeffs.
+// M and rhs must be pre-sized; contents are overwritten.
+void QRSTune::QRSModel::buildQuadHessian(vector<vector<double>>& M,
+                                          vector<double>& rhs) const {
+  const double* linearCoeffs = beta_.data() + 1;
+  const double* quadCoeffs   = beta_.data() + 1 + D_;
+  const double* crossCoeffs  = beta_.data() + 1 + 2 * D_;
+
+  for(int k = 0; k < D_; k++) {
+    fill(M[k].begin(), M[k].end(), 0.0);
+    M[k][k] = 2.0 * quadCoeffs[k];
+    rhs[k]  = -linearCoeffs[k];
+  }
+  int idx = 0;
+  for(int i = 0; i < D_; i++)
+    for(int j = i + 1; j < D_; j++) {
+      M[i][j] += crossCoeffs[idx];
+      M[j][i] += crossCoeffs[idx];
+      idx++;
+    }
+}
+
 // Newton-Raphson MAP estimation for L2-regularized quadratic logistic regression.
 //
 // Maximizes: sum_n [ y_n * log(p_n) + (1-y_n) * log(1-p_n) ] - (l2/2) * ||beta||^2
@@ -119,7 +166,7 @@ void QRSTune::QRSModel::fit(const vector<vector<double>>& xs,
   vector<vector<double>> negH(F_, vector<double>(F_));
 
   for(int iter = 0; iter < max_iter; iter++) {
-    // Initialize with L2 prior contribution: grad = -l2*beta, negH = l2*I
+    // Build negH and compute gradient simultaneously
     fill(grad.begin(), grad.end(), 0.0);
     for(int f = 0; f < F_; f++) fill(negH[f].begin(), negH[f].end(), 0.0);
     for(int f = 0; f < F_; f++) {
@@ -127,21 +174,19 @@ void QRSTune::QRSModel::fit(const vector<vector<double>>& xs,
       negH[f][f] = l2_;
     }
 
-    // Accumulate data likelihood: grad += (y-p)*phi, negH += p*(1-p)*phi*phi^T
     for(int n = 0; n < N; n++) {
       computeFeatures(D_, xs[n].data(), phi.data());
       double logit = 0.0;
       for(int f = 0; f < F_; f++) logit += beta_[f] * phi[f];
       double p = sigmoid(logit);
-      double hessianWeight = p * (1.0 - p);
+      double w = p * (1.0 - p);
       double residual = ys[n] - p;
       for(int f = 0; f < F_; f++) {
         grad[f] += residual * phi[f];
         for(int g = f; g < F_; g++)
-          negH[f][g] += hessianWeight * phi[f] * phi[g];
+          negH[f][g] += w * phi[f] * phi[g];
       }
     }
-    // Symmetrize: negH is only filled for g >= f above
     for(int f = 0; f < F_; f++)
       for(int g = f + 1; g < F_; g++)
         negH[g][f] = negH[f][g];
@@ -172,38 +217,114 @@ double QRSTune::QRSModel::score(const double* x) const {
 }
 
 // Find the unconstrained optimum of the quadratic score surface, then clamp to [-1,+1]^D.
-//
-// Beta layout: [intercept, linear[0..D-1], quadratic[0..D-1], cross[i<j]]
-// The quadratic surface gradient is: M*x + linearCoeffs = 0
-// where M[i][i] = 2*quadCoeffs[i], M[i][j] = crossCoeffs[pair(i,j)]
 void QRSTune::QRSModel::mapOptimum(double* out_x) const {
-  const double* linearCoeffs = beta_.data() + 1;
-  const double* quadCoeffs   = beta_.data() + 1 + D_;
-  const double* crossCoeffs  = beta_.data() + 1 + 2 * D_;
-
-  // Build the Hessian matrix M and right-hand side for M*x = -linearCoeffs
   vector<vector<double>> M(D_, vector<double>(D_, 0.0));
   vector<double> rhs(D_);
+  buildQuadHessian(M, rhs);
 
-  for(int k = 0; k < D_; k++) {
-    M[k][k] = 2.0 * quadCoeffs[k];
-    rhs[k]  = -linearCoeffs[k];
+  if(!gaussianSolve(D_, M, rhs)) {
+    for(int i = 0; i < D_; i++) out_x[i] = 0.0;
+    return;
+  }
+  for(int i = 0; i < D_; i++)
+    out_x[i] = max(-1.0, min(1.0, rhs[i]));
+}
+
+// Compute standard errors of the MAP optimum via the delta method.
+//
+// 1. Rebuild negH (Fisher info + L2 prior) at current beta.
+// 2. Invert negH -> Cov(beta).
+// 3. Compute unconstrained optimum x* and M^{-1}.
+// 4. Build Jacobian J = dx*/dbeta via implicit differentiation.
+// 5. Cov(x*) = J * Cov(beta) * J^T.
+// 6. SE[d] = sqrt(Cov(x*)[d][d]).
+bool QRSTune::QRSModel::computeOptimumSE(const vector<vector<double>>& xs,
+                                          const vector<double>& ys,
+                                          double* se,
+                                          bool* clamped) const {
+  int N = (int)xs.size();
+  if(N < F_) return false;
+
+  // --- Step 1: Build negH (Fisher info + L2 prior) at current beta ---
+  vector<vector<double>> negH(F_, vector<double>(F_, 0.0));
+  buildNegHessian(xs, negH);
+
+  // --- Step 2: Invert negH -> Cov(beta), column by column ---
+  vector<vector<double>> covBeta(F_, vector<double>(F_, 0.0));
+  for(int g = 0; g < F_; g++) {
+    auto negH_copy = negH;
+    vector<double> e(F_, 0.0);
+    e[g] = 1.0;
+    if(!gaussianSolve(F_, negH_copy, e)) return false;
+    for(int f = 0; f < F_; f++)
+      covBeta[f][g] = e[f];
+  }
+
+  // --- Step 3: Compute unconstrained optimum x* and M^{-1} ---
+  vector<vector<double>> M(D_, vector<double>(D_, 0.0));
+  vector<double> rhs(D_);
+  buildQuadHessian(M, rhs);
+
+  // Save M for Jacobian computation before solve destroys it
+  auto M_saved = M;
+  if(!gaussianSolve(D_, M, rhs)) return false;
+  vector<double> xStar(rhs);
+
+  for(int d = 0; d < D_; d++)
+    clamped[d] = (xStar[d] < -1.0 || xStar[d] > 1.0);
+
+  // Compute M^{-1} column by column
+  vector<vector<double>> Minv(D_, vector<double>(D_, 0.0));
+  for(int g = 0; g < D_; g++) {
+    auto M_copy = M_saved;
+    vector<double> e(D_, 0.0);
+    e[g] = 1.0;
+    if(!gaussianSolve(D_, M_copy, e)) return false;
+    for(int d = 0; d < D_; d++)
+      Minv[d][g] = e[d];
   }
+
+  // --- Step 4: Build Jacobian J (D x F) via implicit differentiation ---
+  vector<vector<double>> J(D_, vector<double>(F_, 0.0));
+
+  // Linear coefficients: J[:, 1+i] = -Minv[:, i]
+  for(int i = 0; i < D_; i++)
+    for(int d = 0; d < D_; d++)
+      J[d][1 + i] = -Minv[d][i];
+
+  // Quadratic diagonal coefficients: J[:, 1+D+i] = -2 x*_i Minv[:, i]
+  for(int i = 0; i < D_; i++)
+    for(int d = 0; d < D_; d++)
+      J[d][1 + D_ + i] = -2.0 * xStar[i] * Minv[d][i];
+
+  // Cross-term coefficients: J[:, f] = -(x*_j Minv[:, i] + x*_i Minv[:, j])
   int idx = 0;
   for(int i = 0; i < D_; i++)
     for(int j = i + 1; j < D_; j++) {
-      M[i][j] += crossCoeffs[idx];
-      M[j][i] += crossCoeffs[idx];
+      for(int d = 0; d < D_; d++)
+        J[d][1 + 2 * D_ + idx] = -(xStar[j] * Minv[d][i] + xStar[i] * Minv[d][j]);
       idx++;
     }
 
-  if(!gaussianSolve(D_, M, rhs)) {
-    for(int i = 0; i < D_; i++) out_x[i] = 0.0;
-    return;
+  // --- Step 5: Cov(x*) = J Cov(beta) J^T ---
+  vector<vector<double>> temp(D_, vector<double>(F_, 0.0));
+  for(int d = 0; d < D_; d++)
+    for(int g = 0; g < F_; g++)
+      for(int f = 0; f < F_; f++)
+        temp[d][g] += J[d][f] * covBeta[f][g];
+
+  vector<vector<double>> covX(D_, vector<double>(D_, 0.0));
+  for(int d1 = 0; d1 < D_; d1++)
+    for(int d2 = 0; d2 < D_; d2++)
+      for(int f = 0; f < F_; f++)
+        covX[d1][d2] += temp[d1][f] * J[d2][f];
+
+  // --- Step 6: Extract SEs ---
+  for(int d = 0; d < D_; d++) {
+    se[d] = covX[d][d] > 0.0 ? sqrt(covX[d][d]) : 0.0;
   }
-  // Clamp to the normalized coordinate range [-1, +1]
-  for(int i = 0; i < D_; i++)
-    out_x[i] = max(-1.0, min(1.0, rhs[i]));
+
+  return true;
 }
 
 // ============================================================
@@ -491,6 +612,65 @@ void QRSTune::runTests() {
     testAssert(tuner.bestWinProb() > 0.5);
   }
 
+  // Test computeOptimumSE: 1D with a concave peak (wins near center, losses at edges).
+  // This gives a negative quadratic coefficient, making M invertible.
+  {
+    QRSModel model(1, 0.01);
+    vector<vector<double>> xs;
+    vector<double> ys;
+    for(int i = 0; i < 40; i++) {
+      xs.push_back({0.0});  ys.push_back(1.0);   // center: wins
+      xs.push_back({0.8});  ys.push_back(0.0);   // right edge: losses
+      xs.push_back({-0.8}); ys.push_back(0.0);   // left edge: losses
+    }
+    model.fit(xs, ys);
+    double se[1];
+    bool clamped[1];
+    bool ok = model.computeOptimumSE(xs, ys, se, clamped);
+    testAssert(ok);
+    testAssert(se[0] > 0.0);
+    testAssert(se[0] < 2.0);
+  }
+
+  // Test computeOptimumSE: more data gives smaller SE
+  {
+    auto buildData = [](int reps, vector<vector<double>>& xs, vector<double>& ys) {
+      for(int i = 0; i < reps; i++) {
+        xs.push_back({0.0});  ys.push_back(1.0);
+        xs.push_back({0.8});  ys.push_back(0.0);
+        xs.push_back({-0.8}); ys.push_back(0.0);
+      }
+    };
+
+    QRSModel modelSmall(1, 0.01);
+    QRSModel modelLarge(1, 0.01);
+    vector<vector<double>> xsSmall, xsLarge;
+    vector<double> ysSmall, ysLarge;
+    buildData(15, xsSmall, ysSmall);
+    buildData(150, xsLarge, ysLarge);
+    modelSmall.fit(xsSmall, ysSmall);
+    modelLarge.fit(xsLarge, ysLarge);
+    double seSmall[1], seLarge[1];
+    bool clampedSmall[1], clampedLarge[1];
+    bool okSmall = modelSmall.computeOptimumSE(xsSmall, ysSmall, seSmall, clampedSmall);
+    bool okLarge = modelLarge.computeOptimumSE(xsLarge, ysLarge, seLarge, clampedLarge);
+    testAssert(okSmall && okLarge);
+    testAssert(seLarge[0] < seSmall[0]);
+  }
+
+  // Test computeOptimumSE: insufficient data returns false
+  {
+    QRSModel model(1, 0.1);
+    vector<vector<double>> xs = {{0.5}, {-0.5}};
+    vector<double> ys = {1.0, 0.0};
+    // N=2 < F=3, so fit() does nothing and beta stays zero
+    model.fit(xs, ys);
+    double se[1];
+    bool clamped[1];
+    bool ok = model.computeOptimumSE(xs, ys, se, clamped);
+    testAssert(!ok);
+  }
+
   // Test QRSBuffer prune: verify pruning reduces buffer size
   {
     QRSModel model(1, 0.1);
diff --git a/cpp/qrstune/QRSOptimizer.h b/cpp/qrstune/QRSOptimizer.h
index 4dadcb079..1163e3e7e 100644
--- a/cpp/qrstune/QRSOptimizer.h
+++ b/cpp/qrstune/QRSOptimizer.h
@@ -35,6 +35,14 @@ class QRSModel {
   std::vector<double> beta_;   // F coefficients (intercept, linear, quad, cross)
   double l2_;                  // L2 regularization strength
 
+  // Build the negative Hessian (Fisher info + L2 prior) at current beta.
+  void buildNegHessian(const std::vector<std::vector<double>>& xs,
+                       std::vector<std::vector<double>>& negH) const;
+
+  // Build the D x D quadratic Hessian M and rhs for the system M*x = -linearCoeffs.
+  void buildQuadHessian(std::vector<std::vector<double>>& M,
+                        std::vector<double>& rhs) const;
+
  public:
   QRSModel();
   QRSModel(int D, double l2_reg = 0.1);
@@ -61,6 +69,17 @@ class QRSModel {
 
   int dims()     const { return D_; }
   int features() const { return F_; }
+
+  // Compute standard errors of the MAP optimum x* via the delta method.
+  // Rebuilds the Fisher information matrix from (xs, ys), inverts it to get
+  // Cov(beta), then propagates through the beta -> x* mapping.
+  // On success, fills se[0..D-1] with SEs in normalized [-1,+1] coords and
+  // sets clamped[0..D-1] to true for dims where x* was clamped to boundary.
+  // Returns false if the Hessian is singular (CIs unavailable).
+  bool computeOptimumSE(const std::vector<std::vector<double>>& xs,
+                        const std::vector<double>& ys,
+                        double* se,
+                        bool* clamped) const;
 };
 
 // ============================================================
@@ -147,6 +166,7 @@ class QRSTuner {
   int trialCount()   const { return trial_count_; }
   int dims()         const { return D_; }
   const QRSModel& model() const { return model_; }
+  const QRSBuffer& buffer() const { return buffer_; }
 };
 
 void runTests();

From a940abbf423136f057d10b3452a86f2455b92ef2 Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Wed, 1 Apr 2026 23:37:38 +0800
Subject: [PATCH 18/41] Use short param names in tune-params progress CIs for
 consistency

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/command/tuneparams.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index 9d694f6d7..daadaceea 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -40,6 +40,12 @@ static const char* paramNames[nDims] = {
   "cpuctUtilityStdevPrior"
 };
 
+static const char* paramShortNames[nDims] = {
+  "E",
+  "Log",
+  "Stdev"
+};
+
 //Default search ranges (used when config keys are absent)
 static const double qrsDefaultMins[nDims] = {0.5,  0.05, 0.1};
 static const double qrsDefaultMaxs[nDims] = {2.0,  1.0,  0.8};
@@ -304,7 +310,7 @@ int MainCmds::tuneparams(const vector<string>& args) {
         if(computeParamCIs(tuner, vBest, qrsMins, qrsMaxs, ciLo, ciHi, clampedDims)) {
           string ciLine = "  95% CIs:";
           for(int d = 0; d < nDims; d++) {
-            ciLine += Global::strprintf(" %s=[%.4f, %.4f]", paramNames[d], ciLo[d], ciHi[d]);
+            ciLine += Global::strprintf(" %s=[%.4f, %.4f]", paramShortNames[d], ciLo[d], ciHi[d]);
             if(clampedDims[d]) ciLine += "*";
           }
           logger.write(ciLine);

From 51a003088ca7012ff0728454cac3432068c813ae Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Wed, 1 Apr 2026 23:45:50 +0800
Subject: [PATCH 19/41] Merge progress and CI lines into single-line output in
 tune-params

Use paramShortNames consistently in both CI and fallback branches,
eliminating hardcoded parameter names and unused qrsToPUCT call.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/command/tuneparams.cpp | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index daadaceea..5e0716d0a 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -284,8 +284,6 @@ int MainCmds::tuneparams(const vector<string>& args) {
     //Progress report every 10% of trials
     if((trial + 1) % reportInterval == 0) {
       vector<double> vBest = tuner.bestCoords();
-      double bE, bLog, bStdev;
-      qrsToPUCT(vBest, bE, bLog, bStdev, qrsMins, qrsMaxs);
 
       int completed = trial + 1;
       int pct = completed * 100 / numTrials;
@@ -299,22 +297,23 @@ int MainCmds::tuneparams(const vector<string>& args) {
       else
         eta = Global::intToString(etaSec / 3600) + "h" + Global::intToString((etaSec % 3600) / 60) + "m";
 
-      logger.write(Global::strprintf(
-        "[%d%%] %d/%d | W=%d L=%d D=%d | best: E=%.4f Log=%.4f Stdev=%.4f | ETA %s",
-        pct, completed, numTrials, wins, losses, draws, bE, bLog, bStdev, eta.c_str()
-      ));
-
       {
+        string paramStr;
         double ciLo[nDims], ciHi[nDims];
         bool clampedDims[nDims];
         if(computeParamCIs(tuner, vBest, qrsMins, qrsMaxs, ciLo, ciHi, clampedDims)) {
-          string ciLine = "  95% CIs:";
           for(int d = 0; d < nDims; d++) {
-            ciLine += Global::strprintf(" %s=[%.4f, %.4f]", paramShortNames[d], ciLo[d], ciHi[d]);
-            if(clampedDims[d]) ciLine += "*";
+            paramStr += Global::strprintf(" %s=[%.4f, %.4f]", paramShortNames[d], ciLo[d], ciHi[d]);
+            if(clampedDims[d]) paramStr += "*";
           }
-          logger.write(ciLine);
+        } else {
+          for(int d = 0; d < nDims; d++)
+            paramStr += Global::strprintf(" %s=%.4f", paramShortNames[d], qrsDimToReal(d, vBest[d], qrsMins, qrsMaxs));
         }
+        logger.write(Global::strprintf(
+          "[%d%%] %d/%d | W=%d L=%d D=%d |%s | ETA %s",
+          pct, completed, numTrials, wins, losses, draws, paramStr.c_str(), eta.c_str()
+        ));
       }
     }
   }

From 66da59b945779aa9efb83827455a4ebdb8fe6954 Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Thu, 2 Apr 2026 06:59:14 +0800
Subject: [PATCH 20/41] Comment out unused numGameThreads in
 tune_params_example.cfg

tune-params runs one game per trial sequentially because QRS-Tune
needs each outcome before choosing the next sample point, so
numGameThreads is never read and was misleading.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/configs/tune_params_example.cfg | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cpp/configs/tune_params_example.cfg b/cpp/configs/tune_params_example.cfg
index 38e74e69b..40995f2c3 100644
--- a/cpp/configs/tune_params_example.cfg
+++ b/cpp/configs/tune_params_example.cfg
@@ -50,7 +50,9 @@ nnModelFile1 = PATH_TO_MODEL
 
 # Match-----------------------------------------------------------------------------------
 
-numGameThreads = 8
+# tune-params runs one game per trial sequentially (QRS-Tune is inherently serial),
+# so numGameThreads is not used. Keeping it commented out to avoid confusion.
+#numGameThreads = 1
 maxMovesPerGame = 1200
 
 allowResignation = true

From 33aaaf753bc4c8a93ac5fceef08cbad74102c52b Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Thu, 2 Apr 2026 07:12:46 +0800
Subject: [PATCH 21/41] Remove unused ys parameter from computeOptimumSE

The Fisher information matrix only depends on xs and current beta, not
on the observed outcomes ys. Remove the misleading parameter and fix
the doc comment.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/command/tuneparams.cpp   | 2 +-
 cpp/qrstune/QRSOptimizer.cpp | 9 ++++-----
 cpp/qrstune/QRSOptimizer.h   | 5 ++---
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index 5e0716d0a..e0c2d6cd3 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -87,7 +87,7 @@ static bool computeParamCIs(const QRSTune::QRSTuner& tuner,
                              double* ciLo, double* ciHi, bool* clamped) {
   double se[nDims];
   bool hasCIs = tuner.model().computeOptimumSE(
-    tuner.buffer().xs(), tuner.buffer().ys(), se, clamped);
+    tuner.buffer().xs(), se, clamped);
   if(!hasCIs) return false;
   for(int d = 0; d < nDims; d++) {
     double radius = (maxs[d] - mins[d]) * 0.5;
diff --git a/cpp/qrstune/QRSOptimizer.cpp b/cpp/qrstune/QRSOptimizer.cpp
index c412fa069..eb3e617cf 100644
--- a/cpp/qrstune/QRSOptimizer.cpp
+++ b/cpp/qrstune/QRSOptimizer.cpp
@@ -239,7 +239,6 @@ void QRSTune::QRSModel::mapOptimum(double* out_x) const {
 // 5. Cov(x*) = J * Cov(beta) * J^T.
 // 6. SE[d] = sqrt(Cov(x*)[d][d]).
 bool QRSTune::QRSModel::computeOptimumSE(const vector<vector<double>>& xs,
-                                          const vector<double>& ys,
                                           double* se,
                                           bool* clamped) const {
   int N = (int)xs.size();
@@ -626,7 +625,7 @@ void QRSTune::runTests() {
     model.fit(xs, ys);
     double se[1];
     bool clamped[1];
-    bool ok = model.computeOptimumSE(xs, ys, se, clamped);
+    bool ok = model.computeOptimumSE(xs, se, clamped);
     testAssert(ok);
     testAssert(se[0] > 0.0);
     testAssert(se[0] < 2.0);
@@ -652,8 +651,8 @@ void QRSTune::runTests() {
     modelLarge.fit(xsLarge, ysLarge);
     double seSmall[1], seLarge[1];
     bool clampedSmall[1], clampedLarge[1];
-    bool okSmall = modelSmall.computeOptimumSE(xsSmall, ysSmall, seSmall, clampedSmall);
-    bool okLarge = modelLarge.computeOptimumSE(xsLarge, ysLarge, seLarge, clampedLarge);
+    bool okSmall = modelSmall.computeOptimumSE(xsSmall, seSmall, clampedSmall);
+    bool okLarge = modelLarge.computeOptimumSE(xsLarge, seLarge, clampedLarge);
     testAssert(okSmall && okLarge);
     testAssert(seLarge[0] < seSmall[0]);
   }
@@ -667,7 +666,7 @@ void QRSTune::runTests() {
     model.fit(xs, ys);
     double se[1];
     bool clamped[1];
-    bool ok = model.computeOptimumSE(xs, ys, se, clamped);
+    bool ok = model.computeOptimumSE(xs, se, clamped);
     testAssert(!ok);
   }
 
diff --git a/cpp/qrstune/QRSOptimizer.h b/cpp/qrstune/QRSOptimizer.h
index 1163e3e7e..d513cbbe8 100644
--- a/cpp/qrstune/QRSOptimizer.h
+++ b/cpp/qrstune/QRSOptimizer.h
@@ -71,13 +71,12 @@ class QRSModel {
   int features() const { return F_; }
 
   // Compute standard errors of the MAP optimum x* via the delta method.
-  // Rebuilds the Fisher information matrix from (xs, ys), inverts it to get
-  // Cov(beta), then propagates through the beta -> x* mapping.
+  // Rebuilds the Fisher information matrix from xs at current beta, inverts
+  // it to get Cov(beta), then propagates through the beta -> x* mapping.
   // On success, fills se[0..D-1] with SEs in normalized [-1,+1] coords and
   // sets clamped[0..D-1] to true for dims where x* was clamped to boundary.
   // Returns false if the Hessian is singular (CIs unavailable).
   bool computeOptimumSE(const std::vector<std::vector<double>>& xs,
-                        const std::vector<double>& ys,
                         double* se,
                         bool* clamped) const;
 };

From 2202b1de2a8a6225343e443e92662f7eac85330e Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Thu, 2 Apr 2026 07:55:09 +0800
Subject: [PATCH 22/41] Print suggested match command after tune-params results

After printing optimal PUCT parameters, tune-params now outputs a
ready-to-use match command so users can quickly verify the tuned
parameters against defaults.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/command/tuneparams.cpp | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index e0c2d6cd3..09dc424a2 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -106,6 +106,7 @@ int MainCmds::tuneparams(const vector<string>& args) {
 
   ConfigParser cfg;
   string logFile;
+  string configFilePath;  //Original config file path for suggested match command
   try {
     KataGoCommandLine cmd(
       "Tune KataGo hyperparameters using sequential optimization (QRS-Tune).\n"
@@ -122,6 +123,10 @@ int MainCmds::tuneparams(const vector<string>& args) {
     cmd.parseArgs(args);
     logFile = logFileArg.getValue();
     cmd.getConfig(cfg);
+    configFilePath = cfg.getFileName();
+    string suffix = " and/or command-line and query overrides";
+    if(Global::isSuffix(configFilePath, suffix))
+      configFilePath = Global::chopSuffix(configFilePath, suffix);
   }
   catch(TCLAP::ArgException& e) {
     cerr << "Error: " << e.error() << " for argument " << e.argId() << endl;
@@ -353,6 +358,24 @@ int MainCmds::tuneparams(const vector<string>& args) {
     Global::doubleToString(vBest[1]) + ", " + Global::doubleToString(vBest[2]) + "]"
   );
 
+  //Suggested match command for verification
+  {
+    string overrides = Global::strprintf(
+      "botName0=tuned,botName1=default,"
+      "cpuctExploration0=%.4f,cpuctExplorationLog0=%.4f,cpuctUtilityStdevPrior0=%.4f,"
+      "numGameThreads=8,numGamesTotal=200",
+      bestE, bestLog, bestStdev
+    );
+    overrides += ",nnModelFile0=" + nnModelFile0 + ",nnModelFile1=" + nnModelFile1;
+    logger.write("");
+    logger.write("To verify, run a match of tuned (bot0) vs default (bot1):");
+    logger.write(
+      "./katago match -config " + configFilePath +
+      " -override-config \"" + overrides + "\"" +
+      " -log-file match.log -sgf-output-dir match_sgfs/"
+    );
+  }
+
   //Cleanup
   delete gameRunner;
   for(NNEvaluator* eval : nnEvals)

From 75d82c785a1d4d96bcb921f32f2765ce2cba0c6c Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Thu, 2 Apr 2026 08:17:29 +0800
Subject: [PATCH 23/41] Add shifted-optimum convergence test for QRSTuner

Verifies QRS-Tune can locate a shifted peak in a stochastic 1D quadratic
landscape (true optimum at x*=0.35), complementing the existing step-function
end-to-end test which only checks x>0 separation.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/qrstune/QRSOptimizer.cpp | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/cpp/qrstune/QRSOptimizer.cpp b/cpp/qrstune/QRSOptimizer.cpp
index eb3e617cf..aea850091 100644
--- a/cpp/qrstune/QRSOptimizer.cpp
+++ b/cpp/qrstune/QRSOptimizer.cpp
@@ -694,4 +694,27 @@ void QRSTune::runTests() {
     testAssert(buffer.size() < 60);
     testAssert(buffer.size() >= 5);  // min_keep
   }
+
+  // Test QRSTuner convergence with stochastic outcomes in a 1D quadratic
+  // landscape centered at x* = 0.35.
+  {
+    const double trueOpt = 0.35;
+    const int numTrials = 500;
+    mt19937_64 outcomeRng(99);
+    uniform_real_distribution<double> uni01(0.0, 1.0);
+
+    QRSTuner tuner(1, /*seed=*/42, numTrials,
+                   /*l2_reg=*/0.1, /*refit_every=*/10, /*prune_every=*/5,
+                   /*sigma_init=*/0.60, /*sigma_fin=*/0.20);
+    for(int trial = 0; trial < numTrials; trial++) {
+      vector<double> sample = tuner.nextSample();
+      double dx = sample[0] - trueOpt;
+      double winProb = sigmoid(2.0 - 4.0 * dx * dx);
+      double outcome = (uni01(outcomeRng) < winProb) ? 1.0 : 0.0;
+      tuner.addResult(sample, outcome);
+    }
+    vector<double> best = tuner.bestCoords();
+    testAssert(fabs(best[0] - trueOpt) < 0.15);
+    testAssert(tuner.bestWinProb() > 0.7);
+  }
 }

From eff8ecf965fcab18ba647bfdb6fd2f6e70f98c5b Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Thu, 2 Apr 2026 18:22:22 +0800
Subject: [PATCH 24/41] Fix pruning bias that drives optimizer to boundary
 values

Revert QRSBuffer::prune to retain oldest min_keep_ samples (insertion
order) instead of best-scoring ones. The sort-based pruning removed
spatially diverse early exploration samples, enabling a feedback loop
where the model drifts to boundary values on flat landscapes.

Also tighten the clamped detection in computeOptimumSE to catch
near-boundary optima (within 1e-9) and cache predictions in prune
to avoid redundant predict() calls.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/qrstune/QRSOptimizer.cpp | 33 +++++++--------------------------
 cpp/qrstune/QRSOptimizer.h   |  5 ++---
 2 files changed, 9 insertions(+), 29 deletions(-)

diff --git a/cpp/qrstune/QRSOptimizer.cpp b/cpp/qrstune/QRSOptimizer.cpp
index aea850091..d099b5074 100644
--- a/cpp/qrstune/QRSOptimizer.cpp
+++ b/cpp/qrstune/QRSOptimizer.cpp
@@ -270,7 +270,7 @@ bool QRSTune::QRSModel::computeOptimumSE(const vector<vector<double>>& xs,
   vector<double> xStar(rhs);
 
   for(int d = 0; d < D_; d++)
-    clamped[d] = (xStar[d] < -1.0 || xStar[d] > 1.0);
+    clamped[d] = (fabs(xStar[d]) >= 1.0 - 1e-9);
 
   // Compute M^{-1} column by column
   vector<vector<double>> Minv(D_, vector<double>(D_, 0.0));
@@ -342,43 +342,24 @@ void QRSTune::QRSBuffer::add(const vector<double>& x, double y) {
 
 // Confidence-based pruning: drop samples whose predicted win rate
 // is more than prune_margin_ below the best predicted win rate.
-// Samples are ranked so that min_keep_ retains the highest-quality
-// samples (not just the oldest).
+// The min_keep_ guard retains the oldest samples (in insertion order),
+// preserving spatial diversity from early uniform exploration.
 void QRSTune::QRSBuffer::prune(const QRSModel& model) {
   int N = (int)xs_.size();
   if(N <= min_keep_ * 2) return;
 
-  // Score all samples and find best predicted win rate
-  vector<pair<double, int>> scored(N);  // (predicted winrate, original index)
   double bestPrediction = 0.0;
+  vector<double> preds(N);
   for(int i = 0; i < N; i++) {
-    double p = model.predict(xs_[i].data());
-    scored[i] = {p, i};
-    if(p > bestPrediction) bestPrediction = p;
+    preds[i] = model.predict(xs_[i].data());
+    if(preds[i] > bestPrediction) bestPrediction = preds[i];
   }
   double threshold = bestPrediction - prune_margin_;
 
-  // Sort by descending predicted quality so min_keep_ retains the best
-  sort(scored.begin(), scored.end(),
-    [](const pair<double,int>& a, const pair<double,int>& b) {
-      return a.first > b.first;
-    });
-
-  // Mark samples to keep: above threshold, or among top min_keep_
-  vector<bool> keep(N, false);
-  int kept = 0;
-  for(auto& entry : scored) {
-    if(entry.first >= threshold || kept < min_keep_) {
-      keep[entry.second] = true;
-      kept++;
-    }
-  }
-
-  // Rebuild in original order to preserve temporal structure
   vector<vector<double>> newXs;
   vector<double> newYs;
   for(int i = 0; i < N; i++) {
-    if(keep[i]) {
+    if(preds[i] >= threshold || (int)newXs.size() < min_keep_) {
       newXs.push_back(std::move(xs_[i]));
       newYs.push_back(ys_[i]);
     }
diff --git a/cpp/qrstune/QRSOptimizer.h b/cpp/qrstune/QRSOptimizer.h
index d513cbbe8..f8bd2ca02 100644
--- a/cpp/qrstune/QRSOptimizer.h
+++ b/cpp/qrstune/QRSOptimizer.h
@@ -98,9 +98,8 @@ class QRSBuffer {
   void add(const std::vector<double>& x, double y);
 
   // Remove samples significantly below the current MAP win estimate.
-  // Samples are ranked by predicted quality so that min_keep_ retains the
-  // best samples rather than the oldest (which are typically from early
-  // uniform random exploration).
+  // The min_keep_ guard retains the oldest samples (in insertion order),
+  // preserving spatial diversity from early uniform exploration.
   void prune(const QRSModel& model);
 
   const std::vector<std::vector<double>>& xs() const { return xs_; }

From 0dfde0cb55592696898e4d5817f5c7b73db5ed0d Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Thu, 2 Apr 2026 18:58:41 +0800
Subject: [PATCH 25/41] Restore ASCII-art regression curves in tune-params
 output

The charts were removed in 4cdf5bc2 because saturated sigmoid values
made them misleading. Bring them back so users can visually inspect the
fitted win-rate surface for each PUCT dimension after tuning.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/command/tuneparams.cpp | 68 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index 09dc424a2..b6cab9697 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -99,6 +99,71 @@ static bool computeParamCIs(const QRSTune::QRSTuner& tuner,
   return true;
 }
 
+//Print ASCII-art regression curve for each PUCT dimension.
+//For dimension d: fix all other dims at vBest, sweep d from -1 to +1.
+static void printRegressionCurves(const QRSTune::QRSTuner& tuner,
+                                   const vector<double>& vBest,
+                                   const double* mins, const double* maxs,
+                                   Logger& logger) {
+  const int plotW = 60;
+  const int plotH = 20;
+  double bestWinRate = tuner.model().predict(vBest.data());
+
+  for(int dim = 0; dim < nDims; dim++) {
+    vector<string> canvas(plotH, string(plotW, ' '));
+
+    int bestCol = (int)((vBest[dim] + 1.0) / 2.0 * (plotW - 1) + 0.5);
+    bestCol = max(0, min(plotW - 1, bestCol));
+
+    vector<double> xSlice(vBest);
+    for(int col = 0; col < plotW; col++) {
+      double t = -1.0 + 2.0 * col / (plotW - 1);
+      xSlice[dim] = t;
+      double winRate = tuner.model().predict(xSlice.data());
+
+      int row = (int)((1.0 - winRate) * (plotH - 1) + 0.5);
+      row = max(0, min(plotH - 1, row));
+      canvas[row][col] = (col == bestCol) ? '*' : 'o';
+    }
+
+    double bestReal = qrsDimToReal(dim, vBest[dim], mins, maxs);
+    logger.write("");
+    logger.write(
+      "[Dim " + Global::intToString(dim) + "] " + paramNames[dim] +
+      "  (best QRS=" + Global::strprintf("%.3f", vBest[dim]) +
+      " -> real=" + Global::strprintf("%.3f", bestReal) +
+      ", est.winrate=" + Global::strprintf("%.3f", bestWinRate) + ")"
+    );
+
+    for(int row = 0; row < plotH; row++) {
+      string label;
+      if(row == 0)               label = "1.0 |";
+      else if(row == plotH / 2) label = "0.5 |";
+      else if(row == plotH - 1) label = "0.0 |";
+      else                       label = "    |";
+      logger.write(label + canvas[row]);
+    }
+    logger.write("    +" + string(plotW, '-'));
+
+    {
+      string line(plotW + 5, ' ');
+      const int off = 5;
+      auto place = [&](int col, const string& lbl) {
+        int pos = off + col - (int)lbl.size() / 2;
+        if(pos < 0) pos = 0;
+        for(int i = 0; i < (int)lbl.size() && pos + i < (int)line.size(); i++)
+          line[pos + i] = lbl[i];
+      };
+      place(0,          Global::strprintf("%.3f", qrsDimToReal(dim, -1.0, mins, maxs)));
+      place(plotW / 2, Global::strprintf("%.3f", qrsDimToReal(dim,  0.0, mins, maxs)));
+      place(plotW - 1, Global::strprintf("%.3f", qrsDimToReal(dim, +1.0, mins, maxs)));
+      size_t last = line.find_last_not_of(' ');
+      logger.write(line.substr(0, last + 1));
+    }
+  }
+  logger.write("");
+}
+
 int MainCmds::tuneparams(const vector<string>& args) {
   Board::initHash();
   ScoreValue::initTables();
@@ -358,6 +423,9 @@ int MainCmds::tuneparams(const vector<string>& args) {
     Global::doubleToString(vBest[1]) + ", " + Global::doubleToString(vBest[2]) + "]"
   );
 
+  //ASCII-art regression curves (one per PUCT dimension)
+  printRegressionCurves(tuner, vBest, qrsMins, qrsMaxs, logger);
+
   //Suggested match command for verification
   {
     string overrides = Global::strprintf(

From cf2457e0dc2921650efbf0f0b77bbb5d85cb04b0 Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Fri, 3 Apr 2026 07:11:45 +0800
Subject: [PATCH 26/41] Fix convex-dim handling and add verbose diagnostic
 logging

Fix computeOptimumSE producing wild CIs for boundary-clamped dims by
returning early when convex and zeroing Jacobian rows for clamped dims.
Skip pruning when model has convex dimensions to prevent discarding
good samples based on unreliable predictions. Add optional verbose
logging (config key 'verbose') that logs refit diagnostics, pruning
events, and per-trial sample coordinates.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/command/tuneparams.cpp          |   4 +
 cpp/configs/tune_params_example.cfg |   5 +-
 cpp/qrstune/QRSOptimizer.cpp        | 168 ++++++++++++++++++++++++++--
 cpp/qrstune/QRSOptimizer.h          |  12 ++
 4 files changed, 175 insertions(+), 14 deletions(-)

diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index b6cab9697..bbe6258d9 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -271,6 +271,10 @@ int MainCmds::tuneparams(const vector<string>& args) {
   uint64_t qrsSeed = seedRand.nextUInt64();
   QRSTune::QRSTuner tuner(nDims, qrsSeed, numTrials);
 
+  bool verbose = cfg.contains("verbose") ? cfg.getBool("verbose") : false;
+  if(verbose)
+    tuner.setLogger(&logger);
+
   const string gameSeedBase = Global::uint64ToHexString(seedRand.nextUInt64());
 
   int wins = 0, losses = 0, draws = 0;
diff --git a/cpp/configs/tune_params_example.cfg b/cpp/configs/tune_params_example.cfg
index 40995f2c3..19d6f0a54 100644
--- a/cpp/configs/tune_params_example.cfg
+++ b/cpp/configs/tune_params_example.cfg
@@ -23,11 +23,11 @@ numTrials = 500
 # If omitted, defaults are used: cpuctExploration [0.5, 2.0], cpuctExplorationLog [0.05, 1.0],
 # cpuctUtilityStdevPrior [0.1, 0.8].
 # cpuctExplorationMin = 0.5
-# cpuctExplorationMax = 2.0
+cpuctExplorationMax = 4.0
 # cpuctExplorationLogMin = 0.05
 # cpuctExplorationLogMax = 1.0
 # cpuctUtilityStdevPriorMin = 0.1
-# cpuctUtilityStdevPriorMax = 0.8
+cpuctUtilityStdevPriorMax = 2.0
 
 # Logs------------------------------------------------------------------------------------
 
@@ -35,6 +35,7 @@ logSearchInfo = false
 logMoves = false
 logGamesEvery = 100
 logToStdout = true
+# verbose = false   # Log QRS optimizer internals (refits, pruning, sample coords)
 
 # Bots-------------------------------------------------------------------------------------
 # Exactly 2 bots are required: bot0 = reference (fixed params), bot1 = experiment (tuned params).
diff --git a/cpp/qrstune/QRSOptimizer.cpp b/cpp/qrstune/QRSOptimizer.cpp
index d099b5074..a546a7c27 100644
--- a/cpp/qrstune/QRSOptimizer.cpp
+++ b/cpp/qrstune/QRSOptimizer.cpp
@@ -14,6 +14,8 @@
 #include <cmath>
 #include <iostream>
 
+#include "../core/global.h"
+#include "../core/logger.h"
 #include "../core/test.h"
 
 using namespace std;
@@ -218,6 +220,13 @@ double QRSTune::QRSModel::score(const double* x) const {
 
 // Find the unconstrained optimum of the quadratic score surface, then clamp to [-1,+1]^D.
 void QRSTune::QRSModel::mapOptimum(double* out_x) const {
+  // If any dimension is convex, the fit is dominated by noise in that
+  // dimension.  Return the origin — a conservative, prior-centered choice.
+  if(hasConvexDim()) {
+    for(int i = 0; i < D_; i++) out_x[i] = 0.0;
+    return;
+  }
+
   vector<vector<double>> M(D_, vector<double>(D_, 0.0));
   vector<double> rhs(D_);
   buildQuadHessian(M, rhs);
@@ -230,6 +239,13 @@ void QRSTune::QRSModel::mapOptimum(double* out_x) const {
     out_x[i] = max(-1.0, min(1.0, rhs[i]));
 }
 
+bool QRSTune::QRSModel::hasConvexDim() const {
+  const double* quadCoeffs = beta_.data() + 1 + D_;
+  for(int d = 0; d < D_; d++)
+    if(quadCoeffs[d] >= 0.0) return true;
+  return false;
+}
+
 // Compute standard errors of the MAP optimum via the delta method.
 //
 // 1. Rebuild negH (Fisher info + L2 prior) at current beta.
@@ -244,6 +260,10 @@ bool QRSTune::QRSModel::computeOptimumSE(const vector<vector<double>>& xs,
   int N = (int)xs.size();
   if(N < F_) return false;
 
+  // If any dim is convex, mapOptimum returns origin — CIs for the
+  // unconstrained critical point would be misleading.
+  if(hasConvexDim()) return false;
+
   // --- Step 1: Build negH (Fisher info + L2 prior) at current beta ---
   vector<vector<double>> negH(F_, vector<double>(F_, 0.0));
   buildNegHessian(xs, negH);
@@ -305,6 +325,11 @@ bool QRSTune::QRSModel::computeOptimumSE(const vector<vector<double>>& xs,
       idx++;
     }
 
+  // Clamped dims: x*[d] is at boundary, so dx*_d/dbeta = 0
+  for(int d = 0; d < D_; d++)
+    if(clamped[d])
+      fill(J[d].begin(), J[d].end(), 0.0);
+
   // --- Step 5: Cov(x*) = J Cov(beta) J^T ---
   vector<vector<double>> temp(D_, vector<double>(F_, 0.0));
   for(int d = 0; d < D_; d++)
@@ -384,27 +409,50 @@ QRSTune::QRSTuner::QRSTuner(int D, uint64_t seed, int total_trials,
    refit_every_(refit_every),
    prune_every_(prune_every),
    sigma_initial_(sigma_init),
-   sigma_final_(sigma_fin)
+   sigma_final_(sigma_fin),
+   logger_(nullptr)
 {}
 
+void QRSTune::QRSTuner::setLogger(Logger* logger) {
+  logger_ = logger;
+}
+
 vector<double> QRSTune::QRSTuner::nextSample() {
   vector<double> x(D_);
   int F = model_.features();
+  string sigmaStr;
 
   if(buffer_.size() < F + 1) {
     // Insufficient data for reliable fit — explore uniformly
     uniform_real_distribution<double> uni(-1.0, 1.0);
     for(int i = 0; i < D_; i++) x[i] = uni(rng_);
-    return x;
+    sigmaStr = "uniform";
+  } else {
+    // Start from MAP optimum, add decaying Gaussian noise for exploration
+    model_.mapOptimum(x.data());
+    double progress = (double)trial_count_ / max(1, total_trials_ - 1);
+    double sigma = sigma_initial_ + progress * (sigma_final_ - sigma_initial_);
+
+    // When the fit has convex dimensions (noise-dominated), keep exploration
+    // wide to avoid premature convergence around the unreliable origin.
+    if(model_.hasConvexDim())
+      sigma = sigma_initial_;
+
+    normal_distribution<double> noise(0.0, sigma);
+    for(int i = 0; i < D_; i++)
+      x[i] = max(-1.0, min(1.0, x[i] + noise(rng_)));
+    sigmaStr = Global::strprintf("%.4f", sigma);
   }
 
-  // Start from MAP optimum, add decaying Gaussian noise for exploration
-  model_.mapOptimum(x.data());
-  double progress = (double)trial_count_ / max(1, total_trials_ - 1);
-  double sigma = sigma_initial_ + progress * (sigma_final_ - sigma_initial_);
-  normal_distribution<double> noise(0.0, sigma);
-  for(int i = 0; i < D_; i++)
-    x[i] = max(-1.0, min(1.0, x[i] + noise(rng_)));
+  if(logger_) {
+    string msg = "QRS sample trial=" + to_string(trial_count_) + " sigma=" + sigmaStr + " x=[";
+    for(int i = 0; i < D_; i++) {
+      if(i > 0) msg += ",";
+      msg += Global::strprintf("%.3f", x[i]);
+    }
+    msg += "]";
+    logger_->write(msg);
+  }
 
   return x;
 }
@@ -414,10 +462,42 @@ void QRSTune::QRSTuner::addResult(const vector<double>& x, double y) {
   trial_count_++;
 
   if(trial_count_ % refit_every_ == 0 && buffer_.size() >= model_.features() + 1) {
+    int sizeBefore = buffer_.size();
     model_.fit(buffer_.xs(), buffer_.ys());
     int refit_count = trial_count_ / refit_every_;
-    if(refit_count % prune_every_ == 0)
-      buffer_.prune(model_);
+    if(refit_count % prune_every_ == 0) {
+      if(model_.hasConvexDim()) {
+        if(logger_)
+          logger_->write("QRS prune skipped: model has convex dims, predictions unreliable");
+      } else {
+        buffer_.prune(model_);
+        if(logger_)
+          logger_->write("QRS prune: " + to_string(sizeBefore) + " -> " + to_string(buffer_.size()) + " samples");
+      }
+    }
+    if(logger_) {
+      auto best = bestCoords();
+      double winP = model_.predict(best.data());
+      const auto& b = model_.beta();
+      string diag = "QRS refit trial=" + to_string(trial_count_);
+      diag += " buf=" + to_string(buffer_.size());
+      diag += " intercept=" + Global::strprintf("%.4f", b[0]);
+      diag += " quadDiag=[";
+      for(int d = 0; d < D_; d++) {
+        if(d > 0) diag += ",";
+        diag += Global::strprintf("%.4f", b[1 + D_ + d]);
+      }
+      diag += "]";
+      diag += " convex=" + string(model_.hasConvexDim() ? "Y" : "N");
+      diag += " bestQRS=[";
+      for(int d = 0; d < D_; d++) {
+        if(d > 0) diag += ",";
+        diag += Global::strprintf("%.3f", best[d]);
+      }
+      diag += "]";
+      diag += " winP=" + Global::strprintf("%.4f", winP);
+      logger_->write(diag);
+    }
   }
 }
 
@@ -589,7 +669,8 @@ void QRSTune::runTests() {
     }
     testAssert(tuner.trialCount() == numTrials);
     // The fitted model should recognize that positive x is better
-    testAssert(tuner.bestWinProb() > 0.5);
+    double posOne = 1.0, negOne = -1.0;
+    testAssert(tuner.model().predict(&posOne) > tuner.model().predict(&negOne));
   }
 
   // Test computeOptimumSE: 1D with a concave peak (wins near center, losses at edges).
@@ -698,4 +779,67 @@ void QRSTune::runTests() {
     testAssert(fabs(best[0] - trueOpt) < 0.15);
     testAssert(tuner.bestWinProb() > 0.7);
   }
+
+  // Test: Nearly-flat 3D landscape exposes convex-fitting bug.
+  //
+  // When the true function is nearly flat and we have only ~128 stochastic
+  // trials fitting 10 parameters, noise can make the fitted quadratic convex
+  // (positive coefficient) in some dimensions.  mapOptimum() then returns the
+  // MINIMUM in those dimensions instead of the maximum.
+  {
+    const int D = 3;
+    const int numTrials = 128;
+    const double trueOpt[3] = {0.3, -0.2, 0.4};
+    const double curvature = 0.1;  // very weak — winrate spans only ~0.39-0.50
+
+    mt19937_64 outcomeRng(0);
+    uniform_real_distribution<double> uni01(0.0, 1.0);
+
+    QRSTuner tuner(D, /*seed=*/42, numTrials,
+                   /*l2_reg=*/0.1, /*refit_every=*/10, /*prune_every=*/5,
+                   /*sigma_init=*/0.60, /*sigma_fin=*/0.20);
+
+    for(int trial = 0; trial < numTrials; trial++) {
+      vector<double> sample = tuner.nextSample();
+      double sc = 0.0;
+      for(int d = 0; d < D; d++) {
+        double dx = sample[d] - trueOpt[d];
+        sc -= curvature * dx * dx;
+      }
+      double winProb = sigmoid(sc);
+      double outcome = (uni01(outcomeRng) < winProb) ? 1.0 : 0.0;
+      tuner.addResult(sample, outcome);
+    }
+
+    // Probe fitted quadratic coefficients:
+    //   quadCoeff_k = (score(e_k) + score(-e_k) - 2*score(0)) / 2
+    const QRSModel& model = tuner.model();
+    double origin[3] = {0.0, 0.0, 0.0};
+    double s0 = model.score(origin);
+    bool anyConvex = false;
+    double probe[3] = {0.0, 0.0, 0.0};
+    for(int d = 0; d < D; d++) {
+      probe[d] = 1.0;
+      double sp = model.score(probe);
+      probe[d] = -1.0;
+      double sn = model.score(probe);
+      probe[d] = 0.0;
+      double quadCoeff = (sp + sn - 2.0 * s0) / 2.0;
+      if(quadCoeff > 0.0) {
+        anyConvex = true;
+        break;
+      }
+    }
+    // With these seeds, noise overwhelms the weak signal and at least one
+    // fitted dimension ends up convex (positive quadratic coefficient).
+    testAssert(anyConvex);
+
+    // Invariant: the optimizer's "best" should predict at least as well as
+    // an arbitrary point like the origin.  Currently fails because
+    // mapOptimum() returns the critical point of the fitted quadratic
+    // without checking whether it is a maximum or minimum.
+    double probAtBest = tuner.bestWinProb();
+    double probAtOrigin = model.predict(origin);
+    testAssert(probAtBest >= probAtOrigin);
+  }
 }
diff --git a/cpp/qrstune/QRSOptimizer.h b/cpp/qrstune/QRSOptimizer.h
index f8bd2ca02..8236d9ed1 100644
--- a/cpp/qrstune/QRSOptimizer.h
+++ b/cpp/qrstune/QRSOptimizer.h
@@ -7,6 +7,8 @@
 #include <random>
 #include <vector>
 
+class Logger;
+
 namespace QRSTune {
 
 // ============================================================
@@ -67,8 +69,13 @@ class QRSModel {
   // The solution is clamped to [-1,+1]^D.
   void mapOptimum(double* out_x) const;
 
+  // Returns true if any fitted quadratic coefficient is non-negative (convex),
+  // indicating the fit is unreliable due to noise.
+  bool hasConvexDim() const;
+
   int dims()     const { return D_; }
   int features() const { return F_; }
+  const std::vector<double>& beta() const { return beta_; }
 
   // Compute standard errors of the MAP optimum x* via the delta method.
   // Rebuilds the Fisher information matrix from xs at current beta, inverts
@@ -132,6 +139,8 @@ class QRSTuner {
   double sigma_initial_;
   double sigma_final_;
 
+  Logger* logger_;     // non-null enables verbose diagnostic logging
+
  public:
   // D            : number of dimensions
   // seed         : RNG seed for reproducibility
@@ -161,6 +170,9 @@ class QRSTuner {
   // Estimated win probability at the MAP optimum
   double bestWinProb() const;
 
+  // Enable diagnostic logging (refits, pruning, sample coords).
+  void setLogger(Logger* logger);
+
   int trialCount()   const { return trial_count_; }
   int dims()         const { return D_; }
   const QRSModel& model() const { return model_; }

From 7a9f39b9920e66d2e7a3d72d1daa17ad740df773 Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Fri, 3 Apr 2026 08:44:28 +0800
Subject: [PATCH 27/41] Remove cpuctExploration from tune-params tuned
 dimensions

cpuctExploration and cpuctExplorationLog are strongly correlated in the
PUCT formula (both additively scale exploration), making the optimizer
unable to distinguish their individual effects. Dropping cpuctExploration
reduces dimensionality from 3 to 2, improving optimizer convergence.
The experiment bot's cpuctExploration now stays fixed at the config value.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/command/tuneparams.cpp          | 39 ++++++++++++-----------------
 cpp/configs/tune_params_example.cfg | 18 ++++++-------
 2 files changed, 24 insertions(+), 33 deletions(-)

diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index bbe6258d9..9208ce21d 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -32,30 +32,28 @@ static void signalHandler(int signal)
 }
 
 //Number of dimensions = number of PUCT params being tuned
-static const int nDims = 3;
+static const int nDims = 2;
 
 static const char* paramNames[nDims] = {
-  "cpuctExploration",
   "cpuctExplorationLog",
   "cpuctUtilityStdevPrior"
 };
 
 static const char* paramShortNames[nDims] = {
-  "E",
   "Log",
   "Stdev"
 };
 
 //Default search ranges (used when config keys are absent)
-static const double qrsDefaultMins[nDims] = {0.5,  0.05, 0.1};
-static const double qrsDefaultMaxs[nDims] = {2.0,  1.0,  0.8};
+static const double qrsDefaultMins[nDims] = {0.05, 0.1};
+static const double qrsDefaultMaxs[nDims] = {1.0,  0.8};
 
 //Config keys for per-dimension search ranges
 static const char* rangeMinKeys[nDims] = {
-  "cpuctExplorationMin", "cpuctExplorationLogMin", "cpuctUtilityStdevPriorMin"
+  "cpuctExplorationLogMin", "cpuctUtilityStdevPriorMin"
 };
 static const char* rangeMaxKeys[nDims] = {
-  "cpuctExplorationMax", "cpuctExplorationLogMax", "cpuctUtilityStdevPriorMax"
+  "cpuctExplorationLogMax", "cpuctUtilityStdevPriorMax"
 };
 
 //Map QRS-Tune normalized coordinate x in [-1,+1] to real PUCT value.
@@ -67,14 +65,12 @@ static double qrsDimToReal(int dim, double x, const double* mins, const double*
 
 static void qrsToPUCT(
   const vector<double>& x,
-  double& cpuctExploration,
   double& cpuctExplorationLog,
   double& cpuctUtilityStdevPrior,
   const double* mins, const double* maxs
 ) {
-  cpuctExploration       = qrsDimToReal(0, x[0], mins, maxs);
-  cpuctExplorationLog    = qrsDimToReal(1, x[1], mins, maxs);
-  cpuctUtilityStdevPrior = qrsDimToReal(2, x[2], mins, maxs);
+  cpuctExplorationLog    = qrsDimToReal(0, x[0], mins, maxs);
+  cpuctUtilityStdevPrior = qrsDimToReal(1, x[1], mins, maxs);
 }
 
 static const double Z_95 = 1.96;
@@ -220,12 +216,10 @@ int MainCmds::tuneparams(const vector<string>& args) {
         string("tune-params: ") + rangeMinKeys[d] + " must be < " + rangeMaxKeys[d]);
   }
   logger.write(
-    "QRS ranges: cpuctExploration=[" +
+    "QRS ranges: cpuctExplorationLog=[" +
     Global::strprintf("%.4f", qrsMins[0]) + "," + Global::strprintf("%.4f", qrsMaxs[0]) +
-    "] cpuctExplorationLog=[" +
-    Global::strprintf("%.4f", qrsMins[1]) + "," + Global::strprintf("%.4f", qrsMaxs[1]) +
     "] cpuctUtilityStdevPrior=[" +
-    Global::strprintf("%.4f", qrsMins[2]) + "," + Global::strprintf("%.4f", qrsMaxs[2]) + "]"
+    Global::strprintf("%.4f", qrsMins[1]) + "," + Global::strprintf("%.4f", qrsMaxs[1]) + "]"
   );
 
   //Load search params for both bots
@@ -286,11 +280,10 @@ int MainCmds::tuneparams(const vector<string>& args) {
   for(int trial = 0; trial < numTrials; trial++) {
     vector<double> sample = tuner.nextSample();
 
-    double cpuctExploration, cpuctExplorationLog, cpuctUtilityStdevPrior;
-    qrsToPUCT(sample, cpuctExploration, cpuctExplorationLog, cpuctUtilityStdevPrior, qrsMins, qrsMaxs);
+    double cpuctExplorationLog, cpuctUtilityStdevPrior;
+    qrsToPUCT(sample, cpuctExplorationLog, cpuctUtilityStdevPrior, qrsMins, qrsMaxs);
 
     SearchParams expParams = paramss[1];
-    expParams.cpuctExploration       = cpuctExploration;
     expParams.cpuctExplorationLog    = cpuctExplorationLog;
     expParams.cpuctUtilityStdevPrior = cpuctUtilityStdevPrior;
 
@@ -394,8 +387,8 @@ int MainCmds::tuneparams(const vector<string>& args) {
 
   //Final result
   vector<double> vBest = tuner.bestCoords();
-  double bestE, bestLog, bestStdev;
-  qrsToPUCT(vBest, bestE, bestLog, bestStdev, qrsMins, qrsMaxs);
+  double bestLog, bestStdev;
+  qrsToPUCT(vBest, bestLog, bestStdev, qrsMins, qrsMaxs);
 
   logger.write("");
   logger.write("=== tune-params Results ===");
@@ -424,7 +417,7 @@ int MainCmds::tuneparams(const vector<string>& args) {
   }
   logger.write(
     "QRS raw coordinates: [" + Global::doubleToString(vBest[0]) + ", " +
-    Global::doubleToString(vBest[1]) + ", " + Global::doubleToString(vBest[2]) + "]"
+    Global::doubleToString(vBest[1]) + "]"
   );
 
   //ASCII-art regression curves (one per PUCT dimension)
@@ -434,9 +427,9 @@ int MainCmds::tuneparams(const vector<string>& args) {
   {
     string overrides = Global::strprintf(
       "botName0=tuned,botName1=default,"
-      "cpuctExploration0=%.4f,cpuctExplorationLog0=%.4f,cpuctUtilityStdevPrior0=%.4f,"
+      "cpuctExplorationLog0=%.4f,cpuctUtilityStdevPrior0=%.4f,"
       "numGameThreads=8,numGamesTotal=200",
-      bestE, bestLog, bestStdev
+      bestLog, bestStdev
     );
     overrides += ",nnModelFile0=" + nnModelFile0 + ",nnModelFile1=" + nnModelFile1;
     logger.write("");
diff --git a/cpp/configs/tune_params_example.cfg b/cpp/configs/tune_params_example.cfg
index 19d6f0a54..4b0aeba76 100644
--- a/cpp/configs/tune_params_example.cfg
+++ b/cpp/configs/tune_params_example.cfg
@@ -6,9 +6,9 @@
 # and an experiment bot (bot1) whose PUCT parameters are adapted each trial using
 # QRS-Tune (Quadratic Regression Sequential optimization).
 #
-# After all trials, it reports the best-found values for cpuctExploration,
-# cpuctExplorationLog, and cpuctUtilityStdevPrior, along with ASCII regression curves
-# showing each parameter's estimated effect on win rate.
+# After all trials, it reports the best-found values for cpuctExplorationLog
+# and cpuctUtilityStdevPrior, along with ASCII regression curves showing each
+# parameter's estimated effect on win rate.
 #
 # See gtp config and match config for descriptions of most search and GPU params.
 
@@ -20,14 +20,12 @@ numTrials = 500
 
 # Search ranges for PUCT parameters being tuned.
 # The optimizer explores within [Min, Max] for each parameter.
-# If omitted, defaults are used: cpuctExploration [0.5, 2.0], cpuctExplorationLog [0.05, 1.0],
+# If omitted, defaults are used: cpuctExplorationLog [0.05, 1.0],
 # cpuctUtilityStdevPrior [0.1, 0.8].
-# cpuctExplorationMin = 0.5
-cpuctExplorationMax = 4.0
 # cpuctExplorationLogMin = 0.05
-# cpuctExplorationLogMax = 1.0
+cpuctExplorationLogMax = 10.0
 # cpuctUtilityStdevPriorMin = 0.1
-cpuctUtilityStdevPriorMax = 2.0
+cpuctUtilityStdevPriorMax = 10.0
 
 # Logs------------------------------------------------------------------------------------
 
@@ -98,8 +96,8 @@ chosenMoveTemperatureEarly = 0.60
 chosenMoveTemperature = 0.20
 
 # Internal params------------------------------------------------------------------------------
-# These are the FIXED params for bot0 (reference). Bot1's cpuctExploration,
-# cpuctExplorationLog, and cpuctUtilityStdevPrior will be overridden by the optimizer.
+# These are the FIXED params for bot0 (reference). Bot1's cpuctExplorationLog
+# and cpuctUtilityStdevPrior will be overridden by the optimizer.
 
 # cpuctExploration = 0.9
 # cpuctExplorationLog = 0.4

From c349aa6792275f011bdf68030460286c9bd47e32 Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Fri, 3 Apr 2026 19:16:38 +0800
Subject: [PATCH 28/41] Add intercept divergence regression test and enable
 GTP-equivalent defaults in tune config

The QRS tuner's Newton-Raphson can diverge when warm-starting from
saturated beta values: the Hessian degenerates to just the L2 prior,
causing oscillating overshoots that grow across refits. The new test
reproduces this by running a flat 2D landscape and scanning seeds until
one triggers |intercept| > 50 (observed values: 400-500 vs < 1 normal).

Also update tune_params_example.cfg to use default parameter ranges and
enable GTP-equivalent search features so tuning reflects actual play.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/configs/tune_params_example.cfg | 18 ++++++++++++---
 cpp/qrstune/QRSOptimizer.cpp        | 36 +++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/cpp/configs/tune_params_example.cfg b/cpp/configs/tune_params_example.cfg
index 4b0aeba76..3c942b7c2 100644
--- a/cpp/configs/tune_params_example.cfg
+++ b/cpp/configs/tune_params_example.cfg
@@ -23,9 +23,9 @@ numTrials = 500
 # If omitted, defaults are used: cpuctExplorationLog [0.05, 1.0],
 # cpuctUtilityStdevPrior [0.1, 0.8].
 # cpuctExplorationLogMin = 0.05
-cpuctExplorationLogMax = 10.0
+# cpuctExplorationLogMax = 1.0
 # cpuctUtilityStdevPriorMin = 0.1
-cpuctUtilityStdevPriorMax = 10.0
+# cpuctUtilityStdevPriorMax = 0.8
 
 # Logs------------------------------------------------------------------------------------
 
@@ -107,4 +107,16 @@ chosenMoveTemperature = 0.20
 # valueWeightExponent = 0.25
 # subtreeValueBiasFactor = 0.45
 # subtreeValueBiasWeightExponent = 0.85
-# useGraphSearch = true
+
+# GTP-equivalent defaults----------------------------------------------------------------------
+# These features default to off in match mode but on in GTP mode.
+# Enable them so tuning results reflect actual GTP-strength play.
+
+cpuctUtilityStdevScale = 0.85
+policyOptimism = 1.0
+useUncertainty = true
+useNoisePruning = true
+useNonBuggyLcb = true
+useGraphSearch = true
+rootSymmetryPruning = true
+fpuParentWeightByVisitedPolicy = true
diff --git a/cpp/qrstune/QRSOptimizer.cpp b/cpp/qrstune/QRSOptimizer.cpp
index a546a7c27..9fdaf0c6b 100644
--- a/cpp/qrstune/QRSOptimizer.cpp
+++ b/cpp/qrstune/QRSOptimizer.cpp
@@ -842,4 +842,40 @@ void QRSTune::runTests() {
     double probAtOrigin = model.predict(origin);
     testAssert(probAtBest >= probAtOrigin);
   }
+
+  // Regression test for Newton-Raphson intercept divergence on a flat 2D
+  // landscape (true winrate = 50% everywhere, so correct intercept = 0).
+  // Scans seeds until one triggers the warm-start saturation cascade.
+  {
+    const int D = 2;
+    const int numTrials = 100;
+    // Diverged intercepts are 400-500; non-diverged are < 1.
+    const double DIVERGE_THRESHOLD = 50.0;
+
+    bool diverged = false;
+    uniform_real_distribution<double> uni01(0.0, 1.0);
+
+    for(uint64_t tunerSeed = 0; tunerSeed < 20 && !diverged; tunerSeed++) {
+      mt19937_64 outcomeRng(tunerSeed * 1000 + 7);
+
+      QRSTuner tuner(D, /*seed=*/tunerSeed, numTrials,
+                     /*l2_reg=*/0.1, /*refit_every=*/10, /*prune_every=*/5,
+                     /*sigma_init=*/0.40, /*sigma_fin=*/0.05);
+
+      for(int trial = 0; trial < numTrials; trial++) {
+        vector<double> sample = tuner.nextSample();
+        double outcome = (uni01(outcomeRng) < 0.5) ? 1.0 : 0.0;
+        tuner.addResult(sample, outcome);
+      }
+
+      if(fabs(tuner.model().beta()[0]) > DIVERGE_THRESHOLD)
+        diverged = true;
+    }
+
+    // BUG: at least one seed triggers intercept divergence.
+    testAssert(diverged);
+
+    // After fix, uncomment:
+    // testAssert(!diverged);
+  }
 }

From 29dba7ab09cec646bff95cfa9f364698f827eece Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Fri, 3 Apr 2026 20:54:50 +0800
Subject: [PATCH 29/41] Remove cpuctUtilityStdevPrior from tune-params tuned
 dimensions

Reduce QRS-Tune optimization from 2D to 1D, tuning only
cpuctExplorationLog. Also fix intercept divergence in QRSModel::fit
by resetting beta to prior mean before each IRLS iteration.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/command/tuneparams.cpp          | 42 ++++++++++++-----------------
 cpp/configs/tune_params_example.cfg | 17 +++++-------
 cpp/qrstune/QRSOptimizer.cpp        | 12 +++++----
 3 files changed, 31 insertions(+), 40 deletions(-)

diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index 9208ce21d..382dd8134 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -32,28 +32,26 @@ static void signalHandler(int signal)
 }
 
 //Number of dimensions = number of PUCT params being tuned
-static const int nDims = 2;
+static const int nDims = 1;
 
 static const char* paramNames[nDims] = {
-  "cpuctExplorationLog",
-  "cpuctUtilityStdevPrior"
+  "cpuctExplorationLog"
 };
 
 static const char* paramShortNames[nDims] = {
-  "Log",
-  "Stdev"
+  "Log"
 };
 
 //Default search ranges (used when config keys are absent)
-static const double qrsDefaultMins[nDims] = {0.05, 0.1};
-static const double qrsDefaultMaxs[nDims] = {1.0,  0.8};
+static const double qrsDefaultMins[nDims] = {0.05};
+static const double qrsDefaultMaxs[nDims] = {1.0};
 
 //Config keys for per-dimension search ranges
 static const char* rangeMinKeys[nDims] = {
-  "cpuctExplorationLogMin", "cpuctUtilityStdevPriorMin"
+  "cpuctExplorationLogMin"
 };
 static const char* rangeMaxKeys[nDims] = {
-  "cpuctExplorationLogMax", "cpuctUtilityStdevPriorMax"
+  "cpuctExplorationLogMax"
 };
 
 //Map QRS-Tune normalized coordinate x in [-1,+1] to real PUCT value.
@@ -66,11 +64,9 @@ static double qrsDimToReal(int dim, double x, const double* mins, const double*
 static void qrsToPUCT(
   const vector<double>& x,
   double& cpuctExplorationLog,
-  double& cpuctUtilityStdevPrior,
   const double* mins, const double* maxs
 ) {
-  cpuctExplorationLog    = qrsDimToReal(0, x[0], mins, maxs);
-  cpuctUtilityStdevPrior = qrsDimToReal(1, x[1], mins, maxs);
+  cpuctExplorationLog = qrsDimToReal(0, x[0], mins, maxs);
 }
 
 static const double Z_95 = 1.96;
@@ -217,9 +213,7 @@ int MainCmds::tuneparams(const vector<string>& args) {
   }
   logger.write(
     "QRS ranges: cpuctExplorationLog=[" +
-    Global::strprintf("%.4f", qrsMins[0]) + "," + Global::strprintf("%.4f", qrsMaxs[0]) +
-    "] cpuctUtilityStdevPrior=[" +
-    Global::strprintf("%.4f", qrsMins[1]) + "," + Global::strprintf("%.4f", qrsMaxs[1]) + "]"
+    Global::strprintf("%.4f", qrsMins[0]) + "," + Global::strprintf("%.4f", qrsMaxs[0]) + "]"
   );
 
   //Load search params for both bots
@@ -280,12 +274,11 @@ int MainCmds::tuneparams(const vector<string>& args) {
   for(int trial = 0; trial < numTrials; trial++) {
     vector<double> sample = tuner.nextSample();
 
-    double cpuctExplorationLog, cpuctUtilityStdevPrior;
-    qrsToPUCT(sample, cpuctExplorationLog, cpuctUtilityStdevPrior, qrsMins, qrsMaxs);
+    double cpuctExplorationLog;
+    qrsToPUCT(sample, cpuctExplorationLog, qrsMins, qrsMaxs);
 
     SearchParams expParams = paramss[1];
-    expParams.cpuctExplorationLog    = cpuctExplorationLog;
-    expParams.cpuctUtilityStdevPrior = cpuctUtilityStdevPrior;
+    expParams.cpuctExplorationLog = cpuctExplorationLog;
 
     //Alternate colors to remove first-move advantage bias
     bool expIsBlack = (trial % 2 == 0);
@@ -387,8 +380,8 @@ int MainCmds::tuneparams(const vector<string>& args) {
 
   //Final result
   vector<double> vBest = tuner.bestCoords();
-  double bestLog, bestStdev;
-  qrsToPUCT(vBest, bestLog, bestStdev, qrsMins, qrsMaxs);
+  double bestLog;
+  qrsToPUCT(vBest, bestLog, qrsMins, qrsMaxs);
 
   logger.write("");
   logger.write("=== tune-params Results ===");
@@ -416,8 +409,7 @@ int MainCmds::tuneparams(const vector<string>& args) {
     }
   }
   logger.write(
-    "QRS raw coordinates: [" + Global::doubleToString(vBest[0]) + ", " +
-    Global::doubleToString(vBest[1]) + "]"
+    "QRS raw coordinates: [" + Global::doubleToString(vBest[0]) + "]"
   );
 
   //ASCII-art regression curves (one per PUCT dimension)
@@ -427,9 +419,9 @@ int MainCmds::tuneparams(const vector<string>& args) {
   {
     string overrides = Global::strprintf(
       "botName0=tuned,botName1=default,"
-      "cpuctExplorationLog0=%.4f,cpuctUtilityStdevPrior0=%.4f,"
+      "cpuctExplorationLog0=%.4f,"
       "numGameThreads=8,numGamesTotal=200",
-      bestLog, bestStdev
+      bestLog
     );
     overrides += ",nnModelFile0=" + nnModelFile0 + ",nnModelFile1=" + nnModelFile1;
     logger.write("");
diff --git a/cpp/configs/tune_params_example.cfg b/cpp/configs/tune_params_example.cfg
index 3c942b7c2..fee301ece 100644
--- a/cpp/configs/tune_params_example.cfg
+++ b/cpp/configs/tune_params_example.cfg
@@ -6,9 +6,9 @@
 # and an experiment bot (bot1) whose PUCT parameters are adapted each trial using
 # QRS-Tune (Quadratic Regression Sequential optimization).
 #
-# After all trials, it reports the best-found values for cpuctExplorationLog
-# and cpuctUtilityStdevPrior, along with ASCII regression curves showing each
-# parameter's estimated effect on win rate.
+# After all trials, it reports the best-found value for cpuctExplorationLog,
+# along with an ASCII regression curve showing the parameter's estimated
+# effect on win rate.
 #
 # See gtp config and match config for descriptions of most search and GPU params.
 
@@ -18,14 +18,11 @@
 # A few hundred trials is a reasonable starting point; 1000+ for higher confidence.
 numTrials = 500
 
-# Search ranges for PUCT parameters being tuned.
-# The optimizer explores within [Min, Max] for each parameter.
-# If omitted, defaults are used: cpuctExplorationLog [0.05, 1.0],
-# cpuctUtilityStdevPrior [0.1, 0.8].
+# Search range for the PUCT parameter being tuned.
+# The optimizer explores within [Min, Max].
+# If omitted, defaults are used: cpuctExplorationLog [0.05, 1.0].
 # cpuctExplorationLogMin = 0.05
 # cpuctExplorationLogMax = 1.0
-# cpuctUtilityStdevPriorMin = 0.1
-# cpuctUtilityStdevPriorMax = 0.8
 
 # Logs------------------------------------------------------------------------------------
 
@@ -97,7 +94,7 @@ chosenMoveTemperature = 0.20
 
 # Internal params------------------------------------------------------------------------------
 # These are the FIXED params for bot0 (reference). Bot1's cpuctExplorationLog
-# and cpuctUtilityStdevPrior will be overridden by the optimizer.
+# will be overridden by the optimizer.
 
 # cpuctExploration = 0.9
 # cpuctExplorationLog = 0.4
diff --git a/cpp/qrstune/QRSOptimizer.cpp b/cpp/qrstune/QRSOptimizer.cpp
index 9fdaf0c6b..2a1421506 100644
--- a/cpp/qrstune/QRSOptimizer.cpp
+++ b/cpp/qrstune/QRSOptimizer.cpp
@@ -163,6 +163,11 @@ void QRSTune::QRSModel::fit(const vector<vector<double>>& xs,
   int N = (int)xs.size();
   if(N < F_) return;  // underdetermined; keep prior beta = 0
 
+  // Reset to prior mean to avoid warm-start saturation cascade: a
+  // previously-extreme intercept makes w = p*(1-p) ≈ 0 for all samples,
+  // degenerating the Hessian to l2_*I and producing unbounded Newton steps.
+  fill(beta_.begin(), beta_.end(), 0.0);
+
   vector<double> phi(F_);
   vector<double> grad(F_);
   vector<vector<double>> negH(F_, vector<double>(F_));
@@ -872,10 +877,7 @@ void QRSTune::runTests() {
         diverged = true;
     }
 
-    // BUG: at least one seed triggers intercept divergence.
-    testAssert(diverged);
-
-    // After fix, uncomment:
-    // testAssert(!diverged);
+    // Fixed: no seed triggers intercept divergence after removing warm-start.
+    testAssert(!diverged);
   }
 }

From 5381fbda689a7918f6d51770491f2f8fca445770 Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Fri, 3 Apr 2026 21:05:17 +0800
Subject: [PATCH 30/41] Append game outcome to QRS sample log line in
 tune-params verbose mode

Defer the QRS sample log message from nextSample() to addResult(), so the
experiment outcome (exp wins / exp loses / draw) can be appended to the same
line.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/qrstune/QRSOptimizer.cpp | 10 +++++++++-
 cpp/qrstune/QRSOptimizer.h   |  1 +
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/cpp/qrstune/QRSOptimizer.cpp b/cpp/qrstune/QRSOptimizer.cpp
index 2a1421506..ae227017d 100644
--- a/cpp/qrstune/QRSOptimizer.cpp
+++ b/cpp/qrstune/QRSOptimizer.cpp
@@ -456,13 +456,21 @@ vector<double> QRSTune::QRSTuner::nextSample() {
       msg += Global::strprintf("%.3f", x[i]);
     }
     msg += "]";
-    logger_->write(msg);
+    pendingLogMsg_ = msg;
   }
 
   return x;
 }
 
 void QRSTune::QRSTuner::addResult(const vector<double>& x, double y) {
+  if(!pendingLogMsg_.empty() && logger_) {
+    string label;
+    if(y == 1.0)      label = "exp wins";
+    else if(y == 0.0) label = "exp loses";
+    else               label = "draw";
+    logger_->write(pendingLogMsg_ + " -> " + label);
+    pendingLogMsg_.clear();
+  }
   buffer_.add(x, y);
   trial_count_++;
 
diff --git a/cpp/qrstune/QRSOptimizer.h b/cpp/qrstune/QRSOptimizer.h
index 8236d9ed1..12d017588 100644
--- a/cpp/qrstune/QRSOptimizer.h
+++ b/cpp/qrstune/QRSOptimizer.h
@@ -140,6 +140,7 @@ class QRSTuner {
   double sigma_final_;
 
   Logger* logger_;     // non-null enables verbose diagnostic logging
+  std::string pendingLogMsg_;  // assembled in nextSample(), flushed in addResult()
 
  public:
   // D            : number of dimensions

From 406048b8bda769792f4eeebd19674f815956a6b7 Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Sat, 4 Apr 2026 06:40:40 +0800
Subject: [PATCH 31/41] Switch tune-params example config to Tromp-Taylor rules
 with komi 7

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/configs/tune_params_example.cfg | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/configs/tune_params_example.cfg b/cpp/configs/tune_params_example.cfg
index fee301ece..28b014cc0 100644
--- a/cpp/configs/tune_params_example.cfg
+++ b/cpp/configs/tune_params_example.cfg
@@ -58,15 +58,15 @@ resignConsecTurns = 6
 # Rules------------------------------------------------------------------------------------
 # Use a single fixed ruleset for consistent tuning results.
 
-koRules = SIMPLE
+koRules = POSITIONAL
 scoringRules = AREA
 taxRules = NONE
-multiStoneSuicideLegals = false
+multiStoneSuicideLegals = true
 hasButtons = false
 
 bSizes = 19
 bSizeRelProbs = 1
-komiAuto = True
+komiMean = 7
 handicapProb = 0.0
 handicapCompensateKomiProb = 1.0
 

From 0fadb78e56861701fe38422d3d35645846897af6 Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Sat, 4 Apr 2026 07:34:19 +0800
Subject: [PATCH 32/41] Add cpuctUtilityStdevPrior and
 cpuctUtilityStdevPriorWeight to tune-params

Expand tune-params from 1 to 3 tuned dimensions using a data-driven
TuneDimension struct with pointer-to-member fields, so adding future
parameters requires only a single table entry.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/command/tuneparams.cpp          | 107 +++++++++++++---------------
 cpp/configs/tune_params_example.cfg |  25 ++++---
 2 files changed, 66 insertions(+), 66 deletions(-)

diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index 382dd8134..b1698f366 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -31,28 +31,22 @@ static void signalHandler(int signal)
   }
 }
 
-//Number of dimensions = number of PUCT params being tuned
-static const int nDims = 1;
-
-static const char* paramNames[nDims] = {
-  "cpuctExplorationLog"
-};
-
-static const char* paramShortNames[nDims] = {
-  "Log"
+struct TuneDimension {
+  const char* name;
+  const char* shortName;
+  double defaultMin;
+  double defaultMax;
+  const char* minKey;
+  const char* maxKey;
+  double SearchParams::* field;
 };
 
-//Default search ranges (used when config keys are absent)
-static const double qrsDefaultMins[nDims] = {0.05};
-static const double qrsDefaultMaxs[nDims] = {1.0};
-
-//Config keys for per-dimension search ranges
-static const char* rangeMinKeys[nDims] = {
-  "cpuctExplorationLogMin"
-};
-static const char* rangeMaxKeys[nDims] = {
-  "cpuctExplorationLogMax"
+static const TuneDimension tuneDims[] = {
+  {"cpuctExplorationLog",       "Log",    0.05, 1.0,  "cpuctExplorationLogMin",       "cpuctExplorationLogMax",       &SearchParams::cpuctExplorationLog},
+  {"cpuctUtilityStdevPrior",    "Prior",  0.10, 0.80, "cpuctUtilityStdevPriorMin",    "cpuctUtilityStdevPriorMax",    &SearchParams::cpuctUtilityStdevPrior},
+  {"cpuctUtilityStdevPriorWeight","PriorW",0.5, 5.0,  "cpuctUtilityStdevPriorWeightMin","cpuctUtilityStdevPriorWeightMax",&SearchParams::cpuctUtilityStdevPriorWeight},
 };
+static const int nDims = sizeof(tuneDims) / sizeof(tuneDims[0]);
 
 //Map QRS-Tune normalized coordinate x in [-1,+1] to real PUCT value.
 static double qrsDimToReal(int dim, double x, const double* mins, const double* maxs) {
@@ -61,14 +55,6 @@ static double qrsDimToReal(int dim, double x, const double* mins, const double*
   return center + x * radius;
 }
 
-static void qrsToPUCT(
-  const vector<double>& x,
-  double& cpuctExplorationLog,
-  const double* mins, const double* maxs
-) {
-  cpuctExplorationLog = qrsDimToReal(0, x[0], mins, maxs);
-}
-
 static const double Z_95 = 1.96;
 
 // Compute 95% CI bounds for each parameter in real (non-normalized) coordinates.
@@ -121,7 +107,7 @@ static void printRegressionCurves(const QRSTune::QRSTuner& tuner,
     double bestReal = qrsDimToReal(dim, vBest[dim], mins, maxs);
     logger.write("");
     logger.write(
-      "[Dim " + Global::intToString(dim) + "] " + paramNames[dim] +
+      "[Dim " + Global::intToString(dim) + "] " + tuneDims[dim].name +
       "  (best QRS=" + Global::strprintf("%.3f", vBest[dim]) +
       " -> real=" + Global::strprintf("%.3f", bestReal) +
       ", est.winrate=" + Global::strprintf("%.3f", bestWinRate) + ")"
@@ -201,20 +187,25 @@ int MainCmds::tuneparams(const vector<string>& args) {
   //Search ranges (configurable; defaults preserve prior behaviour)
   double qrsMins[nDims], qrsMaxs[nDims];
   for(int d = 0; d < nDims; d++) {
-    qrsMins[d] = cfg.contains(rangeMinKeys[d])
-                    ? cfg.getDouble(rangeMinKeys[d], -1e9, 1e9)
-                    : qrsDefaultMins[d];
-    qrsMaxs[d] = cfg.contains(rangeMaxKeys[d])
-                    ? cfg.getDouble(rangeMaxKeys[d], -1e9, 1e9)
-                    : qrsDefaultMaxs[d];
+    qrsMins[d] = cfg.contains(tuneDims[d].minKey)
+                    ? cfg.getDouble(tuneDims[d].minKey, -1e9, 1e9)
+                    : tuneDims[d].defaultMin;
+    qrsMaxs[d] = cfg.contains(tuneDims[d].maxKey)
+                    ? cfg.getDouble(tuneDims[d].maxKey, -1e9, 1e9)
+                    : tuneDims[d].defaultMax;
     if(qrsMins[d] >= qrsMaxs[d])
       throw StringError(
-        string("tune-params: ") + rangeMinKeys[d] + " must be < " + rangeMaxKeys[d]);
+        string("tune-params: ") + tuneDims[d].minKey + " must be < " + tuneDims[d].maxKey);
+  }
+  {
+    string rangeStr;
+    for(int d = 0; d < nDims; d++) {
+      if(d > 0) rangeStr += ", ";
+      rangeStr += string(tuneDims[d].name) + "=[" +
+        Global::strprintf("%.4f", qrsMins[d]) + "," + Global::strprintf("%.4f", qrsMaxs[d]) + "]";
+    }
+    logger.write("QRS ranges: " + rangeStr);
   }
-  logger.write(
-    "QRS ranges: cpuctExplorationLog=[" +
-    Global::strprintf("%.4f", qrsMins[0]) + "," + Global::strprintf("%.4f", qrsMaxs[0]) + "]"
-  );
 
   //Load search params for both bots
   vector<SearchParams> paramss = Setup::loadParams(cfg, Setup::SETUP_FOR_MATCH);
@@ -274,11 +265,9 @@ int MainCmds::tuneparams(const vector<string>& args) {
   for(int trial = 0; trial < numTrials; trial++) {
     vector<double> sample = tuner.nextSample();
 
-    double cpuctExplorationLog;
-    qrsToPUCT(sample, cpuctExplorationLog, qrsMins, qrsMaxs);
-
     SearchParams expParams = paramss[1];
-    expParams.cpuctExplorationLog = cpuctExplorationLog;
+    for(int d = 0; d < nDims; d++)
+      expParams.*(tuneDims[d].field) = qrsDimToReal(d, sample[d], qrsMins, qrsMaxs);
 
     //Alternate colors to remove first-move advantage bias
     bool expIsBlack = (trial % 2 == 0);
@@ -363,12 +352,12 @@ int MainCmds::tuneparams(const vector<string>& args) {
         bool clampedDims[nDims];
         if(computeParamCIs(tuner, vBest, qrsMins, qrsMaxs, ciLo, ciHi, clampedDims)) {
           for(int d = 0; d < nDims; d++) {
-            paramStr += Global::strprintf(" %s=[%.4f, %.4f]", paramShortNames[d], ciLo[d], ciHi[d]);
+            paramStr += Global::strprintf(" %s=[%.4f, %.4f]", tuneDims[d].shortName, ciLo[d], ciHi[d]);
             if(clampedDims[d]) paramStr += "*";
           }
         } else {
           for(int d = 0; d < nDims; d++)
-            paramStr += Global::strprintf(" %s=%.4f", paramShortNames[d], qrsDimToReal(d, vBest[d], qrsMins, qrsMaxs));
+            paramStr += Global::strprintf(" %s=%.4f", tuneDims[d].shortName, qrsDimToReal(d, vBest[d], qrsMins, qrsMaxs));
         }
         logger.write(Global::strprintf(
           "[%d%%] %d/%d | W=%d L=%d D=%d |%s | ETA %s",
@@ -380,8 +369,6 @@ int MainCmds::tuneparams(const vector<string>& args) {
 
   //Final result
   vector<double> vBest = tuner.bestCoords();
-  double bestLog;
-  qrsToPUCT(vBest, bestLog, qrsMins, qrsMaxs);
 
   logger.write("");
   logger.write("=== tune-params Results ===");
@@ -401,28 +388,32 @@ int MainCmds::tuneparams(const vector<string>& args) {
       if(hasCIs) {
         string warn = clampedDims[d] ? "  [boundary - CI may be unreliable]" : "";
         logger.write(Global::strprintf("Best %-25s = %.4f  95%%CI [%.4f, %.4f]%s",
-          paramNames[d], bestReal, ciLo[d], ciHi[d], warn.c_str()));
+          tuneDims[d].name, bestReal, ciLo[d], ciHi[d], warn.c_str()));
       } else {
         logger.write(Global::strprintf("Best %-25s = %.4f  (CI unavailable)",
-          paramNames[d], bestReal));
+          tuneDims[d].name, bestReal));
       }
     }
   }
-  logger.write(
-    "QRS raw coordinates: [" + Global::doubleToString(vBest[0]) + "]"
-  );
+  {
+    string rawStr;
+    for(int d = 0; d < nDims; d++) {
+      if(d > 0) rawStr += ", ";
+      rawStr += Global::doubleToString(vBest[d]);
+    }
+    logger.write("QRS raw coordinates: [" + rawStr + "]");
+  }
 
   //ASCII-art regression curves (one per PUCT dimension)
   printRegressionCurves(tuner, vBest, qrsMins, qrsMaxs, logger);
 
   //Suggested match command for verification
   {
-    string overrides = Global::strprintf(
-      "botName0=tuned,botName1=default,"
-      "cpuctExplorationLog0=%.4f,"
-      "numGameThreads=8,numGamesTotal=200",
-      bestLog
-    );
+    string overrides = "botName0=tuned,botName1=default,";
+    for(int d = 0; d < nDims; d++)
+      overrides += string(tuneDims[d].name) + "0=" +
+        Global::strprintf("%.4f", qrsDimToReal(d, vBest[d], qrsMins, qrsMaxs)) + ",";
+    overrides += "numGameThreads=8,numGamesTotal=200";
     overrides += ",nnModelFile0=" + nnModelFile0 + ",nnModelFile1=" + nnModelFile1;
     logger.write("");
     logger.write("To verify, run a match of tuned (bot0) vs default (bot1):");
diff --git a/cpp/configs/tune_params_example.cfg b/cpp/configs/tune_params_example.cfg
index 28b014cc0..bc66f740a 100644
--- a/cpp/configs/tune_params_example.cfg
+++ b/cpp/configs/tune_params_example.cfg
@@ -6,9 +6,9 @@
 # and an experiment bot (bot1) whose PUCT parameters are adapted each trial using
 # QRS-Tune (Quadratic Regression Sequential optimization).
 #
-# After all trials, it reports the best-found value for cpuctExplorationLog,
-# along with an ASCII regression curve showing the parameter's estimated
-# effect on win rate.
+# After all trials, it reports the best-found values for cpuctExplorationLog,
+# cpuctUtilityStdevPrior, and cpuctUtilityStdevPriorWeight, along with ASCII
+# regression curves showing each parameter's estimated effect on win rate.
 #
 # See gtp config and match config for descriptions of most search and GPU params.
 
@@ -18,11 +18,18 @@
 # A few hundred trials is a reasonable starting point; 1000+ for higher confidence.
 numTrials = 500
 
-# Search range for the PUCT parameter being tuned.
-# The optimizer explores within [Min, Max].
-# If omitted, defaults are used: cpuctExplorationLog [0.05, 1.0].
+# Search ranges for the PUCT parameters being tuned.
+# The optimizer explores within [Min, Max] for each parameter.
+# If omitted, defaults are used:
+#   cpuctExplorationLog [0.05, 1.0]
+#   cpuctUtilityStdevPrior [0.10, 0.80]
+#   cpuctUtilityStdevPriorWeight [0.5, 5.0]
 # cpuctExplorationLogMin = 0.05
 # cpuctExplorationLogMax = 1.0
+# cpuctUtilityStdevPriorMin = 0.10
+# cpuctUtilityStdevPriorMax = 0.80
+# cpuctUtilityStdevPriorWeightMin = 0.5
+# cpuctUtilityStdevPriorWeightMax = 5.0
 
 # Logs------------------------------------------------------------------------------------
 
@@ -93,12 +100,14 @@ chosenMoveTemperatureEarly = 0.60
 chosenMoveTemperature = 0.20
 
 # Internal params------------------------------------------------------------------------------
-# These are the FIXED params for bot0 (reference). Bot1's cpuctExplorationLog
-# will be overridden by the optimizer.
+# These are the FIXED params for bot0 (reference). Bot1's cpuctExplorationLog,
+# cpuctUtilityStdevPrior, and cpuctUtilityStdevPriorWeight will be overridden
+# by the optimizer.
 
 # cpuctExploration = 0.9
 # cpuctExplorationLog = 0.4
 # cpuctUtilityStdevPrior = 0.40
+# cpuctUtilityStdevPriorWeight = 2.0
 # fpuReductionMax = 0.2
 # rootFpuReductionMax = 0.1
 # valueWeightExponent = 0.25

From 41139eed6264909dcb0232873c602ec8ba5cb4f2 Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Sat, 4 Apr 2026 10:31:00 +0800
Subject: [PATCH 33/41] Rename verbose log label from convex=Y/N to concave=Y/N

Y now indicates the normal case (proper concave maximum found),
matching user expectation that Y = good and N = problematic.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/qrstune/QRSOptimizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/qrstune/QRSOptimizer.cpp b/cpp/qrstune/QRSOptimizer.cpp
index ae227017d..b21bac094 100644
--- a/cpp/qrstune/QRSOptimizer.cpp
+++ b/cpp/qrstune/QRSOptimizer.cpp
@@ -501,7 +501,7 @@ void QRSTune::QRSTuner::addResult(const vector<double>& x, double y) {
         diag += Global::strprintf("%.4f", b[1 + D_ + d]);
       }
       diag += "]";
-      diag += " convex=" + string(model_.hasConvexDim() ? "Y" : "N");
+      diag += " concave=" + string(model_.hasConvexDim() ? "N" : "Y");
       diag += " bestQRS=[";
       for(int d = 0; d < D_; d++) {
         if(d > 0) diag += ",";

From eb8ca5e385f3d068fd026c51d96e49b2b32bb65e Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Sat, 4 Apr 2026 19:03:54 +0800
Subject: [PATCH 34/41] Remove cpuctUtilityStdevPriorWeight from tune-params
 and soften pruning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop cpuctUtilityStdevPriorWeight from the tuned dimensions — its peak
effect on explore scaling is ~4% (at ~15-visit nodes), far too small to
detect in 1000 binary-outcome games, and its persistently convex
quadratic coefficient prevents QRS convergence. This reduces the model
from 10 parameters (3D) to 6 (2D), stabilizing the quadratic fit.

Also soften buffer pruning to never remove more than half the buffer in
one pass, avoiding sudden data cliffs that destabilize the fit.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/command/tuneparams.cpp          |  1 -
 cpp/configs/tune_params_example.cfg | 14 +++++---------
 cpp/qrstune/QRSOptimizer.cpp        |  7 +++++--
 3 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index b1698f366..609427d09 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -44,7 +44,6 @@ struct TuneDimension {
 static const TuneDimension tuneDims[] = {
   {"cpuctExplorationLog",       "Log",    0.05, 1.0,  "cpuctExplorationLogMin",       "cpuctExplorationLogMax",       &SearchParams::cpuctExplorationLog},
   {"cpuctUtilityStdevPrior",    "Prior",  0.10, 0.80, "cpuctUtilityStdevPriorMin",    "cpuctUtilityStdevPriorMax",    &SearchParams::cpuctUtilityStdevPrior},
-  {"cpuctUtilityStdevPriorWeight","PriorW",0.5, 5.0,  "cpuctUtilityStdevPriorWeightMin","cpuctUtilityStdevPriorWeightMax",&SearchParams::cpuctUtilityStdevPriorWeight},
 };
 static const int nDims = sizeof(tuneDims) / sizeof(tuneDims[0]);
 
diff --git a/cpp/configs/tune_params_example.cfg b/cpp/configs/tune_params_example.cfg
index bc66f740a..4b17c28d5 100644
--- a/cpp/configs/tune_params_example.cfg
+++ b/cpp/configs/tune_params_example.cfg
@@ -6,9 +6,9 @@
 # and an experiment bot (bot1) whose PUCT parameters are adapted each trial using
 # QRS-Tune (Quadratic Regression Sequential optimization).
 #
-# After all trials, it reports the best-found values for cpuctExplorationLog,
-# cpuctUtilityStdevPrior, and cpuctUtilityStdevPriorWeight, along with ASCII
-# regression curves showing each parameter's estimated effect on win rate.
+# After all trials, it reports the best-found values for cpuctExplorationLog
+# and cpuctUtilityStdevPrior, along with ASCII regression curves showing each
+# parameter's estimated effect on win rate.
 #
 # See gtp config and match config for descriptions of most search and GPU params.
 
@@ -23,13 +23,10 @@ numTrials = 500
 # If omitted, defaults are used:
 #   cpuctExplorationLog [0.05, 1.0]
 #   cpuctUtilityStdevPrior [0.10, 0.80]
-#   cpuctUtilityStdevPriorWeight [0.5, 5.0]
 # cpuctExplorationLogMin = 0.05
 # cpuctExplorationLogMax = 1.0
 # cpuctUtilityStdevPriorMin = 0.10
 # cpuctUtilityStdevPriorMax = 0.80
-# cpuctUtilityStdevPriorWeightMin = 0.5
-# cpuctUtilityStdevPriorWeightMax = 5.0
 
 # Logs------------------------------------------------------------------------------------
 
@@ -100,9 +97,8 @@ chosenMoveTemperatureEarly = 0.60
 chosenMoveTemperature = 0.20
 
 # Internal params------------------------------------------------------------------------------
-# These are the FIXED params for bot0 (reference). Bot1's cpuctExplorationLog,
-# cpuctUtilityStdevPrior, and cpuctUtilityStdevPriorWeight will be overridden
-# by the optimizer.
+# These are the FIXED params for bot0 (reference). Bot1's cpuctExplorationLog
+# and cpuctUtilityStdevPrior will be overridden by the optimizer.
 
 # cpuctExploration = 0.9
 # cpuctExplorationLog = 0.4
diff --git a/cpp/qrstune/QRSOptimizer.cpp b/cpp/qrstune/QRSOptimizer.cpp
index b21bac094..176a551ef 100644
--- a/cpp/qrstune/QRSOptimizer.cpp
+++ b/cpp/qrstune/QRSOptimizer.cpp
@@ -376,7 +376,10 @@ void QRSTune::QRSBuffer::add(const vector<double>& x, double y) {
 // preserving spatial diversity from early uniform exploration.
 void QRSTune::QRSBuffer::prune(const QRSModel& model) {
   int N = (int)xs_.size();
-  if(N <= min_keep_ * 2) return;
+  // Never remove more than half the buffer in one pass to avoid
+  // destabilizing the quadratic fit with a sudden data cliff.
+  int keepFloor = max(min_keep_, N / 2);
+  if(N <= keepFloor) return;
 
   double bestPrediction = 0.0;
   vector<double> preds(N);
@@ -389,7 +392,7 @@ void QRSTune::QRSBuffer::prune(const QRSModel& model) {
   vector<vector<double>> newXs;
   vector<double> newYs;
   for(int i = 0; i < N; i++) {
-    if(preds[i] >= threshold || (int)newXs.size() < min_keep_) {
+    if(preds[i] >= threshold || (int)newXs.size() < keepFloor) {
       newXs.push_back(std::move(xs_[i]));
       newYs.push_back(ys_[i]);
     }

From 0bc3dfea4ebeba94e5044a8b2348ab65fc388709 Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Sun, 5 Apr 2026 09:21:39 +0800
Subject: [PATCH 35/41] Append experiment bot color to QRS sample verbose log

Makes tune-params verbose output show whether the experiment bot
played as black or white, aiding diagnosis of color-dependent effects.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/command/tuneparams.cpp   | 2 +-
 cpp/qrstune/QRSOptimizer.cpp | 4 ++--
 cpp/qrstune/QRSOptimizer.h   | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index 609427d09..bf36e932b 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -324,7 +324,7 @@ int MainCmds::tuneparams(const vector<string>& args) {
       logger.write("Warning: trial " + Global::intToString(trial) + " returned null game data");
     }
 
-    tuner.addResult(sample, outcome);
+    tuner.addResult(sample, outcome, expIsBlack ? " as black" : " as white");
 
     if(shouldStop.load())
       break;
diff --git a/cpp/qrstune/QRSOptimizer.cpp b/cpp/qrstune/QRSOptimizer.cpp
index 176a551ef..4715e4532 100644
--- a/cpp/qrstune/QRSOptimizer.cpp
+++ b/cpp/qrstune/QRSOptimizer.cpp
@@ -465,13 +465,13 @@ vector<double> QRSTune::QRSTuner::nextSample() {
   return x;
 }
 
-void QRSTune::QRSTuner::addResult(const vector<double>& x, double y) {
+void QRSTune::QRSTuner::addResult(const vector<double>& x, double y, const string& logSuffix) {
   if(!pendingLogMsg_.empty() && logger_) {
     string label;
     if(y == 1.0)      label = "exp wins";
     else if(y == 0.0) label = "exp loses";
     else               label = "draw";
-    logger_->write(pendingLogMsg_ + " -> " + label);
+    logger_->write(pendingLogMsg_ + " -> " + label + logSuffix);
     pendingLogMsg_.clear();
   }
   buffer_.add(x, y);
diff --git a/cpp/qrstune/QRSOptimizer.h b/cpp/qrstune/QRSOptimizer.h
index 12d017588..61003bb81 100644
--- a/cpp/qrstune/QRSOptimizer.h
+++ b/cpp/qrstune/QRSOptimizer.h
@@ -163,7 +163,7 @@ class QRSTuner {
 
   // Record the outcome of a trial.
   // y: 1.0 = win, 0.0 = loss, 0.5 = draw
-  void addResult(const std::vector<double>& x, double y);
+  void addResult(const std::vector<double>& x, double y, const std::string& logSuffix = "");
 
   // Return current MAP optimum in [-1,+1]^D
   std::vector<double> bestCoords() const;

From 16748a3f37ffef0a0cca33606a5c83e825ff2f05 Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Mon, 6 Apr 2026 19:56:04 +0800
Subject: [PATCH 36/41] Remove cpuctUtilityStdevPrior from tune-params tuned
 dimensions

1000-trial tuning run showed cpuctUtilityStdevPrior has a convex
(monotonically increasing) response with no optimum in range.
A 200-game match confirmed stdev-scaling (0.85 vs 0.0) produces
no significant difference (p=0.69) with the b6c64 model at 4kv.
The parameter wastes half the tuning budget and causes the QRS
optimizer to fall back to non-concave mode.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/command/tuneparams.cpp          |  1 -
 cpp/configs/tune_params_example.cfg | 20 ++++++++------------
 2 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index bf36e932b..25dd64005 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -43,7 +43,6 @@ struct TuneDimension {
 
 static const TuneDimension tuneDims[] = {
   {"cpuctExplorationLog",       "Log",    0.05, 1.0,  "cpuctExplorationLogMin",       "cpuctExplorationLogMax",       &SearchParams::cpuctExplorationLog},
-  {"cpuctUtilityStdevPrior",    "Prior",  0.10, 0.80, "cpuctUtilityStdevPriorMin",    "cpuctUtilityStdevPriorMax",    &SearchParams::cpuctUtilityStdevPrior},
 };
 static const int nDims = sizeof(tuneDims) / sizeof(tuneDims[0]);
 
diff --git a/cpp/configs/tune_params_example.cfg b/cpp/configs/tune_params_example.cfg
index 4b17c28d5..73e2bb6e5 100644
--- a/cpp/configs/tune_params_example.cfg
+++ b/cpp/configs/tune_params_example.cfg
@@ -6,9 +6,9 @@
 # and an experiment bot (bot1) whose PUCT parameters are adapted each trial using
 # QRS-Tune (Quadratic Regression Sequential optimization).
 #
-# After all trials, it reports the best-found values for cpuctExplorationLog
-# and cpuctUtilityStdevPrior, along with ASCII regression curves showing each
-# parameter's estimated effect on win rate.
+# After all trials, it reports the best-found value for cpuctExplorationLog,
+# along with an ASCII regression curve showing the parameter's estimated
+# effect on win rate.
 #
 # See gtp config and match config for descriptions of most search and GPU params.
 
@@ -18,15 +18,12 @@
 # A few hundred trials is a reasonable starting point; 1000+ for higher confidence.
 numTrials = 500
 
-# Search ranges for the PUCT parameters being tuned.
-# The optimizer explores within [Min, Max] for each parameter.
-# If omitted, defaults are used:
-#   cpuctExplorationLog [0.05, 1.0]
-#   cpuctUtilityStdevPrior [0.10, 0.80]
+# Search range for the PUCT parameter being tuned.
+# The optimizer explores within [Min, Max].
+# If omitted, defaults are used: cpuctExplorationLog [0.05, 1.0]
 # cpuctExplorationLogMin = 0.05
 # cpuctExplorationLogMax = 1.0
-# cpuctUtilityStdevPriorMin = 0.10
-# cpuctUtilityStdevPriorMax = 0.80
+cpuctExplorationLogMax = 10.0
 
 # Logs------------------------------------------------------------------------------------
 
@@ -98,11 +95,10 @@ chosenMoveTemperature = 0.20
 
 # Internal params------------------------------------------------------------------------------
 # These are the FIXED params for bot0 (reference). Bot1's cpuctExplorationLog
-# and cpuctUtilityStdevPrior will be overridden by the optimizer.
+# will be overridden by the optimizer.
 
 # cpuctExploration = 0.9
 # cpuctExplorationLog = 0.4
-# cpuctUtilityStdevPrior = 0.40
 # cpuctUtilityStdevPriorWeight = 2.0
 # fpuReductionMax = 0.2
 # rootFpuReductionMax = 0.1

From af204774ea80c8f13302a133c22e908ec642e58f Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Mon, 6 Apr 2026 23:32:00 +0800
Subject: [PATCH 37/41] Add convergence scaling tests for QRSOptimizer with
 100, 1000, and 10000 trials

Verify that QRSTuner converges to the true optimum of a shared 2D quadratic
landscape with progressively tighter accuracy as trial budget increases.
Asserts distance-to-optimum, win probability, monotonic SE shrinkage, and
that the true optimum falls within each run's 95% confidence interval.

Also lower cpuctExplorationLogMax from 10.0 to 5.0 in tune_params_example.cfg.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/configs/tune_params_example.cfg |  3 +-
 cpp/qrstune/QRSOptimizer.cpp        | 82 +++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+), 2 deletions(-)

diff --git a/cpp/configs/tune_params_example.cfg b/cpp/configs/tune_params_example.cfg
index 73e2bb6e5..3cfd9a25f 100644
--- a/cpp/configs/tune_params_example.cfg
+++ b/cpp/configs/tune_params_example.cfg
@@ -22,8 +22,7 @@ numTrials = 500
 # The optimizer explores within [Min, Max].
 # If omitted, defaults are used: cpuctExplorationLog [0.05, 1.0]
 # cpuctExplorationLogMin = 0.05
-# cpuctExplorationLogMax = 1.0
-cpuctExplorationLogMax = 10.0
+cpuctExplorationLogMax = 5.0
 
 # Logs------------------------------------------------------------------------------------
 
diff --git a/cpp/qrstune/QRSOptimizer.cpp b/cpp/qrstune/QRSOptimizer.cpp
index 4715e4532..ac916b276 100644
--- a/cpp/qrstune/QRSOptimizer.cpp
+++ b/cpp/qrstune/QRSOptimizer.cpp
@@ -891,4 +891,86 @@ void QRSTune::runTests() {
     // Fixed: no seed triggers intercept divergence after removing warm-start.
     testAssert(!diverged);
   }
+
+  // Test convergence scaling: same 2D quadratic landscape with 100, 1000,
+  // and 10000 trials.  More trials should yield tighter standard errors
+  // and a closer estimate of the true optimum.
+  {
+    // Shared true function: score = 1.5 - 3.0*((x0-0.25)^2 + (x1+0.40)^2)
+    // Peak winrate = sigmoid(1.5) ~ 0.818, optimum at (0.25, -0.40).
+    const double trueOpt0 = 0.25;
+    const double trueOpt1 = -0.40;
+    const int D = 2;
+
+    const uint64_t tunerSeed = 77;
+    const double l2_reg = 0.1;
+    const int refit_every = 10;
+    const int prune_every = 5;
+    const double sigma_init = 0.50;
+    const double sigma_fin = 0.15;
+
+    struct TrialResult { double dist, winProb, se0, se1; };
+
+    auto runTrials = [&](int numTrials, uint64_t outcomeSeed) -> TrialResult {
+      mt19937_64 outcomeRng(outcomeSeed);
+      uniform_real_distribution<double> uni01(0.0, 1.0);
+
+      QRSTuner tuner(D, tunerSeed, numTrials,
+                     l2_reg, refit_every, prune_every,
+                     sigma_init, sigma_fin);
+      for(int trial = 0; trial < numTrials; trial++) {
+        vector<double> sample = tuner.nextSample();
+        double dx0 = sample[0] - trueOpt0;
+        double dx1 = sample[1] - trueOpt1;
+        double winProb = sigmoid(1.5 - 3.0 * (dx0 * dx0 + dx1 * dx1));
+        double outcome = (uni01(outcomeRng) < winProb) ? 1.0 : 0.0;
+        tuner.addResult(sample, outcome);
+      }
+      vector<double> best = tuner.bestCoords();
+      double dist = hypot(best[0] - trueOpt0, best[1] - trueOpt1);
+      double wp = tuner.bestWinProb();
+
+      double se[2];
+      bool clamped[2];
+      bool ok = tuner.model().computeOptimumSE(tuner.buffer().xs(), se, clamped);
+      testAssert(ok);
+
+      // True optimum should fall within the 95% CI
+      testAssert(fabs(best[0] - trueOpt0) < 1.96 * se[0]);
+      testAssert(fabs(best[1] - trueOpt1) < 1.96 * se[1]);
+
+      cout << "  Trials=" << numTrials
+           << "  best=(" << best[0] << ", " << best[1] << ")"
+           << "  dist=" << dist
+           << "  winProb=" << wp
+           << "  SE=(" << se[0] << ", " << se[1] << ")"
+           << "  95%CI_x0=[" << (best[0] - 1.96 * se[0]) << ", " << (best[0] + 1.96 * se[0]) << "]"
+           << "  95%CI_x1=[" << (best[1] - 1.96 * se[1]) << ", " << (best[1] + 1.96 * se[1]) << "]"
+           << endl;
+
+      return {dist, wp, se[0], se[1]};
+    };
+
+    cout << "Convergence scaling (2D quadratic, true optimum at (0.25, -0.40)):" << endl;
+    TrialResult small = runTrials(100, /*outcomeSeed=*/1001);
+    TrialResult med   = runTrials(1000, /*outcomeSeed=*/1002);
+    TrialResult large = runTrials(10000, /*outcomeSeed=*/1003);
+
+    // 100 trials: rough convergence
+    testAssert(small.dist < 0.30);
+    testAssert(small.winProb > 0.60);
+
+    // 1000 trials: solid convergence
+    testAssert(med.dist < 0.10);
+    testAssert(med.winProb > 0.75);
+
+    // 10000 trials: tight convergence
+    testAssert(large.dist < 0.05);
+    testAssert(large.winProb > 0.75);
+
+    testAssert(large.se0 < med.se0);
+    testAssert(med.se0 < small.se0);
+    testAssert(large.se1 < med.se1);
+    testAssert(med.se1 < small.se1);
+  }
 }

From f0659aa22dfe7cc333879ccc6a45694d970431ce Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Tue, 7 Apr 2026 07:59:59 +0800
Subject: [PATCH 38/41] Use uniform sampling in QRS optimizer when model has
 convex dimensions

When the fitted quadratic surface has convex dimensions (no reliable peak),
sample uniformly across [-1,+1]^D instead of clustering around the origin
via N(0, sigma_initial). This avoids center-biased exploration on flat or
noise-dominated landscapes where the MAP optimum is meaningless.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/qrstune/QRSOptimizer.cpp | 7 +------
 cpp/qrstune/QRSOptimizer.h   | 3 ++-
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/cpp/qrstune/QRSOptimizer.cpp b/cpp/qrstune/QRSOptimizer.cpp
index ac916b276..9ce2f07be 100644
--- a/cpp/qrstune/QRSOptimizer.cpp
+++ b/cpp/qrstune/QRSOptimizer.cpp
@@ -430,7 +430,7 @@ vector<double> QRSTune::QRSTuner::nextSample() {
   int F = model_.features();
   string sigmaStr;
 
-  if(buffer_.size() < F + 1) {
+  if(buffer_.size() < F + 1 || model_.hasConvexDim()) {
     // Insufficient data for reliable fit — explore uniformly
     uniform_real_distribution<double> uni(-1.0, 1.0);
     for(int i = 0; i < D_; i++) x[i] = uni(rng_);
@@ -441,11 +441,6 @@ vector<double> QRSTune::QRSTuner::nextSample() {
     double progress = (double)trial_count_ / max(1, total_trials_ - 1);
     double sigma = sigma_initial_ + progress * (sigma_final_ - sigma_initial_);
 
-    // When the fit has convex dimensions (noise-dominated), keep exploration
-    // wide to avoid premature convergence around the unreliable origin.
-    if(model_.hasConvexDim())
-      sigma = sigma_initial_;
-
     normal_distribution<double> noise(0.0, sigma);
     for(int i = 0; i < D_; i++)
       x[i] = max(-1.0, min(1.0, x[i] + noise(rng_)));
diff --git a/cpp/qrstune/QRSOptimizer.h b/cpp/qrstune/QRSOptimizer.h
index 61003bb81..9f9c14713 100644
--- a/cpp/qrstune/QRSOptimizer.h
+++ b/cpp/qrstune/QRSOptimizer.h
@@ -157,7 +157,8 @@ class QRSTuner {
            double sigma_fin  = 0.05);
 
   // Propose next point to evaluate.
-  // During early exploration (< F samples) returns a random point.
+  // During early exploration (< F+1 samples) or when the model has convex
+  // dimensions (noise-dominated landscape), returns a uniform random point.
   // Afterwards: MAP optimum + decaying Gaussian noise clamped to [-1,+1]^D.
   std::vector<double> nextSample();
 

From 2f07ba0d9e079d62dac4c5c843c7139b29641a78 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 10 Apr 2026 02:14:37 +0000
Subject: [PATCH 39/41] Fix review findings: Bradley-Terry stderr, null game
 handling, test portability

- elo.cpp: Set outStderr to 1e18 (not 0) when Fisher info is zero, so bots
  with no pairwise data don't falsely appear perfectly confident. Add comment
  explaining singular-column skip in Gaussian elimination.
- tuneparams.cpp: Stop feeding null game outcomes to the optimizer as draws.
  Track null games separately and abort early if >5% of games return null.
- fancymath.h: Fix oneTailedPValue comment to correctly describe the
  frequentist hypothesis test direction.
- QRSOptimizer.cpp: Make convex-fitting test scan multiple seeds instead of
  relying on a single seed that may not trigger convexity on all platforms.

https://claude.ai/code/session_01YPvah124CjKUchjoKrsxe5
---
 cpp/command/tuneparams.cpp   | 12 +++++-
 cpp/core/elo.cpp             |  6 +++
 cpp/core/fancymath.h         |  3 +-
 cpp/qrstune/QRSOptimizer.cpp | 78 +++++++++++++++---------------------
 4 files changed, 50 insertions(+), 49 deletions(-)

diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index 25dd64005..9085d98e9 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -254,7 +254,7 @@ int MainCmds::tuneparams(const vector<string>& args) {
 
   const string gameSeedBase = Global::uint64ToHexString(seedRand.nextUInt64());
 
-  int wins = 0, losses = 0, draws = 0;
+  int wins = 0, losses = 0, draws = 0, nullGames = 0;
   int reportInterval = std::max(1, numTrials / 10);
   ClockTimer timer;
 
@@ -319,8 +319,16 @@ int MainCmds::tuneparams(const vector<string>& args) {
       }
       delete gameData;
     } else {
-      draws++;
+      nullGames++;
       logger.write("Warning: trial " + Global::intToString(trial) + " returned null game data");
+      //Too many null games corrupt the optimizer with noise — abort early.
+      if(nullGames * 20 > trial + 1) {
+        logger.write("Error: >5% of games returned null data (" +
+          Global::intToString(nullGames) + "/" + Global::intToString(trial + 1) +
+          "), aborting. Check model files and config.");
+        break;
+      }
+      continue;  //Do not feed null outcomes to the optimizer
     }
 
     tuner.addResult(sample, outcome, expIsBlack ? " as black" : " as white");
diff --git a/cpp/core/elo.cpp b/cpp/core/elo.cpp
index 4b66ba23e..a2c12909d 100644
--- a/cpp/core/elo.cpp
+++ b/cpp/core/elo.cpp
@@ -327,6 +327,8 @@ bool ComputeElos::computeBradleyTerryElo(
         for(int r = col+1; r < M; r++)
           if(fabs(aug[r][col]) > fabs(aug[piv][col])) piv = r;
         swap(aug[col], aug[piv]);
+        //Singular column: bot has no games against others in this subproblem.
+        //Skip elimination and leave delta[col]=0 (no Elo update for this bot).
         if(fabs(aug[col][col]) < 1e-12) continue;
         double inv = 1.0 / aug[col][col];
         for(int r = col+1; r < M; r++) {
@@ -356,6 +358,9 @@ bool ComputeElos::computeBradleyTerryElo(
     outElo[i] = (theta[i] - theta[0]) * ELO_PER_LOG_GAMMA;
 
   //Fisher information diagonal -> stderr
+  //A fish of 0 means no pairwise data for this bot, so the Elo is
+  //unconstrained. Use a large sentinel value rather than 0 (which
+  //would falsely imply perfect confidence).
   for(int i = 1; i < N; i++) {
     double fish = 0.0;
     for(int j = 0; j < N; j++) {
@@ -366,6 +371,7 @@ bool ComputeElos::computeBradleyTerryElo(
       fish += nij * sigma * (1.0 - sigma);
     }
     if(fish > 0.0) outStderr[i] = ELO_PER_LOG_GAMMA / sqrt(fish);
+    else outStderr[i] = 1e18;
   }
   return converged;
 }
diff --git a/cpp/core/fancymath.h b/cpp/core/fancymath.h
index 363403eb3..8afca5725 100644
--- a/cpp/core/fancymath.h
+++ b/cpp/core/fancymath.h
@@ -30,7 +30,8 @@ namespace FancyMath {
   //Draws should be counted as 0.5 wins before calling.
   void wilsonCI95(double wins, double n, double& lo, double& hi);
 
-  //One-tailed p-value: P(observed winrate <= 0.5 | data), using normal approximation.
+  //One-tailed p-value for H0: winrate=0.5 vs H1: winrate>0.5, using normal approximation.
+  //Small values indicate the first player wins significantly more than 50%.
   double oneTailedPValue(double wins, double n);
 
   void runTests();
diff --git a/cpp/qrstune/QRSOptimizer.cpp b/cpp/qrstune/QRSOptimizer.cpp
index 9ce2f07be..e596cfeee 100644
--- a/cpp/qrstune/QRSOptimizer.cpp
+++ b/cpp/qrstune/QRSOptimizer.cpp
@@ -791,67 +791,53 @@ void QRSTune::runTests() {
     testAssert(tuner.bestWinProb() > 0.7);
   }
 
-  // Test: Nearly-flat 3D landscape exposes convex-fitting bug.
+  // Test: Nearly-flat 3D landscape can expose convex-fitting.
   //
   // When the true function is nearly flat and we have only ~128 stochastic
   // trials fitting 10 parameters, noise can make the fitted quadratic convex
-  // (positive coefficient) in some dimensions.  mapOptimum() then returns the
-  // MINIMUM in those dimensions instead of the maximum.
+  // (positive coefficient) in some dimensions.  We scan seeds to find one
+  // that triggers this, then verify the invariant: the optimizer's "best"
+  // should predict at least as well as the origin regardless.
   {
     const int D = 3;
     const int numTrials = 128;
     const double trueOpt[3] = {0.3, -0.2, 0.4};
     const double curvature = 0.1;  // very weak — winrate spans only ~0.39-0.50
 
-    mt19937_64 outcomeRng(0);
-    uniform_real_distribution<double> uni01(0.0, 1.0);
+    bool foundConvex = false;
+    for(uint64_t tunerSeed = 0; tunerSeed < 50 && !foundConvex; tunerSeed++) {
+      mt19937_64 outcomeRng(tunerSeed * 1000);
+      uniform_real_distribution<double> uni01(0.0, 1.0);
 
-    QRSTuner tuner(D, /*seed=*/42, numTrials,
-                   /*l2_reg=*/0.1, /*refit_every=*/10, /*prune_every=*/5,
-                   /*sigma_init=*/0.60, /*sigma_fin=*/0.20);
+      QRSTuner tuner(D, /*seed=*/tunerSeed, numTrials,
+                     /*l2_reg=*/0.1, /*refit_every=*/10, /*prune_every=*/5,
+                     /*sigma_init=*/0.60, /*sigma_fin=*/0.20);
 
-    for(int trial = 0; trial < numTrials; trial++) {
-      vector<double> sample = tuner.nextSample();
-      double sc = 0.0;
-      for(int d = 0; d < D; d++) {
-        double dx = sample[d] - trueOpt[d];
-        sc -= curvature * dx * dx;
+      for(int trial = 0; trial < numTrials; trial++) {
+        vector<double> sample = tuner.nextSample();
+        double sc = 0.0;
+        for(int d = 0; d < D; d++) {
+          double dx = sample[d] - trueOpt[d];
+          sc -= curvature * dx * dx;
+        }
+        double winProb = sigmoid(sc);
+        double outcome = (uni01(outcomeRng) < winProb) ? 1.0 : 0.0;
+        tuner.addResult(sample, outcome);
       }
-      double winProb = sigmoid(sc);
-      double outcome = (uni01(outcomeRng) < winProb) ? 1.0 : 0.0;
-      tuner.addResult(sample, outcome);
-    }
 
-    // Probe fitted quadratic coefficients:
-    //   quadCoeff_k = (score(e_k) + score(-e_k) - 2*score(0)) / 2
-    const QRSModel& model = tuner.model();
-    double origin[3] = {0.0, 0.0, 0.0};
-    double s0 = model.score(origin);
-    bool anyConvex = false;
-    double probe[3] = {0.0, 0.0, 0.0};
-    for(int d = 0; d < D; d++) {
-      probe[d] = 1.0;
-      double sp = model.score(probe);
-      probe[d] = -1.0;
-      double sn = model.score(probe);
-      probe[d] = 0.0;
-      double quadCoeff = (sp + sn - 2.0 * s0) / 2.0;
-      if(quadCoeff > 0.0) {
-        anyConvex = true;
-        break;
+      if(tuner.model().hasConvexDim()) {
+        foundConvex = true;
+        // Invariant: the optimizer's "best" should predict at least as well
+        // as the origin, even when some dimensions are convex-fitted.
+        const QRSModel& model = tuner.model();
+        double origin[3] = {0.0, 0.0, 0.0};
+        double probAtBest = tuner.bestWinProb();
+        double probAtOrigin = model.predict(origin);
+        testAssert(probAtBest >= probAtOrigin);
       }
     }
-    // With these seeds, noise overwhelms the weak signal and at least one
-    // fitted dimension ends up convex (positive quadratic coefficient).
-    testAssert(anyConvex);
-
-    // Invariant: the optimizer's "best" should predict at least as well as
-    // an arbitrary point like the origin.  Currently fails because
-    // mapOptimum() returns the critical point of the fitted quadratic
-    // without checking whether it is a maximum or minimum.
-    double probAtBest = tuner.bestWinProb();
-    double probAtOrigin = model.predict(origin);
-    testAssert(probAtBest >= probAtOrigin);
+    // At least one seed should trigger convex fitting in a nearly-flat landscape.
+    testAssert(foundConvex);
   }
 
   // Regression test for Newton-Raphson intercept divergence on a flat 2D

From e7b63a509f6d8513eca1fcf49d60a0b99a21ebf7 Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Mon, 13 Apr 2026 06:52:00 +0800
Subject: [PATCH 40/41] Simplify convergence tests to 1D and add flat landscape
 test case

Refactor QRS convergence scaling tests from 2D quadratic to 1D, testing both
steep (curvature=3.0) and flat (curvature=0.2) landscapes. The flat landscape
validates behavior when the signal is weak and adaptive sampling may push the
model toward boundaries. Also comment out cpuctExplorationLogMax override in
example config to use the default range [0.05, 1.0].

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/configs/tune_params_example.cfg |   2 +-
 cpp/qrstune/QRSOptimizer.cpp        | 117 +++++++++++++++++-----------
 2 files changed, 73 insertions(+), 46 deletions(-)

diff --git a/cpp/configs/tune_params_example.cfg b/cpp/configs/tune_params_example.cfg
index 3cfd9a25f..b8fb0bae5 100644
--- a/cpp/configs/tune_params_example.cfg
+++ b/cpp/configs/tune_params_example.cfg
@@ -22,7 +22,7 @@ numTrials = 500
 # The optimizer explores within [Min, Max].
 # If omitted, defaults are used: cpuctExplorationLog [0.05, 1.0]
 # cpuctExplorationLogMin = 0.05
-cpuctExplorationLogMax = 5.0
+# cpuctExplorationLogMax = 1.0
 
 # Logs------------------------------------------------------------------------------------
 
diff --git a/cpp/qrstune/QRSOptimizer.cpp b/cpp/qrstune/QRSOptimizer.cpp
index e596cfeee..6c3bafd78 100644
--- a/cpp/qrstune/QRSOptimizer.cpp
+++ b/cpp/qrstune/QRSOptimizer.cpp
@@ -873,16 +873,16 @@ void QRSTune::runTests() {
     testAssert(!diverged);
   }
 
-  // Test convergence scaling: same 2D quadratic landscape with 100, 1000,
+  // Test convergence scaling: 1D quadratic landscapes with 100, 1000,
   // and 10000 trials.  More trials should yield tighter standard errors
   // and a closer estimate of the true optimum.
+  //
+  // Two landscapes are tested:
+  //   Steep: score = 1.5 - 3.0*(x-0.25)^2,  peak winrate ~0.818
+  //   Flat:  score = 0.1 - 0.2*(x-0.25)^2,  peak winrate ~0.525  (15x weaker curvature)
   {
-    // Shared true function: score = 1.5 - 3.0*((x0-0.25)^2 + (x1+0.40)^2)
-    // Peak winrate = sigmoid(1.5) ~ 0.818, optimum at (0.25, -0.40).
-    const double trueOpt0 = 0.25;
-    const double trueOpt1 = -0.40;
-    const int D = 2;
-
+    const double trueOpt = 0.25;
+    const int D = 1;
     const uint64_t tunerSeed = 77;
     const double l2_reg = 0.1;
     const int refit_every = 10;
@@ -890,9 +890,10 @@ void QRSTune::runTests() {
     const double sigma_init = 0.50;
     const double sigma_fin = 0.15;
 
-    struct TrialResult { double dist, winProb, se0, se1; };
+    struct TrialResult { double dist, winProb, se; bool seOk; };
 
-    auto runTrials = [&](int numTrials, uint64_t outcomeSeed) -> TrialResult {
+    auto runTrials = [&](double intercept, double curvature,
+                         int numTrials, uint64_t outcomeSeed) -> TrialResult {
       mt19937_64 outcomeRng(outcomeSeed);
       uniform_real_distribution<double> uni01(0.0, 1.0);
 
@@ -901,57 +902,83 @@ void QRSTune::runTests() {
                      sigma_init, sigma_fin);
       for(int trial = 0; trial < numTrials; trial++) {
         vector<double> sample = tuner.nextSample();
-        double dx0 = sample[0] - trueOpt0;
-        double dx1 = sample[1] - trueOpt1;
-        double winProb = sigmoid(1.5 - 3.0 * (dx0 * dx0 + dx1 * dx1));
+        double dx = sample[0] - trueOpt;
+        double winProb = sigmoid(intercept - curvature * dx * dx);
         double outcome = (uni01(outcomeRng) < winProb) ? 1.0 : 0.0;
         tuner.addResult(sample, outcome);
       }
       vector<double> best = tuner.bestCoords();
-      double dist = hypot(best[0] - trueOpt0, best[1] - trueOpt1);
+      double dist = fabs(best[0] - trueOpt);
       double wp = tuner.bestWinProb();
 
-      double se[2];
-      bool clamped[2];
-      bool ok = tuner.model().computeOptimumSE(tuner.buffer().xs(), se, clamped);
-      testAssert(ok);
-
-      // True optimum should fall within the 95% CI
-      testAssert(fabs(best[0] - trueOpt0) < 1.96 * se[0]);
-      testAssert(fabs(best[1] - trueOpt1) < 1.96 * se[1]);
+      double se[1] = {-1.0};
+      bool clamped[1];
+      bool seOk = tuner.model().computeOptimumSE(tuner.buffer().xs(), se, clamped);
 
       cout << "  Trials=" << numTrials
-           << "  best=(" << best[0] << ", " << best[1] << ")"
+           << "  best=" << best[0]
            << "  dist=" << dist
            << "  winProb=" << wp
-           << "  SE=(" << se[0] << ", " << se[1] << ")"
-           << "  95%CI_x0=[" << (best[0] - 1.96 * se[0]) << ", " << (best[0] + 1.96 * se[0]) << "]"
-           << "  95%CI_x1=[" << (best[1] - 1.96 * se[1]) << ", " << (best[1] + 1.96 * se[1]) << "]"
-           << endl;
-
-      return {dist, wp, se[0], se[1]};
+           << "  convex=" << (tuner.model().hasConvexDim() ? "Y" : "N")
+           << "  seOk=" << (seOk ? "Y" : "N");
+      if(seOk)
+        cout << "  SE=" << se[0]
+             << "  95%CI=[" << (best[0] - 1.96 * se[0]) << ", " << (best[0] + 1.96 * se[0]) << "]";
+      cout << endl;
+
+      return {dist, wp, seOk ? se[0] : -1.0, seOk};
     };
 
-    cout << "Convergence scaling (2D quadratic, true optimum at (0.25, -0.40)):" << endl;
-    TrialResult small = runTrials(100, /*outcomeSeed=*/1001);
-    TrialResult med   = runTrials(1000, /*outcomeSeed=*/1002);
-    TrialResult large = runTrials(10000, /*outcomeSeed=*/1003);
+    // --- Steep landscape: curvature=3.0, peak winrate ~0.818 ---
+    cout << "Convergence scaling (1D quadratic, true optimum at 0.25):" << endl;
+    {
+      TrialResult small = runTrials(1.5, 3.0, 100, /*outcomeSeed=*/1001);
+      TrialResult med   = runTrials(1.5, 3.0, 1000, /*outcomeSeed=*/1002);
+      TrialResult large = runTrials(1.5, 3.0, 10000, /*outcomeSeed=*/1003);
 
-    // 100 trials: rough convergence
-    testAssert(small.dist < 0.30);
-    testAssert(small.winProb > 0.60);
+      testAssert(small.seOk);
+      testAssert(med.seOk);
+      testAssert(large.seOk);
+
+      // True optimum should fall within the 95% CI
+      testAssert(fabs(small.dist) < 1.96 * small.se);
+      testAssert(fabs(med.dist)   < 1.96 * med.se);
+      testAssert(fabs(large.dist) < 1.96 * large.se);
 
-    // 1000 trials: solid convergence
-    testAssert(med.dist < 0.10);
-    testAssert(med.winProb > 0.75);
+      // 100 trials: rough convergence
+      testAssert(small.dist < 0.30);
+      testAssert(small.winProb > 0.60);
 
-    // 10000 trials: tight convergence
-    testAssert(large.dist < 0.05);
-    testAssert(large.winProb > 0.75);
+      // 1000 trials: solid convergence
+      testAssert(med.dist < 0.10);
+      testAssert(med.winProb > 0.75);
 
-    testAssert(large.se0 < med.se0);
-    testAssert(med.se0 < small.se0);
-    testAssert(large.se1 < med.se1);
-    testAssert(med.se1 < small.se1);
+      // 10000 trials: tight convergence
+      testAssert(large.dist < 0.05);
+      testAssert(large.winProb > 0.75);
+
+      testAssert(large.se < med.se);
+      testAssert(med.se < small.se);
+    }
+
+    // --- Flat landscape: curvature=0.2, peak winrate ~0.525 ---
+    // With weak curvature, adaptive sampling can lead the model to the boundary,
+    // making SE non-monotonic across trial counts.
+    cout << "Flat convergence scaling (1D, curvature=0.2, true optimum at 0.25):" << endl;
+    {
+      TrialResult small = runTrials(0.10, 0.20, 100, /*outcomeSeed=*/1001);
+      TrialResult med   = runTrials(0.10, 0.20, 1000, /*outcomeSeed=*/1002);
+      TrialResult large = runTrials(0.10, 0.20, 10000, /*outcomeSeed=*/1003);
+
+      // 100 trials on a flat function: may not converge at all
+      testAssert(small.winProb > 0.40);
+
+      // 1000 trials: rough convergence (may still be far off on flat landscape)
+      testAssert(med.winProb > 0.45);
+
+      // 10000 trials: moderate convergence (peak winrate is only 0.525)
+      testAssert(large.dist < 0.50);
+      testAssert(large.winProb > 0.48);
+    }
   }
 }

From 54833cbeceb3b7dfaa5a73b0b95adc5f05daaaea Mon Sep 17 00:00:00 2001
From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com>
Date: Thu, 16 Apr 2026 07:24:09 +0800
Subject: [PATCH 41/41] Switch tune-params dimension from cpuctExplorationLog
 to cpuctExploration

Tune the constant cpuctExploration factor directly in linear space over
[0.5, 1.5] instead of the log-scaling cpuctExplorationLog coefficient
over [0.05, 1.0]. Update the example config's doc comments, range keys,
and drop the stale cpuctExplorationLog entry from the fixed-bot0
reference list.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cpp/command/tuneparams.cpp          |  2 +-
 cpp/configs/tune_params_example.cfg | 11 +++++------
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp
index 9085d98e9..cd4809a92 100644
--- a/cpp/command/tuneparams.cpp
+++ b/cpp/command/tuneparams.cpp
@@ -42,7 +42,7 @@ struct TuneDimension {
 };
 
 static const TuneDimension tuneDims[] = {
-  {"cpuctExplorationLog",       "Log",    0.05, 1.0,  "cpuctExplorationLogMin",       "cpuctExplorationLogMax",       &SearchParams::cpuctExplorationLog},
+  {"cpuctExploration",          "Cpuct",  0.5,  1.5,  "cpuctExplorationMin",          "cpuctExplorationMax",          &SearchParams::cpuctExploration},
 };
 static const int nDims = sizeof(tuneDims) / sizeof(tuneDims[0]);
 
diff --git a/cpp/configs/tune_params_example.cfg b/cpp/configs/tune_params_example.cfg
index b8fb0bae5..da3774fbd 100644
--- a/cpp/configs/tune_params_example.cfg
+++ b/cpp/configs/tune_params_example.cfg
@@ -6,7 +6,7 @@
 # and an experiment bot (bot1) whose PUCT parameters are adapted each trial using
 # QRS-Tune (Quadratic Regression Sequential optimization).
 #
-# After all trials, it reports the best-found value for cpuctExplorationLog,
+# After all trials, it reports the best-found value for cpuctExploration,
 # along with an ASCII regression curve showing the parameter's estimated
 # effect on win rate.
 #
@@ -20,9 +20,9 @@ numTrials = 500
 
 # Search range for the PUCT parameter being tuned.
 # The optimizer explores within [Min, Max].
-# If omitted, defaults are used: cpuctExplorationLog [0.05, 1.0]
-# cpuctExplorationLogMin = 0.05
-# cpuctExplorationLogMax = 1.0
+# If omitted, defaults are used: cpuctExploration [0.5, 1.5]
+# cpuctExplorationMin = 0.5
+# cpuctExplorationMax = 1.5
 
 # Logs------------------------------------------------------------------------------------
 
@@ -93,11 +93,10 @@ chosenMoveTemperatureEarly = 0.60
 chosenMoveTemperature = 0.20
 
 # Internal params------------------------------------------------------------------------------
-# These are the FIXED params for bot0 (reference). Bot1's cpuctExplorationLog
+# These are the FIXED params for bot0 (reference). Bot1's cpuctExploration
 # will be overridden by the optimizer.
 
 # cpuctExploration = 0.9
-# cpuctExplorationLog = 0.4
 # cpuctUtilityStdevPriorWeight = 2.0
 # fpuReductionMax = 0.2
 # rootFpuReductionMax = 0.1