diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h
index e1097455f..01867a4c4 100644
--- a/cpp/include/cuopt/linear_programming/constants.h
+++ b/cpp/include/cuopt/linear_programming/constants.h
@@ -20,63 +20,64 @@
 #define CUOPT_INSTANTIATE_INT64  0
 
 /* @brief LP/MIP parameter string constants */
-#define CUOPT_ABSOLUTE_DUAL_TOLERANCE         "absolute_dual_tolerance"
-#define CUOPT_RELATIVE_DUAL_TOLERANCE         "relative_dual_tolerance"
-#define CUOPT_ABSOLUTE_PRIMAL_TOLERANCE       "absolute_primal_tolerance"
-#define CUOPT_RELATIVE_PRIMAL_TOLERANCE       "relative_primal_tolerance"
-#define CUOPT_ABSOLUTE_GAP_TOLERANCE          "absolute_gap_tolerance"
-#define CUOPT_RELATIVE_GAP_TOLERANCE          "relative_gap_tolerance"
-#define CUOPT_INFEASIBILITY_DETECTION         "infeasibility_detection"
-#define CUOPT_STRICT_INFEASIBILITY            "strict_infeasibility"
-#define CUOPT_PRIMAL_INFEASIBLE_TOLERANCE     "primal_infeasible_tolerance"
-#define CUOPT_DUAL_INFEASIBLE_TOLERANCE       "dual_infeasible_tolerance"
-#define CUOPT_ITERATION_LIMIT                 "iteration_limit"
-#define CUOPT_TIME_LIMIT                      "time_limit"
-#define CUOPT_WORK_LIMIT                      "work_limit"
-#define CUOPT_PDLP_SOLVER_MODE                "pdlp_solver_mode"
-#define CUOPT_METHOD                          "method"
-#define CUOPT_PER_CONSTRAINT_RESIDUAL         "per_constraint_residual"
-#define CUOPT_SAVE_BEST_PRIMAL_SO_FAR         "save_best_primal_so_far"
-#define CUOPT_FIRST_PRIMAL_FEASIBLE           "first_primal_feasible"
-#define CUOPT_LOG_FILE                        "log_file"
-#define CUOPT_LOG_TO_CONSOLE                  "log_to_console"
-#define CUOPT_CROSSOVER                       "crossover"
-#define CUOPT_FOLDING                         "folding"
-#define CUOPT_AUGMENTED                       "augmented"
-#define CUOPT_DUALIZE                         "dualize"
-#define CUOPT_ORDERING                        "ordering"
-#define CUOPT_BARRIER_DUAL_INITIAL_POINT      "barrier_dual_initial_point"
-#define CUOPT_ELIMINATE_DENSE_COLUMNS         "eliminate_dense_columns"
-#define CUOPT_CUDSS_DETERMINISTIC             "cudss_deterministic"
-#define CUOPT_PRESOLVE                        "presolve"
-#define CUOPT_DUAL_POSTSOLVE                  "dual_postsolve"
-#define CUOPT_MIP_DETERMINISM_MODE            "mip_determinism_mode"
-#define CUOPT_MIP_ABSOLUTE_TOLERANCE          "mip_absolute_tolerance"
-#define CUOPT_MIP_RELATIVE_TOLERANCE          "mip_relative_tolerance"
-#define CUOPT_MIP_INTEGRALITY_TOLERANCE       "mip_integrality_tolerance"
-#define CUOPT_MIP_ABSOLUTE_GAP                "mip_absolute_gap"
-#define CUOPT_MIP_RELATIVE_GAP                "mip_relative_gap"
-#define CUOPT_MIP_HEURISTICS_ONLY             "mip_heuristics_only"
-#define CUOPT_MIP_SCALING                     "mip_scaling"
-#define CUOPT_MIP_PRESOLVE                    "mip_presolve"
-#define CUOPT_MIP_RELIABILITY_BRANCHING       "mip_reliability_branching"
-#define CUOPT_MIP_CUT_PASSES                  "mip_cut_passes"
-#define CUOPT_MIP_MIXED_INTEGER_ROUNDING_CUTS "mip_mixed_integer_rounding_cuts"
-#define CUOPT_MIP_MIXED_INTEGER_GOMORY_CUTS   "mip_mixed_integer_gomory_cuts"
-#define CUOPT_MIP_KNAPSACK_CUTS               "mip_knapsack_cuts"
-#define CUOPT_MIP_CLIQUE_CUTS                 "mip_clique_cuts"
-#define CUOPT_MIP_STRONG_CHVATAL_GOMORY_CUTS  "mip_strong_chvatal_gomory_cuts"
-#define CUOPT_MIP_REDUCED_COST_STRENGTHENING  "mip_reduced_cost_strengthening"
-#define CUOPT_MIP_CUT_CHANGE_THRESHOLD        "mip_cut_change_threshold"
-#define CUOPT_MIP_CUT_MIN_ORTHOGONALITY       "mip_cut_min_orthogonality"
-#define CUOPT_MIP_BATCH_PDLP_STRONG_BRANCHING "mip_batch_pdlp_strong_branching"
-#define CUOPT_SOLUTION_FILE                   "solution_file"
-#define CUOPT_NUM_CPU_THREADS                 "num_cpu_threads"
-#define CUOPT_NUM_GPUS                        "num_gpus"
-#define CUOPT_USER_PROBLEM_FILE               "user_problem_file"
-#define CUOPT_PRESOLVE_FILE                   "presolve_file"
-#define CUOPT_RANDOM_SEED                     "random_seed"
-#define CUOPT_PDLP_PRECISION                  "pdlp_precision"
+#define CUOPT_ABSOLUTE_DUAL_TOLERANCE              "absolute_dual_tolerance"
+#define CUOPT_RELATIVE_DUAL_TOLERANCE              "relative_dual_tolerance"
+#define CUOPT_ABSOLUTE_PRIMAL_TOLERANCE            "absolute_primal_tolerance"
+#define CUOPT_RELATIVE_PRIMAL_TOLERANCE            "relative_primal_tolerance"
+#define CUOPT_ABSOLUTE_GAP_TOLERANCE               "absolute_gap_tolerance"
+#define CUOPT_RELATIVE_GAP_TOLERANCE               "relative_gap_tolerance"
+#define CUOPT_INFEASIBILITY_DETECTION              "infeasibility_detection"
+#define CUOPT_STRICT_INFEASIBILITY                 "strict_infeasibility"
+#define CUOPT_PRIMAL_INFEASIBLE_TOLERANCE          "primal_infeasible_tolerance"
+#define CUOPT_DUAL_INFEASIBLE_TOLERANCE            "dual_infeasible_tolerance"
+#define CUOPT_ITERATION_LIMIT                      "iteration_limit"
+#define CUOPT_TIME_LIMIT                           "time_limit"
+#define CUOPT_WORK_LIMIT                           "work_limit"
+#define CUOPT_PDLP_SOLVER_MODE                     "pdlp_solver_mode"
+#define CUOPT_METHOD                               "method"
+#define CUOPT_PER_CONSTRAINT_RESIDUAL              "per_constraint_residual"
+#define CUOPT_SAVE_BEST_PRIMAL_SO_FAR              "save_best_primal_so_far"
+#define CUOPT_FIRST_PRIMAL_FEASIBLE                "first_primal_feasible"
+#define CUOPT_LOG_FILE                             "log_file"
+#define CUOPT_LOG_TO_CONSOLE                       "log_to_console"
+#define CUOPT_CROSSOVER                            "crossover"
+#define CUOPT_FOLDING                              "folding"
+#define CUOPT_AUGMENTED                            "augmented"
+#define CUOPT_DUALIZE                              "dualize"
+#define CUOPT_ORDERING                             "ordering"
+#define CUOPT_BARRIER_DUAL_INITIAL_POINT           "barrier_dual_initial_point"
+#define CUOPT_ELIMINATE_DENSE_COLUMNS              "eliminate_dense_columns"
+#define CUOPT_CUDSS_DETERMINISTIC                  "cudss_deterministic"
+#define CUOPT_PRESOLVE                             "presolve"
+#define CUOPT_DUAL_POSTSOLVE                       "dual_postsolve"
+#define CUOPT_MIP_DETERMINISM_MODE                 "mip_determinism_mode"
+#define CUOPT_MIP_ABSOLUTE_TOLERANCE               "mip_absolute_tolerance"
+#define CUOPT_MIP_RELATIVE_TOLERANCE               "mip_relative_tolerance"
+#define CUOPT_MIP_INTEGRALITY_TOLERANCE            "mip_integrality_tolerance"
+#define CUOPT_MIP_ABSOLUTE_GAP                     "mip_absolute_gap"
+#define CUOPT_MIP_RELATIVE_GAP                     "mip_relative_gap"
+#define CUOPT_MIP_HEURISTICS_ONLY                  "mip_heuristics_only"
+#define CUOPT_MIP_SCALING                          "mip_scaling"
+#define CUOPT_MIP_PRESOLVE                         "mip_presolve"
+#define CUOPT_MIP_RELIABILITY_BRANCHING            "mip_reliability_branching"
+#define CUOPT_MIP_CUT_PASSES                       "mip_cut_passes"
+#define CUOPT_MIP_MIXED_INTEGER_ROUNDING_CUTS      "mip_mixed_integer_rounding_cuts"
+#define CUOPT_MIP_MIXED_INTEGER_GOMORY_CUTS        "mip_mixed_integer_gomory_cuts"
+#define CUOPT_MIP_KNAPSACK_CUTS                    "mip_knapsack_cuts"
+#define CUOPT_MIP_CLIQUE_CUTS                      "mip_clique_cuts"
+#define CUOPT_MIP_STRONG_CHVATAL_GOMORY_CUTS       "mip_strong_chvatal_gomory_cuts"
+#define CUOPT_MIP_REDUCED_COST_STRENGTHENING       "mip_reduced_cost_strengthening"
+#define CUOPT_MIP_CUT_CHANGE_THRESHOLD             "mip_cut_change_threshold"
+#define CUOPT_MIP_CUT_MIN_ORTHOGONALITY            "mip_cut_min_orthogonality"
+#define CUOPT_MIP_BATCH_PDLP_STRONG_BRANCHING      "mip_batch_pdlp_strong_branching"
+#define CUOPT_MIP_BATCH_PDLP_RELIABILITY_BRANCHING "mip_batch_pdlp_reliability_branching"
+#define CUOPT_SOLUTION_FILE                        "solution_file"
+#define CUOPT_NUM_CPU_THREADS                      "num_cpu_threads"
+#define CUOPT_NUM_GPUS                             "num_gpus"
+#define CUOPT_USER_PROBLEM_FILE                    "user_problem_file"
+#define CUOPT_PRESOLVE_FILE                        "presolve_file"
+#define CUOPT_RANDOM_SEED                          "random_seed"
+#define CUOPT_PDLP_PRECISION                       "pdlp_precision"
 
 /* @brief MIP determinism mode constants */
 #define CUOPT_MODE_OPPORTUNISTIC 0
diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
index 8f4a6c1d7..3da9ea8f1 100644
--- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
+++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
@@ -94,13 +94,16 @@ class mip_solver_settings_t {
   i_t mixed_integer_gomory_cuts = -1;
   i_t knapsack_cuts             = -1;
   i_t clique_cuts               = -1;
-  i_t strong_chvatal_gomory_cuts      = -1;
-  i_t reduced_cost_strengthening      = -1;
-  f_t cut_change_threshold            = -1.0;
-  f_t cut_min_orthogonality           = 0.5;
-  i_t mip_batch_pdlp_strong_branching = 0;
-  i_t num_gpus                        = 1;
-  bool log_to_console                 = true;
+  i_t strong_chvatal_gomory_cuts = -1;
+  i_t reduced_cost_strengthening = -1;
+  f_t cut_change_threshold       = -1.0;
+  f_t cut_min_orthogonality      = 0.5;
+  i_t mip_batch_pdlp_strong_branching{
+    0};  // 0 = DS only, 1 = cooperative DS + PDLP, 2 = batch PDLP only
+  i_t mip_batch_pdlp_reliability_branching{
+    0};  // 0 = DS only, 1 = cooperative DS + PDLP, 2 = batch PDLP only
+  i_t num_gpus        = 1;
+  bool log_to_console = true;
 
   std::string log_file;
   std::string sol_file;
diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
index 86ce4d8db..6abefb2d5 100644
--- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
+++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
@@ -17,6 +17,7 @@
 #include <rmm/device_uvector.hpp>
 
 #include <atomic>
+#include <span>
 
 namespace cuopt::linear_programming {
 
@@ -147,6 +148,12 @@ class pdlp_solver_settings_t {
    * @param[in] initial_primal_weight Initial primal weight.
    */
   void set_initial_primal_weight(f_t initial_primal_weight);
+  /**
+   * @brief Set an initial pdlp iteration.
+   *
+   * @param[in] initial_pdlp_iteration Initial pdlp iteration.
+   */
+  void set_initial_pdlp_iteration(i_t initial_pdlp_iteration);
 
   /**
    * @brief Set the pdlp warm start data. This allows to restart PDLP with a
@@ -213,6 +220,8 @@ class pdlp_solver_settings_t {
   std::optional<f_t> get_initial_step_size() const;
   // TODO batch mode: tmp
   std::optional<f_t> get_initial_primal_weight() const;
+  // TODO batch mode: tmp
+  std::optional<i_t> get_initial_pdlp_iteration() const;
 
   const rmm::device_uvector<f_t>& get_initial_primal_solution() const;
   const rmm::device_uvector<f_t>& get_initial_dual_solution() const;
@@ -265,6 +274,8 @@ class pdlp_solver_settings_t {
   bool inside_mip{false};
   // For concurrent termination
   std::atomic<int>* concurrent_halt{nullptr};
+  // Shared strong branching solved flags for cooperative DS + PDLP
+  std::span<std::atomic<int>> shared_sb_solved;
   static constexpr f_t minimal_absolute_tolerance = 1.0e-12;
   pdlp_hyper_params::pdlp_hyper_params_t hyper_params;
   // Holds the information of new variable lower and upper bounds for each climber in the format:
@@ -273,6 +284,12 @@ class pdlp_solver_settings_t {
   // concurrently i.e. if new_bounds.size() == 2, then 2 versions of the problem with updated bounds
   // will be solved concurrently
   std::vector<std::tuple<i_t, f_t, f_t>> new_bounds;
+  // By default to save memory and speed we don't store and copy each climber's primal and dual
+  // solutions We only retrieve termination statistics and the objective values
+  bool generate_batch_primal_dual_solution{false};
+  // Used to force batch PDLP to solve a subbatch of the problems at a time
+  // The 0 default value will make the solver use its heuristic to determine the subbatch size
+  i_t sub_batch_size{0};
 
  private:
   /** Initial primal solution */
@@ -285,6 +302,9 @@ class pdlp_solver_settings_t {
   /** Initial primal weight */
   // TODO batch mode: tmp
   std::optional<f_t> initial_primal_weight_;
+  /** Initial pdlp iteration */
+  // TODO batch mode: tmp
+  std::optional<i_t> initial_pdlp_iteration_;
   /** GPU-backed warm start data (device_uvector), used by C++ API and local GPU solves */
   pdlp_warm_start_data_t<i_t, f_t> pdlp_warm_start_data_;
   /** Warm start data as spans over external memory, used by Cython/Python interface */
diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp
index ba2b63983..b940134fb 100644
--- a/cpp/src/branch_and_bound/branch_and_bound.cpp
+++ b/cpp/src/branch_and_bound/branch_and_bound.cpp
@@ -846,7 +846,9 @@ branch_variable_t<i_t> branch_and_bound_t<i_t, f_t>::variable_selection(
                                                      exploration_stats_,
                                                      upper_bound_,
                                                      worker_pool_.num_idle_workers(),
-                                                     log);
+                                                     log,
+                                                     new_slacks_,
+                                                     original_lp_);
       } else {
         branch_var = pc_.variable_selection(fractional, solution, log);
       }
@@ -2448,12 +2450,14 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   pc_.resize(original_lp_.num_cols);
   {
     raft::common::nvtx::range scope_sb("BB::strong_branching");
-    strong_branching<i_t, f_t>(original_problem_,
-                               original_lp_,
+    strong_branching<i_t, f_t>(original_lp_,
                                settings_,
                                exploration_stats_.start_time,
+                               new_slacks_,
                                var_types_,
                                root_relax_soln_.x,
+                               root_relax_soln_.y,
+                               root_relax_soln_.z,
                                fractional,
                                root_objective_,
                                root_vstatus_,
diff --git a/cpp/src/branch_and_bound/pseudo_costs.cpp b/cpp/src/branch_and_bound/pseudo_costs.cpp
index ee7e2f780..4625b4343 100644
--- a/cpp/src/branch_and_bound/pseudo_costs.cpp
+++ b/cpp/src/branch_and_bound/pseudo_costs.cpp
@@ -6,14 +6,19 @@
 /* clang-format on */
 
 #include <branch_and_bound/pseudo_costs.hpp>
+#include <branch_and_bound/shared_strong_branching_context.hpp>
 
 #include <dual_simplex/phase2.hpp>
 #include <dual_simplex/simplex_solver_settings.hpp>
 #include <dual_simplex/solve.hpp>
 #include <dual_simplex/tic_toc.hpp>
 
+#include <pdlp/pdlp_constants.hpp>
+
 #include <cuopt/linear_programming/solve.hpp>
 
+#include <utilities/copy_helpers.hpp>
+
 #include <raft/core/nvtx.hpp>
 
 #include <omp.h>
@@ -22,6 +27,12 @@ namespace cuopt::linear_programming::dual_simplex {
 
 namespace {
 
+static bool is_dual_simplex_done(dual::status_t status)
+{
+  return status == dual::status_t::DUAL_UNBOUNDED || status == dual::status_t::OPTIMAL ||
+         status == dual::status_t::ITERATION_LIMIT || status == dual::status_t::CUTOFF;
+}
+
 template <typename i_t, typename f_t>
 void strong_branch_helper(i_t start,
                           i_t end,
@@ -34,11 +45,19 @@ void strong_branch_helper(i_t start,
                           const std::vector<f_t>& root_soln,
                           const std::vector<variable_status_t>& root_vstatus,
                           const std::vector<f_t>& edge_norms,
-                          pseudo_costs_t<i_t, f_t>& pc)
+                          pseudo_costs_t<i_t, f_t>& pc,
+                          std::vector<f_t>& ds_obj_down,
+                          std::vector<f_t>& ds_obj_up,
+                          std::vector<dual::status_t>& ds_status_down,
+                          std::vector<dual::status_t>& ds_status_up,
+                          std::atomic<int>* concurrent_halt,
+                          shared_strong_branching_context_view_t<i_t, f_t>& sb_view)
 {
   raft::common::nvtx::range scope("BB::strong_branch_helper");
   lp_problem_t child_problem = original_lp;
 
+  assert(concurrent_halt != nullptr && "Concurrent halt pointer cannot be nullptr");
+
   constexpr bool verbose = false;
   f_t last_log           = tic();
   i_t thread_id          = omp_get_thread_num();
@@ -47,6 +66,19 @@ void strong_branch_helper(i_t start,
 
     for (i_t branch = 0; branch < 2; branch++) {
       // Do the down branch
+      const i_t shared_idx = (branch == 0) ? k : k + static_cast<i_t>(fractional.size());
+      // Batch PDLP has already solved this subproblem, skip it
+      if (sb_view.is_valid() && sb_view.is_solved(shared_idx)) {
+        settings.log.printf(
+          "[COOP SB] DS thread %d skipping variable %d branch %s (shared_idx %d): already solved "
+          "by PDLP\n",
+          thread_id,
+          j,
+          branch == 0 ? "down" : "up",
+          shared_idx);
+        continue;
+      }
+
       if (branch == 0) {
         child_problem.lower[j] = original_lp.lower[j];
         child_problem.upper[j] = std::floor(root_soln[j]);
@@ -59,7 +91,7 @@ void strong_branch_helper(i_t start,
       child_settings.set_log(false);
       f_t lp_start_time = tic();
       f_t elapsed_time  = toc(start_time);
-      if (elapsed_time > settings.time_limit) { break; }
+      if (elapsed_time > settings.time_limit || *concurrent_halt == 1) { break; }
       child_settings.time_limit      = std::max(0.0, settings.time_limit - elapsed_time);
       child_settings.iteration_limit = 200;
       lp_solution_t<i_t, f_t> solution(original_lp.num_rows, original_lp.num_cols);
@@ -80,7 +112,8 @@ void strong_branch_helper(i_t start,
       if (status == dual::status_t::DUAL_UNBOUNDED) {
         // LP was infeasible
         obj = std::numeric_limits<f_t>::infinity();
-      } else if (status == dual::status_t::OPTIMAL || status == dual::status_t::ITERATION_LIMIT) {
+      } else if (status == dual::status_t::OPTIMAL || status == dual::status_t::ITERATION_LIMIT ||
+                 status == dual::status_t::CUTOFF) {
         obj = compute_objective(child_problem, solution.x);
       } else {
         settings.log.debug("Thread id %2d remaining %d variable %d branch %d status %d\n",
@@ -93,6 +126,8 @@ void strong_branch_helper(i_t start,
 
       if (branch == 0) {
         pc.strong_branch_down[k] = std::max(obj - root_obj, 0.0);
+        ds_obj_down[k]           = std::max(obj - root_obj, 0.0);
+        ds_status_down[k]        = status;
         if (verbose) {
           settings.log.printf("Thread id %2d remaining %d variable %d branch %d obj %e time %.2f\n",
                               thread_id,
@@ -104,6 +139,8 @@ void strong_branch_helper(i_t start,
         }
       } else {
         pc.strong_branch_up[k] = std::max(obj - root_obj, 0.0);
+        ds_obj_up[k]           = std::max(obj - root_obj, 0.0);
+        ds_status_up[k]        = status;
         if (verbose) {
           settings.log.printf(
             "Thread id %2d remaining %d variable %d branch %d obj %e change down %e change up %e "
@@ -113,14 +150,29 @@ void strong_branch_helper(i_t start,
             j,
             branch,
             obj,
-            pc.strong_branch_down[k],
-            pc.strong_branch_up[k],
+            ds_obj_down[k],
+            ds_obj_up[k],
             toc(start_time));
         }
       }
-      if (toc(start_time) > settings.time_limit) { break; }
+      // Mark the subproblem as solved so that batch PDLP removes it from the batch
+      if (sb_view.is_valid()) {
+        // We could not mark as solved nodes hitting iteration limit in DS
+        if ((branch == 0 && is_dual_simplex_done(ds_status_down[k])) ||
+            (branch == 1 && is_dual_simplex_done(ds_status_up[k]))) {
+          sb_view.mark_solved(shared_idx);
+          settings.log.printf(
+            "[COOP SB] DS thread %d solved variable %d branch %s (shared_idx %d), marking in "
+            "shared context\n",
+            thread_id,
+            j,
+            branch == 0 ? "down" : "up",
+            shared_idx);
+        }
+      }
+      if (toc(start_time) > settings.time_limit || *concurrent_halt == 1) { break; }
     }
-    if (toc(start_time) > settings.time_limit) { break; }
+    if (toc(start_time) > settings.time_limit || *concurrent_halt == 1) { break; }
 
     const i_t completed = pc.num_strong_branches_completed++;
 
@@ -135,28 +187,28 @@ void strong_branch_helper(i_t start,
     child_problem.lower[j] = original_lp.lower[j];
     child_problem.upper[j] = original_lp.upper[j];
 
-    if (toc(start_time) > settings.time_limit) { break; }
+    if (toc(start_time) > settings.time_limit || *concurrent_halt == 1) { break; }
   }
 }
 
 template <typename i_t, typename f_t>
-f_t trial_branching(const lp_problem_t<i_t, f_t>& original_lp,
-                    const simplex_solver_settings_t<i_t, f_t>& settings,
-                    const std::vector<variable_type_t>& var_types,
-                    const std::vector<variable_status_t>& vstatus,
-                    const std::vector<f_t>& edge_norms,
-                    const basis_update_mpf_t<i_t, f_t>& basis_factors,
-                    const std::vector<i_t>& basic_list,
-                    const std::vector<i_t>& nonbasic_list,
-                    i_t branch_var,
-                    f_t branch_var_lower,
-                    f_t branch_var_upper,
-                    f_t upper_bound,
-                    i_t bnb_lp_iter_per_node,
-                    f_t start_time,
-                    i_t upper_max_lp_iter,
-                    i_t lower_max_lp_iter,
-                    omp_atomic_t<int64_t>& total_lp_iter)
+std::pair<f_t, dual::status_t> trial_branching(const lp_problem_t<i_t, f_t>& original_lp,
+                                               const simplex_solver_settings_t<i_t, f_t>& settings,
+                                               const std::vector<variable_type_t>& var_types,
+                                               const std::vector<variable_status_t>& vstatus,
+                                               const std::vector<f_t>& edge_norms,
+                                               const basis_update_mpf_t<i_t, f_t>& basis_factors,
+                                               const std::vector<i_t>& basic_list,
+                                               const std::vector<i_t>& nonbasic_list,
+                                               i_t branch_var,
+                                               f_t branch_var_lower,
+                                               f_t branch_var_upper,
+                                               f_t upper_bound,
+                                               i_t bnb_lp_iter_per_node,
+                                               f_t start_time,
+                                               i_t upper_max_lp_iter,
+                                               i_t lower_max_lp_iter,
+                                               omp_atomic_t<int64_t>& total_lp_iter)
 {
   lp_problem_t child_problem      = original_lp;
   child_problem.lower[branch_var] = branch_var_lower;
@@ -207,12 +259,12 @@ f_t trial_branching(const lp_problem_t<i_t, f_t>& original_lp,
 
   if (status == dual::status_t::DUAL_UNBOUNDED) {
     // LP was infeasible
-    return std::numeric_limits<f_t>::infinity();
+    return {std::numeric_limits<f_t>::infinity(), dual::status_t::DUAL_UNBOUNDED};
   } else if (status == dual::status_t::OPTIMAL || status == dual::status_t::ITERATION_LIMIT ||
              status == dual::status_t::CUTOFF) {
-    return compute_objective(child_problem, solution.x);
+    return {compute_objective(child_problem, solution.x), status};
   } else {
-    return std::numeric_limits<f_t>::quiet_NaN();
+    return {std::numeric_limits<f_t>::quiet_NaN(), dual::status_t::NUMERICAL};
   }
 }
 
@@ -220,15 +272,44 @@ f_t trial_branching(const lp_problem_t<i_t, f_t>& original_lp,
 
 template <typename i_t, typename f_t>
 static cuopt::mps_parser::mps_data_model_t<i_t, f_t> simplex_problem_to_mps_data_model(
-  const dual_simplex::user_problem_t<i_t, f_t>& user_problem)
+  const dual_simplex::lp_problem_t<i_t, f_t>& lp,
+  const std::vector<i_t>& new_slacks,
+  const std::vector<f_t>& root_soln,
+  std::vector<f_t>& original_root_soln_x)
 {
+  // Branch and bound has a problem of the form:
+  // minimize c^T x
+  // subject to A*x + Es = b
+  //            l <= x <= u
+  //            E_{jj} = sigma_j, where sigma_j is +1 or -1
+
+  // We need to convert this into a problem that is better for PDLP
+  // to solve. PDLP perfers inequality constraints. Thus, we want
+  // to convert the above into the problem:
+  // minimize c^T x
+  // subject to  lb <= A*x <= ub
+  //             l <= x <= u
+
   cuopt::mps_parser::mps_data_model_t<i_t, f_t> mps_model;
-  int m = user_problem.num_rows;
-  int n = user_problem.num_cols;
+  int m = lp.num_rows;
+  int n = lp.num_cols - new_slacks.size();
+  original_root_soln_x.resize(n);
+
+  // Remove slacks from A
+  dual_simplex::csc_matrix_t<i_t, f_t> A_no_slacks = lp.A;
+  std::vector<i_t> cols_to_remove(lp.A.n, 0);
+  for (i_t j : new_slacks) {
+    cols_to_remove[j] = 1;
+  }
+  A_no_slacks.remove_columns(cols_to_remove);
+
+  for (i_t j = 0; j < n; j++) {
+    original_root_soln_x[j] = root_soln[j];
+  }
 
   // Convert CSC to CSR using built-in method
   dual_simplex::csr_matrix_t<i_t, f_t> csr_A(m, n, 0);
-  user_problem.A.to_compressed_row(csr_A);
+  A_no_slacks.to_compressed_row(csr_A);
 
   int nz = csr_A.row_start[m];
 
@@ -237,72 +318,418 @@ static cuopt::mps_parser::mps_data_model_t<i_t, f_t> simplex_problem_to_mps_data
     csr_A.x.data(), nz, csr_A.j.data(), nz, csr_A.row_start.data(), m + 1);
 
   // Set objective coefficients
-  mps_model.set_objective_coefficients(user_problem.objective.data(), n);
+  mps_model.set_objective_coefficients(lp.objective.data(), n);
 
   // Set objective scaling and offset
-  mps_model.set_objective_scaling_factor(user_problem.obj_scale);
-  mps_model.set_objective_offset(user_problem.obj_constant);
+  mps_model.set_objective_scaling_factor(lp.obj_scale);
+  mps_model.set_objective_offset(lp.obj_constant);
 
   // Set variable bounds
-  mps_model.set_variable_lower_bounds(user_problem.lower.data(), n);
-  mps_model.set_variable_upper_bounds(user_problem.upper.data(), n);
+  mps_model.set_variable_lower_bounds(lp.lower.data(), n);
+  mps_model.set_variable_upper_bounds(lp.upper.data(), n);
 
   // Convert row sense and RHS to constraint bounds
   std::vector<f_t> constraint_lower(m);
   std::vector<f_t> constraint_upper(m);
 
-  for (i_t i = 0; i < m; ++i) {
-    if (user_problem.row_sense[i] == 'L') {
-      constraint_lower[i] = -std::numeric_limits<f_t>::infinity();
-      constraint_upper[i] = user_problem.rhs[i];
-    } else if (user_problem.row_sense[i] == 'G') {
-      constraint_lower[i] = user_problem.rhs[i];
-      constraint_upper[i] = std::numeric_limits<f_t>::infinity();
-    } else {
-      constraint_lower[i] = user_problem.rhs[i];
-      constraint_upper[i] = user_problem.rhs[i];
-    }
+  std::vector<i_t> slack_map(m, -1);
+  for (i_t j : new_slacks) {
+    const i_t col_start = lp.A.col_start[j];
+    const i_t i         = lp.A.i[col_start];
+    slack_map[i]        = j;
   }
 
-  for (i_t k = 0; k < user_problem.num_range_rows; ++k) {
-    i_t i = user_problem.range_rows[k];
-    f_t r = user_problem.range_value[k];
-    f_t b = user_problem.rhs[i];
-    f_t h = -std::numeric_limits<f_t>::infinity();
-    f_t u = std::numeric_limits<f_t>::infinity();
-    if (user_problem.row_sense[i] == 'L') {
-      h = b - std::abs(r);
-      u = b;
-    } else if (user_problem.row_sense[i] == 'G') {
-      h = b;
-      u = b + std::abs(r);
-    } else if (user_problem.row_sense[i] == 'E') {
-      if (r > 0) {
-        h = b;
-        u = b + std::abs(r);
-      } else {
-        h = b - std::abs(r);
-        u = b;
-      }
+  for (i_t i = 0; i < m; ++i) {
+    // Each row is of the form a_i^T x + sigma * s_i = b_i
+    // with sigma = +1 or -1
+    // and l_i <= s_i <= u_i
+    // We have that a_i^T x - b_i = -sigma * s_i
+    // If sigma = -1, then we have
+    //    a_i^T x - b_i = s_i
+    //  l_i <= a_i^T x - b_i <= u_i
+    //  l_i + b_i <= a_i^T x <= u_i + b_i
+    //
+    // If sigma = +1, then we have
+    //    a_i^T x - b_i = -s_i
+    //   -a_i^T x + b_i = s_i
+    //  l_i <= -a_i^T x + b_i <= u_i
+    //  l_i - b_i <= -a_i^T x <= u_i - b_i
+    //  -u_i + b_i <= a_i^T x <= -l_i + b_i
+
+    const i_t slack = slack_map[i];
+    assert(slack != -1);
+    const i_t col_start   = lp.A.col_start[slack];
+    const f_t sigma       = lp.A.x[col_start];
+    const f_t slack_lower = lp.lower[slack];
+    const f_t slack_upper = lp.upper[slack];
+
+    if (sigma == -1) {
+      constraint_lower[i] = slack_lower + lp.rhs[i];
+      constraint_upper[i] = slack_upper + lp.rhs[i];
+    } else if (sigma == 1) {
+      constraint_lower[i] = -slack_upper + lp.rhs[i];
+      constraint_upper[i] = -slack_lower + lp.rhs[i];
+    } else {
+      assert(sigma == 1.0 || sigma == -1.0);
     }
-    constraint_lower[i] = h;
-    constraint_upper[i] = u;
   }
 
   mps_model.set_constraint_lower_bounds(constraint_lower.data(), m);
   mps_model.set_constraint_upper_bounds(constraint_upper.data(), m);
-  mps_model.set_maximize(user_problem.obj_scale < 0);
+  mps_model.set_maximize(lp.obj_scale < 0);
 
   return mps_model;
 }
 
+enum class sb_source_t { DUAL_SIMPLEX, PDLP, NONE };
+
+// Merge a single strong branching result from Dual Simplex and PDLP.
+// Rules:
+//   1. If both found optimal   -> keep DS (higher quality vertex solution)
+//   2. Else if Dual Simplex found infeasible -> declare infeasible
+//   3. Else if one is optimal -> keep the optimal one
+//   4. Else if Dual Simplex hit iteration limit -> keep DS
+//   5. Else if none converged -> NaN (original objective)
 template <typename i_t, typename f_t>
-void strong_branching(const user_problem_t<i_t, f_t>& original_problem,
-                      const lp_problem_t<i_t, f_t>& original_lp,
+static std::pair<f_t, sb_source_t> merge_sb_result(f_t ds_val,
+                                                   dual::status_t ds_status,
+                                                   f_t pdlp_dual_obj,
+                                                   bool pdlp_optimal)
+{
+  // Dual simplex always maintains dual feasibility, so OPTIMAL and ITERATION_LIMIT both qualify
+
+  // Rule 1: Both optimal -> keep DS
+  if (ds_status == dual::status_t::OPTIMAL && pdlp_optimal) {
+    return {ds_val, sb_source_t::DUAL_SIMPLEX};
+  }
+
+  // Rule 2: Dual Simplex found infeasible -> declare infeasible
+  if (ds_status == dual::status_t::DUAL_UNBOUNDED) {
+    return {std::numeric_limits<f_t>::infinity(), sb_source_t::DUAL_SIMPLEX};
+  }
+
+  // Rule 3: Only one converged -> keep that
+  if (ds_status == dual::status_t::OPTIMAL && !pdlp_optimal) {
+    return {ds_val, sb_source_t::DUAL_SIMPLEX};
+  }
+  if (pdlp_optimal && ds_status != dual::status_t::OPTIMAL) {
+    return {pdlp_dual_obj, sb_source_t::PDLP};
+  }
+
+  // Rule 4: Dual Simplex hit iteration limit or work limit or cutoff -> keep DS
+  if (ds_status == dual::status_t::ITERATION_LIMIT || ds_status == dual::status_t::WORK_LIMIT ||
+      ds_status == dual::status_t::CUTOFF) {
+    return {ds_val, sb_source_t::DUAL_SIMPLEX};
+  }
+
+  // Rule 5: None converged -> NaN
+  return {std::numeric_limits<f_t>::quiet_NaN(), sb_source_t::NONE};
+}
+
+template <typename i_t, typename f_t>
+static void batch_pdlp_strong_branching_task(
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  i_t effective_batch_pdlp,
+  f_t start_time,
+  std::atomic<int>& concurrent_halt,
+  const lp_problem_t<i_t, f_t>& original_lp,
+  const std::vector<i_t>& new_slacks,
+  const std::vector<f_t>& root_soln,
+  const std::vector<i_t>& fractional,
+  f_t root_obj,
+  pseudo_costs_t<i_t, f_t>& pc,
+  shared_strong_branching_context_view_t<i_t, f_t>& sb_view,
+  std::vector<f_t>& pdlp_obj_down,
+  std::vector<f_t>& pdlp_obj_up)
+{
+  settings.log.printf(effective_batch_pdlp == 2
+                        ? "Batch PDLP only for strong branching\n"
+                        : "Cooperative batch PDLP and Dual Simplex for strong branching\n");
+
+  f_t start_batch = tic();
+  std::vector<f_t> original_root_soln_x;
+
+  if (concurrent_halt.load() == 1) { return; }
+
+  const auto mps_model =
+    simplex_problem_to_mps_data_model(original_lp, new_slacks, root_soln, original_root_soln_x);
+
+  std::vector<f_t> fraction_values;
+
+  std::vector<f_t> original_root_soln_y, original_root_soln_z;
+  // TODO put back later once Chris has this part
+  /*uncrush_dual_solution(
+    original_problem, original_lp, root_soln_y, root_soln_z, original_root_soln_y,
+    original_root_soln_z);*/
+
+  for (i_t k = 0; k < fractional.size(); k++) {
+    const i_t j = fractional[k];
+    fraction_values.push_back(original_root_soln_x[j]);
+  }
+
+  if (concurrent_halt.load() == 1) { return; }
+
+  f_t batch_elapsed_time = toc(start_time);
+  const f_t warm_start_remaining_time =
+    std::max(static_cast<f_t>(0.0), settings.time_limit - batch_elapsed_time);
+  if (warm_start_remaining_time <= 0.0) { return; }
+
+  assert(!pc.pdlp_warm_cache.populated && "PDLP warm cache should not be populated at this point");
+
+  if (!pc.pdlp_warm_cache.populated) {
+    pdlp_solver_settings_t<i_t, f_t> ws_settings;
+    ws_settings.method               = method_t::PDLP;
+    ws_settings.presolver            = presolver_t::None;
+    ws_settings.pdlp_solver_mode     = pdlp_solver_mode_t::Stable3;
+    ws_settings.detect_infeasibility = false;
+    // Since the warm start will be used over and over again we want to maximize the chance of
+    // convergeance Batch PDLP is very compute intensive so we want to minimize the number of
+    // iterations
+    constexpr int warm_start_iteration_limit         = 500000;
+    ws_settings.iteration_limit                      = warm_start_iteration_limit;
+    ws_settings.time_limit                           = warm_start_remaining_time;
+    constexpr f_t pdlp_tolerance                     = 1e-5;
+    ws_settings.tolerances.relative_dual_tolerance   = pdlp_tolerance;
+    ws_settings.tolerances.absolute_dual_tolerance   = pdlp_tolerance;
+    ws_settings.tolerances.relative_primal_tolerance = pdlp_tolerance;
+    ws_settings.tolerances.absolute_primal_tolerance = pdlp_tolerance;
+    ws_settings.tolerances.relative_gap_tolerance    = pdlp_tolerance;
+    ws_settings.tolerances.absolute_gap_tolerance    = pdlp_tolerance;
+    ws_settings.inside_mip                           = true;
+    if (effective_batch_pdlp == 1) { ws_settings.concurrent_halt = &concurrent_halt; }
+
+#ifdef BATCH_VERBOSE_MODE
+    auto start_time = std::chrono::high_resolution_clock::now();
+#endif
+
+    auto ws_solution = solve_lp(&pc.pdlp_warm_cache.batch_pdlp_handle, mps_model, ws_settings);
+
+#ifdef BATCH_VERBOSE_MODE
+    auto end_time = std::chrono::high_resolution_clock::now();
+    auto duration =
+      std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count();
+    std::cout << "Original problem solved in " << duration << " milliseconds"
+              << " and iterations: "
+              << ws_solution.get_pdlp_warm_start_data().total_pdlp_iterations_ << std::endl;
+#endif
+
+    if (ws_solution.get_termination_status() == pdlp_termination_status_t::Optimal) {
+      auto& cache           = pc.pdlp_warm_cache;
+      const auto& ws_primal = ws_solution.get_primal_solution();
+      const auto& ws_dual   = ws_solution.get_dual_solution();
+      // Need to use the pc steam since the batch pdlp handle will get destroyed after the warm
+      // start
+      cache.initial_primal = rmm::device_uvector<f_t>(ws_primal, ws_primal.stream());
+      cache.initial_dual   = rmm::device_uvector<f_t>(ws_dual, ws_dual.stream());
+      cache.step_size      = ws_solution.get_pdlp_warm_start_data().initial_step_size_;
+      cache.primal_weight  = ws_solution.get_pdlp_warm_start_data().initial_primal_weight_;
+      cache.pdlp_iteration = ws_solution.get_pdlp_warm_start_data().total_pdlp_iterations_;
+      cache.populated      = true;
+
+      settings.log.printf(
+        "Cached PDLP warm start: primal=%zu dual=%zu step_size=%e primal_weight=%e iters=%d\n",
+        cache.initial_primal.size(),
+        cache.initial_dual.size(),
+        cache.step_size,
+        cache.primal_weight,
+        cache.pdlp_iteration);
+    } else {
+      settings.log.printf(
+        "PDLP warm start solve did not reach optimality (%s), skipping cache and batch PDLP\n",
+        ws_solution.get_termination_status_string().c_str());
+      return;
+    }
+  }
+
+  if (concurrent_halt.load() == 1) { return; }
+
+  pdlp_solver_settings_t<i_t, f_t> pdlp_settings;
+  if (effective_batch_pdlp == 1) {
+    pdlp_settings.concurrent_halt  = &concurrent_halt;
+    pdlp_settings.shared_sb_solved = sb_view.solved;
+  }
+
+  batch_elapsed_time = toc(start_time);
+  const f_t batch_remaining_time =
+    std::max(static_cast<f_t>(0.0), settings.time_limit - batch_elapsed_time);
+  if (batch_remaining_time <= 0.0) { return; }
+  pdlp_settings.time_limit = batch_remaining_time;
+
+  if (pc.pdlp_warm_cache.populated) {
+    auto& cache = pc.pdlp_warm_cache;
+    pdlp_settings.set_initial_primal_solution(cache.initial_primal.data(),
+                                              cache.initial_primal.size(),
+                                              cache.batch_pdlp_handle.get_stream());
+    pdlp_settings.set_initial_dual_solution(
+      cache.initial_dual.data(), cache.initial_dual.size(), cache.batch_pdlp_handle.get_stream());
+    pdlp_settings.set_initial_step_size(cache.step_size);
+    pdlp_settings.set_initial_primal_weight(cache.primal_weight);
+    pdlp_settings.set_initial_pdlp_iteration(cache.pdlp_iteration);
+  }
+
+  if (concurrent_halt.load() == 1) { return; }
+
+  const auto solutions = batch_pdlp_solve(
+    &pc.pdlp_warm_cache.batch_pdlp_handle, mps_model, fractional, fraction_values, pdlp_settings);
+  f_t batch_pdlp_strong_branching_time = toc(start_batch);
+
+  // Fail safe in case the batch PDLP failed and produced no solutions
+  if (solutions.get_additional_termination_informations().size() != fractional.size() * 2) {
+    settings.log.printf("Batch PDLP failed and produced no solutions\n");
+    return;
+  }
+
+  // Find max iteration on how many are done accross the batch
+  i_t max_iterations = 0;
+  i_t amount_done    = 0;
+  for (i_t k = 0; k < solutions.get_additional_termination_informations().size(); k++) {
+    max_iterations = std::max(
+      max_iterations, solutions.get_additional_termination_information(k).number_of_steps_taken);
+    // TODO batch mode infeasible: should also count as done if infeasible
+    if (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal) {
+      amount_done++;
+    }
+  }
+
+  settings.log.printf(
+    "Batch PDLP strong branching completed in %.2fs. Solved %d/%d with max %d iterations\n",
+    batch_pdlp_strong_branching_time,
+    amount_done,
+    fractional.size() * 2,
+    max_iterations);
+
+  for (i_t k = 0; k < fractional.size(); k++) {
+    f_t obj_down = (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal)
+                     ? solutions.get_dual_objective_value(k)
+                     : std::numeric_limits<f_t>::quiet_NaN();
+
+    f_t obj_up = (solutions.get_termination_status(k + fractional.size()) ==
+                  pdlp_termination_status_t::Optimal)
+                   ? solutions.get_dual_objective_value(k + fractional.size())
+                   : std::numeric_limits<f_t>::quiet_NaN();
+
+    pdlp_obj_down[k] = std::max(obj_down - root_obj, f_t(0.0));
+    pdlp_obj_up[k]   = std::max(obj_up - root_obj, f_t(0.0));
+  }
+}
+
+template <typename i_t, typename f_t>
+static void batch_pdlp_reliability_branching_task(
+  logger_t& log,
+  i_t rb_mode,
+  i_t num_candidates,
+  f_t start_time,
+  std::atomic<int>& concurrent_halt,
+  const lp_problem_t<i_t, f_t>& original_lp,
+  const std::vector<i_t>& new_slacks,
+  const std::vector<f_t>& solution,
+  branch_and_bound_worker_t<i_t, f_t>* worker,
+  const std::vector<i_t>& candidate_vars,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  shared_strong_branching_context_view_t<i_t, f_t>& sb_view,
+  batch_pdlp_warm_cache_t<i_t, f_t>& pdlp_warm_cache,
+  std::vector<f_t>& pdlp_obj_down,
+  std::vector<f_t>& pdlp_obj_up)
+{
+  log.printf(rb_mode == 2 ? "RB batch PDLP only for %d candidates\n"
+                          : "RB cooperative batch PDLP and DS for %d candidates\n",
+             num_candidates);
+
+  f_t start_batch = tic();
+
+  std::vector<f_t> original_soln_x;
+
+  if (concurrent_halt.load() == 1) { return; }
+
+  auto mps_model =
+    simplex_problem_to_mps_data_model(original_lp, new_slacks, solution, original_soln_x);
+  {
+    const i_t n_orig = original_lp.num_cols - new_slacks.size();
+    for (i_t j = 0; j < n_orig; j++) {
+      mps_model.variable_lower_bounds_[j] = worker->leaf_problem.lower[j];
+      mps_model.variable_upper_bounds_[j] = worker->leaf_problem.upper[j];
+    }
+  }
+
+  std::vector<f_t> fraction_values;
+  fraction_values.reserve(num_candidates);
+  for (i_t j : candidate_vars) {
+    fraction_values.push_back(original_soln_x[j]);
+  }
+
+  if (concurrent_halt.load() == 1) { return; }
+
+  const f_t batch_elapsed_time = toc(start_time);
+  const f_t batch_remaining_time =
+    std::max(static_cast<f_t>(0.0), settings.time_limit - batch_elapsed_time);
+  if (batch_remaining_time <= 0.0) { return; }
+
+  // One handle per batch PDLP since there can be concurrent calls
+  const raft::handle_t batch_pdlp_handle;
+
+  pdlp_solver_settings_t<i_t, f_t> pdlp_settings;
+  if (rb_mode == 1) {
+    pdlp_settings.concurrent_halt  = &concurrent_halt;
+    pdlp_settings.shared_sb_solved = sb_view.solved;
+  }
+  pdlp_settings.time_limit = batch_remaining_time;
+
+  if (pdlp_warm_cache.populated) {
+    auto& cache = pdlp_warm_cache;
+    pdlp_settings.set_initial_primal_solution(
+      cache.initial_primal.data(), cache.initial_primal.size(), batch_pdlp_handle.get_stream());
+    pdlp_settings.set_initial_dual_solution(
+      cache.initial_dual.data(), cache.initial_dual.size(), batch_pdlp_handle.get_stream());
+    pdlp_settings.set_initial_step_size(cache.step_size);
+    pdlp_settings.set_initial_primal_weight(cache.primal_weight);
+    pdlp_settings.set_initial_pdlp_iteration(cache.pdlp_iteration);
+  }
+
+  if (concurrent_halt.load() == 1) { return; }
+
+  const auto solutions =
+    batch_pdlp_solve(&batch_pdlp_handle, mps_model, candidate_vars, fraction_values, pdlp_settings);
+
+  f_t batch_pdlp_time = toc(start_batch);
+
+  if (solutions.get_additional_termination_informations().size() !=
+      static_cast<size_t>(num_candidates) * 2) {
+    log.printf("RB batch PDLP failed and produced no solutions\n");
+    return;
+  }
+
+  i_t amount_done = 0;
+  for (i_t k = 0; k < num_candidates * 2; k++) {
+    if (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal) {
+      amount_done++;
+    }
+  }
+
+  log.printf("RB batch PDLP completed in %.2fs. Solved %d/%d\n",
+             batch_pdlp_time,
+             amount_done,
+             num_candidates * 2);
+
+  for (i_t k = 0; k < num_candidates; k++) {
+    if (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal) {
+      pdlp_obj_down[k] = solutions.get_dual_objective_value(k);
+    }
+    if (solutions.get_termination_status(k + num_candidates) ==
+        pdlp_termination_status_t::Optimal) {
+      pdlp_obj_up[k] = solutions.get_dual_objective_value(k + num_candidates);
+    }
+  }
+}
+
+template <typename i_t, typename f_t>
+void strong_branching(const lp_problem_t<i_t, f_t>& original_lp,
                       const simplex_solver_settings_t<i_t, f_t>& settings,
                       f_t start_time,
+                      const std::vector<i_t>& new_slacks,
                       const std::vector<variable_type_t>& var_types,
-                      const std::vector<f_t> root_soln,
+                      const std::vector<f_t>& root_soln,
+                      const std::vector<f_t>& root_soln_y,
+                      const std::vector<f_t>& root_soln_z,
                       const std::vector<i_t>& fractional,
                       f_t root_obj,
                       const std::vector<variable_status_t>& root_vstatus,
@@ -317,89 +744,54 @@ void strong_branching(const user_problem_t<i_t, f_t>& original_problem,
   const f_t elapsed_time = toc(start_time);
   if (elapsed_time > settings.time_limit) { return; }
 
-  if (settings.mip_batch_pdlp_strong_branching) {
-    settings.log.printf("Batch PDLP strong branching enabled\n");
-
-    f_t start_batch = tic();
-
-    // Use original_problem to create the BatchLP problem
-    csr_matrix_t<i_t, f_t> A_row(original_problem.A.m, original_problem.A.n, 0);
-    original_problem.A.to_compressed_row(A_row);
-
-    // Convert the root_soln to the original problem space
-    std::vector<f_t> original_root_soln_x;
-    uncrush_primal_solution(original_problem, original_lp, root_soln, original_root_soln_x);
-
-    std::vector<f_t> fraction_values;
-
-    for (i_t k = 0; k < fractional.size(); k++) {
-      const i_t j = fractional[k];
-      fraction_values.push_back(original_root_soln_x[j]);
-    }
-
-    const auto mps_model         = simplex_problem_to_mps_data_model(original_problem);
-    const f_t batch_elapsed_time = toc(start_time);
-    const f_t batch_remaining_time =
-      std::max(static_cast<f_t>(0.0), settings.time_limit - batch_elapsed_time);
-    if (batch_remaining_time <= 0.0) { return; }
-    pdlp_solver_settings_t<i_t, f_t> pdlp_settings;
-    pdlp_settings.time_limit = batch_remaining_time;
-    const raft::handle_t batch_pdlp_handle;
-    const auto solutions =
-      batch_pdlp_solve(&batch_pdlp_handle, mps_model, fractional, fraction_values, pdlp_settings);
-    f_t batch_pdlp_strong_branching_time = toc(start_batch);
-
-    // Find max iteration on how many are done accross the batch
-    i_t max_iterations = 0;
-    i_t amount_done    = 0;
-    for (i_t k = 0; k < solutions.get_additional_termination_informations().size(); k++) {
-      max_iterations = std::max(
-        max_iterations, solutions.get_additional_termination_information(k).number_of_steps_taken);
-      // TODO batch mode infeasible: should also count as done if infeasible
-      if (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal) {
-        amount_done++;
-      }
-    }
+  const i_t effective_batch_pdlp =
+    (settings.sub_mip || (settings.deterministic && settings.mip_batch_pdlp_strong_branching == 1))
+      ? 0
+      : settings.mip_batch_pdlp_strong_branching;
 
+  if (settings.mip_batch_pdlp_strong_branching != 0 &&
+      (settings.sub_mip || settings.deterministic)) {
     settings.log.printf(
-      "Batch PDLP strong branching completed in %.2fs. Solved %d/%d with max %d iterations\n",
-      batch_pdlp_strong_branching_time,
-      amount_done,
-      fractional.size() * 2,
-      max_iterations);
+      "Batch PDLP strong branching is disabled because sub-MIP or deterministic mode is enabled\n");
+  }
 
-    for (i_t k = 0; k < fractional.size(); k++) {
-      // Call BatchLP solver. Solve 2*fractional.size() subproblems.
-      // Let j = fractional[k]. We want to solve the two trial branching problems
-      // Branch down:
-      // minimize c^T x
-      // subject to lb <= A*x <= ub
-      // x_j <= floor(root_soln[j])
-      // l <= x < u
-      // Let the optimal objective value of thie problem be obj_down
-      f_t obj_down = (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal)
-                       ? solutions.get_dual_objective_value(k)
-                       : std::numeric_limits<f_t>::quiet_NaN();
-
-      // Branch up:
-      // minimize c^T x
-      // subject to lb <= A*x <= ub
-      // x_j >= ceil(root_soln[j])
-      // Let the optimal objective value of thie problem be obj_up
-      f_t obj_up = (solutions.get_termination_status(k + fractional.size()) ==
-                    pdlp_termination_status_t::Optimal)
-                     ? solutions.get_dual_objective_value(k + fractional.size())
-                     : std::numeric_limits<f_t>::quiet_NaN();
+  settings.log.printf("Strong branching using %d threads and %ld fractional variables\n",
+                      settings.num_threads,
+                      fractional.size());
+
+  // Cooperative DS + PDLP: shared context tracks which subproblems are solved
+  shared_strong_branching_context_t<i_t, f_t> shared_ctx(2 * fractional.size());
+  shared_strong_branching_context_view_t<i_t, f_t> sb_view(shared_ctx.solved);
+
+  std::atomic<int> concurrent_halt{0};
+
+  std::vector<f_t> pdlp_obj_down(fractional.size(), std::numeric_limits<f_t>::quiet_NaN());
+  std::vector<f_t> pdlp_obj_up(fractional.size(), std::numeric_limits<f_t>::quiet_NaN());
+
+  if (effective_batch_pdlp != 0) {
+#pragma omp task default(shared)
+    batch_pdlp_strong_branching_task(settings,
+                                     effective_batch_pdlp,
+                                     start_time,
+                                     concurrent_halt,
+                                     original_lp,
+                                     new_slacks,
+                                     root_soln,
+                                     fractional,
+                                     root_obj,
+                                     pc,
+                                     sb_view,
+                                     pdlp_obj_down,
+                                     pdlp_obj_up);
+  }
 
-      pc.strong_branch_down[k] = obj_down - root_obj;
-      pc.strong_branch_up[k]   = obj_up - root_obj;
-    }
-  } else {
-    settings.log.printf("Strong branching using %d threads and %ld fractional variables\n",
-                        settings.num_threads,
-                        fractional.size());
-    f_t strong_branching_start_time = tic();
+  std::vector<dual::status_t> ds_status_down(fractional.size(), dual::status_t::UNSET);
+  std::vector<dual::status_t> ds_status_up(fractional.size(), dual::status_t::UNSET);
+  std::vector<f_t> ds_obj_down(fractional.size(), std::numeric_limits<f_t>::quiet_NaN());
+  std::vector<f_t> ds_obj_up(fractional.size(), std::numeric_limits<f_t>::quiet_NaN());
+  f_t dual_simplex_strong_branching_time = tic();
 
+  if (effective_batch_pdlp != 2) {
 #pragma omp parallel num_threads(settings.num_threads)
     {
       i_t n = std::min<i_t>(4 * settings.num_threads, fractional.size());
@@ -432,10 +824,135 @@ void strong_branching(const user_problem_t<i_t, f_t>& original_problem,
                              root_soln,
                              root_vstatus,
                              edge_norms,
-                             pc);
+                             pc,
+                             ds_obj_down,
+                             ds_obj_up,
+                             ds_status_down,
+                             ds_status_up,
+                             &concurrent_halt,
+                             sb_view);
+      }
+    }
+
+    // DS done: signal PDLP to stop (time-limit or all work done) and wait
+    concurrent_halt.store(1);
+  }
+
+  if (effective_batch_pdlp != 0) {
+#pragma omp taskwait
+  }
+
+  settings.log.printf("Strong branching took %.2fs\n", toc(dual_simplex_strong_branching_time));
+
+  // Collect Dual Simplex statistics
+  i_t ds_optimal = 0, ds_infeasible = 0, ds_iter_limit = 0;
+  i_t ds_numerical = 0, ds_cutoff = 0, ds_time_limit = 0;
+  i_t ds_concurrent = 0, ds_work_limit = 0, ds_unset = 0;
+  const i_t total_subproblems = fractional.size() * 2;
+  for (i_t k = 0; k < fractional.size(); k++) {
+    for (auto st : {ds_status_down[k], ds_status_up[k]}) {
+      switch (st) {
+        case dual::status_t::OPTIMAL: ds_optimal++; break;
+        case dual::status_t::DUAL_UNBOUNDED: ds_infeasible++; break;
+        case dual::status_t::ITERATION_LIMIT: ds_iter_limit++; break;
+        case dual::status_t::NUMERICAL: ds_numerical++; break;
+        case dual::status_t::CUTOFF: ds_cutoff++; break;
+        case dual::status_t::TIME_LIMIT: ds_time_limit++; break;
+        case dual::status_t::CONCURRENT_LIMIT: ds_concurrent++; break;
+        case dual::status_t::WORK_LIMIT: ds_work_limit++; break;
+        case dual::status_t::UNSET: ds_unset++; break;
       }
     }
-    settings.log.printf("Strong branching completed in %.2fs\n", toc(strong_branching_start_time));
+  }
+
+  settings.log.printf("Dual Simplex: %d/%d optimal, %d infeasible, %d iter-limit",
+                      ds_optimal,
+                      total_subproblems,
+                      ds_infeasible,
+                      ds_iter_limit);
+  if (ds_cutoff) settings.log.printf(", %d cutoff", ds_cutoff);
+  if (ds_time_limit) settings.log.printf(", %d time-limit", ds_time_limit);
+  if (ds_numerical) settings.log.printf(", %d numerical", ds_numerical);
+  if (ds_concurrent) settings.log.printf(", %d concurrent-halt", ds_concurrent);
+  if (ds_work_limit) settings.log.printf(", %d work-limit", ds_work_limit);
+  if (ds_unset) settings.log.printf(", %d unset/skipped", ds_unset);
+  settings.log.printf("\n");
+
+  if (effective_batch_pdlp != 0) {
+    i_t pdlp_optimal_count = 0;
+    for (i_t k = 0; k < fractional.size(); k++) {
+      if (!std::isnan(pdlp_obj_down[k])) pdlp_optimal_count++;
+      if (!std::isnan(pdlp_obj_up[k])) pdlp_optimal_count++;
+    }
+
+    settings.log.printf("Batch PDLP found %d/%d optimal solutions\n",
+                        pdlp_optimal_count,
+                        static_cast<int>(fractional.size() * 2));
+  }
+
+  i_t merged_from_ds      = 0;
+  i_t merged_from_pdlp    = 0;
+  i_t merged_nan          = 0;
+  i_t solved_by_both_down = 0;
+  i_t solved_by_both_up   = 0;
+  for (i_t k = 0; k < fractional.size(); k++) {
+    bool ds_has_down   = ds_status_down[k] != dual::status_t::UNSET;
+    bool pdlp_has_down = !std::isnan(pdlp_obj_down[k]);
+    const auto [value_down, source_down] =
+      merge_sb_result<i_t, f_t>(ds_obj_down[k], ds_status_down[k], pdlp_obj_down[k], pdlp_has_down);
+    pc.strong_branch_down[k] = value_down;
+    if (source_down == sb_source_t::DUAL_SIMPLEX)
+      merged_from_ds++;
+    else if (source_down == sb_source_t::PDLP)
+      merged_from_pdlp++;
+    else
+      merged_nan++;
+    if (ds_has_down && pdlp_has_down) {
+      solved_by_both_down++;
+      settings.log.printf(
+        "[COOP SB] Merge: variable %d DOWN solved by BOTH (DS=%e PDLP=%e) -> kept %s\n",
+        fractional[k],
+        ds_obj_down[k],
+        pdlp_obj_down[k],
+        source_down == sb_source_t::DUAL_SIMPLEX ? "DS" : "PDLP");
+    }
+
+    bool ds_has_up   = ds_status_up[k] != dual::status_t::UNSET;
+    bool pdlp_has_up = !std::isnan(pdlp_obj_up[k]);
+    const auto [value_up, source_up] =
+      merge_sb_result<i_t, f_t>(ds_obj_up[k], ds_status_up[k], pdlp_obj_up[k], pdlp_has_up);
+    pc.strong_branch_up[k] = value_up;
+    if (source_up == sb_source_t::DUAL_SIMPLEX)
+      merged_from_ds++;
+    else if (source_up == sb_source_t::PDLP)
+      merged_from_pdlp++;
+    else
+      merged_nan++;
+    if (ds_has_up && pdlp_has_up) {
+      solved_by_both_up++;
+      settings.log.printf(
+        "[COOP SB] Merge: variable %d UP solved by BOTH (DS=%e PDLP=%e) -> kept %s\n",
+        fractional[k],
+        ds_obj_up[k],
+        pdlp_obj_up[k],
+        source_up == sb_source_t::DUAL_SIMPLEX ? "DS" : "PDLP");
+    }
+  }
+
+  if (effective_batch_pdlp != 0) {
+    pc.pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root =
+      (f_t(merged_from_pdlp) / f_t(fractional.size() * 2)) * 100.0;
+    settings.log.printf(
+      "Batch PDLP only for strong branching. percent solved by batch PDLP at root: %f\n",
+      pc.pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root);
+    settings.log.printf(
+      "Merged results: %d from DS, %d from PDLP, %d unresolved (NaN), %d/%d solved by both "
+      "(down/up)\n",
+      merged_from_ds,
+      merged_from_pdlp,
+      merged_nan,
+      solved_by_both_down,
+      solved_by_both_up);
   }
 
   pc.update_pseudo_costs_from_strong_branching(fractional, root_soln);
@@ -538,7 +1055,9 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
   const branch_and_bound_stats_t<i_t, f_t>& bnb_stats,
   f_t upper_bound,
   int max_num_tasks,
-  logger_t& log)
+  logger_t& log,
+  const std::vector<i_t>& new_slacks,
+  const lp_problem_t<i_t, f_t>& original_lp)
 {
   constexpr f_t eps = 1e-6;
   f_t start_time    = bnb_stats.start_time;
@@ -605,9 +1124,47 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
     return branch_var;
   }
 
-  const int num_tasks          = std::max(max_num_tasks, 1);
-  const int task_priority      = reliability_branching_settings.task_priority;
-  const i_t max_num_candidates = reliability_branching_settings.max_num_candidates;
+  const i_t rb_mode = settings.mip_batch_pdlp_reliability_branching;
+  // We don't use batch PDLP in reliability branching if the PDLP warm start data was not filled
+  // This indicates that PDLP alone (not batched) couldn't even run at the root node
+  // So it will most likely perform poorly compared to DS
+  // It is also off if the number of candidate is very small
+  // If warm start could run but almost none of the BPDLP results were used, we also want to avoid
+  // using batch PDLP
+  constexpr i_t min_num_candidates_for_pdlp                       = 5;
+  constexpr f_t min_percent_solved_by_batch_pdlp_at_root_for_pdlp = 5.0;
+  const bool use_pdlp = (rb_mode != 0) && !settings.sub_mip && !settings.deterministic &&
+                        pdlp_warm_cache.populated &&
+                        unreliable_list.size() > min_num_candidates_for_pdlp &&
+                        pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root >
+                          min_percent_solved_by_batch_pdlp_at_root_for_pdlp;
+
+  if (rb_mode != 0 && !pdlp_warm_cache.populated) {
+    log.printf("PDLP warm start data not populated, using DS only\n");
+  } else if (rb_mode != 0 && settings.sub_mip) {
+    log.printf("Batch PDLP reliability branching is disabled because sub-MIP is enabled\n");
+  } else if (rb_mode != 0 && settings.deterministic) {
+    log.printf(
+      "Batch PDLP reliability branching is disabled because deterministic mode is enabled\n");
+  } else if (rb_mode != 0 && unreliable_list.size() < min_num_candidates_for_pdlp) {
+    log.printf("Not enough candidates to use batch PDLP, using DS only\n");
+  } else if (rb_mode != 0 && pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root < 5.0) {
+    log.printf("Percent solved by batch PDLP at root is too low, using DS only\n");
+  } else if (use_pdlp) {
+    log.printf(
+      "Using batch PDLP because populated, unreliable list size is %d (> %d), and percent solved "
+      "by batch PDLP at root is %f%% (> %f%%)\n",
+      static_cast<i_t>(unreliable_list.size()),
+      min_num_candidates_for_pdlp,
+      pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root,
+      min_percent_solved_by_batch_pdlp_at_root_for_pdlp);
+  }
+
+  const int num_tasks     = std::max(max_num_tasks, 1);
+  const int task_priority = reliability_branching_settings.task_priority;
+  // If both batch PDLP and DS are used we double the max number of candidates
+  const i_t max_num_candidates = use_pdlp ? 2 * reliability_branching_settings.max_num_candidates
+                                          : reliability_branching_settings.max_num_candidates;
   const i_t num_candidates     = std::min<size_t>(unreliable_list.size(), max_num_candidates);
 
   assert(task_priority > 0);
@@ -626,89 +1183,246 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
   // Shuffle the unreliable list so every variable has the same chance to be selected.
   if (unreliable_list.size() > max_num_candidates) { worker->rng.shuffle(unreliable_list); }
 
+  // Both DS and PDLP work on the same candidate set
+  std::vector<i_t> candidate_vars(unreliable_list.begin(),
+                                  unreliable_list.begin() + num_candidates);
+
+  // Shared context for cooperative work-stealing (mode 1)
+  // [0..num_candidates) = down, [num_candidates..2*num_candidates) = up
+  shared_strong_branching_context_t<i_t, f_t> shared_ctx(2 * num_candidates);
+  shared_strong_branching_context_view_t<i_t, f_t> sb_view(shared_ctx.solved);
+
+  std::vector<f_t> pdlp_obj_down(num_candidates, std::numeric_limits<f_t>::quiet_NaN());
+  std::vector<f_t> pdlp_obj_up(num_candidates, std::numeric_limits<f_t>::quiet_NaN());
+
+  std::atomic<int> concurrent_halt{0};
+
+  if (use_pdlp) {
+#pragma omp task default(shared)
+    batch_pdlp_reliability_branching_task(log,
+                                          rb_mode,
+                                          num_candidates,
+                                          start_time,
+                                          concurrent_halt,
+                                          original_lp,
+                                          new_slacks,
+                                          solution,
+                                          worker,
+                                          candidate_vars,
+                                          settings,
+                                          sb_view,
+                                          pdlp_warm_cache,
+                                          pdlp_obj_down,
+                                          pdlp_obj_up);
+  }
+
   if (toc(start_time) > settings.time_limit) {
-    log.printf("Time limit reached");
+    log.printf("Time limit reached\n");
+    if (use_pdlp) {
+      concurrent_halt.store(1);
+#pragma omp taskwait
+    }
     return branch_var;
   }
 
+  std::vector<f_t> ds_obj_down(num_candidates, std::numeric_limits<f_t>::quiet_NaN());
+  std::vector<f_t> ds_obj_up(num_candidates, std::numeric_limits<f_t>::quiet_NaN());
+  std::vector<dual::status_t> ds_status_down(num_candidates, dual::status_t::UNSET);
+  std::vector<dual::status_t> ds_status_up(num_candidates, dual::status_t::UNSET);
+
+  f_t ds_start_time = tic();
+
+  if (rb_mode != 2) {
 #pragma omp taskloop if (num_tasks > 1) priority(task_priority) num_tasks(num_tasks) \
-  shared(score_mutex)
-  for (i_t i = 0; i < num_candidates; ++i) {
-    const i_t j = unreliable_list[i];
-
-    if (toc(start_time) > settings.time_limit) { continue; }
-
-    pseudo_cost_mutex_down[j].lock();
-    if (pseudo_cost_num_down[j] < reliable_threshold) {
-      // Do trial branching on the down branch
-      f_t obj = trial_branching(worker->leaf_problem,
-                                settings,
-                                var_types,
-                                node_ptr->vstatus,
-                                worker->leaf_edge_norms,
-                                worker->basis_factors,
-                                worker->basic_list,
-                                worker->nonbasic_list,
-                                j,
-                                worker->leaf_problem.lower[j],
-                                std::floor(solution[j]),
-                                upper_bound,
-                                branch_and_bound_lp_iter_per_node,
-                                start_time,
-                                reliability_branching_settings.upper_max_lp_iter,
-                                reliability_branching_settings.lower_max_lp_iter,
-                                strong_branching_lp_iter);
-
-      if (!std::isnan(obj)) {
-        f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps);
-        f_t change_in_x   = solution[j] - std::floor(solution[j]);
-        pseudo_cost_sum_down[j] += change_in_obj / change_in_x;
-        pseudo_cost_num_down[j]++;
+  shared(score_mutex, sb_view)
+    for (i_t i = 0; i < num_candidates; ++i) {
+      const i_t j = unreliable_list[i];
+
+      if (toc(start_time) > settings.time_limit) { continue; }
+
+      if (rb_mode == 1 && sb_view.is_solved(i)) {
+        log.printf(
+          "DS skipping variable %d branch down (shared_idx %d): already solved by PDLP\n", j, i);
+      } else {
+        pseudo_cost_mutex_down[j].lock();
+        if (pseudo_cost_num_down[j] < reliable_threshold) {
+          // Do trial branching on the down branch
+          const auto [obj, status] =
+            trial_branching(worker->leaf_problem,
+                            settings,
+                            var_types,
+                            node_ptr->vstatus,
+                            worker->leaf_edge_norms,
+                            worker->basis_factors,
+                            worker->basic_list,
+                            worker->nonbasic_list,
+                            j,
+                            worker->leaf_problem.lower[j],
+                            std::floor(solution[j]),
+                            upper_bound,
+                            branch_and_bound_lp_iter_per_node,
+                            start_time,
+                            reliability_branching_settings.upper_max_lp_iter,
+                            reliability_branching_settings.lower_max_lp_iter,
+                            strong_branching_lp_iter);
+
+          ds_obj_down[i]    = obj;
+          ds_status_down[i] = status;
+          if (!std::isnan(obj)) {
+            f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps);
+            f_t change_in_x   = solution[j] - std::floor(solution[j]);
+            pseudo_cost_sum_down[j] += change_in_obj / change_in_x;
+            pseudo_cost_num_down[j]++;
+            // Should be valid if were are already here
+            if (rb_mode == 1 && is_dual_simplex_done(status)) { sb_view.mark_solved(i); }
+          }
+        } else {
+          // Variable became reliable, make it as solved so that batch PDLP does not solve it again
+          if (rb_mode == 1) sb_view.mark_solved(i);
+        }
+        pseudo_cost_mutex_down[j].unlock();
       }
-    }
-    pseudo_cost_mutex_down[j].unlock();
-
-    if (toc(start_time) > settings.time_limit) { continue; }
-
-    pseudo_cost_mutex_up[j].lock();
-    if (pseudo_cost_num_up[j] < reliable_threshold) {
-      f_t obj = trial_branching(worker->leaf_problem,
-                                settings,
-                                var_types,
-                                node_ptr->vstatus,
-                                worker->leaf_edge_norms,
-                                worker->basis_factors,
-                                worker->basic_list,
-                                worker->nonbasic_list,
-                                j,
-                                std::ceil(solution[j]),
-                                worker->leaf_problem.upper[j],
-                                upper_bound,
-                                branch_and_bound_lp_iter_per_node,
-                                start_time,
-                                reliability_branching_settings.upper_max_lp_iter,
-                                reliability_branching_settings.lower_max_lp_iter,
-                                strong_branching_lp_iter);
-
-      if (!std::isnan(obj)) {
-        f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps);
-        f_t change_in_x   = std::ceil(solution[j]) - solution[j];
-        pseudo_cost_sum_up[j] += change_in_obj / change_in_x;
-        pseudo_cost_num_up[j]++;
+
+      if (toc(start_time) > settings.time_limit) { continue; }
+
+      const i_t shared_idx = i + num_candidates;
+      if (rb_mode == 1 && sb_view.is_solved(shared_idx)) {
+        log.printf("DS skipping variable %d branch up (shared_idx %d): already solved by PDLP\n",
+                   j,
+                   shared_idx);
+      } else {
+        pseudo_cost_mutex_up[j].lock();
+        if (pseudo_cost_num_up[j] < reliable_threshold) {
+          const auto [obj, status] =
+            trial_branching(worker->leaf_problem,
+                            settings,
+                            var_types,
+                            node_ptr->vstatus,
+                            worker->leaf_edge_norms,
+                            worker->basis_factors,
+                            worker->basic_list,
+                            worker->nonbasic_list,
+                            j,
+                            std::ceil(solution[j]),
+                            worker->leaf_problem.upper[j],
+                            upper_bound,
+                            branch_and_bound_lp_iter_per_node,
+                            start_time,
+                            reliability_branching_settings.upper_max_lp_iter,
+                            reliability_branching_settings.lower_max_lp_iter,
+                            strong_branching_lp_iter);
+
+          ds_obj_up[i]    = obj;
+          ds_status_up[i] = status;
+          if (!std::isnan(obj)) {
+            f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps);
+            f_t change_in_x   = std::ceil(solution[j]) - solution[j];
+            pseudo_cost_sum_up[j] += change_in_obj / change_in_x;
+            pseudo_cost_num_up[j]++;
+            // Should be valid if were are already here
+            if (rb_mode == 1 && is_dual_simplex_done(status)) { sb_view.mark_solved(shared_idx); }
+          }
+        } else {
+          // Variable became reliable, make it as solved so that batch PDLP does not solve it again
+          if (rb_mode == 1) sb_view.mark_solved(shared_idx);
+        }
+        pseudo_cost_mutex_up[j].unlock();
+      }
+
+      if (toc(start_time) > settings.time_limit) { continue; }
+
+      f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg);
+
+      score_mutex.lock();
+      if (score > max_score) {
+        max_score  = score;
+        branch_var = j;
       }
+      score_mutex.unlock();
     }
-    pseudo_cost_mutex_up[j].unlock();
 
-    if (toc(start_time) > settings.time_limit) { continue; }
+    concurrent_halt.store(1);
+  }
 
-    f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg);
+  f_t ds_elapsed = toc(ds_start_time);
+
+  // TODO put back
+  // if (rb_mode != 2) {
+  //  if (rb_mode == 1) {
+  //    log.printf(
+  //      "RB Dual Simplex: %d candidates, %d/%d optimal, %d/%d infeasible, %d/%d failed, %d skipped
+  //      (PDLP) in %.2fs\n", num_candidates, ds_optimal.load(), num_candidates * 2,
+  //      ds_infeasible.load(), num_candidates * 2,
+  //      ds_failed.load(), num_candidates * 2,
+  //      ds_skipped.load(), ds_elapsed);
+  //  } else {
+  //    log.printf(
+  //      "RB Dual Simplex: %d candidates, %d/%d optimal, %d/%d infeasible, %d/%d failed in
+  //      %.2fs\n", num_candidates, ds_optimal.load(), num_candidates * 2, ds_infeasible.load(),
+  //      num_candidates * 2, ds_failed.load(), num_candidates * 2, ds_elapsed);
+  //  }
+  //}
+
+  if (use_pdlp) {
+#pragma omp taskwait
+
+    i_t pdlp_applied = 0;
+    i_t pdlp_optimal = 0;
+    for (i_t i = 0; i < num_candidates; i++) {
+      const i_t j = candidate_vars[i];
+
+      // Down: check if PDLP should override DS
+      if (!std::isnan(pdlp_obj_down[i])) {
+        pdlp_optimal++;
+        const auto [merged_obj, source] =
+          merge_sb_result<i_t, f_t>(ds_obj_down[i], ds_status_down[i], pdlp_obj_down[i], true);
+        // PDLP won the merge, update the pseudo-cost only if node is still unreliable (concurrent
+        // calls may have made it reliable)
+        if (source == sb_source_t::PDLP) {
+          pseudo_cost_mutex_down[j].lock();
+          if (pseudo_cost_num_down[j] < reliable_threshold) {
+            f_t change_in_obj = std::max(merged_obj - node_ptr->lower_bound, eps);
+            f_t change_in_x   = solution[j] - std::floor(solution[j]);
+            pseudo_cost_sum_down[j] += change_in_obj / change_in_x;
+            pseudo_cost_num_down[j]++;
+            pdlp_applied++;
+          }
+          pseudo_cost_mutex_down[j].unlock();
+        }
+      }
 
-    score_mutex.lock();
-    if (score > max_score) {
-      max_score  = score;
-      branch_var = j;
+      // Up: check if PDLP should override DS
+      if (!std::isnan(pdlp_obj_up[i])) {
+        pdlp_optimal++;
+        const auto [merged_obj, source] =
+          merge_sb_result<i_t, f_t>(ds_obj_up[i], ds_status_up[i], pdlp_obj_up[i], true);
+        // PDLP won the merge, update the pseudo-cost only if node is still unreliable (concurrent
+        // calls may have made it reliable)
+        if (source == sb_source_t::PDLP) {
+          pseudo_cost_mutex_up[j].lock();
+          if (pseudo_cost_num_up[j] < reliable_threshold) {
+            f_t change_in_obj = std::max(merged_obj - node_ptr->lower_bound, eps);
+            f_t change_in_x   = std::ceil(solution[j]) - solution[j];
+            pseudo_cost_sum_up[j] += change_in_obj / change_in_x;
+            pseudo_cost_num_up[j]++;
+            pdlp_applied++;
+          }
+          pseudo_cost_mutex_up[j].unlock();
+        }
+      }
+
+      f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg);
+      if (score > max_score) {
+        max_score  = score;
+        branch_var = j;
+      }
     }
-    score_mutex.unlock();
+
+    log.printf("RB batch PDLP: %d candidates, %d/%d optimal, %d applied to pseudo-costs\n",
+               num_candidates,
+               pdlp_optimal,
+               num_candidates * 2,
+               pdlp_applied);
   }
 
   log.printf(
@@ -776,12 +1490,14 @@ void pseudo_costs_t<i_t, f_t>::update_pseudo_costs_from_strong_branching(
 
 template class pseudo_costs_t<int, double>;
 
-template void strong_branching<int, double>(const user_problem_t<int, double>& original_problem,
-                                            const lp_problem_t<int, double>& original_lp,
+template void strong_branching<int, double>(const lp_problem_t<int, double>& original_lp,
                                             const simplex_solver_settings_t<int, double>& settings,
                                             double start_time,
+                                            const std::vector<int>& new_slacks,
                                             const std::vector<variable_type_t>& var_types,
-                                            const std::vector<double> root_soln,
+                                            const std::vector<double>& root_soln,
+                                            const std::vector<double>& root_soln_y,
+                                            const std::vector<double>& root_soln_z,
                                             const std::vector<int>& fractional,
                                             double root_obj,
                                             const std::vector<variable_status_t>& root_vstatus,
diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp
index 6b6c6917b..322daa890 100644
--- a/cpp/src/branch_and_bound/pseudo_costs.hpp
+++ b/cpp/src/branch_and_bound/pseudo_costs.hpp
@@ -20,7 +20,10 @@
 
 #include <omp.h>
 #include <cmath>
+#include <rmm/device_uvector.hpp>
+
 #include <cstdint>
+#include <limits>
 
 namespace cuopt::linear_programming::dual_simplex {
 
@@ -405,6 +408,18 @@ struct reliability_branching_settings_t {
   i_t min_reliable_threshold = 1;
 };
 
+template <typename i_t, typename f_t>
+struct batch_pdlp_warm_cache_t {
+  const raft::handle_t batch_pdlp_handle{};
+  rmm::device_uvector<f_t> initial_primal{0, batch_pdlp_handle.get_stream()};
+  rmm::device_uvector<f_t> initial_dual{0, batch_pdlp_handle.get_stream()};
+  f_t step_size{std::numeric_limits<f_t>::signaling_NaN()};
+  f_t primal_weight{std::numeric_limits<f_t>::signaling_NaN()};
+  i_t pdlp_iteration{-1};
+  f_t percent_solved_by_batch_pdlp_at_root{f_t(0.0)};
+  bool populated{false};
+};
+
 template <typename i_t, typename f_t>
 class pseudo_costs_t {
  public:
@@ -481,7 +496,9 @@ class pseudo_costs_t {
                                   const branch_and_bound_stats_t<i_t, f_t>& bnb_stats,
                                   f_t upper_bound,
                                   int max_num_tasks,
-                                  logger_t& log);
+                                  logger_t& log,
+                                  const std::vector<i_t>& new_slacks,
+                                  const lp_problem_t<i_t, f_t>& original_lp);
 
   void update_pseudo_costs_from_strong_branching(const std::vector<i_t>& fractional,
                                                  const std::vector<f_t>& root_soln);
@@ -514,15 +531,19 @@ class pseudo_costs_t {
   std::vector<omp_mutex_t> pseudo_cost_mutex_down;
   omp_atomic_t<i_t> num_strong_branches_completed = 0;
   omp_atomic_t<int64_t> strong_branching_lp_iter  = 0;
+
+  batch_pdlp_warm_cache_t<i_t, f_t> pdlp_warm_cache;
 };
 
 template <typename i_t, typename f_t>
-void strong_branching(const user_problem_t<i_t, f_t>& original_problem,
-                      const lp_problem_t<i_t, f_t>& original_lp,
+void strong_branching(const lp_problem_t<i_t, f_t>& original_lp,
                       const simplex_solver_settings_t<i_t, f_t>& settings,
                       f_t start_time,
+                      const std::vector<i_t>& new_slacks,
                       const std::vector<variable_type_t>& var_types,
-                      const std::vector<f_t> root_soln,
+                      const std::vector<f_t>& root_soln,
+                      const std::vector<f_t>& root_soln_y,
+                      const std::vector<f_t>& root_soln_z,
                       const std::vector<i_t>& fractional,
                       f_t root_obj,
                       const std::vector<variable_status_t>& root_vstatus,
diff --git a/cpp/src/branch_and_bound/shared_strong_branching_context.hpp b/cpp/src/branch_and_bound/shared_strong_branching_context.hpp
new file mode 100644
index 000000000..a9e697ae5
--- /dev/null
+++ b/cpp/src/branch_and_bound/shared_strong_branching_context.hpp
@@ -0,0 +1,60 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <span>
+#include <vector>
+
+namespace cuopt::linear_programming::dual_simplex {
+
+template <typename i_t, typename f_t>
+struct shared_strong_branching_context_t {
+  std::vector<std::atomic<int>> solved;
+
+  explicit shared_strong_branching_context_t(size_t num_subproblems) : solved(num_subproblems)
+  {
+    for (auto& s : solved)
+      s.store(0);
+  }
+};
+
+template <typename i_t, typename f_t>
+struct shared_strong_branching_context_view_t {
+  std::span<std::atomic<int>> solved;
+
+  shared_strong_branching_context_view_t() = default;
+
+  shared_strong_branching_context_view_t(std::span<std::atomic<int>> s) : solved(s) {}
+
+  bool is_valid() const { return !solved.empty(); }
+
+  bool is_solved(i_t local_idx) const
+  {
+    assert(local_idx >= 0 && static_cast<size_t>(local_idx) < solved.size() &&
+           "local_idx out of bounds");
+    return solved[local_idx].load() != 0;
+  }
+
+  void mark_solved(i_t local_idx) const
+  {
+    assert(local_idx >= 0 && static_cast<size_t>(local_idx) < solved.size() &&
+           "local_idx out of bounds");
+    solved[local_idx].store(1);
+  }
+
+  shared_strong_branching_context_view_t subview(i_t offset, i_t count) const
+  {
+    assert(offset >= 0 && count >= 0 && static_cast<size_t>(offset + count) <= solved.size() &&
+           "subview out of bounds");
+    return {solved.subspan(offset, count)};
+  }
+};
+
+}  // namespace cuopt::linear_programming::dual_simplex
diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp
index eadd93040..882f7a14f 100644
--- a/cpp/src/dual_simplex/simplex_solver_settings.hpp
+++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp
@@ -187,8 +187,10 @@ struct simplex_solver_settings_t {
                                    // strengthening
   f_t cut_change_threshold;        // threshold for cut change
   f_t cut_min_orthogonality;       // minimum orthogonality for cuts
-  i_t mip_batch_pdlp_strong_branching{0};  // 0 if not using batch PDLP for strong branching, 1 if
-                                           // using batch PDLP for strong branching
+  i_t
+    mip_batch_pdlp_strong_branching;  // 0 = DS only, 1 = cooperative DS + PDLP, 2 = batch PDLP only
+  i_t mip_batch_pdlp_reliability_branching;  // 0 = DS only, 1 = cooperative DS + PDLP, 2 = batch
+                                             // PDLP only
 
   diving_heuristics_settings_t<i_t, f_t> diving_settings;  // Settings for the diving heuristics
 
diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu
index a8bc0ea33..9d933f3c9 100644
--- a/cpp/src/math_optimization/solver_settings.cu
+++ b/cpp/src/math_optimization/solver_settings.cu
@@ -99,7 +99,8 @@ solver_settings_t<i_t, f_t>::solver_settings_t() : pdlp_settings(), mip_settings
     {CUOPT_MIP_REDUCED_COST_STRENGTHENING, &mip_settings.reduced_cost_strengthening, -1, std::numeric_limits<i_t>::max(), -1},
     {CUOPT_NUM_GPUS, &pdlp_settings.num_gpus, 1, 2, 1},
     {CUOPT_NUM_GPUS, &mip_settings.num_gpus, 1, 2, 1},
-    {CUOPT_MIP_BATCH_PDLP_STRONG_BRANCHING, &mip_settings.mip_batch_pdlp_strong_branching, 0, 1, 0},
+    {CUOPT_MIP_BATCH_PDLP_STRONG_BRANCHING, &mip_settings.mip_batch_pdlp_strong_branching, 0, 2, 0},
+    {CUOPT_MIP_BATCH_PDLP_RELIABILITY_BRANCHING, &mip_settings.mip_batch_pdlp_reliability_branching, 0, 2, 0},
     {CUOPT_PRESOLVE, reinterpret_cast<int*>(&pdlp_settings.presolver), CUOPT_PRESOLVE_DEFAULT, CUOPT_PRESOLVE_PSLP, CUOPT_PRESOLVE_DEFAULT},
     {CUOPT_PRESOLVE, reinterpret_cast<int*>(&mip_settings.presolver), CUOPT_PRESOLVE_DEFAULT, CUOPT_PRESOLVE_PSLP, CUOPT_PRESOLVE_DEFAULT},
     {CUOPT_MIP_DETERMINISM_MODE, &mip_settings.determinism_mode, CUOPT_MODE_OPPORTUNISTIC, CUOPT_MODE_DETERMINISTIC, CUOPT_MODE_OPPORTUNISTIC},
diff --git a/cpp/src/mip_heuristics/CMakeLists.txt b/cpp/src/mip_heuristics/CMakeLists.txt
index a200d4265..5e3d19c8b 100644
--- a/cpp/src/mip_heuristics/CMakeLists.txt
+++ b/cpp/src/mip_heuristics/CMakeLists.txt
@@ -14,6 +14,7 @@ set(MIP_LP_NECESSARY_FILES
   ${CMAKE_CURRENT_SOURCE_DIR}/presolve/third_party_presolve.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/presolve/gf2_presolve.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/solution/solution.cu
+  ${CMAKE_CURRENT_SOURCE_DIR}/presolve/conflict_graph/clique_table.cu
 )
 
 # Files that are MIP-specific and not needed for pure LP
@@ -38,7 +39,6 @@ set(MIP_NON_LP_FILES
   ${CMAKE_CURRENT_SOURCE_DIR}/presolve/multi_probe.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/presolve/probing_cache.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/presolve/trivial_presolve.cu
-  ${CMAKE_CURRENT_SOURCE_DIR}/presolve/conflict_graph/clique_table.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump_kernels.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/fj_cpu.cu)
diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu
index f25c093af..d58f20876 100644
--- a/cpp/src/mip_heuristics/solver.cu
+++ b/cpp/src/mip_heuristics/solver.cu
@@ -229,6 +229,8 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
     branch_and_bound_settings.cut_min_orthogonality = context.settings.cut_min_orthogonality;
     branch_and_bound_settings.mip_batch_pdlp_strong_branching =
       context.settings.mip_batch_pdlp_strong_branching;
+    branch_and_bound_settings.mip_batch_pdlp_reliability_branching =
+      context.settings.mip_batch_pdlp_reliability_branching;
     branch_and_bound_settings.reduced_cost_strengthening =
       context.settings.reduced_cost_strengthening == -1
         ? 2
diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu
index 82e79098a..33c080ee3 100644
--- a/cpp/src/pdlp/pdlp.cu
+++ b/cpp/src/pdlp/pdlp.cu
@@ -500,8 +500,7 @@ std::optional<optimization_problem_solution_t<i_t, f_t>> pdlp_solver_t<i_t, f_t>
   }
 
   // Check for concurrent limit
-  if (settings_.method == method_t::Concurrent && settings_.concurrent_halt != nullptr &&
-      *settings_.concurrent_halt == 1) {
+  if (settings_.concurrent_halt != nullptr && settings_.concurrent_halt->load() == 1) {
 #ifdef PDLP_VERBOSE_MODE
     RAFT_CUDA_TRY(cudaDeviceSynchronize());
     std::cout << "Concurrent Limit reached, returning current solution" << std::endl;
@@ -771,13 +770,34 @@ pdlp_solver_t<i_t, f_t>::check_batch_termination(const timer_t& timer)
     if (current_termination_strategy_.is_done(term)) {
       std::cout << "[BATCH MODE]: Climber " << i << " is done with "
                 << optimization_problem_solution_t<i_t, f_t>::get_termination_status_string(term)
-                << " at step " << total_pdlp_iterations_ << ". It's original index is "
+                << " at step " << internal_solver_iterations_ << ". It's original index is "
                 << climber_strategies_[i].original_index << std::endl;
     }
   }
 #endif
 
-  // All are optimal or infeasible
+  // Sync external solved status into internal termination strategy before all_done() check
+  if (sb_view_.is_valid()) {
+    for (size_t i = 0; i < climber_strategies_.size(); ++i) {
+      // If PDLP has solved it to optimality we want to keep it and resolved both solvers having
+      // solved the problem later
+      if (current_termination_strategy_.is_done(
+            current_termination_strategy_.get_termination_status(i)))
+        continue;
+      const i_t local_idx = climber_strategies_[i].original_index;
+      if (sb_view_.is_solved(local_idx)) {
+        current_termination_strategy_.set_termination_status(
+          i, pdlp_termination_status_t::ConcurrentLimit);
+#ifdef BATCH_VERBOSE_MODE
+        std::cout << "[COOP SB] DS already solved climber " << i << " (original_index " << local_idx
+                  << "), synced to ConcurrentLimit at step " << internal_solver_iterations_
+                  << std::endl;
+#endif
+      }
+    }
+  }
+
+  // All are optimal, infeasible, or externally solved
   if (current_termination_strategy_.all_done()) {
     const auto original_batch_size = settings_.new_bounds.size();
     // Some climber got removed from the batch while the optimization was running
@@ -824,6 +844,7 @@ pdlp_solver_t<i_t, f_t>::check_batch_termination(const timer_t& timer)
           .get_additional_termination_informations()[climber_strategies_[i].original_index]
           .solved_by_pdlp = (current_termination_strategy_.get_termination_status(i) !=
                              pdlp_termination_status_t::ConcurrentLimit);
+        if (sb_view_.is_valid()) { sb_view_.mark_solved(climber_strategies_[i].original_index); }
       }
       current_termination_strategy_.fill_gpu_terms_stats(total_pdlp_iterations_);
       RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_));
@@ -840,6 +861,11 @@ pdlp_solver_t<i_t, f_t>::check_batch_termination(const timer_t& timer)
         std::move(batch_solution_to_return_.get_additional_termination_informations()),
         std::move(batch_solution_to_return_.get_terminations_status())};
     }
+    if (sb_view_.is_valid()) {
+      for (size_t i = 0; i < climber_strategies_.size(); ++i) {
+        sb_view_.mark_solved(climber_strategies_[i].original_index);
+      }
+    }
     RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_));
     return current_termination_strategy_.fill_return_problem_solution(
       internal_solver_iterations_,
@@ -858,8 +884,11 @@ pdlp_solver_t<i_t, f_t>::check_batch_termination(const timer_t& timer)
             current_termination_strategy_.get_termination_status(i))) {
         raft::common::nvtx::range fun_scope("remove_done_climber");
 #ifdef BATCH_VERBOSE_MODE
-        std::cout << "Removing climber " << i << " because it is done. Its original index is "
-                  << climber_strategies_[i].original_index << std::endl;
+        const bool externally_solved = (current_termination_strategy_.get_termination_status(i) ==
+                                        pdlp_termination_status_t::ConcurrentLimit);
+        std::cout << "Removing climber " << i << " (original_index "
+                  << climber_strategies_[i].original_index << ") because it is done"
+                  << (externally_solved ? " [solved by DS]" : " [solved by PDLP]") << std::endl;
 #endif
         to_remove.emplace(i);
         // Copy current climber solution information
@@ -892,6 +921,7 @@ pdlp_solver_t<i_t, f_t>::check_batch_termination(const timer_t& timer)
           .get_additional_termination_informations()[climber_strategies_[i].original_index]
           .solved_by_pdlp = (current_termination_strategy_.get_termination_status(i) !=
                              pdlp_termination_status_t::ConcurrentLimit);
+        if (sb_view_.is_valid()) { sb_view_.mark_solved(climber_strategies_[i].original_index); }
       }
     }
     if (to_remove.size() > 0) {
@@ -965,7 +995,7 @@ std::optional<optimization_problem_solution_t<i_t, f_t>> pdlp_solver_t<i_t, f_t>
   // To avoid that we allow at least two iterations at first before checking (in practice 0 wasn't
   // enough) We still need to check iteration and time limit prior without breaking the logic below
   // of first checking termination before the limit
-  if (total_pdlp_iterations_ <= 1) {
+  if (internal_solver_iterations_ <= 1) {
     print_termination_criteria(timer);
     return check_limits(timer);
   }
@@ -1507,9 +1537,6 @@ HDI void fixed_error_computation(const f_t norm_squared_delta_primal,
     norm_squared_delta_primal * primal_weight + norm_squared_delta_dual / primal_weight;
   const f_t computed_interaction = f_t(2.0) * interaction * step_size;
 
-  cuopt_assert(movement + computed_interaction >= f_t(0.0),
-               "Movement + computed interaction must be >= 0");
-
   // Clamp to 0 to avoid NaN
   *fixed_point_error = cuda::std::sqrt(cuda::std::max(f_t(0.0), movement + computed_interaction));
 
@@ -1768,6 +1795,90 @@ void pdlp_solver_t<i_t, f_t>::resize_and_swap_all_context_loop(
     pdhg_solver_.get_primal_tmp_resource().data(),
     CUSPARSE_ORDER_COL);
 
+  // Recalculate SpMM buffer sizes for the new batch dimensions.
+  // cuSparse may require different buffer sizes when the number of columns changes
+  // (e.g. SpMM with 1 column may internally fall back to SpMV with larger buffer needs).
+  {
+    size_t new_buf_size = 0;
+
+    // PDHG row-row: A_T * batch_dual_solutions -> batch_current_AtYs
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize(
+      handle_ptr_->get_cusparse_handle(),
+      CUSPARSE_OPERATION_NON_TRANSPOSE,
+      CUSPARSE_OPERATION_NON_TRANSPOSE,
+      reusable_device_scalar_value_1_.data(),
+      pdhg_cusparse_view.A_T,
+      pdhg_cusparse_view.batch_dual_solutions,
+      reusable_device_scalar_value_0_.data(),
+      pdhg_cusparse_view.batch_current_AtYs,
+      (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2,
+      &new_buf_size,
+      stream_view_));
+    pdhg_cusparse_view.buffer_transpose_batch_row_row_.resize(new_buf_size, stream_view_);
+
+    // PDHG row-row: A * batch_reflected_primal_solutions -> batch_dual_gradients
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize(
+      handle_ptr_->get_cusparse_handle(),
+      CUSPARSE_OPERATION_NON_TRANSPOSE,
+      CUSPARSE_OPERATION_NON_TRANSPOSE,
+      reusable_device_scalar_value_1_.data(),
+      pdhg_cusparse_view.A,
+      pdhg_cusparse_view.batch_reflected_primal_solutions,
+      reusable_device_scalar_value_0_.data(),
+      pdhg_cusparse_view.batch_dual_gradients,
+      (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2,
+      &new_buf_size,
+      stream_view_));
+    pdhg_cusparse_view.buffer_non_transpose_batch_row_row_.resize(new_buf_size, stream_view_);
+
+    // Adaptive step size: A_T * batch_potential_next_dual_solution -> batch_next_AtYs
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize(
+      handle_ptr_->get_cusparse_handle(),
+      CUSPARSE_OPERATION_NON_TRANSPOSE,
+      CUSPARSE_OPERATION_NON_TRANSPOSE,
+      reusable_device_scalar_value_1_.data(),
+      pdhg_cusparse_view.A_T,
+      pdhg_cusparse_view.batch_potential_next_dual_solution,
+      reusable_device_scalar_value_0_.data(),
+      pdhg_cusparse_view.batch_next_AtYs,
+      CUSPARSE_SPMM_CSR_ALG3,
+      &new_buf_size,
+      stream_view_));
+    pdhg_cusparse_view.buffer_transpose_batch.resize(new_buf_size, stream_view_);
+
+    // Convergence info: A_T * batch_dual_solutions -> batch_tmp_primals
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize(
+      handle_ptr_->get_cusparse_handle(),
+      CUSPARSE_OPERATION_NON_TRANSPOSE,
+      CUSPARSE_OPERATION_NON_TRANSPOSE,
+      reusable_device_scalar_value_1_.data(),
+      current_op_problem_evaluation_cusparse_view_.A_T,
+      current_op_problem_evaluation_cusparse_view_.batch_dual_solutions,
+      reusable_device_scalar_value_0_.data(),
+      current_op_problem_evaluation_cusparse_view_.batch_tmp_primals,
+      CUSPARSE_SPMM_CSR_ALG3,
+      &new_buf_size,
+      stream_view_));
+    current_op_problem_evaluation_cusparse_view_.buffer_transpose_batch.resize(new_buf_size,
+                                                                               stream_view_);
+
+    // Convergence info: A * batch_primal_solutions -> batch_tmp_duals
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize(
+      handle_ptr_->get_cusparse_handle(),
+      CUSPARSE_OPERATION_NON_TRANSPOSE,
+      CUSPARSE_OPERATION_NON_TRANSPOSE,
+      reusable_device_scalar_value_1_.data(),
+      current_op_problem_evaluation_cusparse_view_.A,
+      current_op_problem_evaluation_cusparse_view_.batch_primal_solutions,
+      reusable_device_scalar_value_0_.data(),
+      current_op_problem_evaluation_cusparse_view_.batch_tmp_duals,
+      CUSPARSE_SPMM_CSR_ALG3,
+      &new_buf_size,
+      stream_view_));
+    current_op_problem_evaluation_cusparse_view_.buffer_non_transpose_batch.resize(new_buf_size,
+                                                                                   stream_view_);
+  }
+
   // Rerun preprocess
 
   // PDHG SpMM preprocess
@@ -2199,6 +2310,22 @@ optimization_problem_solution_t<i_t, f_t> pdlp_solver_t<i_t, f_t>::run_solver(co
     pdhg_solver_.total_pdhg_iterations_ = initial_k_.value();
     pdhg_solver_.get_d_total_pdhg_iterations().set_value_async(initial_k_.value(), stream_view_);
   }
+  if (settings_.get_initial_pdlp_iteration().has_value()) {
+    total_pdlp_iterations_ = settings_.get_initial_pdlp_iteration().value();
+    // This is meaningless in batch mode since pdhg step is never used, set it just to avoid
+    // assertions
+    pdhg_solver_.get_d_total_pdhg_iterations().set_value_async(total_pdlp_iterations_,
+                                                               stream_view_);
+    pdhg_solver_.total_pdhg_iterations_ = total_pdlp_iterations_;
+    // Reset the fixed point error since at this pdlp iteration it is expected to already be
+    // initialized to some value
+    std::fill(restart_strategy_.initial_fixed_point_error_.begin(),
+              restart_strategy_.initial_fixed_point_error_.end(),
+              f_t(0.0));
+    std::fill(restart_strategy_.fixed_point_error_.begin(),
+              restart_strategy_.fixed_point_error_.end(),
+              f_t(0.0));
+  }
 
   // Only the primal_weight_ and step_size_ variables are initialized during the initial phase
   // The associated primal/dual step_size (computed using the two firstly mentionned) are not
@@ -2320,13 +2447,6 @@ optimization_problem_solution_t<i_t, f_t> pdlp_solver_t<i_t, f_t>::run_solver(co
 
   bool warm_start_was_given = settings_.get_pdlp_warm_start_data().is_populated();
 
-  // In batch mode, before running the solver, we need to transpose the primal and dual solution to
-  // row format
-  if (batch_mode_)
-    transpose_primal_dual_to_row(pdhg_solver_.get_potential_next_primal_solution(),
-                                 pdhg_solver_.get_potential_next_dual_solution(),
-                                 pdhg_solver_.get_dual_slack());
-
   if (!inside_mip_) {
     CUOPT_LOG_INFO(
       "   Iter    Primal Obj.      Dual Obj.    Gap        Primal Res.  Dual Res.   Time");
@@ -2389,13 +2509,6 @@ optimization_problem_solution_t<i_t, f_t> pdlp_solver_t<i_t, f_t>::run_solver(co
         }
       }
 
-#ifdef CUPDLP_DEBUG_MODE
-      print("before scale slack", pdhg_solver_.get_dual_slack());
-      print("before scale potential next primal",
-            pdhg_solver_.get_potential_next_primal_solution());
-      print("before scale potential next dual", pdhg_solver_.get_potential_next_dual_solution());
-#endif
-
       // In case of batch mode, primal and dual matrices are in row format
       // We need to transpose them to column format before doing any checks
       if (batch_mode_) {
@@ -2411,6 +2524,13 @@ optimization_problem_solution_t<i_t, f_t> pdlp_solver_t<i_t, f_t>::run_solver(co
           pdhg_solver_.get_primal_solution(), pdhg_solver_.get_dual_solution(), dummy);
       }
 
+#ifdef CUPDLP_DEBUG_MODE
+      print("before scale slack", pdhg_solver_.get_dual_slack());
+      print("before scale potential next primal",
+            pdhg_solver_.get_potential_next_primal_solution());
+      print("before scale potential next dual", pdhg_solver_.get_potential_next_dual_solution());
+#endif
+
       // We go back to the unscaled problem here. It ensures that we do not terminate 'too early'
       // because of the error margin being evaluated on the scaled problem
 
diff --git a/cpp/src/pdlp/pdlp.cuh b/cpp/src/pdlp/pdlp.cuh
index de0cf69c9..d03430f15 100644
--- a/cpp/src/pdlp/pdlp.cuh
+++ b/cpp/src/pdlp/pdlp.cuh
@@ -7,6 +7,7 @@
 
 #pragma once
 
+#include <branch_and_bound/shared_strong_branching_context.hpp>
 #include <cuopt/linear_programming/pdlp/solver_settings.hpp>
 #include <cuopt/linear_programming/pdlp/solver_solution.hpp>
 
@@ -138,6 +139,8 @@ class pdlp_solver_t {
   rmm::cuda_stream_view stream_view_;
   // Intentionnaly take a copy to avoid an unintentional modification in the calling context
   const pdlp_solver_settings_t<i_t, f_t> settings_;
+  dual_simplex::shared_strong_branching_context_view_t<i_t, f_t> sb_view_{
+    settings_.shared_sb_solved};
 
   problem_t<i_t, f_t>* problem_ptr;
   // Combined bounds in op_problem_scaled_ will only be scaled if
diff --git a/cpp/src/pdlp/pdlp_constants.hpp b/cpp/src/pdlp/pdlp_constants.hpp
index cf17cc985..568d7d00b 100644
--- a/cpp/src/pdlp/pdlp_constants.hpp
+++ b/cpp/src/pdlp/pdlp_constants.hpp
@@ -7,8 +7,6 @@
 
 #pragma once
 
-#include <raft/util/cuda_utils.cuh>
-
 #include <cuda/cmath>
 
 namespace cuopt::linear_programming::detail {
diff --git a/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu b/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu
index 149e99a43..2b1031026 100644
--- a/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu
+++ b/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu
@@ -691,6 +691,12 @@ void pdlp_restart_strategy_t<i_t, f_t>::should_cupdlpx_restart(i_t total_number_
 {
   std::fill(should_restart.begin(), should_restart.end(), 0);
 
+#ifdef CUPDLP_DEBUG_MODE
+  // Print the current stats of initial fixed point error and fixed point error
+  print("initial_fixed_point_error", initial_fixed_point_error_);
+  print("fixed_point_error", fixed_point_error_);
+#endif
+
   if (total_number_of_iterations == hyper_params_.major_iteration) {
 #ifdef CUPDLP_DEBUG_MODE
     printf("forced restart at first major\n");
diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu
index 19e0b93a8..341edb2c1 100644
--- a/cpp/src/pdlp/solve.cu
+++ b/cpp/src/pdlp/solve.cu
@@ -838,17 +838,17 @@ optimization_problem_solution_t<i_t, f_t> run_pdlp(detail::problem_t<i_t, f_t>&
   return sol;
 }
 
+// Compute in double as some cases overflow when using size_t
 template <typename i_t, typename f_t>
-static size_t batch_pdlp_memory_estimator(const optimization_problem_t<i_t, f_t>& problem,
-                                          int trial_batch_size,
-                                          int max_batch_size)
+static double batch_pdlp_memory_estimator(const optimization_problem_t<i_t, f_t>& problem,
+                                          double trial_batch_size)
 {
-  size_t total_memory = 0;
+  double total_memory = 0.0;
   // In PDLP we store the scaled version of the problem which contains all of those
   total_memory += problem.get_constraint_matrix_indices().size() * sizeof(i_t);
   total_memory += problem.get_constraint_matrix_offsets().size() * sizeof(i_t);
   total_memory += problem.get_constraint_matrix_values().size() * sizeof(f_t);
-  total_memory *= 2;  // To account for the A_t matrix
+  total_memory *= 2.0;  // To account for the A_t matrix
   total_memory += problem.get_objective_coefficients().size() * sizeof(f_t);
   total_memory += problem.get_constraint_bounds().size() * sizeof(f_t);
   total_memory += problem.get_variable_lower_bounds().size() * sizeof(f_t);
@@ -887,13 +887,13 @@ static size_t batch_pdlp_memory_estimator(const optimization_problem_t<i_t, f_t>
   total_memory += trial_batch_size * problem.get_n_constraints() * sizeof(f_t);
 
   // Data for the solution
-  total_memory += problem.get_n_variables() * max_batch_size * sizeof(f_t);
-  total_memory += problem.get_n_constraints() * max_batch_size * sizeof(f_t);
-  total_memory += problem.get_n_variables() * max_batch_size * sizeof(f_t);
+  total_memory += problem.get_n_variables() * trial_batch_size * sizeof(f_t);
+  total_memory += problem.get_n_constraints() * trial_batch_size * sizeof(f_t);
+  total_memory += problem.get_n_variables() * trial_batch_size * sizeof(f_t);
 
-  // Add a 50% overhead to make sure we have enough memory considering other parts of the solver may
-  // allocate at the same time
-  total_memory *= 1.5;
+  // Add a 70% overhead to make sure we have enough memory considering other parts of the solver may
+  // need memory later while the batch PDLP is running
+  total_memory *= 1.7;
 
   // Data from saddle point state
   return total_memory;
@@ -904,125 +904,167 @@ optimization_problem_solution_t<i_t, f_t> run_batch_pdlp(
   optimization_problem_t<i_t, f_t>& problem, pdlp_solver_settings_t<i_t, f_t> const& settings)
 {
   // Hyper parameter than can be changed, I have put what I believe to be the best
-  bool primal_dual_init         = true;
-  bool primal_weight_init       = true;
-  bool use_optimal_batch_size   = false;
-  constexpr int iteration_limit = 100000;
-  // Shouldn't we work on the unpresolved and/or unscaled problem for PDLP?
-  // Shouldn't we put an iteration limit? If yes what should we do with the partial solutions?
+  constexpr bool pdlp_primal_dual_init       = true;
+  constexpr bool primal_weight_init          = true;
+  constexpr bool use_initial_pdlp_iterations = true;
+  bool use_optimal_batch_size                = false;
+  constexpr int batch_iteration_limit        = 100000;
+  constexpr f_t pdlp_tolerance               = 1e-5;
 
   rmm::cuda_stream_view stream = problem.get_handle_ptr()->get_stream();
 
   rmm::device_uvector<f_t> initial_primal(0, stream);
   rmm::device_uvector<f_t> initial_dual(0, stream);
-  f_t initial_step_size     = std::numeric_limits<f_t>::signaling_NaN();
-  f_t initial_primal_weight = std::numeric_limits<f_t>::signaling_NaN();
+  f_t initial_step_size      = std::numeric_limits<f_t>::signaling_NaN();
+  f_t initial_primal_weight  = std::numeric_limits<f_t>::signaling_NaN();
+  i_t initial_pdlp_iteration = -1;
 
   cuopt_assert(settings.new_bounds.size() > 0, "Batch size should be greater than 0");
-  const int max_batch_size  = settings.new_bounds.size();
-  int memory_max_batch_size = max_batch_size;
+  const size_t max_batch_size  = settings.new_bounds.size();
+  size_t memory_max_batch_size = max_batch_size;
 
   // Check if we don't hit the limit using max_batch_size
-  const size_t memory_estimate =
-    batch_pdlp_memory_estimator(problem, max_batch_size, max_batch_size);
-  size_t free_mem, total_mem;
-  RAFT_CUDA_TRY(cudaMemGetInfo(&free_mem, &total_mem));
+  const double memory_estimate = batch_pdlp_memory_estimator(problem, max_batch_size);
+  size_t st_free_mem, st_total_mem;
+  RAFT_CUDA_TRY(cudaMemGetInfo(&st_free_mem, &st_total_mem));
+  const double free_mem  = static_cast<double>(st_free_mem);
+  const double total_mem = static_cast<double>(st_total_mem);
+
+#ifdef BATCH_VERBOSE_MODE
+  std::cout << "Memory estimate: " << memory_estimate << std::endl;
+  std::cout << "Free memory: " << free_mem << std::endl;
+  std::cout << "Total memory: " << total_mem << std::endl;
+#endif
 
   if (memory_estimate > free_mem) {
     use_optimal_batch_size = true;
     // Decrement batch size iteratively until we find a batch size that fits
     while (memory_max_batch_size > 1) {
-      const size_t memory_estimate =
-        batch_pdlp_memory_estimator(problem, memory_max_batch_size, max_batch_size);
+      const double memory_estimate = batch_pdlp_memory_estimator(problem, memory_max_batch_size);
       if (memory_estimate <= free_mem) { break; }
+#ifdef BATCH_VERBOSE_MODE
+      std::cout << "Memory estimate: " << memory_estimate << std::endl;
+      std::cout << "Memory max batch size: " << memory_max_batch_size << std::endl;
+      std::cout << "Free memory: " << free_mem << std::endl;
+      std::cout << "Total memory: " << total_mem << std::endl;
+      std::cout << "--------------------------------" << std::endl;
+#endif
       memory_max_batch_size--;
     }
-    const size_t min_estimate =
-      batch_pdlp_memory_estimator(problem, memory_max_batch_size, max_batch_size);
-    cuopt_expects(min_estimate <= free_mem,
-                  error_type_t::OutOfMemoryError,
-                  "Insufficient GPU memory for batch PDLP (min batch size still too large)");
+    const double min_estimate = batch_pdlp_memory_estimator(problem, memory_max_batch_size);
+    if (min_estimate > free_mem) {
+      return optimization_problem_solution_t<i_t, f_t>(pdlp_termination_status_t::NumericalError,
+                                                       stream);
+    }
   }
 
-  int optimal_batch_size = use_optimal_batch_size
-                             ? detail::optimal_batch_size_handler(problem, memory_max_batch_size)
-                             : max_batch_size;
+  size_t optimal_batch_size = use_optimal_batch_size
+                                ? detail::optimal_batch_size_handler(problem, memory_max_batch_size)
+                                : max_batch_size;
+  if (settings.sub_batch_size > 0) { optimal_batch_size = settings.sub_batch_size; }
   cuopt_assert(optimal_batch_size != 0 && optimal_batch_size <= max_batch_size,
                "Optimal batch size should be between 1 and max batch size");
-  using f_t2 = typename type_2<f_t>::type;
-
-  // If need warm start, solve the LP alone
-  if (primal_dual_init || primal_weight_init) {
-    pdlp_solver_settings_t<i_t, f_t> warm_start_settings = settings;
-    warm_start_settings.new_bounds.clear();
-    warm_start_settings.method               = cuopt::linear_programming::method_t::PDLP;
-    warm_start_settings.presolver            = cuopt::linear_programming::presolver_t::None;
-    warm_start_settings.pdlp_solver_mode     = pdlp_solver_mode_t::Stable3;
-    warm_start_settings.detect_infeasibility = false;
-    warm_start_settings.iteration_limit      = iteration_limit;
-    warm_start_settings.inside_mip           = true;
-    optimization_problem_solution_t<i_t, f_t> original_solution =
-      solve_lp(problem, warm_start_settings);
-    if (primal_dual_init) {
-      initial_primal    = rmm::device_uvector<f_t>(original_solution.get_primal_solution(),
-                                                original_solution.get_primal_solution().stream());
-      initial_dual      = rmm::device_uvector<f_t>(original_solution.get_dual_solution(),
-                                              original_solution.get_dual_solution().stream());
-      initial_step_size = original_solution.get_pdlp_warm_start_data().initial_step_size_;
+
+  const bool warm_start_from_settings = settings.has_initial_primal_solution() ||
+                                        settings.has_initial_dual_solution() ||
+                                        settings.get_initial_step_size().has_value() ||
+                                        settings.get_initial_primal_weight().has_value() ||
+                                        settings.get_initial_pdlp_iteration().has_value();
+
+  if (warm_start_from_settings) {
+#ifdef BATCH_VERBOSE_MODE
+    std::cout << "Using warm start from settings" << std::endl;
+#endif
+    if (settings.has_initial_primal_solution() && pdlp_primal_dual_init) {
+      initial_primal = rmm::device_uvector<f_t>(settings.get_initial_primal_solution(),
+                                                settings.get_initial_primal_solution().stream());
+    }
+    if (settings.has_initial_dual_solution() && pdlp_primal_dual_init) {
+      initial_dual = rmm::device_uvector<f_t>(settings.get_initial_dual_solution(),
+                                              settings.get_initial_dual_solution().stream());
+    }
+    if (settings.get_initial_step_size().has_value() && pdlp_primal_dual_init) {
+      initial_step_size = *settings.get_initial_step_size();
+    }
+    if (settings.get_initial_primal_weight().has_value() && primal_weight_init) {
+      initial_primal_weight = *settings.get_initial_primal_weight();
     }
-    if (primal_weight_init) {
-      initial_primal_weight = original_solution.get_pdlp_warm_start_data().initial_primal_weight_;
+    if (settings.get_initial_pdlp_iteration().has_value() && use_initial_pdlp_iterations) {
+      initial_pdlp_iteration = *settings.get_initial_pdlp_iteration();
     }
   }
 
-  rmm::device_uvector<f_t> full_primal_solution(problem.get_n_variables() * max_batch_size, stream);
-  rmm::device_uvector<f_t> full_dual_solution(problem.get_n_constraints() * max_batch_size, stream);
-  rmm::device_uvector<f_t> full_reduced_cost(problem.get_n_variables() * max_batch_size, stream);
+  // Only used in tests
+  const bool collect_solutions = settings.generate_batch_primal_dual_solution;
+
+  rmm::device_uvector<f_t> full_primal_solution(
+    (collect_solutions) ? problem.get_n_variables() * max_batch_size : 0, stream);
+  rmm::device_uvector<f_t> full_dual_solution(
+    (collect_solutions) ? problem.get_n_constraints() * max_batch_size : 0, stream);
+  rmm::device_uvector<f_t> full_reduced_cost(
+    (collect_solutions) ? problem.get_n_variables() * max_batch_size : 0, stream);
 
   std::vector<
     typename optimization_problem_solution_t<i_t, f_t>::additional_termination_information_t>
     full_info;
   std::vector<pdlp_termination_status_t> full_status;
 
-  pdlp_solver_settings_t<i_t, f_t> batch_settings = settings;
-  const auto original_new_bounds                  = batch_settings.new_bounds;
-  batch_settings.method                           = cuopt::linear_programming::method_t::PDLP;
-  batch_settings.presolver                        = presolver_t::None;
-  batch_settings.pdlp_solver_mode                 = pdlp_solver_mode_t::Stable3;
-  batch_settings.detect_infeasibility             = false;
-  batch_settings.iteration_limit                  = iteration_limit;
-  batch_settings.inside_mip                       = true;
-  if (primal_dual_init) {
+  pdlp_solver_settings_t<i_t, f_t> batch_settings     = settings;
+  const auto original_new_bounds                      = batch_settings.new_bounds;
+  batch_settings.method                               = cuopt::linear_programming::method_t::PDLP;
+  batch_settings.presolver                            = presolver_t::None;
+  batch_settings.pdlp_solver_mode                     = pdlp_solver_mode_t::Stable3;
+  batch_settings.detect_infeasibility                 = false;
+  batch_settings.iteration_limit                      = batch_iteration_limit;
+  batch_settings.inside_mip                           = true;
+  batch_settings.tolerances.absolute_dual_tolerance   = pdlp_tolerance;
+  batch_settings.tolerances.relative_dual_tolerance   = pdlp_tolerance;
+  batch_settings.tolerances.absolute_primal_tolerance = pdlp_tolerance;
+  batch_settings.tolerances.relative_primal_tolerance = pdlp_tolerance;
+  batch_settings.tolerances.absolute_gap_tolerance    = pdlp_tolerance;
+  batch_settings.tolerances.relative_gap_tolerance    = pdlp_tolerance;
+  if (initial_primal.size() > 0) {
     batch_settings.set_initial_primal_solution(
       initial_primal.data(), initial_primal.size(), initial_primal.stream());
+  }
+  if (initial_dual.size() > 0) {
     batch_settings.set_initial_dual_solution(
       initial_dual.data(), initial_dual.size(), initial_dual.stream());
-    batch_settings.set_initial_step_size(initial_step_size);
   }
-  if (primal_weight_init) { batch_settings.set_initial_primal_weight(initial_primal_weight); }
+  if (!std::isnan(initial_step_size)) { batch_settings.set_initial_step_size(initial_step_size); }
+  if (initial_pdlp_iteration != -1) {
+    batch_settings.set_initial_pdlp_iteration(initial_pdlp_iteration);
+  }
+  if (!std::isnan(initial_primal_weight)) {
+    batch_settings.set_initial_primal_weight(initial_primal_weight);
+  }
 
-  for (int i = 0; i < max_batch_size; i += optimal_batch_size) {
-    const int current_batch_size = std::min(optimal_batch_size, max_batch_size - i);
+  for (size_t i = 0; i < max_batch_size; i += optimal_batch_size) {
+    const size_t current_batch_size = std::min(optimal_batch_size, max_batch_size - i);
     // Only take the new bounds from [i, i + current_batch_size)
     batch_settings.new_bounds = std::vector<std::tuple<i_t, f_t, f_t>>(
       original_new_bounds.begin() + i, original_new_bounds.begin() + i + current_batch_size);
 
-    auto sol = solve_lp(problem, batch_settings);
+    if (!settings.shared_sb_solved.empty()) {
+      batch_settings.shared_sb_solved = settings.shared_sb_solved.subspan(i, current_batch_size);
+    }
 
-    // Copy results
-    raft::copy(full_primal_solution.data() + i * problem.get_n_variables(),
-               sol.get_primal_solution().data(),
-               problem.get_n_variables() * current_batch_size,
-               stream);
-    raft::copy(full_dual_solution.data() + i * problem.get_n_constraints(),
-               sol.get_dual_solution().data(),
-               problem.get_n_constraints() * current_batch_size,
-               stream);
-    raft::copy(full_reduced_cost.data() + i * problem.get_n_variables(),
-               sol.get_reduced_cost().data(),
-               problem.get_n_variables() * current_batch_size,
-               stream);
+    auto sol = solve_lp(problem, batch_settings);
 
+    if (collect_solutions) {
+      raft::copy(full_primal_solution.data() + i * problem.get_n_variables(),
+                 sol.get_primal_solution().data(),
+                 sol.get_primal_solution().size(),
+                 stream);
+      raft::copy(full_dual_solution.data() + i * problem.get_n_constraints(),
+                 sol.get_dual_solution().data(),
+                 sol.get_dual_solution().size(),
+                 stream);
+      raft::copy(full_reduced_cost.data() + i * problem.get_n_variables(),
+                 sol.get_reduced_cost().data(),
+                 sol.get_reduced_cost().size(),
+                 stream);
+    }
     auto info = sol.get_additional_termination_informations();
     full_info.insert(full_info.end(), info.begin(), info.end());
 
diff --git a/cpp/src/pdlp/solver_settings.cu b/cpp/src/pdlp/solver_settings.cu
index 7acfc7481..ac2564bb1 100644
--- a/cpp/src/pdlp/solver_settings.cu
+++ b/cpp/src/pdlp/solver_settings.cu
@@ -61,12 +61,30 @@ void pdlp_solver_settings_t<i_t, f_t>::set_initial_dual_solution(const f_t* init
 template <typename i_t, typename f_t>
 void pdlp_solver_settings_t<i_t, f_t>::set_initial_step_size(f_t initial_step_size)
 {
+  cuopt_expects(initial_step_size > f_t(0),
+                error_type_t::ValidationError,
+                "Initial step size must be greater than 0");
+  cuopt_expects(!std::isinf(initial_step_size),
+                error_type_t::ValidationError,
+                "Initial step size must be finite");
+  cuopt_expects(!std::isnan(initial_step_size),
+                error_type_t::ValidationError,
+                "Initial step size must be a number");
   initial_step_size_ = std::make_optional(initial_step_size);
 }
 
 template <typename i_t, typename f_t>
 void pdlp_solver_settings_t<i_t, f_t>::set_initial_primal_weight(f_t initial_primal_weight)
 {
+  cuopt_expects(initial_primal_weight > f_t(0),
+                error_type_t::ValidationError,
+                "Initial primal weight must be greater than 0");
+  cuopt_expects(!std::isinf(initial_primal_weight),
+                error_type_t::ValidationError,
+                "Initial primal weight must be finite");
+  cuopt_expects(!std::isnan(initial_primal_weight),
+                error_type_t::ValidationError,
+                "Initial primal weight must be a number");
   initial_primal_weight_ = std::make_optional(initial_primal_weight);
 }
 
@@ -348,6 +366,21 @@ std::optional<f_t> pdlp_solver_settings_t<i_t, f_t>::get_initial_primal_weight()
   return initial_primal_weight_;
 }
 
+template <typename i_t, typename f_t>
+void pdlp_solver_settings_t<i_t, f_t>::set_initial_pdlp_iteration(i_t initial_pdlp_iteration)
+{
+  cuopt_expects(initial_pdlp_iteration >= 0,
+                error_type_t::ValidationError,
+                "Initial pdlp iteration must be greater than or equal to 0");
+  initial_pdlp_iteration_ = std::make_optional(initial_pdlp_iteration);
+}
+
+template <typename i_t, typename f_t>
+std::optional<i_t> pdlp_solver_settings_t<i_t, f_t>::get_initial_pdlp_iteration() const
+{
+  return initial_pdlp_iteration_;
+}
+
 template <typename i_t, typename f_t>
 const pdlp_warm_start_data_t<i_t, f_t>& pdlp_solver_settings_t<i_t, f_t>::get_pdlp_warm_start_data()
   const noexcept
diff --git a/cpp/src/pdlp/termination_strategy/termination_strategy.cu b/cpp/src/pdlp/termination_strategy/termination_strategy.cu
index 7179df6a4..167cf33e7 100644
--- a/cpp/src/pdlp/termination_strategy/termination_strategy.cu
+++ b/cpp/src/pdlp/termination_strategy/termination_strategy.cu
@@ -124,6 +124,14 @@ pdlp_termination_status_t pdlp_termination_strategy_t<i_t, f_t>::get_termination
   return (pdlp_termination_status_t)termination_status_[id];
 }
 
+template <typename i_t, typename f_t>
+void pdlp_termination_strategy_t<i_t, f_t>::set_termination_status(i_t id,
+                                                                   pdlp_termination_status_t status)
+{
+  cuopt_assert(id < termination_status_.size(), "id too big for batch size");
+  termination_status_[id] = (i_t)status;
+}
+
 template <typename i_t, typename f_t>
 std::vector<pdlp_termination_status_t>
 pdlp_termination_strategy_t<i_t, f_t>::get_terminations_status()
@@ -389,7 +397,8 @@ __host__ __device__ bool pdlp_termination_strategy_t<i_t, f_t>::is_done(
 {
   return termination_status == pdlp_termination_status_t::Optimal ||
          termination_status == pdlp_termination_status_t::PrimalInfeasible ||
-         termination_status == pdlp_termination_status_t::DualInfeasible;
+         termination_status == pdlp_termination_status_t::DualInfeasible ||
+         termination_status == pdlp_termination_status_t::ConcurrentLimit;
 }
 
 template <typename i_t, typename f_t>
diff --git a/cpp/src/pdlp/termination_strategy/termination_strategy.hpp b/cpp/src/pdlp/termination_strategy/termination_strategy.hpp
index 6fe118c48..efb7a41d7 100644
--- a/cpp/src/pdlp/termination_strategy/termination_strategy.hpp
+++ b/cpp/src/pdlp/termination_strategy/termination_strategy.hpp
@@ -140,6 +140,7 @@ class pdlp_termination_strategy_t {
   f_t get_relative_primal_tolerance_factor() const;
 
   pdlp_termination_status_t get_termination_status(i_t id) const;
+  void set_termination_status(i_t id, pdlp_termination_status_t status);
   std::vector<pdlp_termination_status_t> get_terminations_status();
   bool all_optimal_status() const;
   bool all_done() const;
diff --git a/cpp/src/pdlp/translate.hpp b/cpp/src/pdlp/translate.hpp
index b143a206d..1e0f46c8d 100644
--- a/cpp/src/pdlp/translate.hpp
+++ b/cpp/src/pdlp/translate.hpp
@@ -9,6 +9,8 @@
 
 #include <cuopt/linear_programming/optimization_problem_interface.hpp>
 
+#include <mip_heuristics/problem/problem.cuh>
+
 #include <dual_simplex/presolve.hpp>
 #include <dual_simplex/sparse_matrix.hpp>
 
diff --git a/cpp/src/pdlp/utilities/ping_pong_graph.cu b/cpp/src/pdlp/utilities/ping_pong_graph.cu
index 4ec5bff8c..0df3861b5 100644
--- a/cpp/src/pdlp/utilities/ping_pong_graph.cu
+++ b/cpp/src/pdlp/utilities/ping_pong_graph.cu
@@ -8,6 +8,7 @@
 #include <pdlp/utilities/ping_pong_graph.cuh>
 
 #include <raft/core/error.hpp>
+#include <raft/util/cudart_utils.hpp>
 
 #include <utilities/logger.hpp>
 
diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu
index d5a8d6900..5c6edad27 100644
--- a/cpp/tests/linear_programming/pdlp_test.cu
+++ b/cpp/tests/linear_programming/pdlp_test.cu
@@ -5,6 +5,7 @@
  */
 /* clang-format on */
 
+#include <branch_and_bound/shared_strong_branching_context.hpp>
 #include <mps_parser.hpp>
 #include <pdlp/cusparse_view.hpp>
 #include <pdlp/pdlp.cuh>
@@ -43,6 +44,7 @@
 #include <cmath>
 #include <cstdint>
 #include <sstream>
+#include <thread>
 #include <vector>
 
 namespace cuopt::linear_programming::test {
@@ -1681,6 +1683,7 @@ TEST(pdlp_class, strong_branching_test)
   solver_settings.method           = cuopt::linear_programming::method_t::PDLP;
   solver_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3;
   solver_settings.presolver        = cuopt::linear_programming::presolver_t::None;
+  solver_settings.generate_batch_primal_dual_solution = true;
 
   const int n_fractional = fractional.size();
   const int batch_size   = n_fractional * 2;
@@ -2043,6 +2046,301 @@ TEST(pdlp_class, precision_single_pslp_presolve)
     afiro_primal_objective, solution.get_additional_termination_information().primal_objective));
 }
 
+// ---------------------------------------------------------------------------
+// Cooperative strong branching tests
+// ---------------------------------------------------------------------------
+
+TEST(pdlp_class, shared_sb_context_unit)
+{
+  using namespace cuopt::linear_programming::dual_simplex;
+
+  constexpr int N = 10;
+  shared_strong_branching_context_t<int, double> ctx(N);
+  shared_strong_branching_context_view_t<int, double> view(ctx.solved);
+
+  EXPECT_TRUE(view.is_valid());
+
+  shared_strong_branching_context_view_t<int, double> empty_view;
+  EXPECT_FALSE(empty_view.is_valid());
+
+  for (int i = 0; i < N; ++i) {
+    EXPECT_FALSE(view.is_solved(i));
+  }
+
+  view.mark_solved(0);
+  view.mark_solved(3);
+  view.mark_solved(7);
+
+  EXPECT_TRUE(view.is_solved(0));
+  EXPECT_FALSE(view.is_solved(1));
+  EXPECT_FALSE(view.is_solved(2));
+  EXPECT_TRUE(view.is_solved(3));
+  EXPECT_FALSE(view.is_solved(4));
+  EXPECT_FALSE(view.is_solved(5));
+  EXPECT_FALSE(view.is_solved(6));
+  EXPECT_TRUE(view.is_solved(7));
+  EXPECT_FALSE(view.is_solved(8));
+  EXPECT_FALSE(view.is_solved(9));
+
+  // subview(2, 5) covers global indices [2..6]
+  auto sv = view.subview(2, 5);
+  EXPECT_TRUE(sv.is_valid());
+  EXPECT_FALSE(sv.is_solved(0));  // global 2
+  EXPECT_TRUE(sv.is_solved(1));   // global 3
+  EXPECT_FALSE(sv.is_solved(2));  // global 4
+  EXPECT_FALSE(sv.is_solved(3));  // global 5
+  EXPECT_FALSE(sv.is_solved(4));  // global 6
+
+  // Mark through subview: local 4 -> global 6
+  sv.mark_solved(4);
+  EXPECT_TRUE(view.is_solved(6));
+  EXPECT_TRUE(sv.is_solved(4));
+}
+
+TEST(pdlp_class, shared_sb_view_batch_pre_solved)
+{
+  using namespace cuopt::linear_programming::dual_simplex;
+
+  const raft::handle_t handle_{};
+  auto path = make_path_absolute("linear_programming/afiro_original.mps");
+  cuopt::mps_parser::mps_data_model_t<int, double> op_problem =
+    cuopt::mps_parser::parse_mps<int, double>(path, true);
+
+  const std::vector<int> fractional     = {1, 2, 4};
+  const std::vector<double> root_soln_x = {0.891, 0.109, 0.636429};
+  const int n_fractional                = fractional.size();
+  const int batch_size                  = n_fractional * 2;  // 6
+
+  auto solver_settings             = pdlp_solver_settings_t<int, double>{};
+  solver_settings.method           = cuopt::linear_programming::method_t::PDLP;
+  solver_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3;
+  solver_settings.presolver        = cuopt::linear_programming::presolver_t::None;
+
+  // Build new_bounds: down branches [0..2], up branches [3..5]
+  for (int i = 0; i < n_fractional; ++i)
+    solver_settings.new_bounds.push_back({fractional[i],
+                                          op_problem.get_variable_lower_bounds()[fractional[i]],
+                                          std::floor(root_soln_x[i])});
+  for (int i = 0; i < n_fractional; ++i)
+    solver_settings.new_bounds.push_back({fractional[i],
+                                          std::ceil(root_soln_x[i]),
+                                          op_problem.get_variable_upper_bounds()[fractional[i]]});
+
+  shared_strong_branching_context_t<int, double> shared_ctx(batch_size);
+  shared_strong_branching_context_view_t<int, double> sb_view(shared_ctx.solved);
+
+  // Pre-mark entries 1 and 4 as solved (simulating DS)
+  sb_view.mark_solved(1);
+  sb_view.mark_solved(4);
+
+  solver_settings.shared_sb_solved = sb_view.solved;
+
+  auto solution = solve_lp(&handle_, op_problem, solver_settings);
+
+  ASSERT_EQ(solution.get_terminations_status().size(), batch_size);
+
+  // Pre-solved entries should have ConcurrentLimit
+  EXPECT_EQ(solution.get_termination_status(1), pdlp_termination_status_t::ConcurrentLimit);
+  EXPECT_EQ(solution.get_termination_status(4), pdlp_termination_status_t::ConcurrentLimit);
+
+  // Others should be Optimal
+  EXPECT_EQ(solution.get_termination_status(0), pdlp_termination_status_t::Optimal);
+  EXPECT_EQ(solution.get_termination_status(2), pdlp_termination_status_t::Optimal);
+  EXPECT_EQ(solution.get_termination_status(3), pdlp_termination_status_t::Optimal);
+  EXPECT_EQ(solution.get_termination_status(5), pdlp_termination_status_t::Optimal);
+
+  // All entries should now be marked solved in the shared context
+  for (int i = 0; i < batch_size; ++i) {
+    EXPECT_TRUE(sb_view.is_solved(i)) << "Entry " << i << " should be solved";
+  }
+}
+
+TEST(pdlp_class, shared_sb_view_subbatch)
+{
+  using namespace cuopt::linear_programming::dual_simplex;
+
+  const raft::handle_t handle_{};
+  auto path = make_path_absolute("linear_programming/afiro_original.mps");
+  cuopt::mps_parser::mps_data_model_t<int, double> op_problem =
+    cuopt::mps_parser::parse_mps<int, double>(path, true);
+
+  const std::vector<int> fractional     = {1, 2, 4};
+  const std::vector<double> root_soln_x = {0.891, 0.109, 0.636429};
+  const int n_fractional                = fractional.size();
+  const int batch_size                  = n_fractional * 2;
+
+  auto solver_settings             = pdlp_solver_settings_t<int, double>{};
+  solver_settings.method           = cuopt::linear_programming::method_t::PDLP;
+  solver_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3;
+  solver_settings.presolver        = cuopt::linear_programming::presolver_t::None;
+  solver_settings.sub_batch_size   = 2;
+
+  shared_strong_branching_context_t<int, double> shared_ctx(batch_size);
+  shared_strong_branching_context_view_t<int, double> sb_view(shared_ctx.solved);
+
+  // Pre-mark one entry in each sub-batch of size 2: indices 1, 4
+  sb_view.mark_solved(1);
+  sb_view.mark_solved(4);
+
+  solver_settings.shared_sb_solved = sb_view.solved;
+
+  auto solution = batch_pdlp_solve(&handle_, op_problem, fractional, root_soln_x, solver_settings);
+
+  ASSERT_EQ(solution.get_terminations_status().size(), batch_size);
+
+  // Pre-solved entries should have ConcurrentLimit
+  EXPECT_EQ(solution.get_termination_status(1), pdlp_termination_status_t::ConcurrentLimit);
+  EXPECT_EQ(solution.get_termination_status(4), pdlp_termination_status_t::ConcurrentLimit);
+
+  // Others should be Optimal
+  for (int i = 0; i < batch_size; ++i) {
+    if (i == 1 || i == 4) continue;
+    EXPECT_EQ(solution.get_termination_status(i), pdlp_termination_status_t::Optimal)
+      << "Entry " << i << " should be Optimal";
+  }
+
+  // All should be marked solved
+  for (int i = 0; i < batch_size; ++i) {
+    EXPECT_TRUE(sb_view.is_solved(i)) << "Entry " << i << " should be solved";
+  }
+}
+
+TEST(pdlp_class, shared_sb_view_concurrent_mark)
+{
+  using namespace cuopt::linear_programming::dual_simplex;
+
+  const raft::handle_t handle_{};
+  auto path = make_path_absolute("linear_programming/afiro_original.mps");
+  cuopt::mps_parser::mps_data_model_t<int, double> op_problem =
+    cuopt::mps_parser::parse_mps<int, double>(path, true);
+
+  const std::vector<int> fractional     = {1, 2, 4};
+  const std::vector<double> root_soln_x = {0.891, 0.109, 0.636429};
+  const int n_fractional                = fractional.size();
+  const int batch_size                  = n_fractional * 2;
+
+  auto solver_settings             = pdlp_solver_settings_t<int, double>{};
+  solver_settings.method           = cuopt::linear_programming::method_t::PDLP;
+  solver_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3;
+  solver_settings.presolver        = cuopt::linear_programming::presolver_t::None;
+  solver_settings.iteration_limit  = 1000000;
+
+  for (int i = 0; i < n_fractional; ++i)
+    solver_settings.new_bounds.push_back({fractional[0], -5, -5});
+
+  for (int i = 0; i < n_fractional; ++i)
+    solver_settings.new_bounds.push_back({fractional[i],
+                                          std::ceil(root_soln_x[i]),
+                                          op_problem.get_variable_upper_bounds()[fractional[i]]});
+
+  shared_strong_branching_context_t<int, double> shared_ctx(batch_size);
+  shared_strong_branching_context_view_t<int, double> sb_view(shared_ctx.solved);
+
+  solver_settings.shared_sb_solved = sb_view.solved;
+
+  optimization_problem_solution_t<int, double>* result_ptr = nullptr;
+
+  auto pdlp_thread = std::thread([&]() {
+    auto sol = new optimization_problem_solution_t<int, double>(
+      solve_lp(&handle_, op_problem, solver_settings));
+    result_ptr = sol;
+  });
+
+  // Wait a bit then mark entries 0, 2, 4 as solved (simulating DS)
+  std::this_thread::sleep_for(std::chrono::milliseconds(200));
+  for (int i = 0; i < n_fractional; ++i)
+    sb_view.mark_solved(i);
+
+  pdlp_thread.join();
+
+  ASSERT_NE(result_ptr, nullptr);
+  auto& solution = *result_ptr;
+
+  ASSERT_EQ(solution.get_terminations_status().size(), batch_size);
+
+  for (int i = 0; i < batch_size; ++i) {
+    auto status = solution.get_termination_status(i);
+    // Each entry should be either Optimal (PDLP solved it first) or ConcurrentLimit (DS marked it)
+    EXPECT_TRUE(status == pdlp_termination_status_t::Optimal ||
+                status == pdlp_termination_status_t::ConcurrentLimit)
+      << "Entry " << i << " has unexpected status "
+      << cuopt::linear_programming::optimization_problem_solution_t<int, double>::
+           get_termination_status_string(status);
+  }
+
+  // All entries should end up marked solved
+  for (int i = 0; i < batch_size; ++i) {
+    EXPECT_TRUE(sb_view.is_solved(i)) << "Entry " << i << " should be solved";
+  }
+
+  delete result_ptr;
+}
+
+TEST(pdlp_class, shared_sb_view_all_infeasible)
+{
+  using namespace cuopt::linear_programming::dual_simplex;
+
+  const raft::handle_t handle_{};
+  auto path = make_path_absolute("linear_programming/afiro_original.mps");
+  cuopt::mps_parser::mps_data_model_t<int, double> op_problem =
+    cuopt::mps_parser::parse_mps<int, double>(path, true);
+
+  const std::vector<int> fractional     = {1, 2, 4};
+  const std::vector<double> root_soln_x = {0.891, 0.109, 0.636429};
+  const int n_fractional                = fractional.size();
+  const int batch_size                  = n_fractional;
+
+  auto solver_settings             = pdlp_solver_settings_t<int, double>{};
+  solver_settings.method           = cuopt::linear_programming::method_t::PDLP;
+  solver_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3;
+  solver_settings.presolver        = cuopt::linear_programming::presolver_t::None;
+  solver_settings.iteration_limit  = 1000000;
+
+  for (int i = 0; i < n_fractional; ++i)
+    solver_settings.new_bounds.push_back({fractional[0], -5, -5});
+
+  shared_strong_branching_context_t<int, double> shared_ctx(batch_size);
+  shared_strong_branching_context_view_t<int, double> sb_view(shared_ctx.solved);
+
+  solver_settings.shared_sb_solved = sb_view.solved;
+
+  optimization_problem_solution_t<int, double>* result_ptr = nullptr;
+
+  auto pdlp_thread = std::thread([&]() {
+    auto sol = new optimization_problem_solution_t<int, double>(
+      solve_lp(&handle_, op_problem, solver_settings));
+    result_ptr = sol;
+  });
+
+  // Wait a bit then mark entries 0, 2, 4 as solved (simulating DS)
+  std::this_thread::sleep_for(std::chrono::milliseconds(200));
+  for (int i = 0; i < n_fractional; ++i)
+    sb_view.mark_solved(i);
+
+  pdlp_thread.join();
+
+  ASSERT_NE(result_ptr, nullptr);
+  auto& solution = *result_ptr;
+
+  ASSERT_EQ(solution.get_terminations_status().size(), batch_size);
+
+  for (int i = 0; i < batch_size; ++i) {
+    auto status = solution.get_termination_status(i);
+    // Each entry should be either Optimal (PDLP solved it first) or ConcurrentLimit (DS marked it)
+    EXPECT_TRUE(status == pdlp_termination_status_t::ConcurrentLimit)
+      << "Entry " << i << " has unexpected status "
+      << cuopt::linear_programming::optimization_problem_solution_t<int, double>::
+           get_termination_status_string(status);
+  }
+
+  // All entries should end up marked solved
+  for (int i = 0; i < batch_size; ++i) {
+    EXPECT_TRUE(sb_view.is_solved(i)) << "Entry " << i << " should be solved";
+  }
+
+  delete result_ptr;
+}
+
 }  // namespace cuopt::linear_programming::test
 
 CUOPT_TEST_PROGRAM_MAIN()
diff --git a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py
index 59ea62089..32cf860f2 100644
--- a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py
+++ b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py
@@ -452,8 +452,15 @@ class SolverConfig(BaseModel):
     )
     mip_batch_pdlp_strong_branching: Optional[int] = Field(
         default=0,
-        description="Set 1 to enable batch PDLP strong branching "
-        "in the MIP solver, 0 to disable.",
+        description="Strong branching mode: 0 = Dual Simplex only, "
+        "1 = cooperative work-stealing (DS + batch PDLP), "
+        "2 = batch PDLP only.",
+    )
+    mip_batch_pdlp_reliability_branching: Optional[int] = Field(
+        default=0,
+        description="Reliability branching mode: 0 = Dual Simplex only, "
+        "1 = cooperative work-stealing (DS + batch PDLP), "
+        "2 = batch PDLP only.",
     )
     num_cpu_threads: Optional[int] = Field(
         default=None,