diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h index e1097455f..01867a4c4 100644 --- a/cpp/include/cuopt/linear_programming/constants.h +++ b/cpp/include/cuopt/linear_programming/constants.h @@ -20,63 +20,64 @@ #define CUOPT_INSTANTIATE_INT64 0 /* @brief LP/MIP parameter string constants */ -#define CUOPT_ABSOLUTE_DUAL_TOLERANCE "absolute_dual_tolerance" -#define CUOPT_RELATIVE_DUAL_TOLERANCE "relative_dual_tolerance" -#define CUOPT_ABSOLUTE_PRIMAL_TOLERANCE "absolute_primal_tolerance" -#define CUOPT_RELATIVE_PRIMAL_TOLERANCE "relative_primal_tolerance" -#define CUOPT_ABSOLUTE_GAP_TOLERANCE "absolute_gap_tolerance" -#define CUOPT_RELATIVE_GAP_TOLERANCE "relative_gap_tolerance" -#define CUOPT_INFEASIBILITY_DETECTION "infeasibility_detection" -#define CUOPT_STRICT_INFEASIBILITY "strict_infeasibility" -#define CUOPT_PRIMAL_INFEASIBLE_TOLERANCE "primal_infeasible_tolerance" -#define CUOPT_DUAL_INFEASIBLE_TOLERANCE "dual_infeasible_tolerance" -#define CUOPT_ITERATION_LIMIT "iteration_limit" -#define CUOPT_TIME_LIMIT "time_limit" -#define CUOPT_WORK_LIMIT "work_limit" -#define CUOPT_PDLP_SOLVER_MODE "pdlp_solver_mode" -#define CUOPT_METHOD "method" -#define CUOPT_PER_CONSTRAINT_RESIDUAL "per_constraint_residual" -#define CUOPT_SAVE_BEST_PRIMAL_SO_FAR "save_best_primal_so_far" -#define CUOPT_FIRST_PRIMAL_FEASIBLE "first_primal_feasible" -#define CUOPT_LOG_FILE "log_file" -#define CUOPT_LOG_TO_CONSOLE "log_to_console" -#define CUOPT_CROSSOVER "crossover" -#define CUOPT_FOLDING "folding" -#define CUOPT_AUGMENTED "augmented" -#define CUOPT_DUALIZE "dualize" -#define CUOPT_ORDERING "ordering" -#define CUOPT_BARRIER_DUAL_INITIAL_POINT "barrier_dual_initial_point" -#define CUOPT_ELIMINATE_DENSE_COLUMNS "eliminate_dense_columns" -#define CUOPT_CUDSS_DETERMINISTIC "cudss_deterministic" -#define CUOPT_PRESOLVE "presolve" -#define CUOPT_DUAL_POSTSOLVE "dual_postsolve" -#define CUOPT_MIP_DETERMINISM_MODE "mip_determinism_mode" -#define CUOPT_MIP_ABSOLUTE_TOLERANCE "mip_absolute_tolerance" -#define CUOPT_MIP_RELATIVE_TOLERANCE "mip_relative_tolerance" -#define CUOPT_MIP_INTEGRALITY_TOLERANCE "mip_integrality_tolerance" -#define CUOPT_MIP_ABSOLUTE_GAP "mip_absolute_gap" -#define CUOPT_MIP_RELATIVE_GAP "mip_relative_gap" -#define CUOPT_MIP_HEURISTICS_ONLY "mip_heuristics_only" -#define CUOPT_MIP_SCALING "mip_scaling" -#define CUOPT_MIP_PRESOLVE "mip_presolve" -#define CUOPT_MIP_RELIABILITY_BRANCHING "mip_reliability_branching" -#define CUOPT_MIP_CUT_PASSES "mip_cut_passes" -#define CUOPT_MIP_MIXED_INTEGER_ROUNDING_CUTS "mip_mixed_integer_rounding_cuts" -#define CUOPT_MIP_MIXED_INTEGER_GOMORY_CUTS "mip_mixed_integer_gomory_cuts" -#define CUOPT_MIP_KNAPSACK_CUTS "mip_knapsack_cuts" -#define CUOPT_MIP_CLIQUE_CUTS "mip_clique_cuts" -#define CUOPT_MIP_STRONG_CHVATAL_GOMORY_CUTS "mip_strong_chvatal_gomory_cuts" -#define CUOPT_MIP_REDUCED_COST_STRENGTHENING "mip_reduced_cost_strengthening" -#define CUOPT_MIP_CUT_CHANGE_THRESHOLD "mip_cut_change_threshold" -#define CUOPT_MIP_CUT_MIN_ORTHOGONALITY "mip_cut_min_orthogonality" -#define CUOPT_MIP_BATCH_PDLP_STRONG_BRANCHING "mip_batch_pdlp_strong_branching" -#define CUOPT_SOLUTION_FILE "solution_file" -#define CUOPT_NUM_CPU_THREADS "num_cpu_threads" -#define CUOPT_NUM_GPUS "num_gpus" -#define CUOPT_USER_PROBLEM_FILE "user_problem_file" -#define CUOPT_PRESOLVE_FILE "presolve_file" -#define CUOPT_RANDOM_SEED "random_seed" -#define CUOPT_PDLP_PRECISION "pdlp_precision" +#define CUOPT_ABSOLUTE_DUAL_TOLERANCE "absolute_dual_tolerance" +#define CUOPT_RELATIVE_DUAL_TOLERANCE "relative_dual_tolerance" +#define CUOPT_ABSOLUTE_PRIMAL_TOLERANCE "absolute_primal_tolerance" +#define CUOPT_RELATIVE_PRIMAL_TOLERANCE "relative_primal_tolerance" +#define CUOPT_ABSOLUTE_GAP_TOLERANCE "absolute_gap_tolerance" +#define CUOPT_RELATIVE_GAP_TOLERANCE "relative_gap_tolerance" +#define CUOPT_INFEASIBILITY_DETECTION "infeasibility_detection" +#define CUOPT_STRICT_INFEASIBILITY "strict_infeasibility" +#define CUOPT_PRIMAL_INFEASIBLE_TOLERANCE "primal_infeasible_tolerance" +#define CUOPT_DUAL_INFEASIBLE_TOLERANCE "dual_infeasible_tolerance" +#define CUOPT_ITERATION_LIMIT "iteration_limit" +#define CUOPT_TIME_LIMIT "time_limit" +#define CUOPT_WORK_LIMIT "work_limit" +#define CUOPT_PDLP_SOLVER_MODE "pdlp_solver_mode" +#define CUOPT_METHOD "method" +#define CUOPT_PER_CONSTRAINT_RESIDUAL "per_constraint_residual" +#define CUOPT_SAVE_BEST_PRIMAL_SO_FAR "save_best_primal_so_far" +#define CUOPT_FIRST_PRIMAL_FEASIBLE "first_primal_feasible" +#define CUOPT_LOG_FILE "log_file" +#define CUOPT_LOG_TO_CONSOLE "log_to_console" +#define CUOPT_CROSSOVER "crossover" +#define CUOPT_FOLDING "folding" +#define CUOPT_AUGMENTED "augmented" +#define CUOPT_DUALIZE "dualize" +#define CUOPT_ORDERING "ordering" +#define CUOPT_BARRIER_DUAL_INITIAL_POINT "barrier_dual_initial_point" +#define CUOPT_ELIMINATE_DENSE_COLUMNS "eliminate_dense_columns" +#define CUOPT_CUDSS_DETERMINISTIC "cudss_deterministic" +#define CUOPT_PRESOLVE "presolve" +#define CUOPT_DUAL_POSTSOLVE "dual_postsolve" +#define CUOPT_MIP_DETERMINISM_MODE "mip_determinism_mode" +#define CUOPT_MIP_ABSOLUTE_TOLERANCE "mip_absolute_tolerance" +#define CUOPT_MIP_RELATIVE_TOLERANCE "mip_relative_tolerance" +#define CUOPT_MIP_INTEGRALITY_TOLERANCE "mip_integrality_tolerance" +#define CUOPT_MIP_ABSOLUTE_GAP "mip_absolute_gap" +#define CUOPT_MIP_RELATIVE_GAP "mip_relative_gap" +#define CUOPT_MIP_HEURISTICS_ONLY "mip_heuristics_only" +#define CUOPT_MIP_SCALING "mip_scaling" +#define CUOPT_MIP_PRESOLVE "mip_presolve" +#define CUOPT_MIP_RELIABILITY_BRANCHING "mip_reliability_branching" +#define CUOPT_MIP_CUT_PASSES "mip_cut_passes" +#define CUOPT_MIP_MIXED_INTEGER_ROUNDING_CUTS "mip_mixed_integer_rounding_cuts" +#define CUOPT_MIP_MIXED_INTEGER_GOMORY_CUTS "mip_mixed_integer_gomory_cuts" +#define CUOPT_MIP_KNAPSACK_CUTS "mip_knapsack_cuts" +#define CUOPT_MIP_CLIQUE_CUTS "mip_clique_cuts" +#define CUOPT_MIP_STRONG_CHVATAL_GOMORY_CUTS "mip_strong_chvatal_gomory_cuts" +#define CUOPT_MIP_REDUCED_COST_STRENGTHENING "mip_reduced_cost_strengthening" +#define CUOPT_MIP_CUT_CHANGE_THRESHOLD "mip_cut_change_threshold" +#define CUOPT_MIP_CUT_MIN_ORTHOGONALITY "mip_cut_min_orthogonality" +#define CUOPT_MIP_BATCH_PDLP_STRONG_BRANCHING "mip_batch_pdlp_strong_branching" +#define CUOPT_MIP_BATCH_PDLP_RELIABILITY_BRANCHING "mip_batch_pdlp_reliability_branching" +#define CUOPT_SOLUTION_FILE "solution_file" +#define CUOPT_NUM_CPU_THREADS "num_cpu_threads" +#define CUOPT_NUM_GPUS "num_gpus" +#define CUOPT_USER_PROBLEM_FILE "user_problem_file" +#define CUOPT_PRESOLVE_FILE "presolve_file" +#define CUOPT_RANDOM_SEED "random_seed" +#define CUOPT_PDLP_PRECISION "pdlp_precision" /* @brief MIP determinism mode constants */ #define CUOPT_MODE_OPPORTUNISTIC 0 diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp index 8f4a6c1d7..3da9ea8f1 100644 --- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp @@ -94,13 +94,16 @@ class mip_solver_settings_t { i_t mixed_integer_gomory_cuts = -1; i_t knapsack_cuts = -1; i_t clique_cuts = -1; - i_t strong_chvatal_gomory_cuts = -1; - i_t reduced_cost_strengthening = -1; - f_t cut_change_threshold = -1.0; - f_t cut_min_orthogonality = 0.5; - i_t mip_batch_pdlp_strong_branching = 0; - i_t num_gpus = 1; - bool log_to_console = true; + i_t strong_chvatal_gomory_cuts = -1; + i_t reduced_cost_strengthening = -1; + f_t cut_change_threshold = -1.0; + f_t cut_min_orthogonality = 0.5; + i_t mip_batch_pdlp_strong_branching{ + 0}; // 0 = DS only, 1 = cooperative DS + PDLP, 2 = batch PDLP only + i_t mip_batch_pdlp_reliability_branching{ + 0}; // 0 = DS only, 1 = cooperative DS + PDLP, 2 = batch PDLP only + i_t num_gpus = 1; + bool log_to_console = true; std::string log_file; std::string sol_file; diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp index 86ce4d8db..6abefb2d5 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp @@ -17,6 +17,7 @@ #include #include +#include namespace cuopt::linear_programming { @@ -147,6 +148,12 @@ class pdlp_solver_settings_t { * @param[in] initial_primal_weight Initial primal weight. */ void set_initial_primal_weight(f_t initial_primal_weight); + /** + * @brief Set an initial pdlp iteration. + * + * @param[in] initial_pdlp_iteration Initial pdlp iteration. + */ + void set_initial_pdlp_iteration(i_t initial_pdlp_iteration); /** * @brief Set the pdlp warm start data. This allows to restart PDLP with a @@ -213,6 +220,8 @@ class pdlp_solver_settings_t { std::optional get_initial_step_size() const; // TODO batch mode: tmp std::optional get_initial_primal_weight() const; + // TODO batch mode: tmp + std::optional get_initial_pdlp_iteration() const; const rmm::device_uvector& get_initial_primal_solution() const; const rmm::device_uvector& get_initial_dual_solution() const; @@ -265,6 +274,8 @@ class pdlp_solver_settings_t { bool inside_mip{false}; // For concurrent termination std::atomic* concurrent_halt{nullptr}; + // Shared strong branching solved flags for cooperative DS + PDLP + std::span> shared_sb_solved; static constexpr f_t minimal_absolute_tolerance = 1.0e-12; pdlp_hyper_params::pdlp_hyper_params_t hyper_params; // Holds the information of new variable lower and upper bounds for each climber in the format: @@ -273,6 +284,12 @@ class pdlp_solver_settings_t { // concurrently i.e. if new_bounds.size() == 2, then 2 versions of the problem with updated bounds // will be solved concurrently std::vector> new_bounds; + // By default to save memory and speed we don't store and copy each climber's primal and dual + // solutions We only retrieve termination statistics and the objective values + bool generate_batch_primal_dual_solution{false}; + // Used to force batch PDLP to solve a subbatch of the problems at a time + // The 0 default value will make the solver use its heuristic to determine the subbatch size + i_t sub_batch_size{0}; private: /** Initial primal solution */ @@ -285,6 +302,9 @@ class pdlp_solver_settings_t { /** Initial primal weight */ // TODO batch mode: tmp std::optional initial_primal_weight_; + /** Initial pdlp iteration */ + // TODO batch mode: tmp + std::optional initial_pdlp_iteration_; /** GPU-backed warm start data (device_uvector), used by C++ API and local GPU solves */ pdlp_warm_start_data_t pdlp_warm_start_data_; /** Warm start data as spans over external memory, used by Cython/Python interface */ diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index ba2b63983..b940134fb 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -846,7 +846,9 @@ branch_variable_t branch_and_bound_t::variable_selection( exploration_stats_, upper_bound_, worker_pool_.num_idle_workers(), - log); + log, + new_slacks_, + original_lp_); } else { branch_var = pc_.variable_selection(fractional, solution, log); } @@ -2448,12 +2450,14 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut pc_.resize(original_lp_.num_cols); { raft::common::nvtx::range scope_sb("BB::strong_branching"); - strong_branching(original_problem_, - original_lp_, + strong_branching(original_lp_, settings_, exploration_stats_.start_time, + new_slacks_, var_types_, root_relax_soln_.x, + root_relax_soln_.y, + root_relax_soln_.z, fractional, root_objective_, root_vstatus_, diff --git a/cpp/src/branch_and_bound/pseudo_costs.cpp b/cpp/src/branch_and_bound/pseudo_costs.cpp index ee7e2f780..4625b4343 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.cpp +++ b/cpp/src/branch_and_bound/pseudo_costs.cpp @@ -6,14 +6,19 @@ /* clang-format on */ #include +#include #include #include #include #include +#include + #include +#include + #include #include @@ -22,6 +27,12 @@ namespace cuopt::linear_programming::dual_simplex { namespace { +static bool is_dual_simplex_done(dual::status_t status) +{ + return status == dual::status_t::DUAL_UNBOUNDED || status == dual::status_t::OPTIMAL || + status == dual::status_t::ITERATION_LIMIT || status == dual::status_t::CUTOFF; +} + template void strong_branch_helper(i_t start, i_t end, @@ -34,11 +45,19 @@ void strong_branch_helper(i_t start, const std::vector& root_soln, const std::vector& root_vstatus, const std::vector& edge_norms, - pseudo_costs_t& pc) + pseudo_costs_t& pc, + std::vector& ds_obj_down, + std::vector& ds_obj_up, + std::vector& ds_status_down, + std::vector& ds_status_up, + std::atomic* concurrent_halt, + shared_strong_branching_context_view_t& sb_view) { raft::common::nvtx::range scope("BB::strong_branch_helper"); lp_problem_t child_problem = original_lp; + assert(concurrent_halt != nullptr && "Concurrent halt pointer cannot be nullptr"); + constexpr bool verbose = false; f_t last_log = tic(); i_t thread_id = omp_get_thread_num(); @@ -47,6 +66,19 @@ void strong_branch_helper(i_t start, for (i_t branch = 0; branch < 2; branch++) { // Do the down branch + const i_t shared_idx = (branch == 0) ? k : k + static_cast(fractional.size()); + // Batch PDLP has already solved this subproblem, skip it + if (sb_view.is_valid() && sb_view.is_solved(shared_idx)) { + settings.log.printf( + "[COOP SB] DS thread %d skipping variable %d branch %s (shared_idx %d): already solved " + "by PDLP\n", + thread_id, + j, + branch == 0 ? "down" : "up", + shared_idx); + continue; + } + if (branch == 0) { child_problem.lower[j] = original_lp.lower[j]; child_problem.upper[j] = std::floor(root_soln[j]); @@ -59,7 +91,7 @@ void strong_branch_helper(i_t start, child_settings.set_log(false); f_t lp_start_time = tic(); f_t elapsed_time = toc(start_time); - if (elapsed_time > settings.time_limit) { break; } + if (elapsed_time > settings.time_limit || *concurrent_halt == 1) { break; } child_settings.time_limit = std::max(0.0, settings.time_limit - elapsed_time); child_settings.iteration_limit = 200; lp_solution_t solution(original_lp.num_rows, original_lp.num_cols); @@ -80,7 +112,8 @@ void strong_branch_helper(i_t start, if (status == dual::status_t::DUAL_UNBOUNDED) { // LP was infeasible obj = std::numeric_limits::infinity(); - } else if (status == dual::status_t::OPTIMAL || status == dual::status_t::ITERATION_LIMIT) { + } else if (status == dual::status_t::OPTIMAL || status == dual::status_t::ITERATION_LIMIT || + status == dual::status_t::CUTOFF) { obj = compute_objective(child_problem, solution.x); } else { settings.log.debug("Thread id %2d remaining %d variable %d branch %d status %d\n", @@ -93,6 +126,8 @@ void strong_branch_helper(i_t start, if (branch == 0) { pc.strong_branch_down[k] = std::max(obj - root_obj, 0.0); + ds_obj_down[k] = std::max(obj - root_obj, 0.0); + ds_status_down[k] = status; if (verbose) { settings.log.printf("Thread id %2d remaining %d variable %d branch %d obj %e time %.2f\n", thread_id, @@ -104,6 +139,8 @@ void strong_branch_helper(i_t start, } } else { pc.strong_branch_up[k] = std::max(obj - root_obj, 0.0); + ds_obj_up[k] = std::max(obj - root_obj, 0.0); + ds_status_up[k] = status; if (verbose) { settings.log.printf( "Thread id %2d remaining %d variable %d branch %d obj %e change down %e change up %e " @@ -113,14 +150,29 @@ void strong_branch_helper(i_t start, j, branch, obj, - pc.strong_branch_down[k], - pc.strong_branch_up[k], + ds_obj_down[k], + ds_obj_up[k], toc(start_time)); } } - if (toc(start_time) > settings.time_limit) { break; } + // Mark the subproblem as solved so that batch PDLP removes it from the batch + if (sb_view.is_valid()) { + // We could not mark as solved nodes hitting iteration limit in DS + if ((branch == 0 && is_dual_simplex_done(ds_status_down[k])) || + (branch == 1 && is_dual_simplex_done(ds_status_up[k]))) { + sb_view.mark_solved(shared_idx); + settings.log.printf( + "[COOP SB] DS thread %d solved variable %d branch %s (shared_idx %d), marking in " + "shared context\n", + thread_id, + j, + branch == 0 ? "down" : "up", + shared_idx); + } + } + if (toc(start_time) > settings.time_limit || *concurrent_halt == 1) { break; } } - if (toc(start_time) > settings.time_limit) { break; } + if (toc(start_time) > settings.time_limit || *concurrent_halt == 1) { break; } const i_t completed = pc.num_strong_branches_completed++; @@ -135,28 +187,28 @@ void strong_branch_helper(i_t start, child_problem.lower[j] = original_lp.lower[j]; child_problem.upper[j] = original_lp.upper[j]; - if (toc(start_time) > settings.time_limit) { break; } + if (toc(start_time) > settings.time_limit || *concurrent_halt == 1) { break; } } } template -f_t trial_branching(const lp_problem_t& original_lp, - const simplex_solver_settings_t& settings, - const std::vector& var_types, - const std::vector& vstatus, - const std::vector& edge_norms, - const basis_update_mpf_t& basis_factors, - const std::vector& basic_list, - const std::vector& nonbasic_list, - i_t branch_var, - f_t branch_var_lower, - f_t branch_var_upper, - f_t upper_bound, - i_t bnb_lp_iter_per_node, - f_t start_time, - i_t upper_max_lp_iter, - i_t lower_max_lp_iter, - omp_atomic_t& total_lp_iter) +std::pair trial_branching(const lp_problem_t& original_lp, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const std::vector& vstatus, + const std::vector& edge_norms, + const basis_update_mpf_t& basis_factors, + const std::vector& basic_list, + const std::vector& nonbasic_list, + i_t branch_var, + f_t branch_var_lower, + f_t branch_var_upper, + f_t upper_bound, + i_t bnb_lp_iter_per_node, + f_t start_time, + i_t upper_max_lp_iter, + i_t lower_max_lp_iter, + omp_atomic_t& total_lp_iter) { lp_problem_t child_problem = original_lp; child_problem.lower[branch_var] = branch_var_lower; @@ -207,12 +259,12 @@ f_t trial_branching(const lp_problem_t& original_lp, if (status == dual::status_t::DUAL_UNBOUNDED) { // LP was infeasible - return std::numeric_limits::infinity(); + return {std::numeric_limits::infinity(), dual::status_t::DUAL_UNBOUNDED}; } else if (status == dual::status_t::OPTIMAL || status == dual::status_t::ITERATION_LIMIT || status == dual::status_t::CUTOFF) { - return compute_objective(child_problem, solution.x); + return {compute_objective(child_problem, solution.x), status}; } else { - return std::numeric_limits::quiet_NaN(); + return {std::numeric_limits::quiet_NaN(), dual::status_t::NUMERICAL}; } } @@ -220,15 +272,44 @@ f_t trial_branching(const lp_problem_t& original_lp, template static cuopt::mps_parser::mps_data_model_t simplex_problem_to_mps_data_model( - const dual_simplex::user_problem_t& user_problem) + const dual_simplex::lp_problem_t& lp, + const std::vector& new_slacks, + const std::vector& root_soln, + std::vector& original_root_soln_x) { + // Branch and bound has a problem of the form: + // minimize c^T x + // subject to A*x + Es = b + // l <= x <= u + // E_{jj} = sigma_j, where sigma_j is +1 or -1 + + // We need to convert this into a problem that is better for PDLP + // to solve. PDLP perfers inequality constraints. Thus, we want + // to convert the above into the problem: + // minimize c^T x + // subject to lb <= A*x <= ub + // l <= x <= u + cuopt::mps_parser::mps_data_model_t mps_model; - int m = user_problem.num_rows; - int n = user_problem.num_cols; + int m = lp.num_rows; + int n = lp.num_cols - new_slacks.size(); + original_root_soln_x.resize(n); + + // Remove slacks from A + dual_simplex::csc_matrix_t A_no_slacks = lp.A; + std::vector cols_to_remove(lp.A.n, 0); + for (i_t j : new_slacks) { + cols_to_remove[j] = 1; + } + A_no_slacks.remove_columns(cols_to_remove); + + for (i_t j = 0; j < n; j++) { + original_root_soln_x[j] = root_soln[j]; + } // Convert CSC to CSR using built-in method dual_simplex::csr_matrix_t csr_A(m, n, 0); - user_problem.A.to_compressed_row(csr_A); + A_no_slacks.to_compressed_row(csr_A); int nz = csr_A.row_start[m]; @@ -237,72 +318,418 @@ static cuopt::mps_parser::mps_data_model_t simplex_problem_to_mps_data csr_A.x.data(), nz, csr_A.j.data(), nz, csr_A.row_start.data(), m + 1); // Set objective coefficients - mps_model.set_objective_coefficients(user_problem.objective.data(), n); + mps_model.set_objective_coefficients(lp.objective.data(), n); // Set objective scaling and offset - mps_model.set_objective_scaling_factor(user_problem.obj_scale); - mps_model.set_objective_offset(user_problem.obj_constant); + mps_model.set_objective_scaling_factor(lp.obj_scale); + mps_model.set_objective_offset(lp.obj_constant); // Set variable bounds - mps_model.set_variable_lower_bounds(user_problem.lower.data(), n); - mps_model.set_variable_upper_bounds(user_problem.upper.data(), n); + mps_model.set_variable_lower_bounds(lp.lower.data(), n); + mps_model.set_variable_upper_bounds(lp.upper.data(), n); // Convert row sense and RHS to constraint bounds std::vector constraint_lower(m); std::vector constraint_upper(m); - for (i_t i = 0; i < m; ++i) { - if (user_problem.row_sense[i] == 'L') { - constraint_lower[i] = -std::numeric_limits::infinity(); - constraint_upper[i] = user_problem.rhs[i]; - } else if (user_problem.row_sense[i] == 'G') { - constraint_lower[i] = user_problem.rhs[i]; - constraint_upper[i] = std::numeric_limits::infinity(); - } else { - constraint_lower[i] = user_problem.rhs[i]; - constraint_upper[i] = user_problem.rhs[i]; - } + std::vector slack_map(m, -1); + for (i_t j : new_slacks) { + const i_t col_start = lp.A.col_start[j]; + const i_t i = lp.A.i[col_start]; + slack_map[i] = j; } - for (i_t k = 0; k < user_problem.num_range_rows; ++k) { - i_t i = user_problem.range_rows[k]; - f_t r = user_problem.range_value[k]; - f_t b = user_problem.rhs[i]; - f_t h = -std::numeric_limits::infinity(); - f_t u = std::numeric_limits::infinity(); - if (user_problem.row_sense[i] == 'L') { - h = b - std::abs(r); - u = b; - } else if (user_problem.row_sense[i] == 'G') { - h = b; - u = b + std::abs(r); - } else if (user_problem.row_sense[i] == 'E') { - if (r > 0) { - h = b; - u = b + std::abs(r); - } else { - h = b - std::abs(r); - u = b; - } + for (i_t i = 0; i < m; ++i) { + // Each row is of the form a_i^T x + sigma * s_i = b_i + // with sigma = +1 or -1 + // and l_i <= s_i <= u_i + // We have that a_i^T x - b_i = -sigma * s_i + // If sigma = -1, then we have + // a_i^T x - b_i = s_i + // l_i <= a_i^T x - b_i <= u_i + // l_i + b_i <= a_i^T x <= u_i + b_i + // + // If sigma = +1, then we have + // a_i^T x - b_i = -s_i + // -a_i^T x + b_i = s_i + // l_i <= -a_i^T x + b_i <= u_i + // l_i - b_i <= -a_i^T x <= u_i - b_i + // -u_i + b_i <= a_i^T x <= -l_i + b_i + + const i_t slack = slack_map[i]; + assert(slack != -1); + const i_t col_start = lp.A.col_start[slack]; + const f_t sigma = lp.A.x[col_start]; + const f_t slack_lower = lp.lower[slack]; + const f_t slack_upper = lp.upper[slack]; + + if (sigma == -1) { + constraint_lower[i] = slack_lower + lp.rhs[i]; + constraint_upper[i] = slack_upper + lp.rhs[i]; + } else if (sigma == 1) { + constraint_lower[i] = -slack_upper + lp.rhs[i]; + constraint_upper[i] = -slack_lower + lp.rhs[i]; + } else { + assert(sigma == 1.0 || sigma == -1.0); } - constraint_lower[i] = h; - constraint_upper[i] = u; } mps_model.set_constraint_lower_bounds(constraint_lower.data(), m); mps_model.set_constraint_upper_bounds(constraint_upper.data(), m); - mps_model.set_maximize(user_problem.obj_scale < 0); + mps_model.set_maximize(lp.obj_scale < 0); return mps_model; } +enum class sb_source_t { DUAL_SIMPLEX, PDLP, NONE }; + +// Merge a single strong branching result from Dual Simplex and PDLP. +// Rules: +// 1. If both found optimal -> keep DS (higher quality vertex solution) +// 2. Else if Dual Simplex found infeasible -> declare infeasible +// 3. Else if one is optimal -> keep the optimal one +// 4. Else if Dual Simplex hit iteration limit -> keep DS +// 5. Else if none converged -> NaN (original objective) template -void strong_branching(const user_problem_t& original_problem, - const lp_problem_t& original_lp, +static std::pair merge_sb_result(f_t ds_val, + dual::status_t ds_status, + f_t pdlp_dual_obj, + bool pdlp_optimal) +{ + // Dual simplex always maintains dual feasibility, so OPTIMAL and ITERATION_LIMIT both qualify + + // Rule 1: Both optimal -> keep DS + if (ds_status == dual::status_t::OPTIMAL && pdlp_optimal) { + return {ds_val, sb_source_t::DUAL_SIMPLEX}; + } + + // Rule 2: Dual Simplex found infeasible -> declare infeasible + if (ds_status == dual::status_t::DUAL_UNBOUNDED) { + return {std::numeric_limits::infinity(), sb_source_t::DUAL_SIMPLEX}; + } + + // Rule 3: Only one converged -> keep that + if (ds_status == dual::status_t::OPTIMAL && !pdlp_optimal) { + return {ds_val, sb_source_t::DUAL_SIMPLEX}; + } + if (pdlp_optimal && ds_status != dual::status_t::OPTIMAL) { + return {pdlp_dual_obj, sb_source_t::PDLP}; + } + + // Rule 4: Dual Simplex hit iteration limit or work limit or cutoff -> keep DS + if (ds_status == dual::status_t::ITERATION_LIMIT || ds_status == dual::status_t::WORK_LIMIT || + ds_status == dual::status_t::CUTOFF) { + return {ds_val, sb_source_t::DUAL_SIMPLEX}; + } + + // Rule 5: None converged -> NaN + return {std::numeric_limits::quiet_NaN(), sb_source_t::NONE}; +} + +template +static void batch_pdlp_strong_branching_task( + const simplex_solver_settings_t& settings, + i_t effective_batch_pdlp, + f_t start_time, + std::atomic& concurrent_halt, + const lp_problem_t& original_lp, + const std::vector& new_slacks, + const std::vector& root_soln, + const std::vector& fractional, + f_t root_obj, + pseudo_costs_t& pc, + shared_strong_branching_context_view_t& sb_view, + std::vector& pdlp_obj_down, + std::vector& pdlp_obj_up) +{ + settings.log.printf(effective_batch_pdlp == 2 + ? "Batch PDLP only for strong branching\n" + : "Cooperative batch PDLP and Dual Simplex for strong branching\n"); + + f_t start_batch = tic(); + std::vector original_root_soln_x; + + if (concurrent_halt.load() == 1) { return; } + + const auto mps_model = + simplex_problem_to_mps_data_model(original_lp, new_slacks, root_soln, original_root_soln_x); + + std::vector fraction_values; + + std::vector original_root_soln_y, original_root_soln_z; + // TODO put back later once Chris has this part + /*uncrush_dual_solution( + original_problem, original_lp, root_soln_y, root_soln_z, original_root_soln_y, + original_root_soln_z);*/ + + for (i_t k = 0; k < fractional.size(); k++) { + const i_t j = fractional[k]; + fraction_values.push_back(original_root_soln_x[j]); + } + + if (concurrent_halt.load() == 1) { return; } + + f_t batch_elapsed_time = toc(start_time); + const f_t warm_start_remaining_time = + std::max(static_cast(0.0), settings.time_limit - batch_elapsed_time); + if (warm_start_remaining_time <= 0.0) { return; } + + assert(!pc.pdlp_warm_cache.populated && "PDLP warm cache should not be populated at this point"); + + if (!pc.pdlp_warm_cache.populated) { + pdlp_solver_settings_t ws_settings; + ws_settings.method = method_t::PDLP; + ws_settings.presolver = presolver_t::None; + ws_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; + ws_settings.detect_infeasibility = false; + // Since the warm start will be used over and over again we want to maximize the chance of + // convergeance Batch PDLP is very compute intensive so we want to minimize the number of + // iterations + constexpr int warm_start_iteration_limit = 500000; + ws_settings.iteration_limit = warm_start_iteration_limit; + ws_settings.time_limit = warm_start_remaining_time; + constexpr f_t pdlp_tolerance = 1e-5; + ws_settings.tolerances.relative_dual_tolerance = pdlp_tolerance; + ws_settings.tolerances.absolute_dual_tolerance = pdlp_tolerance; + ws_settings.tolerances.relative_primal_tolerance = pdlp_tolerance; + ws_settings.tolerances.absolute_primal_tolerance = pdlp_tolerance; + ws_settings.tolerances.relative_gap_tolerance = pdlp_tolerance; + ws_settings.tolerances.absolute_gap_tolerance = pdlp_tolerance; + ws_settings.inside_mip = true; + if (effective_batch_pdlp == 1) { ws_settings.concurrent_halt = &concurrent_halt; } + +#ifdef BATCH_VERBOSE_MODE + auto start_time = std::chrono::high_resolution_clock::now(); +#endif + + auto ws_solution = solve_lp(&pc.pdlp_warm_cache.batch_pdlp_handle, mps_model, ws_settings); + +#ifdef BATCH_VERBOSE_MODE + auto end_time = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast(end_time - start_time).count(); + std::cout << "Original problem solved in " << duration << " milliseconds" + << " and iterations: " + << ws_solution.get_pdlp_warm_start_data().total_pdlp_iterations_ << std::endl; +#endif + + if (ws_solution.get_termination_status() == pdlp_termination_status_t::Optimal) { + auto& cache = pc.pdlp_warm_cache; + const auto& ws_primal = ws_solution.get_primal_solution(); + const auto& ws_dual = ws_solution.get_dual_solution(); + // Need to use the pc steam since the batch pdlp handle will get destroyed after the warm + // start + cache.initial_primal = rmm::device_uvector(ws_primal, ws_primal.stream()); + cache.initial_dual = rmm::device_uvector(ws_dual, ws_dual.stream()); + cache.step_size = ws_solution.get_pdlp_warm_start_data().initial_step_size_; + cache.primal_weight = ws_solution.get_pdlp_warm_start_data().initial_primal_weight_; + cache.pdlp_iteration = ws_solution.get_pdlp_warm_start_data().total_pdlp_iterations_; + cache.populated = true; + + settings.log.printf( + "Cached PDLP warm start: primal=%zu dual=%zu step_size=%e primal_weight=%e iters=%d\n", + cache.initial_primal.size(), + cache.initial_dual.size(), + cache.step_size, + cache.primal_weight, + cache.pdlp_iteration); + } else { + settings.log.printf( + "PDLP warm start solve did not reach optimality (%s), skipping cache and batch PDLP\n", + ws_solution.get_termination_status_string().c_str()); + return; + } + } + + if (concurrent_halt.load() == 1) { return; } + + pdlp_solver_settings_t pdlp_settings; + if (effective_batch_pdlp == 1) { + pdlp_settings.concurrent_halt = &concurrent_halt; + pdlp_settings.shared_sb_solved = sb_view.solved; + } + + batch_elapsed_time = toc(start_time); + const f_t batch_remaining_time = + std::max(static_cast(0.0), settings.time_limit - batch_elapsed_time); + if (batch_remaining_time <= 0.0) { return; } + pdlp_settings.time_limit = batch_remaining_time; + + if (pc.pdlp_warm_cache.populated) { + auto& cache = pc.pdlp_warm_cache; + pdlp_settings.set_initial_primal_solution(cache.initial_primal.data(), + cache.initial_primal.size(), + cache.batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_dual_solution( + cache.initial_dual.data(), cache.initial_dual.size(), cache.batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_step_size(cache.step_size); + pdlp_settings.set_initial_primal_weight(cache.primal_weight); + pdlp_settings.set_initial_pdlp_iteration(cache.pdlp_iteration); + } + + if (concurrent_halt.load() == 1) { return; } + + const auto solutions = batch_pdlp_solve( + &pc.pdlp_warm_cache.batch_pdlp_handle, mps_model, fractional, fraction_values, pdlp_settings); + f_t batch_pdlp_strong_branching_time = toc(start_batch); + + // Fail safe in case the batch PDLP failed and produced no solutions + if (solutions.get_additional_termination_informations().size() != fractional.size() * 2) { + settings.log.printf("Batch PDLP failed and produced no solutions\n"); + return; + } + + // Find max iteration on how many are done accross the batch + i_t max_iterations = 0; + i_t amount_done = 0; + for (i_t k = 0; k < solutions.get_additional_termination_informations().size(); k++) { + max_iterations = std::max( + max_iterations, solutions.get_additional_termination_information(k).number_of_steps_taken); + // TODO batch mode infeasible: should also count as done if infeasible + if (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal) { + amount_done++; + } + } + + settings.log.printf( + "Batch PDLP strong branching completed in %.2fs. Solved %d/%d with max %d iterations\n", + batch_pdlp_strong_branching_time, + amount_done, + fractional.size() * 2, + max_iterations); + + for (i_t k = 0; k < fractional.size(); k++) { + f_t obj_down = (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal) + ? solutions.get_dual_objective_value(k) + : std::numeric_limits::quiet_NaN(); + + f_t obj_up = (solutions.get_termination_status(k + fractional.size()) == + pdlp_termination_status_t::Optimal) + ? solutions.get_dual_objective_value(k + fractional.size()) + : std::numeric_limits::quiet_NaN(); + + pdlp_obj_down[k] = std::max(obj_down - root_obj, f_t(0.0)); + pdlp_obj_up[k] = std::max(obj_up - root_obj, f_t(0.0)); + } +} + +template +static void batch_pdlp_reliability_branching_task( + logger_t& log, + i_t rb_mode, + i_t num_candidates, + f_t start_time, + std::atomic& concurrent_halt, + const lp_problem_t& original_lp, + const std::vector& new_slacks, + const std::vector& solution, + branch_and_bound_worker_t* worker, + const std::vector& candidate_vars, + const simplex_solver_settings_t& settings, + shared_strong_branching_context_view_t& sb_view, + batch_pdlp_warm_cache_t& pdlp_warm_cache, + std::vector& pdlp_obj_down, + std::vector& pdlp_obj_up) +{ + log.printf(rb_mode == 2 ? "RB batch PDLP only for %d candidates\n" + : "RB cooperative batch PDLP and DS for %d candidates\n", + num_candidates); + + f_t start_batch = tic(); + + std::vector original_soln_x; + + if (concurrent_halt.load() == 1) { return; } + + auto mps_model = + simplex_problem_to_mps_data_model(original_lp, new_slacks, solution, original_soln_x); + { + const i_t n_orig = original_lp.num_cols - new_slacks.size(); + for (i_t j = 0; j < n_orig; j++) { + mps_model.variable_lower_bounds_[j] = worker->leaf_problem.lower[j]; + mps_model.variable_upper_bounds_[j] = worker->leaf_problem.upper[j]; + } + } + + std::vector fraction_values; + fraction_values.reserve(num_candidates); + for (i_t j : candidate_vars) { + fraction_values.push_back(original_soln_x[j]); + } + + if (concurrent_halt.load() == 1) { return; } + + const f_t batch_elapsed_time = toc(start_time); + const f_t batch_remaining_time = + std::max(static_cast(0.0), settings.time_limit - batch_elapsed_time); + if (batch_remaining_time <= 0.0) { return; } + + // One handle per batch PDLP since there can be concurrent calls + const raft::handle_t batch_pdlp_handle; + + pdlp_solver_settings_t pdlp_settings; + if (rb_mode == 1) { + pdlp_settings.concurrent_halt = &concurrent_halt; + pdlp_settings.shared_sb_solved = sb_view.solved; + } + pdlp_settings.time_limit = batch_remaining_time; + + if (pdlp_warm_cache.populated) { + auto& cache = pdlp_warm_cache; + pdlp_settings.set_initial_primal_solution( + cache.initial_primal.data(), cache.initial_primal.size(), batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_dual_solution( + cache.initial_dual.data(), cache.initial_dual.size(), batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_step_size(cache.step_size); + pdlp_settings.set_initial_primal_weight(cache.primal_weight); + pdlp_settings.set_initial_pdlp_iteration(cache.pdlp_iteration); + } + + if (concurrent_halt.load() == 1) { return; } + + const auto solutions = + batch_pdlp_solve(&batch_pdlp_handle, mps_model, candidate_vars, fraction_values, pdlp_settings); + + f_t batch_pdlp_time = toc(start_batch); + + if (solutions.get_additional_termination_informations().size() != + static_cast(num_candidates) * 2) { + log.printf("RB batch PDLP failed and produced no solutions\n"); + return; + } + + i_t amount_done = 0; + for (i_t k = 0; k < num_candidates * 2; k++) { + if (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal) { + amount_done++; + } + } + + log.printf("RB batch PDLP completed in %.2fs. Solved %d/%d\n", + batch_pdlp_time, + amount_done, + num_candidates * 2); + + for (i_t k = 0; k < num_candidates; k++) { + if (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal) { + pdlp_obj_down[k] = solutions.get_dual_objective_value(k); + } + if (solutions.get_termination_status(k + num_candidates) == + pdlp_termination_status_t::Optimal) { + pdlp_obj_up[k] = solutions.get_dual_objective_value(k + num_candidates); + } + } +} + +template +void strong_branching(const lp_problem_t& original_lp, const simplex_solver_settings_t& settings, f_t start_time, + const std::vector& new_slacks, const std::vector& var_types, - const std::vector root_soln, + const std::vector& root_soln, + const std::vector& root_soln_y, + const std::vector& root_soln_z, const std::vector& fractional, f_t root_obj, const std::vector& root_vstatus, @@ -317,89 +744,54 @@ void strong_branching(const user_problem_t& original_problem, const f_t elapsed_time = toc(start_time); if (elapsed_time > settings.time_limit) { return; } - if (settings.mip_batch_pdlp_strong_branching) { - settings.log.printf("Batch PDLP strong branching enabled\n"); - - f_t start_batch = tic(); - - // Use original_problem to create the BatchLP problem - csr_matrix_t A_row(original_problem.A.m, original_problem.A.n, 0); - original_problem.A.to_compressed_row(A_row); - - // Convert the root_soln to the original problem space - std::vector original_root_soln_x; - uncrush_primal_solution(original_problem, original_lp, root_soln, original_root_soln_x); - - std::vector fraction_values; - - for (i_t k = 0; k < fractional.size(); k++) { - const i_t j = fractional[k]; - fraction_values.push_back(original_root_soln_x[j]); - } - - const auto mps_model = simplex_problem_to_mps_data_model(original_problem); - const f_t batch_elapsed_time = toc(start_time); - const f_t batch_remaining_time = - std::max(static_cast(0.0), settings.time_limit - batch_elapsed_time); - if (batch_remaining_time <= 0.0) { return; } - pdlp_solver_settings_t pdlp_settings; - pdlp_settings.time_limit = batch_remaining_time; - const raft::handle_t batch_pdlp_handle; - const auto solutions = - batch_pdlp_solve(&batch_pdlp_handle, mps_model, fractional, fraction_values, pdlp_settings); - f_t batch_pdlp_strong_branching_time = toc(start_batch); - - // Find max iteration on how many are done accross the batch - i_t max_iterations = 0; - i_t amount_done = 0; - for (i_t k = 0; k < solutions.get_additional_termination_informations().size(); k++) { - max_iterations = std::max( - max_iterations, solutions.get_additional_termination_information(k).number_of_steps_taken); - // TODO batch mode infeasible: should also count as done if infeasible - if (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal) { - amount_done++; - } - } + const i_t effective_batch_pdlp = + (settings.sub_mip || (settings.deterministic && settings.mip_batch_pdlp_strong_branching == 1)) + ? 0 + : settings.mip_batch_pdlp_strong_branching; + if (settings.mip_batch_pdlp_strong_branching != 0 && + (settings.sub_mip || settings.deterministic)) { settings.log.printf( - "Batch PDLP strong branching completed in %.2fs. Solved %d/%d with max %d iterations\n", - batch_pdlp_strong_branching_time, - amount_done, - fractional.size() * 2, - max_iterations); + "Batch PDLP strong branching is disabled because sub-MIP or deterministic mode is enabled\n"); + } - for (i_t k = 0; k < fractional.size(); k++) { - // Call BatchLP solver. Solve 2*fractional.size() subproblems. - // Let j = fractional[k]. We want to solve the two trial branching problems - // Branch down: - // minimize c^T x - // subject to lb <= A*x <= ub - // x_j <= floor(root_soln[j]) - // l <= x < u - // Let the optimal objective value of thie problem be obj_down - f_t obj_down = (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal) - ? solutions.get_dual_objective_value(k) - : std::numeric_limits::quiet_NaN(); - - // Branch up: - // minimize c^T x - // subject to lb <= A*x <= ub - // x_j >= ceil(root_soln[j]) - // Let the optimal objective value of thie problem be obj_up - f_t obj_up = (solutions.get_termination_status(k + fractional.size()) == - pdlp_termination_status_t::Optimal) - ? solutions.get_dual_objective_value(k + fractional.size()) - : std::numeric_limits::quiet_NaN(); + settings.log.printf("Strong branching using %d threads and %ld fractional variables\n", + settings.num_threads, + fractional.size()); + + // Cooperative DS + PDLP: shared context tracks which subproblems are solved + shared_strong_branching_context_t shared_ctx(2 * fractional.size()); + shared_strong_branching_context_view_t sb_view(shared_ctx.solved); + + std::atomic concurrent_halt{0}; + + std::vector pdlp_obj_down(fractional.size(), std::numeric_limits::quiet_NaN()); + std::vector pdlp_obj_up(fractional.size(), std::numeric_limits::quiet_NaN()); + + if (effective_batch_pdlp != 0) { +#pragma omp task default(shared) + batch_pdlp_strong_branching_task(settings, + effective_batch_pdlp, + start_time, + concurrent_halt, + original_lp, + new_slacks, + root_soln, + fractional, + root_obj, + pc, + sb_view, + pdlp_obj_down, + pdlp_obj_up); + } - pc.strong_branch_down[k] = obj_down - root_obj; - pc.strong_branch_up[k] = obj_up - root_obj; - } - } else { - settings.log.printf("Strong branching using %d threads and %ld fractional variables\n", - settings.num_threads, - fractional.size()); - f_t strong_branching_start_time = tic(); + std::vector ds_status_down(fractional.size(), dual::status_t::UNSET); + std::vector ds_status_up(fractional.size(), dual::status_t::UNSET); + std::vector ds_obj_down(fractional.size(), std::numeric_limits::quiet_NaN()); + std::vector ds_obj_up(fractional.size(), std::numeric_limits::quiet_NaN()); + f_t dual_simplex_strong_branching_time = tic(); + if (effective_batch_pdlp != 2) { #pragma omp parallel num_threads(settings.num_threads) { i_t n = std::min(4 * settings.num_threads, fractional.size()); @@ -432,10 +824,135 @@ void strong_branching(const user_problem_t& original_problem, root_soln, root_vstatus, edge_norms, - pc); + pc, + ds_obj_down, + ds_obj_up, + ds_status_down, + ds_status_up, + &concurrent_halt, + sb_view); + } + } + + // DS done: signal PDLP to stop (time-limit or all work done) and wait + concurrent_halt.store(1); + } + + if (effective_batch_pdlp != 0) { +#pragma omp taskwait + } + + settings.log.printf("Strong branching took %.2fs\n", toc(dual_simplex_strong_branching_time)); + + // Collect Dual Simplex statistics + i_t ds_optimal = 0, ds_infeasible = 0, ds_iter_limit = 0; + i_t ds_numerical = 0, ds_cutoff = 0, ds_time_limit = 0; + i_t ds_concurrent = 0, ds_work_limit = 0, ds_unset = 0; + const i_t total_subproblems = fractional.size() * 2; + for (i_t k = 0; k < fractional.size(); k++) { + for (auto st : {ds_status_down[k], ds_status_up[k]}) { + switch (st) { + case dual::status_t::OPTIMAL: ds_optimal++; break; + case dual::status_t::DUAL_UNBOUNDED: ds_infeasible++; break; + case dual::status_t::ITERATION_LIMIT: ds_iter_limit++; break; + case dual::status_t::NUMERICAL: ds_numerical++; break; + case dual::status_t::CUTOFF: ds_cutoff++; break; + case dual::status_t::TIME_LIMIT: ds_time_limit++; break; + case dual::status_t::CONCURRENT_LIMIT: ds_concurrent++; break; + case dual::status_t::WORK_LIMIT: ds_work_limit++; break; + case dual::status_t::UNSET: ds_unset++; break; } } - settings.log.printf("Strong branching completed in %.2fs\n", toc(strong_branching_start_time)); + } + + settings.log.printf("Dual Simplex: %d/%d optimal, %d infeasible, %d iter-limit", + ds_optimal, + total_subproblems, + ds_infeasible, + ds_iter_limit); + if (ds_cutoff) settings.log.printf(", %d cutoff", ds_cutoff); + if (ds_time_limit) settings.log.printf(", %d time-limit", ds_time_limit); + if (ds_numerical) settings.log.printf(", %d numerical", ds_numerical); + if (ds_concurrent) settings.log.printf(", %d concurrent-halt", ds_concurrent); + if (ds_work_limit) settings.log.printf(", %d work-limit", ds_work_limit); + if (ds_unset) settings.log.printf(", %d unset/skipped", ds_unset); + settings.log.printf("\n"); + + if (effective_batch_pdlp != 0) { + i_t pdlp_optimal_count = 0; + for (i_t k = 0; k < fractional.size(); k++) { + if (!std::isnan(pdlp_obj_down[k])) pdlp_optimal_count++; + if (!std::isnan(pdlp_obj_up[k])) pdlp_optimal_count++; + } + + settings.log.printf("Batch PDLP found %d/%d optimal solutions\n", + pdlp_optimal_count, + static_cast(fractional.size() * 2)); + } + + i_t merged_from_ds = 0; + i_t merged_from_pdlp = 0; + i_t merged_nan = 0; + i_t solved_by_both_down = 0; + i_t solved_by_both_up = 0; + for (i_t k = 0; k < fractional.size(); k++) { + bool ds_has_down = ds_status_down[k] != dual::status_t::UNSET; + bool pdlp_has_down = !std::isnan(pdlp_obj_down[k]); + const auto [value_down, source_down] = + merge_sb_result(ds_obj_down[k], ds_status_down[k], pdlp_obj_down[k], pdlp_has_down); + pc.strong_branch_down[k] = value_down; + if (source_down == sb_source_t::DUAL_SIMPLEX) + merged_from_ds++; + else if (source_down == sb_source_t::PDLP) + merged_from_pdlp++; + else + merged_nan++; + if (ds_has_down && pdlp_has_down) { + solved_by_both_down++; + settings.log.printf( + "[COOP SB] Merge: variable %d DOWN solved by BOTH (DS=%e PDLP=%e) -> kept %s\n", + fractional[k], + ds_obj_down[k], + pdlp_obj_down[k], + source_down == sb_source_t::DUAL_SIMPLEX ? "DS" : "PDLP"); + } + + bool ds_has_up = ds_status_up[k] != dual::status_t::UNSET; + bool pdlp_has_up = !std::isnan(pdlp_obj_up[k]); + const auto [value_up, source_up] = + merge_sb_result(ds_obj_up[k], ds_status_up[k], pdlp_obj_up[k], pdlp_has_up); + pc.strong_branch_up[k] = value_up; + if (source_up == sb_source_t::DUAL_SIMPLEX) + merged_from_ds++; + else if (source_up == sb_source_t::PDLP) + merged_from_pdlp++; + else + merged_nan++; + if (ds_has_up && pdlp_has_up) { + solved_by_both_up++; + settings.log.printf( + "[COOP SB] Merge: variable %d UP solved by BOTH (DS=%e PDLP=%e) -> kept %s\n", + fractional[k], + ds_obj_up[k], + pdlp_obj_up[k], + source_up == sb_source_t::DUAL_SIMPLEX ? "DS" : "PDLP"); + } + } + + if (effective_batch_pdlp != 0) { + pc.pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root = + (f_t(merged_from_pdlp) / f_t(fractional.size() * 2)) * 100.0; + settings.log.printf( + "Batch PDLP only for strong branching. percent solved by batch PDLP at root: %f\n", + pc.pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root); + settings.log.printf( + "Merged results: %d from DS, %d from PDLP, %d unresolved (NaN), %d/%d solved by both " + "(down/up)\n", + merged_from_ds, + merged_from_pdlp, + merged_nan, + solved_by_both_down, + solved_by_both_up); } pc.update_pseudo_costs_from_strong_branching(fractional, root_soln); @@ -538,7 +1055,9 @@ i_t pseudo_costs_t::reliable_variable_selection( const branch_and_bound_stats_t& bnb_stats, f_t upper_bound, int max_num_tasks, - logger_t& log) + logger_t& log, + const std::vector& new_slacks, + const lp_problem_t& original_lp) { constexpr f_t eps = 1e-6; f_t start_time = bnb_stats.start_time; @@ -605,9 +1124,47 @@ i_t pseudo_costs_t::reliable_variable_selection( return branch_var; } - const int num_tasks = std::max(max_num_tasks, 1); - const int task_priority = reliability_branching_settings.task_priority; - const i_t max_num_candidates = reliability_branching_settings.max_num_candidates; + const i_t rb_mode = settings.mip_batch_pdlp_reliability_branching; + // We don't use batch PDLP in reliability branching if the PDLP warm start data was not filled + // This indicates that PDLP alone (not batched) couldn't even run at the root node + // So it will most likely perform poorly compared to DS + // It is also off if the number of candidate is very small + // If warm start could run but almost none of the BPDLP results were used, we also want to avoid + // using batch PDLP + constexpr i_t min_num_candidates_for_pdlp = 5; + constexpr f_t min_percent_solved_by_batch_pdlp_at_root_for_pdlp = 5.0; + const bool use_pdlp = (rb_mode != 0) && !settings.sub_mip && !settings.deterministic && + pdlp_warm_cache.populated && + unreliable_list.size() > min_num_candidates_for_pdlp && + pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root > + min_percent_solved_by_batch_pdlp_at_root_for_pdlp; + + if (rb_mode != 0 && !pdlp_warm_cache.populated) { + log.printf("PDLP warm start data not populated, using DS only\n"); + } else if (rb_mode != 0 && settings.sub_mip) { + log.printf("Batch PDLP reliability branching is disabled because sub-MIP is enabled\n"); + } else if (rb_mode != 0 && settings.deterministic) { + log.printf( + "Batch PDLP reliability branching is disabled because deterministic mode is enabled\n"); + } else if (rb_mode != 0 && unreliable_list.size() < min_num_candidates_for_pdlp) { + log.printf("Not enough candidates to use batch PDLP, using DS only\n"); + } else if (rb_mode != 0 && pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root < 5.0) { + log.printf("Percent solved by batch PDLP at root is too low, using DS only\n"); + } else if (use_pdlp) { + log.printf( + "Using batch PDLP because populated, unreliable list size is %d (> %d), and percent solved " + "by batch PDLP at root is %f%% (> %f%%)\n", + static_cast(unreliable_list.size()), + min_num_candidates_for_pdlp, + pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root, + min_percent_solved_by_batch_pdlp_at_root_for_pdlp); + } + + const int num_tasks = std::max(max_num_tasks, 1); + const int task_priority = reliability_branching_settings.task_priority; + // If both batch PDLP and DS are used we double the max number of candidates + const i_t max_num_candidates = use_pdlp ? 2 * reliability_branching_settings.max_num_candidates + : reliability_branching_settings.max_num_candidates; const i_t num_candidates = std::min(unreliable_list.size(), max_num_candidates); assert(task_priority > 0); @@ -626,89 +1183,246 @@ i_t pseudo_costs_t::reliable_variable_selection( // Shuffle the unreliable list so every variable has the same chance to be selected. if (unreliable_list.size() > max_num_candidates) { worker->rng.shuffle(unreliable_list); } + // Both DS and PDLP work on the same candidate set + std::vector candidate_vars(unreliable_list.begin(), + unreliable_list.begin() + num_candidates); + + // Shared context for cooperative work-stealing (mode 1) + // [0..num_candidates) = down, [num_candidates..2*num_candidates) = up + shared_strong_branching_context_t shared_ctx(2 * num_candidates); + shared_strong_branching_context_view_t sb_view(shared_ctx.solved); + + std::vector pdlp_obj_down(num_candidates, std::numeric_limits::quiet_NaN()); + std::vector pdlp_obj_up(num_candidates, std::numeric_limits::quiet_NaN()); + + std::atomic concurrent_halt{0}; + + if (use_pdlp) { +#pragma omp task default(shared) + batch_pdlp_reliability_branching_task(log, + rb_mode, + num_candidates, + start_time, + concurrent_halt, + original_lp, + new_slacks, + solution, + worker, + candidate_vars, + settings, + sb_view, + pdlp_warm_cache, + pdlp_obj_down, + pdlp_obj_up); + } + if (toc(start_time) > settings.time_limit) { - log.printf("Time limit reached"); + log.printf("Time limit reached\n"); + if (use_pdlp) { + concurrent_halt.store(1); +#pragma omp taskwait + } return branch_var; } + std::vector ds_obj_down(num_candidates, std::numeric_limits::quiet_NaN()); + std::vector ds_obj_up(num_candidates, std::numeric_limits::quiet_NaN()); + std::vector ds_status_down(num_candidates, dual::status_t::UNSET); + std::vector ds_status_up(num_candidates, dual::status_t::UNSET); + + f_t ds_start_time = tic(); + + if (rb_mode != 2) { #pragma omp taskloop if (num_tasks > 1) priority(task_priority) num_tasks(num_tasks) \ - shared(score_mutex) - for (i_t i = 0; i < num_candidates; ++i) { - const i_t j = unreliable_list[i]; - - if (toc(start_time) > settings.time_limit) { continue; } - - pseudo_cost_mutex_down[j].lock(); - if (pseudo_cost_num_down[j] < reliable_threshold) { - // Do trial branching on the down branch - f_t obj = trial_branching(worker->leaf_problem, - settings, - var_types, - node_ptr->vstatus, - worker->leaf_edge_norms, - worker->basis_factors, - worker->basic_list, - worker->nonbasic_list, - j, - worker->leaf_problem.lower[j], - std::floor(solution[j]), - upper_bound, - branch_and_bound_lp_iter_per_node, - start_time, - reliability_branching_settings.upper_max_lp_iter, - reliability_branching_settings.lower_max_lp_iter, - strong_branching_lp_iter); - - if (!std::isnan(obj)) { - f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); - f_t change_in_x = solution[j] - std::floor(solution[j]); - pseudo_cost_sum_down[j] += change_in_obj / change_in_x; - pseudo_cost_num_down[j]++; + shared(score_mutex, sb_view) + for (i_t i = 0; i < num_candidates; ++i) { + const i_t j = unreliable_list[i]; + + if (toc(start_time) > settings.time_limit) { continue; } + + if (rb_mode == 1 && sb_view.is_solved(i)) { + log.printf( + "DS skipping variable %d branch down (shared_idx %d): already solved by PDLP\n", j, i); + } else { + pseudo_cost_mutex_down[j].lock(); + if (pseudo_cost_num_down[j] < reliable_threshold) { + // Do trial branching on the down branch + const auto [obj, status] = + trial_branching(worker->leaf_problem, + settings, + var_types, + node_ptr->vstatus, + worker->leaf_edge_norms, + worker->basis_factors, + worker->basic_list, + worker->nonbasic_list, + j, + worker->leaf_problem.lower[j], + std::floor(solution[j]), + upper_bound, + branch_and_bound_lp_iter_per_node, + start_time, + reliability_branching_settings.upper_max_lp_iter, + reliability_branching_settings.lower_max_lp_iter, + strong_branching_lp_iter); + + ds_obj_down[i] = obj; + ds_status_down[i] = status; + if (!std::isnan(obj)) { + f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); + f_t change_in_x = solution[j] - std::floor(solution[j]); + pseudo_cost_sum_down[j] += change_in_obj / change_in_x; + pseudo_cost_num_down[j]++; + // Should be valid if were are already here + if (rb_mode == 1 && is_dual_simplex_done(status)) { sb_view.mark_solved(i); } + } + } else { + // Variable became reliable, make it as solved so that batch PDLP does not solve it again + if (rb_mode == 1) sb_view.mark_solved(i); + } + pseudo_cost_mutex_down[j].unlock(); } - } - pseudo_cost_mutex_down[j].unlock(); - - if (toc(start_time) > settings.time_limit) { continue; } - - pseudo_cost_mutex_up[j].lock(); - if (pseudo_cost_num_up[j] < reliable_threshold) { - f_t obj = trial_branching(worker->leaf_problem, - settings, - var_types, - node_ptr->vstatus, - worker->leaf_edge_norms, - worker->basis_factors, - worker->basic_list, - worker->nonbasic_list, - j, - std::ceil(solution[j]), - worker->leaf_problem.upper[j], - upper_bound, - branch_and_bound_lp_iter_per_node, - start_time, - reliability_branching_settings.upper_max_lp_iter, - reliability_branching_settings.lower_max_lp_iter, - strong_branching_lp_iter); - - if (!std::isnan(obj)) { - f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); - f_t change_in_x = std::ceil(solution[j]) - solution[j]; - pseudo_cost_sum_up[j] += change_in_obj / change_in_x; - pseudo_cost_num_up[j]++; + + if (toc(start_time) > settings.time_limit) { continue; } + + const i_t shared_idx = i + num_candidates; + if (rb_mode == 1 && sb_view.is_solved(shared_idx)) { + log.printf("DS skipping variable %d branch up (shared_idx %d): already solved by PDLP\n", + j, + shared_idx); + } else { + pseudo_cost_mutex_up[j].lock(); + if (pseudo_cost_num_up[j] < reliable_threshold) { + const auto [obj, status] = + trial_branching(worker->leaf_problem, + settings, + var_types, + node_ptr->vstatus, + worker->leaf_edge_norms, + worker->basis_factors, + worker->basic_list, + worker->nonbasic_list, + j, + std::ceil(solution[j]), + worker->leaf_problem.upper[j], + upper_bound, + branch_and_bound_lp_iter_per_node, + start_time, + reliability_branching_settings.upper_max_lp_iter, + reliability_branching_settings.lower_max_lp_iter, + strong_branching_lp_iter); + + ds_obj_up[i] = obj; + ds_status_up[i] = status; + if (!std::isnan(obj)) { + f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); + f_t change_in_x = std::ceil(solution[j]) - solution[j]; + pseudo_cost_sum_up[j] += change_in_obj / change_in_x; + pseudo_cost_num_up[j]++; + // Should be valid if were are already here + if (rb_mode == 1 && is_dual_simplex_done(status)) { sb_view.mark_solved(shared_idx); } + } + } else { + // Variable became reliable, make it as solved so that batch PDLP does not solve it again + if (rb_mode == 1) sb_view.mark_solved(shared_idx); + } + pseudo_cost_mutex_up[j].unlock(); + } + + if (toc(start_time) > settings.time_limit) { continue; } + + f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg); + + score_mutex.lock(); + if (score > max_score) { + max_score = score; + branch_var = j; } + score_mutex.unlock(); } - pseudo_cost_mutex_up[j].unlock(); - if (toc(start_time) > settings.time_limit) { continue; } + concurrent_halt.store(1); + } - f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg); + f_t ds_elapsed = toc(ds_start_time); + + // TODO put back + // if (rb_mode != 2) { + // if (rb_mode == 1) { + // log.printf( + // "RB Dual Simplex: %d candidates, %d/%d optimal, %d/%d infeasible, %d/%d failed, %d skipped + // (PDLP) in %.2fs\n", num_candidates, ds_optimal.load(), num_candidates * 2, + // ds_infeasible.load(), num_candidates * 2, + // ds_failed.load(), num_candidates * 2, + // ds_skipped.load(), ds_elapsed); + // } else { + // log.printf( + // "RB Dual Simplex: %d candidates, %d/%d optimal, %d/%d infeasible, %d/%d failed in + // %.2fs\n", num_candidates, ds_optimal.load(), num_candidates * 2, ds_infeasible.load(), + // num_candidates * 2, ds_failed.load(), num_candidates * 2, ds_elapsed); + // } + //} + + if (use_pdlp) { +#pragma omp taskwait + + i_t pdlp_applied = 0; + i_t pdlp_optimal = 0; + for (i_t i = 0; i < num_candidates; i++) { + const i_t j = candidate_vars[i]; + + // Down: check if PDLP should override DS + if (!std::isnan(pdlp_obj_down[i])) { + pdlp_optimal++; + const auto [merged_obj, source] = + merge_sb_result(ds_obj_down[i], ds_status_down[i], pdlp_obj_down[i], true); + // PDLP won the merge, update the pseudo-cost only if node is still unreliable (concurrent + // calls may have made it reliable) + if (source == sb_source_t::PDLP) { + pseudo_cost_mutex_down[j].lock(); + if (pseudo_cost_num_down[j] < reliable_threshold) { + f_t change_in_obj = std::max(merged_obj - node_ptr->lower_bound, eps); + f_t change_in_x = solution[j] - std::floor(solution[j]); + pseudo_cost_sum_down[j] += change_in_obj / change_in_x; + pseudo_cost_num_down[j]++; + pdlp_applied++; + } + pseudo_cost_mutex_down[j].unlock(); + } + } - score_mutex.lock(); - if (score > max_score) { - max_score = score; - branch_var = j; + // Up: check if PDLP should override DS + if (!std::isnan(pdlp_obj_up[i])) { + pdlp_optimal++; + const auto [merged_obj, source] = + merge_sb_result(ds_obj_up[i], ds_status_up[i], pdlp_obj_up[i], true); + // PDLP won the merge, update the pseudo-cost only if node is still unreliable (concurrent + // calls may have made it reliable) + if (source == sb_source_t::PDLP) { + pseudo_cost_mutex_up[j].lock(); + if (pseudo_cost_num_up[j] < reliable_threshold) { + f_t change_in_obj = std::max(merged_obj - node_ptr->lower_bound, eps); + f_t change_in_x = std::ceil(solution[j]) - solution[j]; + pseudo_cost_sum_up[j] += change_in_obj / change_in_x; + pseudo_cost_num_up[j]++; + pdlp_applied++; + } + pseudo_cost_mutex_up[j].unlock(); + } + } + + f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg); + if (score > max_score) { + max_score = score; + branch_var = j; + } } - score_mutex.unlock(); + + log.printf("RB batch PDLP: %d candidates, %d/%d optimal, %d applied to pseudo-costs\n", + num_candidates, + pdlp_optimal, + num_candidates * 2, + pdlp_applied); } log.printf( @@ -776,12 +1490,14 @@ void pseudo_costs_t::update_pseudo_costs_from_strong_branching( template class pseudo_costs_t; -template void strong_branching(const user_problem_t& original_problem, - const lp_problem_t& original_lp, +template void strong_branching(const lp_problem_t& original_lp, const simplex_solver_settings_t& settings, double start_time, + const std::vector& new_slacks, const std::vector& var_types, - const std::vector root_soln, + const std::vector& root_soln, + const std::vector& root_soln_y, + const std::vector& root_soln_z, const std::vector& fractional, double root_obj, const std::vector& root_vstatus, diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp index 6b6c6917b..322daa890 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.hpp +++ b/cpp/src/branch_and_bound/pseudo_costs.hpp @@ -20,7 +20,10 @@ #include #include +#include + #include +#include namespace cuopt::linear_programming::dual_simplex { @@ -405,6 +408,18 @@ struct reliability_branching_settings_t { i_t min_reliable_threshold = 1; }; +template +struct batch_pdlp_warm_cache_t { + const raft::handle_t batch_pdlp_handle{}; + rmm::device_uvector initial_primal{0, batch_pdlp_handle.get_stream()}; + rmm::device_uvector initial_dual{0, batch_pdlp_handle.get_stream()}; + f_t step_size{std::numeric_limits::signaling_NaN()}; + f_t primal_weight{std::numeric_limits::signaling_NaN()}; + i_t pdlp_iteration{-1}; + f_t percent_solved_by_batch_pdlp_at_root{f_t(0.0)}; + bool populated{false}; +}; + template class pseudo_costs_t { public: @@ -481,7 +496,9 @@ class pseudo_costs_t { const branch_and_bound_stats_t& bnb_stats, f_t upper_bound, int max_num_tasks, - logger_t& log); + logger_t& log, + const std::vector& new_slacks, + const lp_problem_t& original_lp); void update_pseudo_costs_from_strong_branching(const std::vector& fractional, const std::vector& root_soln); @@ -514,15 +531,19 @@ class pseudo_costs_t { std::vector pseudo_cost_mutex_down; omp_atomic_t num_strong_branches_completed = 0; omp_atomic_t strong_branching_lp_iter = 0; + + batch_pdlp_warm_cache_t pdlp_warm_cache; }; template -void strong_branching(const user_problem_t& original_problem, - const lp_problem_t& original_lp, +void strong_branching(const lp_problem_t& original_lp, const simplex_solver_settings_t& settings, f_t start_time, + const std::vector& new_slacks, const std::vector& var_types, - const std::vector root_soln, + const std::vector& root_soln, + const std::vector& root_soln_y, + const std::vector& root_soln_z, const std::vector& fractional, f_t root_obj, const std::vector& root_vstatus, diff --git a/cpp/src/branch_and_bound/shared_strong_branching_context.hpp b/cpp/src/branch_and_bound/shared_strong_branching_context.hpp new file mode 100644 index 000000000..a9e697ae5 --- /dev/null +++ b/cpp/src/branch_and_bound/shared_strong_branching_context.hpp @@ -0,0 +1,60 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include +#include +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +struct shared_strong_branching_context_t { + std::vector> solved; + + explicit shared_strong_branching_context_t(size_t num_subproblems) : solved(num_subproblems) + { + for (auto& s : solved) + s.store(0); + } +}; + +template +struct shared_strong_branching_context_view_t { + std::span> solved; + + shared_strong_branching_context_view_t() = default; + + shared_strong_branching_context_view_t(std::span> s) : solved(s) {} + + bool is_valid() const { return !solved.empty(); } + + bool is_solved(i_t local_idx) const + { + assert(local_idx >= 0 && static_cast(local_idx) < solved.size() && + "local_idx out of bounds"); + return solved[local_idx].load() != 0; + } + + void mark_solved(i_t local_idx) const + { + assert(local_idx >= 0 && static_cast(local_idx) < solved.size() && + "local_idx out of bounds"); + solved[local_idx].store(1); + } + + shared_strong_branching_context_view_t subview(i_t offset, i_t count) const + { + assert(offset >= 0 && count >= 0 && static_cast(offset + count) <= solved.size() && + "subview out of bounds"); + return {solved.subspan(offset, count)}; + } +}; + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index eadd93040..882f7a14f 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -187,8 +187,10 @@ struct simplex_solver_settings_t { // strengthening f_t cut_change_threshold; // threshold for cut change f_t cut_min_orthogonality; // minimum orthogonality for cuts - i_t mip_batch_pdlp_strong_branching{0}; // 0 if not using batch PDLP for strong branching, 1 if - // using batch PDLP for strong branching + i_t + mip_batch_pdlp_strong_branching; // 0 = DS only, 1 = cooperative DS + PDLP, 2 = batch PDLP only + i_t mip_batch_pdlp_reliability_branching; // 0 = DS only, 1 = cooperative DS + PDLP, 2 = batch + // PDLP only diving_heuristics_settings_t diving_settings; // Settings for the diving heuristics diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index a8bc0ea33..9d933f3c9 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -99,7 +99,8 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_MIP_REDUCED_COST_STRENGTHENING, &mip_settings.reduced_cost_strengthening, -1, std::numeric_limits::max(), -1}, {CUOPT_NUM_GPUS, &pdlp_settings.num_gpus, 1, 2, 1}, {CUOPT_NUM_GPUS, &mip_settings.num_gpus, 1, 2, 1}, - {CUOPT_MIP_BATCH_PDLP_STRONG_BRANCHING, &mip_settings.mip_batch_pdlp_strong_branching, 0, 1, 0}, + {CUOPT_MIP_BATCH_PDLP_STRONG_BRANCHING, &mip_settings.mip_batch_pdlp_strong_branching, 0, 2, 0}, + {CUOPT_MIP_BATCH_PDLP_RELIABILITY_BRANCHING, &mip_settings.mip_batch_pdlp_reliability_branching, 0, 2, 0}, {CUOPT_PRESOLVE, reinterpret_cast(&pdlp_settings.presolver), CUOPT_PRESOLVE_DEFAULT, CUOPT_PRESOLVE_PSLP, CUOPT_PRESOLVE_DEFAULT}, {CUOPT_PRESOLVE, reinterpret_cast(&mip_settings.presolver), CUOPT_PRESOLVE_DEFAULT, CUOPT_PRESOLVE_PSLP, CUOPT_PRESOLVE_DEFAULT}, {CUOPT_MIP_DETERMINISM_MODE, &mip_settings.determinism_mode, CUOPT_MODE_OPPORTUNISTIC, CUOPT_MODE_DETERMINISTIC, CUOPT_MODE_OPPORTUNISTIC}, diff --git a/cpp/src/mip_heuristics/CMakeLists.txt b/cpp/src/mip_heuristics/CMakeLists.txt index a200d4265..5e3d19c8b 100644 --- a/cpp/src/mip_heuristics/CMakeLists.txt +++ b/cpp/src/mip_heuristics/CMakeLists.txt @@ -14,6 +14,7 @@ set(MIP_LP_NECESSARY_FILES ${CMAKE_CURRENT_SOURCE_DIR}/presolve/third_party_presolve.cpp ${CMAKE_CURRENT_SOURCE_DIR}/presolve/gf2_presolve.cpp ${CMAKE_CURRENT_SOURCE_DIR}/solution/solution.cu + ${CMAKE_CURRENT_SOURCE_DIR}/presolve/conflict_graph/clique_table.cu ) # Files that are MIP-specific and not needed for pure LP @@ -38,7 +39,6 @@ set(MIP_NON_LP_FILES ${CMAKE_CURRENT_SOURCE_DIR}/presolve/multi_probe.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/probing_cache.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/trivial_presolve.cu - ${CMAKE_CURRENT_SOURCE_DIR}/presolve/conflict_graph/clique_table.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump_kernels.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/fj_cpu.cu) diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index f25c093af..d58f20876 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -229,6 +229,8 @@ solution_t mip_solver_t::run_solver() branch_and_bound_settings.cut_min_orthogonality = context.settings.cut_min_orthogonality; branch_and_bound_settings.mip_batch_pdlp_strong_branching = context.settings.mip_batch_pdlp_strong_branching; + branch_and_bound_settings.mip_batch_pdlp_reliability_branching = + context.settings.mip_batch_pdlp_reliability_branching; branch_and_bound_settings.reduced_cost_strengthening = context.settings.reduced_cost_strengthening == -1 ? 2 diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu index 82e79098a..33c080ee3 100644 --- a/cpp/src/pdlp/pdlp.cu +++ b/cpp/src/pdlp/pdlp.cu @@ -500,8 +500,7 @@ std::optional> pdlp_solver_t } // Check for concurrent limit - if (settings_.method == method_t::Concurrent && settings_.concurrent_halt != nullptr && - *settings_.concurrent_halt == 1) { + if (settings_.concurrent_halt != nullptr && settings_.concurrent_halt->load() == 1) { #ifdef PDLP_VERBOSE_MODE RAFT_CUDA_TRY(cudaDeviceSynchronize()); std::cout << "Concurrent Limit reached, returning current solution" << std::endl; @@ -771,13 +770,34 @@ pdlp_solver_t::check_batch_termination(const timer_t& timer) if (current_termination_strategy_.is_done(term)) { std::cout << "[BATCH MODE]: Climber " << i << " is done with " << optimization_problem_solution_t::get_termination_status_string(term) - << " at step " << total_pdlp_iterations_ << ". It's original index is " + << " at step " << internal_solver_iterations_ << ". It's original index is " << climber_strategies_[i].original_index << std::endl; } } #endif - // All are optimal or infeasible + // Sync external solved status into internal termination strategy before all_done() check + if (sb_view_.is_valid()) { + for (size_t i = 0; i < climber_strategies_.size(); ++i) { + // If PDLP has solved it to optimality we want to keep it and resolved both solvers having + // solved the problem later + if (current_termination_strategy_.is_done( + current_termination_strategy_.get_termination_status(i))) + continue; + const i_t local_idx = climber_strategies_[i].original_index; + if (sb_view_.is_solved(local_idx)) { + current_termination_strategy_.set_termination_status( + i, pdlp_termination_status_t::ConcurrentLimit); +#ifdef BATCH_VERBOSE_MODE + std::cout << "[COOP SB] DS already solved climber " << i << " (original_index " << local_idx + << "), synced to ConcurrentLimit at step " << internal_solver_iterations_ + << std::endl; +#endif + } + } + } + + // All are optimal, infeasible, or externally solved if (current_termination_strategy_.all_done()) { const auto original_batch_size = settings_.new_bounds.size(); // Some climber got removed from the batch while the optimization was running @@ -824,6 +844,7 @@ pdlp_solver_t::check_batch_termination(const timer_t& timer) .get_additional_termination_informations()[climber_strategies_[i].original_index] .solved_by_pdlp = (current_termination_strategy_.get_termination_status(i) != pdlp_termination_status_t::ConcurrentLimit); + if (sb_view_.is_valid()) { sb_view_.mark_solved(climber_strategies_[i].original_index); } } current_termination_strategy_.fill_gpu_terms_stats(total_pdlp_iterations_); RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); @@ -840,6 +861,11 @@ pdlp_solver_t::check_batch_termination(const timer_t& timer) std::move(batch_solution_to_return_.get_additional_termination_informations()), std::move(batch_solution_to_return_.get_terminations_status())}; } + if (sb_view_.is_valid()) { + for (size_t i = 0; i < climber_strategies_.size(); ++i) { + sb_view_.mark_solved(climber_strategies_[i].original_index); + } + } RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); return current_termination_strategy_.fill_return_problem_solution( internal_solver_iterations_, @@ -858,8 +884,11 @@ pdlp_solver_t::check_batch_termination(const timer_t& timer) current_termination_strategy_.get_termination_status(i))) { raft::common::nvtx::range fun_scope("remove_done_climber"); #ifdef BATCH_VERBOSE_MODE - std::cout << "Removing climber " << i << " because it is done. Its original index is " - << climber_strategies_[i].original_index << std::endl; + const bool externally_solved = (current_termination_strategy_.get_termination_status(i) == + pdlp_termination_status_t::ConcurrentLimit); + std::cout << "Removing climber " << i << " (original_index " + << climber_strategies_[i].original_index << ") because it is done" + << (externally_solved ? " [solved by DS]" : " [solved by PDLP]") << std::endl; #endif to_remove.emplace(i); // Copy current climber solution information @@ -892,6 +921,7 @@ pdlp_solver_t::check_batch_termination(const timer_t& timer) .get_additional_termination_informations()[climber_strategies_[i].original_index] .solved_by_pdlp = (current_termination_strategy_.get_termination_status(i) != pdlp_termination_status_t::ConcurrentLimit); + if (sb_view_.is_valid()) { sb_view_.mark_solved(climber_strategies_[i].original_index); } } } if (to_remove.size() > 0) { @@ -965,7 +995,7 @@ std::optional> pdlp_solver_t // To avoid that we allow at least two iterations at first before checking (in practice 0 wasn't // enough) We still need to check iteration and time limit prior without breaking the logic below // of first checking termination before the limit - if (total_pdlp_iterations_ <= 1) { + if (internal_solver_iterations_ <= 1) { print_termination_criteria(timer); return check_limits(timer); } @@ -1507,9 +1537,6 @@ HDI void fixed_error_computation(const f_t norm_squared_delta_primal, norm_squared_delta_primal * primal_weight + norm_squared_delta_dual / primal_weight; const f_t computed_interaction = f_t(2.0) * interaction * step_size; - cuopt_assert(movement + computed_interaction >= f_t(0.0), - "Movement + computed interaction must be >= 0"); - // Clamp to 0 to avoid NaN *fixed_point_error = cuda::std::sqrt(cuda::std::max(f_t(0.0), movement + computed_interaction)); @@ -1768,6 +1795,90 @@ void pdlp_solver_t::resize_and_swap_all_context_loop( pdhg_solver_.get_primal_tmp_resource().data(), CUSPARSE_ORDER_COL); + // Recalculate SpMM buffer sizes for the new batch dimensions. + // cuSparse may require different buffer sizes when the number of columns changes + // (e.g. SpMM with 1 column may internally fall back to SpMV with larger buffer needs). + { + size_t new_buf_size = 0; + + // PDHG row-row: A_T * batch_dual_solutions -> batch_current_AtYs + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize( + handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + pdhg_cusparse_view.A_T, + pdhg_cusparse_view.batch_dual_solutions, + reusable_device_scalar_value_0_.data(), + pdhg_cusparse_view.batch_current_AtYs, + (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, + &new_buf_size, + stream_view_)); + pdhg_cusparse_view.buffer_transpose_batch_row_row_.resize(new_buf_size, stream_view_); + + // PDHG row-row: A * batch_reflected_primal_solutions -> batch_dual_gradients + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize( + handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + pdhg_cusparse_view.A, + pdhg_cusparse_view.batch_reflected_primal_solutions, + reusable_device_scalar_value_0_.data(), + pdhg_cusparse_view.batch_dual_gradients, + (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, + &new_buf_size, + stream_view_)); + pdhg_cusparse_view.buffer_non_transpose_batch_row_row_.resize(new_buf_size, stream_view_); + + // Adaptive step size: A_T * batch_potential_next_dual_solution -> batch_next_AtYs + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize( + handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + pdhg_cusparse_view.A_T, + pdhg_cusparse_view.batch_potential_next_dual_solution, + reusable_device_scalar_value_0_.data(), + pdhg_cusparse_view.batch_next_AtYs, + CUSPARSE_SPMM_CSR_ALG3, + &new_buf_size, + stream_view_)); + pdhg_cusparse_view.buffer_transpose_batch.resize(new_buf_size, stream_view_); + + // Convergence info: A_T * batch_dual_solutions -> batch_tmp_primals + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize( + handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + current_op_problem_evaluation_cusparse_view_.A_T, + current_op_problem_evaluation_cusparse_view_.batch_dual_solutions, + reusable_device_scalar_value_0_.data(), + current_op_problem_evaluation_cusparse_view_.batch_tmp_primals, + CUSPARSE_SPMM_CSR_ALG3, + &new_buf_size, + stream_view_)); + current_op_problem_evaluation_cusparse_view_.buffer_transpose_batch.resize(new_buf_size, + stream_view_); + + // Convergence info: A * batch_primal_solutions -> batch_tmp_duals + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize( + handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + current_op_problem_evaluation_cusparse_view_.A, + current_op_problem_evaluation_cusparse_view_.batch_primal_solutions, + reusable_device_scalar_value_0_.data(), + current_op_problem_evaluation_cusparse_view_.batch_tmp_duals, + CUSPARSE_SPMM_CSR_ALG3, + &new_buf_size, + stream_view_)); + current_op_problem_evaluation_cusparse_view_.buffer_non_transpose_batch.resize(new_buf_size, + stream_view_); + } + // Rerun preprocess // PDHG SpMM preprocess @@ -2199,6 +2310,22 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co pdhg_solver_.total_pdhg_iterations_ = initial_k_.value(); pdhg_solver_.get_d_total_pdhg_iterations().set_value_async(initial_k_.value(), stream_view_); } + if (settings_.get_initial_pdlp_iteration().has_value()) { + total_pdlp_iterations_ = settings_.get_initial_pdlp_iteration().value(); + // This is meaningless in batch mode since pdhg step is never used, set it just to avoid + // assertions + pdhg_solver_.get_d_total_pdhg_iterations().set_value_async(total_pdlp_iterations_, + stream_view_); + pdhg_solver_.total_pdhg_iterations_ = total_pdlp_iterations_; + // Reset the fixed point error since at this pdlp iteration it is expected to already be + // initialized to some value + std::fill(restart_strategy_.initial_fixed_point_error_.begin(), + restart_strategy_.initial_fixed_point_error_.end(), + f_t(0.0)); + std::fill(restart_strategy_.fixed_point_error_.begin(), + restart_strategy_.fixed_point_error_.end(), + f_t(0.0)); + } // Only the primal_weight_ and step_size_ variables are initialized during the initial phase // The associated primal/dual step_size (computed using the two firstly mentionned) are not @@ -2320,13 +2447,6 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co bool warm_start_was_given = settings_.get_pdlp_warm_start_data().is_populated(); - // In batch mode, before running the solver, we need to transpose the primal and dual solution to - // row format - if (batch_mode_) - transpose_primal_dual_to_row(pdhg_solver_.get_potential_next_primal_solution(), - pdhg_solver_.get_potential_next_dual_solution(), - pdhg_solver_.get_dual_slack()); - if (!inside_mip_) { CUOPT_LOG_INFO( " Iter Primal Obj. Dual Obj. Gap Primal Res. Dual Res. Time"); @@ -2389,13 +2509,6 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co } } -#ifdef CUPDLP_DEBUG_MODE - print("before scale slack", pdhg_solver_.get_dual_slack()); - print("before scale potential next primal", - pdhg_solver_.get_potential_next_primal_solution()); - print("before scale potential next dual", pdhg_solver_.get_potential_next_dual_solution()); -#endif - // In case of batch mode, primal and dual matrices are in row format // We need to transpose them to column format before doing any checks if (batch_mode_) { @@ -2411,6 +2524,13 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co pdhg_solver_.get_primal_solution(), pdhg_solver_.get_dual_solution(), dummy); } +#ifdef CUPDLP_DEBUG_MODE + print("before scale slack", pdhg_solver_.get_dual_slack()); + print("before scale potential next primal", + pdhg_solver_.get_potential_next_primal_solution()); + print("before scale potential next dual", pdhg_solver_.get_potential_next_dual_solution()); +#endif + // We go back to the unscaled problem here. It ensures that we do not terminate 'too early' // because of the error margin being evaluated on the scaled problem diff --git a/cpp/src/pdlp/pdlp.cuh b/cpp/src/pdlp/pdlp.cuh index de0cf69c9..d03430f15 100644 --- a/cpp/src/pdlp/pdlp.cuh +++ b/cpp/src/pdlp/pdlp.cuh @@ -7,6 +7,7 @@ #pragma once +#include #include #include @@ -138,6 +139,8 @@ class pdlp_solver_t { rmm::cuda_stream_view stream_view_; // Intentionnaly take a copy to avoid an unintentional modification in the calling context const pdlp_solver_settings_t settings_; + dual_simplex::shared_strong_branching_context_view_t sb_view_{ + settings_.shared_sb_solved}; problem_t* problem_ptr; // Combined bounds in op_problem_scaled_ will only be scaled if diff --git a/cpp/src/pdlp/pdlp_constants.hpp b/cpp/src/pdlp/pdlp_constants.hpp index cf17cc985..568d7d00b 100644 --- a/cpp/src/pdlp/pdlp_constants.hpp +++ b/cpp/src/pdlp/pdlp_constants.hpp @@ -7,8 +7,6 @@ #pragma once -#include - #include namespace cuopt::linear_programming::detail { diff --git a/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu b/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu index 149e99a43..2b1031026 100644 --- a/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu +++ b/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu @@ -691,6 +691,12 @@ void pdlp_restart_strategy_t::should_cupdlpx_restart(i_t total_number_ { std::fill(should_restart.begin(), should_restart.end(), 0); +#ifdef CUPDLP_DEBUG_MODE + // Print the current stats of initial fixed point error and fixed point error + print("initial_fixed_point_error", initial_fixed_point_error_); + print("fixed_point_error", fixed_point_error_); +#endif + if (total_number_of_iterations == hyper_params_.major_iteration) { #ifdef CUPDLP_DEBUG_MODE printf("forced restart at first major\n"); diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index 19e0b93a8..341edb2c1 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -838,17 +838,17 @@ optimization_problem_solution_t run_pdlp(detail::problem_t& return sol; } +// Compute in double as some cases overflow when using size_t template -static size_t batch_pdlp_memory_estimator(const optimization_problem_t& problem, - int trial_batch_size, - int max_batch_size) +static double batch_pdlp_memory_estimator(const optimization_problem_t& problem, + double trial_batch_size) { - size_t total_memory = 0; + double total_memory = 0.0; // In PDLP we store the scaled version of the problem which contains all of those total_memory += problem.get_constraint_matrix_indices().size() * sizeof(i_t); total_memory += problem.get_constraint_matrix_offsets().size() * sizeof(i_t); total_memory += problem.get_constraint_matrix_values().size() * sizeof(f_t); - total_memory *= 2; // To account for the A_t matrix + total_memory *= 2.0; // To account for the A_t matrix total_memory += problem.get_objective_coefficients().size() * sizeof(f_t); total_memory += problem.get_constraint_bounds().size() * sizeof(f_t); total_memory += problem.get_variable_lower_bounds().size() * sizeof(f_t); @@ -887,13 +887,13 @@ static size_t batch_pdlp_memory_estimator(const optimization_problem_t total_memory += trial_batch_size * problem.get_n_constraints() * sizeof(f_t); // Data for the solution - total_memory += problem.get_n_variables() * max_batch_size * sizeof(f_t); - total_memory += problem.get_n_constraints() * max_batch_size * sizeof(f_t); - total_memory += problem.get_n_variables() * max_batch_size * sizeof(f_t); + total_memory += problem.get_n_variables() * trial_batch_size * sizeof(f_t); + total_memory += problem.get_n_constraints() * trial_batch_size * sizeof(f_t); + total_memory += problem.get_n_variables() * trial_batch_size * sizeof(f_t); - // Add a 50% overhead to make sure we have enough memory considering other parts of the solver may - // allocate at the same time - total_memory *= 1.5; + // Add a 70% overhead to make sure we have enough memory considering other parts of the solver may + // need memory later while the batch PDLP is running + total_memory *= 1.7; // Data from saddle point state return total_memory; @@ -904,125 +904,167 @@ optimization_problem_solution_t run_batch_pdlp( optimization_problem_t& problem, pdlp_solver_settings_t const& settings) { // Hyper parameter than can be changed, I have put what I believe to be the best - bool primal_dual_init = true; - bool primal_weight_init = true; - bool use_optimal_batch_size = false; - constexpr int iteration_limit = 100000; - // Shouldn't we work on the unpresolved and/or unscaled problem for PDLP? - // Shouldn't we put an iteration limit? If yes what should we do with the partial solutions? + constexpr bool pdlp_primal_dual_init = true; + constexpr bool primal_weight_init = true; + constexpr bool use_initial_pdlp_iterations = true; + bool use_optimal_batch_size = false; + constexpr int batch_iteration_limit = 100000; + constexpr f_t pdlp_tolerance = 1e-5; rmm::cuda_stream_view stream = problem.get_handle_ptr()->get_stream(); rmm::device_uvector initial_primal(0, stream); rmm::device_uvector initial_dual(0, stream); - f_t initial_step_size = std::numeric_limits::signaling_NaN(); - f_t initial_primal_weight = std::numeric_limits::signaling_NaN(); + f_t initial_step_size = std::numeric_limits::signaling_NaN(); + f_t initial_primal_weight = std::numeric_limits::signaling_NaN(); + i_t initial_pdlp_iteration = -1; cuopt_assert(settings.new_bounds.size() > 0, "Batch size should be greater than 0"); - const int max_batch_size = settings.new_bounds.size(); - int memory_max_batch_size = max_batch_size; + const size_t max_batch_size = settings.new_bounds.size(); + size_t memory_max_batch_size = max_batch_size; // Check if we don't hit the limit using max_batch_size - const size_t memory_estimate = - batch_pdlp_memory_estimator(problem, max_batch_size, max_batch_size); - size_t free_mem, total_mem; - RAFT_CUDA_TRY(cudaMemGetInfo(&free_mem, &total_mem)); + const double memory_estimate = batch_pdlp_memory_estimator(problem, max_batch_size); + size_t st_free_mem, st_total_mem; + RAFT_CUDA_TRY(cudaMemGetInfo(&st_free_mem, &st_total_mem)); + const double free_mem = static_cast(st_free_mem); + const double total_mem = static_cast(st_total_mem); + +#ifdef BATCH_VERBOSE_MODE + std::cout << "Memory estimate: " << memory_estimate << std::endl; + std::cout << "Free memory: " << free_mem << std::endl; + std::cout << "Total memory: " << total_mem << std::endl; +#endif if (memory_estimate > free_mem) { use_optimal_batch_size = true; // Decrement batch size iteratively until we find a batch size that fits while (memory_max_batch_size > 1) { - const size_t memory_estimate = - batch_pdlp_memory_estimator(problem, memory_max_batch_size, max_batch_size); + const double memory_estimate = batch_pdlp_memory_estimator(problem, memory_max_batch_size); if (memory_estimate <= free_mem) { break; } +#ifdef BATCH_VERBOSE_MODE + std::cout << "Memory estimate: " << memory_estimate << std::endl; + std::cout << "Memory max batch size: " << memory_max_batch_size << std::endl; + std::cout << "Free memory: " << free_mem << std::endl; + std::cout << "Total memory: " << total_mem << std::endl; + std::cout << "--------------------------------" << std::endl; +#endif memory_max_batch_size--; } - const size_t min_estimate = - batch_pdlp_memory_estimator(problem, memory_max_batch_size, max_batch_size); - cuopt_expects(min_estimate <= free_mem, - error_type_t::OutOfMemoryError, - "Insufficient GPU memory for batch PDLP (min batch size still too large)"); + const double min_estimate = batch_pdlp_memory_estimator(problem, memory_max_batch_size); + if (min_estimate > free_mem) { + return optimization_problem_solution_t(pdlp_termination_status_t::NumericalError, + stream); + } } - int optimal_batch_size = use_optimal_batch_size - ? detail::optimal_batch_size_handler(problem, memory_max_batch_size) - : max_batch_size; + size_t optimal_batch_size = use_optimal_batch_size + ? detail::optimal_batch_size_handler(problem, memory_max_batch_size) + : max_batch_size; + if (settings.sub_batch_size > 0) { optimal_batch_size = settings.sub_batch_size; } cuopt_assert(optimal_batch_size != 0 && optimal_batch_size <= max_batch_size, "Optimal batch size should be between 1 and max batch size"); - using f_t2 = typename type_2::type; - - // If need warm start, solve the LP alone - if (primal_dual_init || primal_weight_init) { - pdlp_solver_settings_t warm_start_settings = settings; - warm_start_settings.new_bounds.clear(); - warm_start_settings.method = cuopt::linear_programming::method_t::PDLP; - warm_start_settings.presolver = cuopt::linear_programming::presolver_t::None; - warm_start_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; - warm_start_settings.detect_infeasibility = false; - warm_start_settings.iteration_limit = iteration_limit; - warm_start_settings.inside_mip = true; - optimization_problem_solution_t original_solution = - solve_lp(problem, warm_start_settings); - if (primal_dual_init) { - initial_primal = rmm::device_uvector(original_solution.get_primal_solution(), - original_solution.get_primal_solution().stream()); - initial_dual = rmm::device_uvector(original_solution.get_dual_solution(), - original_solution.get_dual_solution().stream()); - initial_step_size = original_solution.get_pdlp_warm_start_data().initial_step_size_; + + const bool warm_start_from_settings = settings.has_initial_primal_solution() || + settings.has_initial_dual_solution() || + settings.get_initial_step_size().has_value() || + settings.get_initial_primal_weight().has_value() || + settings.get_initial_pdlp_iteration().has_value(); + + if (warm_start_from_settings) { +#ifdef BATCH_VERBOSE_MODE + std::cout << "Using warm start from settings" << std::endl; +#endif + if (settings.has_initial_primal_solution() && pdlp_primal_dual_init) { + initial_primal = rmm::device_uvector(settings.get_initial_primal_solution(), + settings.get_initial_primal_solution().stream()); + } + if (settings.has_initial_dual_solution() && pdlp_primal_dual_init) { + initial_dual = rmm::device_uvector(settings.get_initial_dual_solution(), + settings.get_initial_dual_solution().stream()); + } + if (settings.get_initial_step_size().has_value() && pdlp_primal_dual_init) { + initial_step_size = *settings.get_initial_step_size(); + } + if (settings.get_initial_primal_weight().has_value() && primal_weight_init) { + initial_primal_weight = *settings.get_initial_primal_weight(); } - if (primal_weight_init) { - initial_primal_weight = original_solution.get_pdlp_warm_start_data().initial_primal_weight_; + if (settings.get_initial_pdlp_iteration().has_value() && use_initial_pdlp_iterations) { + initial_pdlp_iteration = *settings.get_initial_pdlp_iteration(); } } - rmm::device_uvector full_primal_solution(problem.get_n_variables() * max_batch_size, stream); - rmm::device_uvector full_dual_solution(problem.get_n_constraints() * max_batch_size, stream); - rmm::device_uvector full_reduced_cost(problem.get_n_variables() * max_batch_size, stream); + // Only used in tests + const bool collect_solutions = settings.generate_batch_primal_dual_solution; + + rmm::device_uvector full_primal_solution( + (collect_solutions) ? problem.get_n_variables() * max_batch_size : 0, stream); + rmm::device_uvector full_dual_solution( + (collect_solutions) ? problem.get_n_constraints() * max_batch_size : 0, stream); + rmm::device_uvector full_reduced_cost( + (collect_solutions) ? problem.get_n_variables() * max_batch_size : 0, stream); std::vector< typename optimization_problem_solution_t::additional_termination_information_t> full_info; std::vector full_status; - pdlp_solver_settings_t batch_settings = settings; - const auto original_new_bounds = batch_settings.new_bounds; - batch_settings.method = cuopt::linear_programming::method_t::PDLP; - batch_settings.presolver = presolver_t::None; - batch_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; - batch_settings.detect_infeasibility = false; - batch_settings.iteration_limit = iteration_limit; - batch_settings.inside_mip = true; - if (primal_dual_init) { + pdlp_solver_settings_t batch_settings = settings; + const auto original_new_bounds = batch_settings.new_bounds; + batch_settings.method = cuopt::linear_programming::method_t::PDLP; + batch_settings.presolver = presolver_t::None; + batch_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; + batch_settings.detect_infeasibility = false; + batch_settings.iteration_limit = batch_iteration_limit; + batch_settings.inside_mip = true; + batch_settings.tolerances.absolute_dual_tolerance = pdlp_tolerance; + batch_settings.tolerances.relative_dual_tolerance = pdlp_tolerance; + batch_settings.tolerances.absolute_primal_tolerance = pdlp_tolerance; + batch_settings.tolerances.relative_primal_tolerance = pdlp_tolerance; + batch_settings.tolerances.absolute_gap_tolerance = pdlp_tolerance; + batch_settings.tolerances.relative_gap_tolerance = pdlp_tolerance; + if (initial_primal.size() > 0) { batch_settings.set_initial_primal_solution( initial_primal.data(), initial_primal.size(), initial_primal.stream()); + } + if (initial_dual.size() > 0) { batch_settings.set_initial_dual_solution( initial_dual.data(), initial_dual.size(), initial_dual.stream()); - batch_settings.set_initial_step_size(initial_step_size); } - if (primal_weight_init) { batch_settings.set_initial_primal_weight(initial_primal_weight); } + if (!std::isnan(initial_step_size)) { batch_settings.set_initial_step_size(initial_step_size); } + if (initial_pdlp_iteration != -1) { + batch_settings.set_initial_pdlp_iteration(initial_pdlp_iteration); + } + if (!std::isnan(initial_primal_weight)) { + batch_settings.set_initial_primal_weight(initial_primal_weight); + } - for (int i = 0; i < max_batch_size; i += optimal_batch_size) { - const int current_batch_size = std::min(optimal_batch_size, max_batch_size - i); + for (size_t i = 0; i < max_batch_size; i += optimal_batch_size) { + const size_t current_batch_size = std::min(optimal_batch_size, max_batch_size - i); // Only take the new bounds from [i, i + current_batch_size) batch_settings.new_bounds = std::vector>( original_new_bounds.begin() + i, original_new_bounds.begin() + i + current_batch_size); - auto sol = solve_lp(problem, batch_settings); + if (!settings.shared_sb_solved.empty()) { + batch_settings.shared_sb_solved = settings.shared_sb_solved.subspan(i, current_batch_size); + } - // Copy results - raft::copy(full_primal_solution.data() + i * problem.get_n_variables(), - sol.get_primal_solution().data(), - problem.get_n_variables() * current_batch_size, - stream); - raft::copy(full_dual_solution.data() + i * problem.get_n_constraints(), - sol.get_dual_solution().data(), - problem.get_n_constraints() * current_batch_size, - stream); - raft::copy(full_reduced_cost.data() + i * problem.get_n_variables(), - sol.get_reduced_cost().data(), - problem.get_n_variables() * current_batch_size, - stream); + auto sol = solve_lp(problem, batch_settings); + if (collect_solutions) { + raft::copy(full_primal_solution.data() + i * problem.get_n_variables(), + sol.get_primal_solution().data(), + sol.get_primal_solution().size(), + stream); + raft::copy(full_dual_solution.data() + i * problem.get_n_constraints(), + sol.get_dual_solution().data(), + sol.get_dual_solution().size(), + stream); + raft::copy(full_reduced_cost.data() + i * problem.get_n_variables(), + sol.get_reduced_cost().data(), + sol.get_reduced_cost().size(), + stream); + } auto info = sol.get_additional_termination_informations(); full_info.insert(full_info.end(), info.begin(), info.end()); diff --git a/cpp/src/pdlp/solver_settings.cu b/cpp/src/pdlp/solver_settings.cu index 7acfc7481..ac2564bb1 100644 --- a/cpp/src/pdlp/solver_settings.cu +++ b/cpp/src/pdlp/solver_settings.cu @@ -61,12 +61,30 @@ void pdlp_solver_settings_t::set_initial_dual_solution(const f_t* init template void pdlp_solver_settings_t::set_initial_step_size(f_t initial_step_size) { + cuopt_expects(initial_step_size > f_t(0), + error_type_t::ValidationError, + "Initial step size must be greater than 0"); + cuopt_expects(!std::isinf(initial_step_size), + error_type_t::ValidationError, + "Initial step size must be finite"); + cuopt_expects(!std::isnan(initial_step_size), + error_type_t::ValidationError, + "Initial step size must be a number"); initial_step_size_ = std::make_optional(initial_step_size); } template void pdlp_solver_settings_t::set_initial_primal_weight(f_t initial_primal_weight) { + cuopt_expects(initial_primal_weight > f_t(0), + error_type_t::ValidationError, + "Initial primal weight must be greater than 0"); + cuopt_expects(!std::isinf(initial_primal_weight), + error_type_t::ValidationError, + "Initial primal weight must be finite"); + cuopt_expects(!std::isnan(initial_primal_weight), + error_type_t::ValidationError, + "Initial primal weight must be a number"); initial_primal_weight_ = std::make_optional(initial_primal_weight); } @@ -348,6 +366,21 @@ std::optional pdlp_solver_settings_t::get_initial_primal_weight() return initial_primal_weight_; } +template +void pdlp_solver_settings_t::set_initial_pdlp_iteration(i_t initial_pdlp_iteration) +{ + cuopt_expects(initial_pdlp_iteration >= 0, + error_type_t::ValidationError, + "Initial pdlp iteration must be greater than or equal to 0"); + initial_pdlp_iteration_ = std::make_optional(initial_pdlp_iteration); +} + +template +std::optional pdlp_solver_settings_t::get_initial_pdlp_iteration() const +{ + return initial_pdlp_iteration_; +} + template const pdlp_warm_start_data_t& pdlp_solver_settings_t::get_pdlp_warm_start_data() const noexcept diff --git a/cpp/src/pdlp/termination_strategy/termination_strategy.cu b/cpp/src/pdlp/termination_strategy/termination_strategy.cu index 7179df6a4..167cf33e7 100644 --- a/cpp/src/pdlp/termination_strategy/termination_strategy.cu +++ b/cpp/src/pdlp/termination_strategy/termination_strategy.cu @@ -124,6 +124,14 @@ pdlp_termination_status_t pdlp_termination_strategy_t::get_termination return (pdlp_termination_status_t)termination_status_[id]; } +template +void pdlp_termination_strategy_t::set_termination_status(i_t id, + pdlp_termination_status_t status) +{ + cuopt_assert(id < termination_status_.size(), "id too big for batch size"); + termination_status_[id] = (i_t)status; +} + template std::vector pdlp_termination_strategy_t::get_terminations_status() @@ -389,7 +397,8 @@ __host__ __device__ bool pdlp_termination_strategy_t::is_done( { return termination_status == pdlp_termination_status_t::Optimal || termination_status == pdlp_termination_status_t::PrimalInfeasible || - termination_status == pdlp_termination_status_t::DualInfeasible; + termination_status == pdlp_termination_status_t::DualInfeasible || + termination_status == pdlp_termination_status_t::ConcurrentLimit; } template diff --git a/cpp/src/pdlp/termination_strategy/termination_strategy.hpp b/cpp/src/pdlp/termination_strategy/termination_strategy.hpp index 6fe118c48..efb7a41d7 100644 --- a/cpp/src/pdlp/termination_strategy/termination_strategy.hpp +++ b/cpp/src/pdlp/termination_strategy/termination_strategy.hpp @@ -140,6 +140,7 @@ class pdlp_termination_strategy_t { f_t get_relative_primal_tolerance_factor() const; pdlp_termination_status_t get_termination_status(i_t id) const; + void set_termination_status(i_t id, pdlp_termination_status_t status); std::vector get_terminations_status(); bool all_optimal_status() const; bool all_done() const; diff --git a/cpp/src/pdlp/translate.hpp b/cpp/src/pdlp/translate.hpp index b143a206d..1e0f46c8d 100644 --- a/cpp/src/pdlp/translate.hpp +++ b/cpp/src/pdlp/translate.hpp @@ -9,6 +9,8 @@ #include +#include + #include #include diff --git a/cpp/src/pdlp/utilities/ping_pong_graph.cu b/cpp/src/pdlp/utilities/ping_pong_graph.cu index 4ec5bff8c..0df3861b5 100644 --- a/cpp/src/pdlp/utilities/ping_pong_graph.cu +++ b/cpp/src/pdlp/utilities/ping_pong_graph.cu @@ -8,6 +8,7 @@ #include #include +#include #include diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu index d5a8d6900..5c6edad27 100644 --- a/cpp/tests/linear_programming/pdlp_test.cu +++ b/cpp/tests/linear_programming/pdlp_test.cu @@ -5,6 +5,7 @@ */ /* clang-format on */ +#include #include #include #include @@ -43,6 +44,7 @@ #include #include #include +#include #include namespace cuopt::linear_programming::test { @@ -1681,6 +1683,7 @@ TEST(pdlp_class, strong_branching_test) solver_settings.method = cuopt::linear_programming::method_t::PDLP; solver_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; solver_settings.presolver = cuopt::linear_programming::presolver_t::None; + solver_settings.generate_batch_primal_dual_solution = true; const int n_fractional = fractional.size(); const int batch_size = n_fractional * 2; @@ -2043,6 +2046,301 @@ TEST(pdlp_class, precision_single_pslp_presolve) afiro_primal_objective, solution.get_additional_termination_information().primal_objective)); } +// --------------------------------------------------------------------------- +// Cooperative strong branching tests +// --------------------------------------------------------------------------- + +TEST(pdlp_class, shared_sb_context_unit) +{ + using namespace cuopt::linear_programming::dual_simplex; + + constexpr int N = 10; + shared_strong_branching_context_t ctx(N); + shared_strong_branching_context_view_t view(ctx.solved); + + EXPECT_TRUE(view.is_valid()); + + shared_strong_branching_context_view_t empty_view; + EXPECT_FALSE(empty_view.is_valid()); + + for (int i = 0; i < N; ++i) { + EXPECT_FALSE(view.is_solved(i)); + } + + view.mark_solved(0); + view.mark_solved(3); + view.mark_solved(7); + + EXPECT_TRUE(view.is_solved(0)); + EXPECT_FALSE(view.is_solved(1)); + EXPECT_FALSE(view.is_solved(2)); + EXPECT_TRUE(view.is_solved(3)); + EXPECT_FALSE(view.is_solved(4)); + EXPECT_FALSE(view.is_solved(5)); + EXPECT_FALSE(view.is_solved(6)); + EXPECT_TRUE(view.is_solved(7)); + EXPECT_FALSE(view.is_solved(8)); + EXPECT_FALSE(view.is_solved(9)); + + // subview(2, 5) covers global indices [2..6] + auto sv = view.subview(2, 5); + EXPECT_TRUE(sv.is_valid()); + EXPECT_FALSE(sv.is_solved(0)); // global 2 + EXPECT_TRUE(sv.is_solved(1)); // global 3 + EXPECT_FALSE(sv.is_solved(2)); // global 4 + EXPECT_FALSE(sv.is_solved(3)); // global 5 + EXPECT_FALSE(sv.is_solved(4)); // global 6 + + // Mark through subview: local 4 -> global 6 + sv.mark_solved(4); + EXPECT_TRUE(view.is_solved(6)); + EXPECT_TRUE(sv.is_solved(4)); +} + +TEST(pdlp_class, shared_sb_view_batch_pre_solved) +{ + using namespace cuopt::linear_programming::dual_simplex; + + const raft::handle_t handle_{}; + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + const std::vector fractional = {1, 2, 4}; + const std::vector root_soln_x = {0.891, 0.109, 0.636429}; + const int n_fractional = fractional.size(); + const int batch_size = n_fractional * 2; // 6 + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::PDLP; + solver_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; + solver_settings.presolver = cuopt::linear_programming::presolver_t::None; + + // Build new_bounds: down branches [0..2], up branches [3..5] + for (int i = 0; i < n_fractional; ++i) + solver_settings.new_bounds.push_back({fractional[i], + op_problem.get_variable_lower_bounds()[fractional[i]], + std::floor(root_soln_x[i])}); + for (int i = 0; i < n_fractional; ++i) + solver_settings.new_bounds.push_back({fractional[i], + std::ceil(root_soln_x[i]), + op_problem.get_variable_upper_bounds()[fractional[i]]}); + + shared_strong_branching_context_t shared_ctx(batch_size); + shared_strong_branching_context_view_t sb_view(shared_ctx.solved); + + // Pre-mark entries 1 and 4 as solved (simulating DS) + sb_view.mark_solved(1); + sb_view.mark_solved(4); + + solver_settings.shared_sb_solved = sb_view.solved; + + auto solution = solve_lp(&handle_, op_problem, solver_settings); + + ASSERT_EQ(solution.get_terminations_status().size(), batch_size); + + // Pre-solved entries should have ConcurrentLimit + EXPECT_EQ(solution.get_termination_status(1), pdlp_termination_status_t::ConcurrentLimit); + EXPECT_EQ(solution.get_termination_status(4), pdlp_termination_status_t::ConcurrentLimit); + + // Others should be Optimal + EXPECT_EQ(solution.get_termination_status(0), pdlp_termination_status_t::Optimal); + EXPECT_EQ(solution.get_termination_status(2), pdlp_termination_status_t::Optimal); + EXPECT_EQ(solution.get_termination_status(3), pdlp_termination_status_t::Optimal); + EXPECT_EQ(solution.get_termination_status(5), pdlp_termination_status_t::Optimal); + + // All entries should now be marked solved in the shared context + for (int i = 0; i < batch_size; ++i) { + EXPECT_TRUE(sb_view.is_solved(i)) << "Entry " << i << " should be solved"; + } +} + +TEST(pdlp_class, shared_sb_view_subbatch) +{ + using namespace cuopt::linear_programming::dual_simplex; + + const raft::handle_t handle_{}; + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + const std::vector fractional = {1, 2, 4}; + const std::vector root_soln_x = {0.891, 0.109, 0.636429}; + const int n_fractional = fractional.size(); + const int batch_size = n_fractional * 2; + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::PDLP; + solver_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; + solver_settings.presolver = cuopt::linear_programming::presolver_t::None; + solver_settings.sub_batch_size = 2; + + shared_strong_branching_context_t shared_ctx(batch_size); + shared_strong_branching_context_view_t sb_view(shared_ctx.solved); + + // Pre-mark one entry in each sub-batch of size 2: indices 1, 4 + sb_view.mark_solved(1); + sb_view.mark_solved(4); + + solver_settings.shared_sb_solved = sb_view.solved; + + auto solution = batch_pdlp_solve(&handle_, op_problem, fractional, root_soln_x, solver_settings); + + ASSERT_EQ(solution.get_terminations_status().size(), batch_size); + + // Pre-solved entries should have ConcurrentLimit + EXPECT_EQ(solution.get_termination_status(1), pdlp_termination_status_t::ConcurrentLimit); + EXPECT_EQ(solution.get_termination_status(4), pdlp_termination_status_t::ConcurrentLimit); + + // Others should be Optimal + for (int i = 0; i < batch_size; ++i) { + if (i == 1 || i == 4) continue; + EXPECT_EQ(solution.get_termination_status(i), pdlp_termination_status_t::Optimal) + << "Entry " << i << " should be Optimal"; + } + + // All should be marked solved + for (int i = 0; i < batch_size; ++i) { + EXPECT_TRUE(sb_view.is_solved(i)) << "Entry " << i << " should be solved"; + } +} + +TEST(pdlp_class, shared_sb_view_concurrent_mark) +{ + using namespace cuopt::linear_programming::dual_simplex; + + const raft::handle_t handle_{}; + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + const std::vector fractional = {1, 2, 4}; + const std::vector root_soln_x = {0.891, 0.109, 0.636429}; + const int n_fractional = fractional.size(); + const int batch_size = n_fractional * 2; + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::PDLP; + solver_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; + solver_settings.presolver = cuopt::linear_programming::presolver_t::None; + solver_settings.iteration_limit = 1000000; + + for (int i = 0; i < n_fractional; ++i) + solver_settings.new_bounds.push_back({fractional[0], -5, -5}); + + for (int i = 0; i < n_fractional; ++i) + solver_settings.new_bounds.push_back({fractional[i], + std::ceil(root_soln_x[i]), + op_problem.get_variable_upper_bounds()[fractional[i]]}); + + shared_strong_branching_context_t shared_ctx(batch_size); + shared_strong_branching_context_view_t sb_view(shared_ctx.solved); + + solver_settings.shared_sb_solved = sb_view.solved; + + optimization_problem_solution_t* result_ptr = nullptr; + + auto pdlp_thread = std::thread([&]() { + auto sol = new optimization_problem_solution_t( + solve_lp(&handle_, op_problem, solver_settings)); + result_ptr = sol; + }); + + // Wait a bit then mark entries 0, 2, 4 as solved (simulating DS) + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + for (int i = 0; i < n_fractional; ++i) + sb_view.mark_solved(i); + + pdlp_thread.join(); + + ASSERT_NE(result_ptr, nullptr); + auto& solution = *result_ptr; + + ASSERT_EQ(solution.get_terminations_status().size(), batch_size); + + for (int i = 0; i < batch_size; ++i) { + auto status = solution.get_termination_status(i); + // Each entry should be either Optimal (PDLP solved it first) or ConcurrentLimit (DS marked it) + EXPECT_TRUE(status == pdlp_termination_status_t::Optimal || + status == pdlp_termination_status_t::ConcurrentLimit) + << "Entry " << i << " has unexpected status " + << cuopt::linear_programming::optimization_problem_solution_t:: + get_termination_status_string(status); + } + + // All entries should end up marked solved + for (int i = 0; i < batch_size; ++i) { + EXPECT_TRUE(sb_view.is_solved(i)) << "Entry " << i << " should be solved"; + } + + delete result_ptr; +} + +TEST(pdlp_class, shared_sb_view_all_infeasible) +{ + using namespace cuopt::linear_programming::dual_simplex; + + const raft::handle_t handle_{}; + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + const std::vector fractional = {1, 2, 4}; + const std::vector root_soln_x = {0.891, 0.109, 0.636429}; + const int n_fractional = fractional.size(); + const int batch_size = n_fractional; + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::PDLP; + solver_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; + solver_settings.presolver = cuopt::linear_programming::presolver_t::None; + solver_settings.iteration_limit = 1000000; + + for (int i = 0; i < n_fractional; ++i) + solver_settings.new_bounds.push_back({fractional[0], -5, -5}); + + shared_strong_branching_context_t shared_ctx(batch_size); + shared_strong_branching_context_view_t sb_view(shared_ctx.solved); + + solver_settings.shared_sb_solved = sb_view.solved; + + optimization_problem_solution_t* result_ptr = nullptr; + + auto pdlp_thread = std::thread([&]() { + auto sol = new optimization_problem_solution_t( + solve_lp(&handle_, op_problem, solver_settings)); + result_ptr = sol; + }); + + // Wait a bit then mark entries 0, 2, 4 as solved (simulating DS) + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + for (int i = 0; i < n_fractional; ++i) + sb_view.mark_solved(i); + + pdlp_thread.join(); + + ASSERT_NE(result_ptr, nullptr); + auto& solution = *result_ptr; + + ASSERT_EQ(solution.get_terminations_status().size(), batch_size); + + for (int i = 0; i < batch_size; ++i) { + auto status = solution.get_termination_status(i); + // Each entry should be either Optimal (PDLP solved it first) or ConcurrentLimit (DS marked it) + EXPECT_TRUE(status == pdlp_termination_status_t::ConcurrentLimit) + << "Entry " << i << " has unexpected status " + << cuopt::linear_programming::optimization_problem_solution_t:: + get_termination_status_string(status); + } + + // All entries should end up marked solved + for (int i = 0; i < batch_size; ++i) { + EXPECT_TRUE(sb_view.is_solved(i)) << "Entry " << i << " should be solved"; + } + + delete result_ptr; +} + } // namespace cuopt::linear_programming::test CUOPT_TEST_PROGRAM_MAIN() diff --git a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py index 59ea62089..32cf860f2 100644 --- a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py +++ b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py @@ -452,8 +452,15 @@ class SolverConfig(BaseModel): ) mip_batch_pdlp_strong_branching: Optional[int] = Field( default=0, - description="Set 1 to enable batch PDLP strong branching " - "in the MIP solver, 0 to disable.", + description="Strong branching mode: 0 = Dual Simplex only, " + "1 = cooperative work-stealing (DS + batch PDLP), " + "2 = batch PDLP only.", + ) + mip_batch_pdlp_reliability_branching: Optional[int] = Field( + default=0, + description="Reliability branching mode: 0 = Dual Simplex only, " + "1 = cooperative work-stealing (DS + batch PDLP), " + "2 = batch PDLP only.", ) num_cpu_threads: Optional[int] = Field( default=None,