From dcc3d1fd4c67ab77ab59e7a726adf7a205d1b463 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sun, 29 Mar 2026 17:51:01 -0400 Subject: [PATCH] Add 2025 ACA and Medicaid target coverage --- .gitignore | 3 + .../ecps-2025-target-coverage.changed.md | 1 + .../calibration/unified_matrix_builder.py | 15 ++- policyengine_us_data/storage/README.md | 11 ++ .../storage/aca_ptc_multipliers_2022_2025.csv | 52 ++++++++ .../aca_spending_and_enrollment_2025.csv | 52 ++++++++ .../medicaid_enrollment_2025.csv | 52 ++++++++ .../test_calibration/test_loss_targets.py | 45 +++++++ .../test_unified_matrix_builder.py | 10 +- policyengine_us_data/utils/loss.py | 117 +++++++++++++++--- 10 files changed, 337 insertions(+), 21 deletions(-) create mode 100644 changelog.d/ecps-2025-target-coverage.changed.md create mode 100644 policyengine_us_data/storage/aca_ptc_multipliers_2022_2025.csv create mode 100644 policyengine_us_data/storage/calibration_targets/aca_spending_and_enrollment_2025.csv create mode 100644 policyengine_us_data/storage/calibration_targets/medicaid_enrollment_2025.csv create mode 100644 policyengine_us_data/tests/test_calibration/test_loss_targets.py diff --git a/.gitignore b/.gitignore index 5418f2090..b53ecb473 100644 --- a/.gitignore +++ b/.gitignore @@ -13,10 +13,12 @@ node_modules ## old (not clean) targets !healthcare_spending.csv !medicaid_enrollment_2024.csv +!medicaid_enrollment_2025.csv !eitc.csv !spm_threshold_agi.csv !population_by_state.csv !aca_spending_and_enrollment_2024.csv +!aca_spending_and_enrollment_2025.csv !real_estate_taxes_by_state_acs.csv !snap_state.csv !age_state.csv @@ -29,6 +31,7 @@ docs/.ipynb_checkpoints/ ## ACA PTC state-level uprating factors !policyengine_us_data/storage/aca_ptc_multipliers_2022_2024.csv +!policyengine_us_data/storage/aca_ptc_multipliers_2022_2025.csv ## Calibration run outputs (weights, diagnostics, packages, config) policyengine_us_data/storage/calibration/ diff --git a/changelog.d/ecps-2025-target-coverage.changed.md b/changelog.d/ecps-2025-target-coverage.changed.md new file mode 100644 index 000000000..b16f84d36 --- /dev/null +++ b/changelog.d/ecps-2025-target-coverage.changed.md @@ -0,0 +1 @@ +Add 2025 ACA and Medicaid calibration target artifacts, plus year-aware ACA target loading and state uprating factors for 2025 builds. diff --git a/policyengine_us_data/calibration/unified_matrix_builder.py b/policyengine_us_data/calibration/unified_matrix_builder.py index fb8865b80..94d8b2c2b 100644 --- a/policyengine_us_data/calibration/unified_matrix_builder.py +++ b/policyengine_us_data/calibration/unified_matrix_builder.py @@ -21,7 +21,7 @@ from policyengine_us_data.db.create_database_tables import create_or_replace_views from policyengine_us_data.storage import STORAGE_FOLDER -from policyengine_us_data.utils.census import STATE_NAME_TO_FIPS +from policyengine_us_data.utils.census import STATE_ABBREV_TO_FIPS, STATE_NAME_TO_FIPS from policyengine_us_data.calibration.calibration_utils import ( get_calculated_variables, apply_op, @@ -1685,11 +1685,22 @@ def _get_uprating_info( def _load_aca_ptc_factors( self, ) -> Dict[int, Dict[str, float]]: - csv_path = STORAGE_FOLDER / "aca_ptc_multipliers_2022_2024.csv" + csv_candidates = {} + for path in STORAGE_FOLDER.glob("aca_ptc_multipliers_2022_*.csv"): + suffix = path.stem.removeprefix("aca_ptc_multipliers_2022_") + if suffix.isdigit(): + csv_candidates[int(suffix)] = path + + eligible_years = [ + year for year in csv_candidates if year <= self.time_period + ] or sorted(csv_candidates) + csv_path = csv_candidates[max(eligible_years)] df = pd.read_csv(csv_path) result = {} for _, row in df.iterrows(): fips_str = STATE_NAME_TO_FIPS.get(row["state"]) + if fips_str is None: + fips_str = STATE_ABBREV_TO_FIPS.get(row["state"]) if fips_str is None: continue fips_int = int(fips_str) diff --git a/policyengine_us_data/storage/README.md b/policyengine_us_data/storage/README.md index 80d4c1cdc..b8df36fca 100644 --- a/policyengine_us_data/storage/README.md +++ b/policyengine_us_data/storage/README.md @@ -5,11 +5,22 @@ • Date: 2024 • Location: https://www.cms.gov/files/document/health-insurance-exchanges-2024-open-enrollment-report-final.pdf +- **aca_spending_and_enrollment_2025.csv** + • Source: CMS “Effectuated Enrollment: Early 2025 Snapshot and Full Year 2024 Average”, Table 2 and Table 3 + • Date: March 15, 2025 snapshot + • Location: https://www.cms.gov/files/document/effectuated-enrollment-early-snapshot-2025-and-full-year-2024-average.pdf + • Notes: `enrollment` is APTC enrollment by state; `spending` is monthly APTC enrollment multiplied by average monthly APTC for APTC recipients + - **medicaid_enrollment_2024.csv** • Source: MACPAC Enrollment Tables, FFY 2024 • Date: 2024 • Location: https://www.medicaid.gov/resources-for-states/downloads/eligib-oper-and-enrol-snap-december2024.pdf#page=26 +- **medicaid_enrollment_2025.csv** + • Source: Medicaid.gov performance indicator dataset, latest final-report month available in the March 2026 release + • Date: November 2025 final reports + • Location: https://data.medicaid.gov/dataset/State-Medicaid-and-CHIP-Applications-Eligibility-Deter/pi-dataset-march-2026release + - **district_mapping.csv** • Source: created by the script `policyengine_us/storage/calibration_targets/make_district_mapping.py` • Notes: this script is not part of `make data` because of the length of time it takes to run and the diff --git a/policyengine_us_data/storage/aca_ptc_multipliers_2022_2025.csv b/policyengine_us_data/storage/aca_ptc_multipliers_2022_2025.csv new file mode 100644 index 000000000..1abb451fd --- /dev/null +++ b/policyengine_us_data/storage/aca_ptc_multipliers_2022_2025.csv @@ -0,0 +1,52 @@ +state,enroll_2022,aptc_2022,enroll_2025,aptc_2025,vol_mult,val_mult +AK,18576,695.4,24249,1008.66,1.3053940568475453,1.450474547023296 +AL,200963,713.19,446956,611.48,2.224071097664744,0.8573872320139093 +AR,73925,451.83,143734,508.4,1.9443219479201894,1.1252019564880598 +AZ,159471,448.69,370817,458.26,2.32529425412771,1.021328757048296 +CA,1586076,476.02,1751717,559.49,1.1044344659398415,1.1753497752195285 +CO,132564,373.95,202916,465.8,1.5307021514136567,1.2456210723358738 +CT,78832,657.22,128440,783.8,1.629287598944591,1.1925991296673868 +DC,2232,343.0,3458,489.13,1.549283154121864,1.4260349854227405 +DE,27345,607.2,45885,588.14,1.678003291278113,0.9686100131752304 +FL,2544476,552.78,4472539,591.39,1.7577446201103881,1.069846955389124 +GA,612951,466.22,1408787,573.3,2.2983680587844706,1.229676976534683 +HI,17724,576.23,19676,577.9,1.1101331527871812,1.0028981483088348 +IA,62355,598.02,117890,471.05,1.8906262529067437,0.787682686197786 +ID,57637,453.64,100605,406.51,1.7454933462879747,0.8961070452341063 +IL,266563,508.89,395850,542.1,1.485014799503307,1.0652596828391205 +IN,121953,459.49,289683,427.8,2.375365919657573,0.9310322313869726 +KS,93793,531.71,182303,579.53,1.9436738349343767,1.0899362434409734 +KY,56614,484.54,77424,526.97,1.367576924435652,1.0875675898790607 +LA,89229,658.46,276294,593.53,3.0964596711831356,0.9013911247456184 +MA,179791,377.37,294241,409.73,1.6365724646951183,1.0857513845827702 +MD,134906,374.94,181274,405.13,1.3437059878730375,1.0805195497946338 +ME,52938,470.92,53808,607.03,1.016434319392497,1.2890299838613777 +MI,248623,392.06,470840,449.38,1.8937910008325858,1.1462021119216446 +MN,69218,342.85,91520,363.43,1.3221994278944784,1.0600262505468863 +MO,216351,548.21,383029,564.46,1.7704054984723898,1.029641925539483 +MS,129385,569.78,320193,575.98,2.474730455616957,1.0108813928182807 +MT,43431,501.35,67185,544.77,1.546936519997237,1.0866061633589308 +NC,608078,580.69,888059,573.88,1.460435996697792,0.9882725722846957 +ND,26577,452.14,38239,485.74,1.4388004665688376,1.0743132658026275 +NE,90106,614.95,127135,602.33,1.4109493263489667,0.979478006341979 +NH,35528,336.67,47900,340.5,1.3482323800945732,1.0113761249888615 +NJ,272035,490.07,457128,546.69,1.6804014189350636,1.1155345154773808 +NM,27522,472.35,60742,591.28,2.2070343725019983,1.251783635016407 +NV,86938,434.76,94596,464.67,1.088085762267363,1.068796577422026 +NY,118805,361.6,118526,433.79,0.9976516139893102,1.1996404867256636 +OH,196185,438.57,513517,500.44,2.6175140810969237,1.1410721207560937 +OK,173176,558.99,289763,559.31,1.673228391924978,1.0005724610458147 +OR,109031,491.48,106167,531.17,0.9737322412891747,1.0807560836656627 +PA,325333,499.67,421996,538.87,1.2971201814755957,1.0784517781735945 +RI,27386,401.71,36438,471.54,1.3305338494121084,1.1738318687610467 +SC,271359,513.06,587715,523.46,2.165820923573569,1.0202705336607807 +SD,38076,641.7,50340,599.92,1.3220926567916798,0.9348916939379771 +TN,238632,544.86,592946,592.36,2.4847715310603773,1.0871783577432734 +TX,1656384,503.86,3713574,540.82,2.2419764982033152,1.0733537093637122 +UT,235188,370.03,395357,462.92,1.6810253924519958,1.251033699970273 +VA,264517,508.29,326268,405.46,1.23344813376834,0.7976942296720375 +VT,22625,566.36,30280,946.45,1.338342541436464,1.6711102478988629 +WA,147712,420.54,226813,445.26,1.5355082863951472,1.0587815665572835 +WI,181848,531.3,271589,585.19,1.4934945668910298,1.1014304536043669 +WV,20466,1007.49,63036,1103.76,3.0800351802990327,1.0955542983056903 +WY,32278,813.47,43457,915.98,1.3463349649916352,1.1260157104748791 diff --git a/policyengine_us_data/storage/calibration_targets/aca_spending_and_enrollment_2025.csv b/policyengine_us_data/storage/calibration_targets/aca_spending_and_enrollment_2025.csv new file mode 100644 index 000000000..0ac75b8f2 --- /dev/null +++ b/policyengine_us_data/storage/calibration_targets/aca_spending_and_enrollment_2025.csv @@ -0,0 +1,52 @@ +state,enrollment,spending +AK,24249,24458996.34 +AL,446956,273304654.88 +AR,143734,73074365.6 +AZ,370817,169930598.42 +CA,1751717,980068144.33 +CO,202916,94518272.8 +CT,128440,100671272.0 +DC,3458,1691411.54 +DE,45885,26986803.9 +FL,4472539,2645014839.21 +GA,1408787,807657587.1 +HI,19676,11370760.4 +IA,117890,55532084.5 +ID,100605,40896938.55 +IL,395850,214590285.0 +IN,289683,123926387.4 +KS,182303,105650057.59 +KY,77424,40800125.28 +LA,276294,163988777.82 +MA,294241,120559364.93 +MD,181274,73439535.62 +ME,53808,32663070.24 +MI,470840,211586079.2 +MN,91520,33261113.6 +MO,383029,216204549.34 +MS,320193,184424764.14 +MT,67185,36600372.45 +NC,888059,509639298.92 +ND,38239,18574211.86 +NE,127135,76577224.55 +NH,47900,16309950.0 +NJ,457128,249907306.32 +NM,60742,35915529.76 +NV,94596,43955923.32 +NY,118526,51415393.54 +OH,513517,256984447.48 +OK,289763,162067343.53 +OR,106167,56392725.39 +PA,421996,227400984.52 +RI,36438,17181974.52 +SC,587715,307645293.9 +SD,50340,30199972.8 +TN,592946,351237492.56 +TX,3713574,2008375090.68 +UT,395357,183018662.44 +VA,326268,132288623.28 +VT,30280,28658506.0 +WA,226813,100990756.38 +WI,271589,158931166.91 +WV,63036,69576615.36 +WY,43457,39805742.86 diff --git a/policyengine_us_data/storage/calibration_targets/medicaid_enrollment_2025.csv b/policyengine_us_data/storage/calibration_targets/medicaid_enrollment_2025.csv new file mode 100644 index 000000000..985ac7f9a --- /dev/null +++ b/policyengine_us_data/storage/calibration_targets/medicaid_enrollment_2025.csv @@ -0,0 +1,52 @@ +state,enrollment +AK,199460 +AL,752535 +AR,723536 +AZ,1579905 +CA,11554412 +CO,1046707 +CT,882013 +DC,242274 +DE,227234 +FL,3447907 +GA,1698374 +HI,364787 +IA,585282 +ID,293777 +IL,2719408 +IN,1431868 +KS,333620 +KY,1200890 +LA,1274536 +MA,1405793 +MD,1209670 +ME,314237 +MI,2131207 +MN,1158706 +MO,1133140 +MS,509631 +MT,192806 +NC,2527040 +ND,100854 +NE,299026 +NH,159398 +NJ,1514284 +NM,640061 +NV,679632 +NY,5894332 +OH,2508352 +OK,913130 +OR,1124039 +PA,2732233 +RI,263719 +SC,875777 +SD,122031 +TN,1233717 +TX,3776984 +UT,298758 +VA,1503054 +VT,146362 +WA,1722640 +WI,1030040 +WV,451904 +WY,54143 diff --git a/policyengine_us_data/tests/test_calibration/test_loss_targets.py b/policyengine_us_data/tests/test_calibration/test_loss_targets.py new file mode 100644 index 000000000..8beefb7d1 --- /dev/null +++ b/policyengine_us_data/tests/test_calibration/test_loss_targets.py @@ -0,0 +1,45 @@ +import pytest + +from policyengine_us_data.utils.loss import ( + _get_aca_national_targets, + _get_medicaid_national_targets, + _load_aca_spending_and_enrollment_targets, + _load_medicaid_enrollment_targets, +) + + +def test_aca_targets_roll_forward_to_2025(): + targets, data_year = _load_aca_spending_and_enrollment_targets(2025) + + assert data_year == 2025 + assert len(targets) == 51 + assert int(targets["enrollment"].sum()) == 21_822_894 + + +def test_aca_targets_use_latest_available_year(): + _, data_year = _load_aca_spending_and_enrollment_targets(2026) + assert data_year == 2025 + + +def test_aca_national_targets_annualize_2025_state_file(): + spending, enrollment, data_year = _get_aca_national_targets(2025) + + assert data_year == 2025 + assert enrollment == 21_822_894 + assert spending == pytest.approx(143_951_057_388.72) + + +def test_medicaid_targets_roll_forward_to_2025(): + targets, data_year = _load_medicaid_enrollment_targets(2025) + + assert data_year == 2025 + assert len(targets) == 51 + assert int(targets["enrollment"].sum()) == 69_185_225 + + +def test_medicaid_national_targets_use_2025_values(): + spending, enrollment, data_year = _get_medicaid_national_targets(2025) + + assert data_year == 2025 + assert enrollment == 69_185_225 + assert spending == pytest.approx(1_000_645_800_000.0001) diff --git a/policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py b/policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py index 60164d5d2..7ae3f726e 100644 --- a/policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py +++ b/policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py @@ -275,9 +275,10 @@ def test_inactive_targets_are_excluded(self): self.assertEqual(float(baseline_rows.iloc[0]["value"]), 10000.0) def test_legacy_target_overview_without_reform_id(self): + b = self._make_builder() _create_legacy_target_overview(self.engine) try: - b = self._make_builder() + b._target_overview_columns = None df = b._query_targets({"domain_variables": ["aca_ptc"]}) self.assertGreater(len(df), 0) self.assertIn("reform_id", df.columns) @@ -439,6 +440,13 @@ def test_aca_ptc_uses_csv_factors(self): self.assertIn(6, result) self.assertIn(37, result) + def test_aca_ptc_2025_uses_2025_csv_factors(self): + b = UnifiedMatrixBuilder(db_uri=self.db_uri, time_period=2025) + result = b._load_aca_ptc_factors() + + self.assertAlmostEqual(result[1]["tax_unit_count"], 2.224071097664744) + self.assertAlmostEqual(result[1]["aca_ptc"], 1.906890162228912) + def test_non_aca_uses_national_factors(self): b = UnifiedMatrixBuilder(db_uri=self.db_uri, time_period=2024) df = pd.DataFrame( diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py index 8588c3263..54d18b27f 100644 --- a/policyengine_us_data/utils/loss.py +++ b/policyengine_us_data/utils/loss.py @@ -103,6 +103,26 @@ ], } +ACA_SPENDING_TARGETS = { + 2024: 98e9, +} + +ACA_ENROLLMENT_TARGETS = { + 2024: 19_743_689, +} + +MEDICAID_SPENDING_TARGETS = { + 2024: 9e11, + # CMS projects Medicaid spending growth of 7.4% in 2025. + # Apply that projection to 2024 Medicaid spending of $931.7B. + # Source: CMS National Health Expenditure projections, 2024-2033. + 2025: 931.7e9 * 1.074, +} + +MEDICAID_ENROLLMENT_TARGETS = { + 2024: 72_429_055, +} + def fmt(x): if x == -np.inf: @@ -118,6 +138,69 @@ def fmt(x): return f"{x / 1e9:.1f}bn" +def _best_available_year(targets_by_year: dict, requested_year: int) -> int: + eligible_years = [year for year in targets_by_year if year <= requested_year] + if not eligible_years: + raise ValueError(f"No target year available for request year {requested_year}") + return max(eligible_years) + + +def _load_yeared_target_csv( + prefix: str, requested_year: int +) -> tuple[pd.DataFrame, int]: + candidates = {} + for path in CALIBRATION_FOLDER.glob(f"{prefix}_*.csv"): + suffix = path.stem.removeprefix(f"{prefix}_") + if suffix.isdigit(): + candidates[int(suffix)] = path + + data_year = _best_available_year(candidates, requested_year) + return pd.read_csv(candidates[data_year]), data_year + + +def _load_aca_spending_and_enrollment_targets( + requested_year: int, +) -> tuple[pd.DataFrame, int]: + return _load_yeared_target_csv("aca_spending_and_enrollment", requested_year) + + +def _load_medicaid_enrollment_targets( + requested_year: int, +) -> tuple[pd.DataFrame, int]: + return _load_yeared_target_csv("medicaid_enrollment", requested_year) + + +def _get_aca_national_targets(requested_year: int) -> tuple[float, float, int]: + targets, data_year = _load_aca_spending_and_enrollment_targets(requested_year) + if data_year in ACA_SPENDING_TARGETS and data_year in ACA_ENROLLMENT_TARGETS: + return ( + ACA_SPENDING_TARGETS[data_year], + ACA_ENROLLMENT_TARGETS[data_year], + data_year, + ) + + # Newer CMS ACA state files encode monthly total APTC spending by state and + # APTC enrollment counts. Annualize the spending for the national target. + return ( + float(targets["spending"].sum() * 12), + float(targets["enrollment"].sum()), + data_year, + ) + + +def _get_medicaid_national_targets(requested_year: int) -> tuple[float, float, int]: + targets, data_year = _load_medicaid_enrollment_targets(requested_year) + spending_year = _best_available_year(MEDICAID_SPENDING_TARGETS, data_year) + enrollment_target = MEDICAID_ENROLLMENT_TARGETS.get( + data_year, float(targets["enrollment"].sum()) + ) + return ( + MEDICAID_SPENDING_TARGETS[spending_year], + enrollment_target, + data_year, + ) + + def build_loss_matrix(dataset: type, time_period): loss_matrix = pd.DataFrame() df = pe_to_soi(dataset, time_period) @@ -289,10 +372,13 @@ def build_loss_matrix(dataset: type, time_period): ) # 1. Medicaid Spending + medicaid_spending_target, medicaid_enrollment_target, _ = ( + _get_medicaid_national_targets(time_period) + ) + label = "nation/hhs/medicaid_spending" loss_matrix[label] = sim.calculate("medicaid", map_to="household").values - MEDICAID_SPENDING_2024 = 9e11 - targets_array.append(MEDICAID_SPENDING_2024) + targets_array.append(medicaid_spending_target) # 2. Medicaid Enrollment label = "nation/hhs/medicaid_enrollment" @@ -305,16 +391,18 @@ def build_loss_matrix(dataset: type, time_period): > 0 ).astype(int) loss_matrix[label] = sim.map_result(on_medicaid, "person", "household") - MEDICAID_ENROLLMENT_2024 = 72_429_055 # target lives (not thousands) - targets_array.append(MEDICAID_ENROLLMENT_2024) + targets_array.append(medicaid_enrollment_target) # National ACA Spending + aca_spending_target, aca_enrollment_target, _ = _get_aca_national_targets( + time_period + ) + label = "nation/gov/aca_spending" loss_matrix[label] = sim.calculate( "aca_ptc", map_to="household", period=2025 ).values - ACA_SPENDING_2024 = 9.8e10 # 2024 outlays on PTC - targets_array.append(ACA_SPENDING_2024) + targets_array.append(aca_spending_target) # National ACA Enrollment (people receiving a PTC) label = "nation/gov/aca_enrollment" @@ -323,8 +411,7 @@ def build_loss_matrix(dataset: type, time_period): ) loss_matrix[label] = sim.map_result(on_ptc, "person", "household") - ACA_PTC_ENROLLMENT_2024 = 19_743_689 # people enrolled - targets_array.append(ACA_PTC_ENROLLMENT_2024) + targets_array.append(aca_enrollment_target) # Treasury EITC @@ -540,14 +627,12 @@ def build_loss_matrix(dataset: type, time_period): targets_array.append(target_count) # ACA spending by state - spending_by_state = pd.read_csv( - CALIBRATION_FOLDER / "aca_spending_and_enrollment_2024.csv" - ) + spending_by_state, _ = _load_aca_spending_and_enrollment_targets(time_period) # Monthly to yearly spending_by_state["spending"] = spending_by_state["spending"] * 12 # Adjust to match national target spending_by_state["spending"] = spending_by_state["spending"] * ( - ACA_SPENDING_2024 / spending_by_state["spending"].sum() + aca_spending_target / spending_by_state["spending"].sum() ) for _, row in spending_by_state.iterrows(): @@ -568,9 +653,7 @@ def build_loss_matrix(dataset: type, time_period): targets_array.append(annual_target) # Marketplace enrollment by state (targets in thousands) - enrollment_by_state = pd.read_csv( - CALIBRATION_FOLDER / "aca_spending_and_enrollment_2024.csv" - ) + enrollment_by_state, _ = _load_aca_spending_and_enrollment_targets(time_period) # One-time pulls so we don’t re-compute inside the loop state_person = sim.calculate("state_code", map_to="person").values @@ -599,9 +682,7 @@ def build_loss_matrix(dataset: type, time_period): # Medicaid enrollment by state - enrollment_by_state = pd.read_csv( - CALIBRATION_FOLDER / "medicaid_enrollment_2024.csv" - ) + enrollment_by_state, _ = _load_medicaid_enrollment_targets(time_period) # One-time pulls so we don’t re-compute inside the loop state_person = sim.calculate("state_code", map_to="person").values