Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
e82164f
Add anymodel directories to feature/puzzletron
danielkorzekwa Mar 4, 2026
2099df3
Make any_model conversion working.
danielkorzekwa Mar 5, 2026
eb5cf8a
Update child_init.py with anymodel version
danielkorzekwa Mar 5, 2026
c9de41c
fix attention pruning
danielkorzekwa Mar 5, 2026
3c1bc1f
Add trust_remote_code to load_model_config (default to false)
danielkorzekwa Mar 5, 2026
8357136
Make activation scoring working
danielkorzekwa Mar 5, 2026
6cc2194
Comment all tested models aside of llama_3_1_8b_instruct
danielkorzekwa Mar 5, 2026
ee4e1e3
Delete not needed decilm test
danielkorzekwa Mar 5, 2026
449b523
Fix broken tests
danielkorzekwa Mar 5, 2026
fb27bba
Update puzzletron_nas_pluging to any_model version
danielkorzekwa Mar 5, 2026
b350f82
Correct test resources used by tests.
danielkorzekwa Mar 5, 2026
fafe5a3
Disable puzzletron tests (will be enabled after all any_model logic i…
danielkorzekwa Mar 5, 2026
e988248
Merge branch 'dkorzekwa/anymodel_core' into dkorzekwa/anymodel_activa…
danielkorzekwa Mar 6, 2026
c717852
Comment out not implemented models.
danielkorzekwa Mar 6, 2026
030f126
format python docs
danielkorzekwa Mar 6, 2026
8dcdfbf
Merge branch 'dkorzekwa/anymodel_core' into dkorzekwa/anymodel_activa…
danielkorzekwa Mar 6, 2026
70df0df
Use trust_remote_code in force_cache_dynamic_modules()
danielkorzekwa Mar 6, 2026
bb56662
Merge branch 'dkorzekwa/anymodel_core' into dkorzekwa/anymodel_activa…
danielkorzekwa Mar 6, 2026
ecd953e
Fix anymodel pruning
danielkorzekwa Mar 6, 2026
ee8f538
Fix buid docs issue.
danielkorzekwa Mar 6, 2026
c9b76a1
Merge branch 'dkorzekwa/anymodel_core' into dkorzekwa/anymodel_activa…
danielkorzekwa Mar 6, 2026
6e3af61
Merge branch 'dkorzekwa/anymodel_activation_scoring' into dkorzekwa/a…
danielkorzekwa Mar 6, 2026
0ad6d92
Merging build_library_and_stats
danielkorzekwa Mar 6, 2026
995eb1a
Merging anymodel: calc_one_block_scores
danielkorzekwa Mar 6, 2026
34081c9
Mering any_model: calc_one_block_scores
danielkorzekwa Mar 6, 2026
ed5c00f
merge any_model: mip_and_realize_models
danielkorzekwa Mar 6, 2026
47414d5
Clarify readme and avoid reusing the same reference in llama_converter.
danielkorzekwa Mar 9, 2026
a8305d8
Fix tied-embedding handling before writing the safetensors index.
danielkorzekwa Mar 9, 2026
68421a5
Fix NaN ranking currently selects NaNs as “best” experts by default.
danielkorzekwa Mar 9, 2026
d6b8028
Code clean up.
danielkorzekwa Mar 9, 2026
ecd2341
Code clean up.
danielkorzekwa Mar 10, 2026
f9d845d
code clean up
danielkorzekwa Mar 10, 2026
d171b01
Merge branch 'dkorzekwa/anymodel_core' into dkorzekwa/anymodel_activa…
danielkorzekwa Mar 10, 2026
722da90
Merge branch 'dkorzekwa/anymodel_activation_scoring' into dkorzekwa/a…
danielkorzekwa Mar 10, 2026
934ab2f
code clean up
danielkorzekwa Mar 10, 2026
0f14ec3
Merge branch 'dkorzekwa/anymodel_pruning' into dkorzekwa/anymodel_bui…
danielkorzekwa Mar 10, 2026
dcb9e02
remove not needed comment
danielkorzekwa Mar 10, 2026
0c9ea5d
Merge branch 'dkorzekwa/anymodel_build_library_and_stats' into dkorze…
danielkorzekwa Mar 10, 2026
5b310e2
Merge branch 'dkorzekwa/any_model_calc_one_block_scores' into dkorzek…
danielkorzekwa Mar 10, 2026
176a435
Fix a broken test_puzzletron test on 2 gpus.
danielkorzekwa Mar 10, 2026
02e2c9b
Merge branch 'dkorzekwa/anymodel_activation_scoring' into dkorzekwa/a…
danielkorzekwa Mar 10, 2026
92c4419
Merge branch 'dkorzekwa/anymodel_pruning' into dkorzekwa/anymodel_bui…
danielkorzekwa Mar 10, 2026
aa1eb3e
Merge branch 'dkorzekwa/anymodel_build_library_and_stats' into dkorze…
danielkorzekwa Mar 10, 2026
2b84a96
Merge branch 'dkorzekwa/any_model_calc_one_block_scores' into dkorzek…
danielkorzekwa Mar 10, 2026
313260d
Merge branch 'feature/puzzletron' into dkorzekwa/mip_and_realize_models
danielkorzekwa Mar 13, 2026
47b4479
Fix comments
danielkorzekwa Mar 13, 2026
1c1e983
fix tox -e build-docs issue
danielkorzekwa Mar 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions modelopt/torch/puzzletron/mip/run_puzzle.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,9 +688,7 @@ def _get_block_stats(
not (block_config.attention.no_op and block_config.ffn.no_op)
)
block_stats["num_kv_heads"] = (
subblock_stats["args"]["n_head"] // block_config.attention.n_heads_in_group
if block_stats["has_attention"]
else 0
block_config.attention.num_key_value_heads if block_stats["has_attention"] else 0
)
block_stats["num_local_experts"] = (
block_config.ffn.moe.num_local_experts if block_stats["has_moe"] else 0
Expand Down
4 changes: 2 additions & 2 deletions modelopt/torch/puzzletron/puzzletron.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def puzzletron(
# Step 5: calc_one_block_scores (distributed processing)
scoring.launch_scoring(hydra_cfg)

# # Step 6: mip_and_realize_models (distributed processing)
# mip_and_realize_models.launch_mip_and_realize_model(hydra_cfg)
# Step 6: mip_and_realize_models (distributed processing)
mip_and_realize_models.launch_mip_and_realize_model(hydra_cfg)

return hydra_cfg
109 changes: 54 additions & 55 deletions tests/gpu/torch/puzzletron/test_puzzletron.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,66 +120,65 @@ def _test_puzzletron_multiprocess_job(
)
dist.barrier()

# TODO commented for the duration of merging process from dkorzekwa/any_model to feature/puzzletron
# Compress the model using a one-click approach
puzzletron.puzzletron(
str(hydra_config_dir), hydra_config_subdir, str(puzzle_dir), str(dataset_path)
)

# #
# # Check assertions
# #
# if rank == 0:
# if has_moe_layers:
# # assertions for the score_pruning_activations step 1 (MoE models only)
# rank_filepath = (
# f"pruning/pruning_scores/expert_removal/10samples_diverse_mini/rank_{rank}.pth"
# )
# assert (puzzle_dir / rank_filepath).is_file(), f"Expected {rank_filepath} to exist"

# # assertions for the pruning_ckpts step 2
# assert (puzzle_dir / "ckpts/num_experts_8").exists()

# # assertions for the mip_and_realize_models step 6
# # Find the MIP solution directory dynamically (e.g., stats_num_local_experts_*)
# mip_solutions_dir = puzzle_dir / "mip/puzzle_solutions"
# solution_dirs = [
# d
# for d in mip_solutions_dir.iterdir()
# if d.is_dir() and d.name.startswith("stats_num_local_experts_")
# ]
# assert len(solution_dirs) == 1, (
# f"Expected exactly one stats_num_local_experts_* directory, found: {[d.name for d in solution_dirs]}"
# )
# solution_dir = solution_dirs[0]

# solution_0_ckpt_config_path = (
# solution_dir / "solutions--checkpoints/solution_0/config.json"
# )
# assert solution_0_ckpt_config_path.exists()
# assert (solution_dir / "solutions.json").exists()

# # Validate lm_loss
# _assert_lm_loss(puzzle_dir, hf_config_name)
# else:
# # assertions for the score_pruning_activations step 1 (FFN pruning)
# _assert_score_pruning_activations(puzzle_dir, hf_config_name)

# # assertions for the pruning_ckpts step 2
# assert (puzzle_dir / "ckpts/ffn_256_attn_no_op").exists()

# # assertions for the mip_and_realize_models step 6
# _assert_mip_solutions(puzzle_dir, hf_config_name)

# # assertions for the build_library_and_stats step 4
# assert (puzzle_dir / "replacement_library.json").is_file()
# assert (puzzle_dir / "subblock_stats.json").is_file()

# # assertions for the scoring step 5
# solution_0_filepath = (
# puzzle_dir / "single_sequence_replacement_solutions--validation/solution_0.json"
# )
# assert solution_0_filepath.exists()
#
# Check assertions
#
if rank == 0:
if has_moe_layers:
# assertions for the score_pruning_activations step 1 (MoE models only)
rank_filepath = (
f"pruning/pruning_scores/expert_removal/10samples_diverse_mini/rank_{rank}.pth"
)
assert (puzzle_dir / rank_filepath).is_file(), f"Expected {rank_filepath} to exist"

# assertions for the pruning_ckpts step 2
assert (puzzle_dir / "ckpts/num_experts_8").exists()

# assertions for the mip_and_realize_models step 6
# Find the MIP solution directory dynamically (e.g., stats_num_local_experts_*)
mip_solutions_dir = puzzle_dir / "mip/puzzle_solutions"
solution_dirs = [
d
for d in mip_solutions_dir.iterdir()
if d.is_dir() and d.name.startswith("stats_num_local_experts_")
]
assert len(solution_dirs) == 1, (
f"Expected exactly one stats_num_local_experts_* directory, found: {[d.name for d in solution_dirs]}"
)
solution_dir = solution_dirs[0]

solution_0_ckpt_config_path = (
solution_dir / "solutions--checkpoints/solution_0/config.json"
)
assert solution_0_ckpt_config_path.exists()
assert (solution_dir / "solutions.json").exists()

# Validate lm_loss
_assert_lm_loss(puzzle_dir, hf_config_name)
else:
# assertions for the score_pruning_activations step 1 (FFN pruning)
_assert_score_pruning_activations(puzzle_dir, hf_config_name)

# assertions for the pruning_ckpts step 2
assert (puzzle_dir / "ckpts/ffn_256_attn_no_op").exists()

# assertions for the mip_and_realize_models step 6
_assert_mip_solutions(puzzle_dir, hf_config_name)

# assertions for the build_library_and_stats step 4
assert (puzzle_dir / "replacement_library.json").is_file()
assert (puzzle_dir / "subblock_stats.json").is_file()

# assertions for the scoring step 5
solution_0_filepath = (
puzzle_dir / "single_sequence_replacement_solutions--validation/solution_0.json"
)
assert solution_0_filepath.exists()

dist.cleanup()

Expand Down
Loading