From 18e88999885ed10c41b88474ba68f9b3703cf7fb Mon Sep 17 00:00:00 2001 From: Lasse Benninga Date: Mon, 18 May 2026 17:03:07 +0200 Subject: [PATCH 1/3] refactor: flatten repo structure to match assignment's task numbering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the task-1/ and task-2/ folder split. All pipeline files now live at the repo root, matching the Deliverables layout in the assignment chapter exactly. Students no longer see "task-1/" and wonder if that maps to "Task 1" in the assignment instructions. - Moved task-1/{models,ingest_api,ingest_files,validate,database,pipeline}.py → root - Moved task-1/data/ → data/ - Moved task-1/output/ → output/ - Moved task-1/.env.example and requirements.txt → root - Moved task-2/AI_DEBUG.md → root - Updated .gitignore, devcontainer.json, .hyf/test.sh, and README accordingly --- .devcontainer/devcontainer.json | 2 +- task-1/.env.example => .env.example | 0 .gitignore | 6 +- .hyf/test.sh | 93 ++++++++++----------- task-2/AI_DEBUG.md => AI_DEBUG.md | 0 README.md | 44 +++++----- {task-1/data => data}/weather_stations.csv | 0 task-1/database.py => database.py | 0 task-1/ingest_api.py => ingest_api.py | 0 task-1/ingest_files.py => ingest_files.py | 0 task-1/models.py => models.py | 0 {task-1/output => output}/azure_compare.md | 0 task-1/pipeline.py => pipeline.py | 0 task-1/requirements.txt => requirements.txt | 0 task-1/validate.py => validate.py | 0 15 files changed, 69 insertions(+), 76 deletions(-) rename task-1/.env.example => .env.example (100%) rename task-2/AI_DEBUG.md => AI_DEBUG.md (100%) rename {task-1/data => data}/weather_stations.csv (100%) rename task-1/database.py => database.py (100%) rename task-1/ingest_api.py => ingest_api.py (100%) rename task-1/ingest_files.py => ingest_files.py (100%) rename task-1/models.py => models.py (100%) rename {task-1/output => output}/azure_compare.md (100%) rename task-1/pipeline.py => pipeline.py (100%) rename task-1/requirements.txt => requirements.txt (100%) rename task-1/validate.py => validate.py (100%) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 23f91dc..b8753d4 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -4,7 +4,7 @@ "features": { "ghcr.io/devcontainers/features/azure-cli:1": {} }, - "postCreateCommand": "python3 -m pip install -r task-1/requirements.txt", + "postCreateCommand": "python3 -m pip install -r requirements.txt", "customizations": { "vscode": { "extensions": [ diff --git a/task-1/.env.example b/.env.example similarity index 100% rename from task-1/.env.example rename to .env.example diff --git a/.gitignore b/.gitignore index 5a0ee9d..9e117dc 100644 --- a/.gitignore +++ b/.gitignore @@ -25,9 +25,9 @@ env/ !.env.example # Generated pipeline output (committed templates stay; generated files do not) -task-1/output/error_report.json -task-1/output/azure_resource_groups.json -task-1/weather.db +output/error_report.json +output/azure_resource_groups.json +weather.db # Editor and IDE settings .vscode/ diff --git a/.hyf/test.sh b/.hyf/test.sh index 130ee65..18a7cc4 100644 --- a/.hyf/test.sh +++ b/.hyf/test.sh @@ -29,16 +29,16 @@ INIT # 70 code uses required patterns (@field_validator, parameterized queries, # ON CONFLICT upsert, time.sleep backoff) task16=0 -task16_msg="missing required files in task-1/" +task16_msg="missing required files" required_files=( - "task-1/models.py" - "task-1/ingest_api.py" - "task-1/ingest_files.py" - "task-1/validate.py" - "task-1/database.py" - "task-1/pipeline.py" - "task-1/.env.example" + "models.py" + "ingest_api.py" + "ingest_files.py" + "validate.py" + "database.py" + "pipeline.py" + ".env.example" ) all_present=true @@ -53,13 +53,13 @@ if [ "$all_present" = true ]; then task16=10 task16_msg="files exist but pipeline failed to run" - if [ -f task-1/requirements.txt ]; then - python3 -m pip install -q -r task-1/requirements.txt || \ + if [ -f requirements.txt ]; then + python3 -m pip install -q -r requirements.txt || \ echo "WARN: pip install failed; pipeline may crash with ModuleNotFoundError" >&2 fi PIPELINE_ERR=$(mktemp) - if ( cd task-1 && python3 -m pipeline ) >/dev/null 2>"$PIPELINE_ERR"; then + if python3 -m pipeline >/dev/null 2>"$PIPELINE_ERR"; then task16=20 task16_msg="pipeline ran but output checks failed" @@ -69,7 +69,7 @@ import json, sqlite3 from pathlib import Path # error_report.json must exist and be a non-empty list of error objects -rpt = Path("task-1/output/error_report.json") +rpt = Path("output/error_report.json") assert rpt.exists(), "output/error_report.json was not created" errors = json.loads(rpt.read_text()) assert isinstance(errors, list), "error_report.json must be a JSON list" @@ -81,7 +81,7 @@ for i, e in enumerate(errors[:3]): assert not missing, f"error object {i} missing fields: {missing}" # weather.db must exist and have rows -db = Path("task-1/weather.db") +db = Path("weather.db") assert db.exists(), "weather.db was not created" conn = sqlite3.connect(db) count = conn.execute("SELECT COUNT(*) FROM weather_readings").fetchone()[0] @@ -95,13 +95,13 @@ PY # Idempotency: run a second time, row count must stay the same count_before=$(python3 -c " import sqlite3 -conn = sqlite3.connect('task-1/weather.db') +conn = sqlite3.connect('weather.db') print(conn.execute('SELECT COUNT(*) FROM weather_readings').fetchone()[0]) ") - if ( cd task-1 && python3 -m pipeline ) >/dev/null 2>&1; then + if python3 -m pipeline >/dev/null 2>&1; then count_after=$(python3 -c " import sqlite3 -conn = sqlite3.connect('task-1/weather.db') +conn = sqlite3.connect('weather.db') print(conn.execute('SELECT COUNT(*) FROM weather_readings').fetchone()[0]) ") if [ "$count_before" = "$count_after" ]; then @@ -109,19 +109,16 @@ print(conn.execute('SELECT COUNT(*) FROM weather_readings').fetchone()[0]) task16_msg="output + idempotency pass; checking code patterns" # Code introspection for the final 20 points. - # These greps target actual code constructs, not docstrings: - # @field_validator / @classmethod — present in scaffold but only - # safe to match after pipeline passes (NotImplementedError in the - # validator would crash the pipeline before we get here) - # execute.*? — SQL parameterized placeholder in a call - # ON CONFLICT — upsert keyword in actual SQL string - # time\.sleep — stdlib sleep call (avoids matching the - # function name "fetch_with_retry" or docstring words) - has_field_validator=$(grep -cE "@field_validator" task-1/models.py || true) - has_classmethod=$(grep -cE "@classmethod" task-1/models.py || true) - has_param_queries=$(grep -cE "execute[a-z]*\(.*\?" task-1/database.py || true) - has_on_conflict=$(grep -ciE "ON CONFLICT" task-1/database.py || true) - has_sleep=$(grep -cE "time\.sleep" task-1/ingest_api.py || true) + # Patterns target actual code constructs, not docstrings: + # execute.*? — SQL placeholder in an execute() call + # ON CONFLICT — upsert keyword in actual SQL string + # time\.sleep — stdlib sleep call (avoids the function + # name "fetch_with_retry" or docstring words) + has_field_validator=$(grep -cE "@field_validator" models.py || true) + has_classmethod=$(grep -cE "@classmethod" models.py || true) + has_param_queries=$(grep -cE "execute[a-z]*\(.*\?" database.py || true) + has_on_conflict=$(grep -ciE "ON CONFLICT" database.py || true) + has_sleep=$(grep -cE "time\.sleep" ingest_api.py || true) if [ "$has_field_validator" -gt 0 ] && \ [ "$has_classmethod" -gt 0 ] && \ @@ -157,51 +154,51 @@ fi # --- Task 7: Azure CLI + Portal (15 points) --- # -# 5 pts azure_resource_groups.json exists and is valid JSON -# 10 pts azure_compare.md exists -# 15 pts azure_compare.md has all 3 sections and is filled in (>1200 chars, -# which is above the committed template's ~310 chars of non-comment text) +# 5 pts output/azure_resource_groups.json exists and is valid JSON +# 10 pts output/azure_compare.md exists +# 15 pts output/azure_compare.md has all 3 sections and is filled in (>1200 chars, +# which is above the committed template's ~233 bytes) task7=0 -task7_msg="missing task-1/output/azure_resource_groups.json" +task7_msg="missing output/azure_resource_groups.json" -if [ -s "task-1/output/azure_resource_groups.json" ]; then - if python3 -c "import json; json.load(open('task-1/output/azure_resource_groups.json'))" 2>/dev/null; then +if [ -s "output/azure_resource_groups.json" ]; then + if python3 -c "import json; json.load(open('output/azure_resource_groups.json'))" 2>/dev/null; then task7=5 task7_msg="azure_resource_groups.json is valid JSON; azure_compare.md missing or not filled in" - if [ -s "task-1/output/azure_compare.md" ]; then + if [ -s "output/azure_compare.md" ]; then task7=10 task7_msg="azure_compare.md exists but looks too short or missing sections" - section_count=$(grep -cE "^## " task-1/output/azure_compare.md || true) - char_count=$(wc -c < task-1/output/azure_compare.md) + section_count=$(grep -cE "^## " output/azure_compare.md || true) + char_count=$(wc -c < output/azure_compare.md) if [ "$section_count" -ge 3 ] && [ "$char_count" -gt 1200 ]; then task7=15 task7_msg="azure_resource_groups.json and azure_compare.md both present and filled in" fi fi else - task7_msg="task-1/output/azure_resource_groups.json is not valid JSON" + task7_msg="output/azure_resource_groups.json is not valid JSON" fi fi # --- Task 8: AI Debug Report (15 points) --- # -# 5 pts task-2/AI_DEBUG.md exists +# 5 pts AI_DEBUG.md exists # 10 pts all four sections present (## The Error, ## The Prompt, ## The Solution, ## Reflection) # 15 pts file is meaningfully filled in (>1800 chars) task8=0 -task8_msg="missing task-2/AI_DEBUG.md" +task8_msg="missing AI_DEBUG.md" -if [ -s "task-2/AI_DEBUG.md" ]; then +if [ -s "AI_DEBUG.md" ]; then task8=5 task8_msg="AI_DEBUG.md exists but missing required sections" - if grep -q "^## The Error" task-2/AI_DEBUG.md && \ - grep -q "^## The Prompt" task-2/AI_DEBUG.md && \ - grep -q "^## The Solution" task-2/AI_DEBUG.md && \ - grep -q "^## Reflection" task-2/AI_DEBUG.md; then + if grep -q "^## The Error" AI_DEBUG.md && \ + grep -q "^## The Prompt" AI_DEBUG.md && \ + grep -q "^## The Solution" AI_DEBUG.md && \ + grep -q "^## Reflection" AI_DEBUG.md; then task8=10 task8_msg="all sections present but file looks too short to be filled in" - if [ "$(wc -c < task-2/AI_DEBUG.md)" -gt 1800 ]; then + if [ "$(wc -c < AI_DEBUG.md)" -gt 1800 ]; then task8=15 task8_msg="AI_DEBUG.md is filled in" fi diff --git a/task-2/AI_DEBUG.md b/AI_DEBUG.md similarity index 100% rename from task-2/AI_DEBUG.md rename to AI_DEBUG.md diff --git a/README.md b/README.md index 3880f34..07d2f40 100644 --- a/README.md +++ b/README.md @@ -24,9 +24,9 @@ cohort copies. | Task | Folder | Points | What you build | |---|---|---|---| -| **Tasks 1-6** — Ingestion Pipeline | `task-1/` | 70 | A modular pipeline: `fetch_with_retry` with exponential backoff, Open-Meteo API ingestion, CSV file ingestion, Pydantic validation with `@field_validator`, SQLite upsert storage, and a `pipeline.py` orchestrator that produces an error report and pipeline summary. | -| **Task 7** — Azure CLI + Portal | `task-1/output/` | 15 | Run three `az` CLI commands, call the ARM API with a Bearer token, save `azure_resource_groups.json`, and fill in `azure_compare.md` with three comparison points. | -| **Task 8** — AI Debug Report | `task-2/` | 15 | Document one LLM-assisted debugging session. Fill in the four sections of `AI_DEBUG.md`. | +| **Tasks 1-6** — Ingestion Pipeline | root | 70 | A modular pipeline: `fetch_with_retry` with exponential backoff, Open-Meteo API ingestion, CSV file ingestion, Pydantic validation with `@field_validator`, SQLite upsert storage, and a `pipeline.py` orchestrator that produces an error report and pipeline summary. | +| **Task 7** — Azure CLI + Portal | `output/` | 15 | Run three `az` CLI commands, call the ARM API with a Bearer token, save `azure_resource_groups.json`, and fill in `azure_compare.md` with three comparison points. | +| **Task 8** — AI Debug Report | root | 15 | Document one LLM-assisted debugging session. Fill in the four sections of `AI_DEBUG.md`. | Total: 100 · Passing: 60. @@ -34,27 +34,25 @@ Total: 100 · Passing: 60. ```text . -├── task-1/ -│ ├── data/ -│ │ └── weather_stations.csv # messy input dataset (committed; do not edit) -│ ├── output/ # pipeline writes here (gitignored except templates) -│ │ ├── error_report.json # generated by pipeline.py -│ │ ├── azure_resource_groups.json # Task 7: save ARM API response here -│ │ └── azure_compare.md # Task 7: fill in 3 comparison points -│ ├── models.py # Pydantic WeatherReading model — fill in TODOs -│ ├── ingest_api.py # fetch_with_retry + API ingestion — fill in TODOs -│ ├── ingest_files.py # CSV reader — fill in TODOs -│ ├── validate.py # batch validation — fill in TODOs -│ ├── database.py # SQLite create, upsert, query — fill in TODOs -│ ├── pipeline.py # orchestrator — fill in TODOs -│ ├── .env.example # no secrets needed; copy to .env if you extend it -│ └── requirements.txt -├── task-2/ -│ └── AI_DEBUG.md # Task 8: fill in the four sections +├── data/ +│ └── weather_stations.csv # messy input dataset (committed; do not edit) +├── output/ # pipeline writes here (gitignored except templates) +│ ├── error_report.json # generated by pipeline.py +│ ├── azure_resource_groups.json # Task 7: save ARM API response here +│ └── azure_compare.md # Task 7: fill in 3 comparison points +├── models.py # Pydantic WeatherReading model — fill in TODOs +├── ingest_api.py # fetch_with_retry + API ingestion — fill in TODOs +├── ingest_files.py # CSV reader — fill in TODOs +├── validate.py # batch validation — fill in TODOs +├── database.py # SQLite create, upsert, query — fill in TODOs +├── pipeline.py # orchestrator — fill in TODOs +├── AI_DEBUG.md # Task 8: fill in the four sections +├── .env.example # no secrets needed; copy to .env if you extend it +├── requirements.txt ├── .hyf/ -│ └── test.sh # auto-grader (read it to see exactly what it checks) +│ └── test.sh # auto-grader (read it to see exactly what it checks) └── .github/workflows/ - └── grade-assignment.yml # runs .hyf/test.sh on every PR + └── grade-assignment.yml # runs .hyf/test.sh on every PR ``` ## Run the grader locally @@ -62,9 +60,7 @@ Total: 100 · Passing: 60. Before opening a PR, run the same checks the auto-grader runs: ```bash -cd task-1 python3 -m pip install -r requirements.txt -cd .. bash .hyf/test.sh cat .hyf/score.json ``` diff --git a/task-1/data/weather_stations.csv b/data/weather_stations.csv similarity index 100% rename from task-1/data/weather_stations.csv rename to data/weather_stations.csv diff --git a/task-1/database.py b/database.py similarity index 100% rename from task-1/database.py rename to database.py diff --git a/task-1/ingest_api.py b/ingest_api.py similarity index 100% rename from task-1/ingest_api.py rename to ingest_api.py diff --git a/task-1/ingest_files.py b/ingest_files.py similarity index 100% rename from task-1/ingest_files.py rename to ingest_files.py diff --git a/task-1/models.py b/models.py similarity index 100% rename from task-1/models.py rename to models.py diff --git a/task-1/output/azure_compare.md b/output/azure_compare.md similarity index 100% rename from task-1/output/azure_compare.md rename to output/azure_compare.md diff --git a/task-1/pipeline.py b/pipeline.py similarity index 100% rename from task-1/pipeline.py rename to pipeline.py diff --git a/task-1/requirements.txt b/requirements.txt similarity index 100% rename from task-1/requirements.txt rename to requirements.txt diff --git a/task-1/validate.py b/validate.py similarity index 100% rename from task-1/validate.py rename to validate.py From 84cd49f2740061ab8ab6a9ed62b96ecbd7c78402 Mon Sep 17 00:00:00 2001 From: Lasse Benninga Date: Mon, 18 May 2026 17:22:23 +0200 Subject: [PATCH 2/3] docs: rewrite README as student navigation guide + add step headers to all Python files - README opens with a 'Why no task folders?' explanation and a step-by-step table (Step 1 = models.py through Step 6 = pipeline.py) so students know where to start without numbered folders to lean on - Every Python file now has a 2-3 line header comment naming the step, the task, and the role that file plays in the pipeline - Scoring ladder rewritten as a table for scannability - Student-redirect callout moved to the bottom (instructors read the top) --- README.md | 120 +++++++++++++++++++++++++++++------------------- database.py | 5 ++ ingest_api.py | 3 ++ ingest_files.py | 3 ++ models.py | 3 ++ pipeline.py | 4 ++ validate.py | 4 ++ 7 files changed, 96 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 07d2f40..53a54d9 100644 --- a/README.md +++ b/README.md @@ -1,76 +1,104 @@ -# Data Track — Week 3 Assignment (Template) +# Data Track — Week 3 Assignment -The HackYourFuture Data Track Week 3 assignment: **Build a Validated Ingestion Pipeline**. +**Build a Validated Ingestion Pipeline** · Total: 100 points · Passing: 60 -> 👩‍🎓 **Students:** you are in the wrong place. Do **not** fork or use this template. -> Go to your cohort's assignment repo under -> [`HackYourAssignment`](https://github.com/HackYourAssignment) (e.g. `c55-data-week3`, -> `c56-data-week3`, …). Your teacher posts the exact link in your cohort channel. -> Fork the cohort repo, branch, and open a PR back to it. Full instructions live in the -> Week 3 Assignment chapter in the learning platform. +--- -## For instructors / track maintainers +## Why no task folders? + +Previous assignments split work across `task-1/`, `task-2/`, etc. This assignment drops that structure intentionally. Real Python projects keep all related modules at the root — you navigate by reading the code, not by opening numbered folders. + +Every file you need to touch is listed below, in the order you should work through them. -This repo is the **upstream template** for the Week 3 assignment. At the start of each -cohort, generate a cohort-specific repo under the `HackYourAssignment` org from this -template (GitHub: **Use this template → Create a new repository**, owner = -`HackYourAssignment`, name = `c-data-week3`). Students then fork *that* cohort repo -and open PRs back to it; the auto-grader runs on every push. +--- -Edits to the assignment, dataset, or grader belong here on the template, not on the -cohort copies. +## Where to start -## Tasks at a glance +Work through the files in this order. Each one maps to a task in the assignment chapter. -| Task | Folder | Points | What you build | +| Step | File | Task in the chapter | Points | |---|---|---|---| -| **Tasks 1-6** — Ingestion Pipeline | root | 70 | A modular pipeline: `fetch_with_retry` with exponential backoff, Open-Meteo API ingestion, CSV file ingestion, Pydantic validation with `@field_validator`, SQLite upsert storage, and a `pipeline.py` orchestrator that produces an error report and pipeline summary. | -| **Task 7** — Azure CLI + Portal | `output/` | 15 | Run three `az` CLI commands, call the ARM API with a Bearer token, save `azure_resource_groups.json`, and fill in `azure_compare.md` with three comparison points. | -| **Task 8** — AI Debug Report | root | 15 | Document one LLM-assisted debugging session. Fill in the four sections of `AI_DEBUG.md`. | +| 1 | `models.py` | Task 4: Pydantic Validation | — | +| 2 | `ingest_api.py` | Task 1: Error Handling + Task 2: API Ingestion | — | +| 3 | `ingest_files.py` | Task 3: File Reading | — | +| 4 | `validate.py` | Task 4: Pydantic Validation | — | +| 5 | `database.py` | Task 5: Database Storage | — | +| 6 | `pipeline.py` | Task 6: Pipeline Orchestration | 70 total | +| 7 | `output/azure_compare.md` | Task 7: Azure CLI + Portal | 15 | +| 8 | `AI_DEBUG.md` | Task 8: AI Debug Report | 15 | -Total: 100 · Passing: 60. +Open each file and read the docstrings and TODO comments — they explain exactly what to implement. Start with `models.py` and `ingest_api.py`; `pipeline.py` is the last thing you wire together. + +--- ## Repository layout ```text . ├── data/ -│ └── weather_stations.csv # messy input dataset (committed; do not edit) -├── output/ # pipeline writes here (gitignored except templates) -│ ├── error_report.json # generated by pipeline.py -│ ├── azure_resource_groups.json # Task 7: save ARM API response here -│ └── azure_compare.md # Task 7: fill in 3 comparison points -├── models.py # Pydantic WeatherReading model — fill in TODOs -├── ingest_api.py # fetch_with_retry + API ingestion — fill in TODOs -├── ingest_files.py # CSV reader — fill in TODOs -├── validate.py # batch validation — fill in TODOs -├── database.py # SQLite create, upsert, query — fill in TODOs -├── pipeline.py # orchestrator — fill in TODOs -├── AI_DEBUG.md # Task 8: fill in the four sections -├── .env.example # no secrets needed; copy to .env if you extend it +│ └── weather_stations.csv # input dataset — do not edit +├── output/ +│ ├── azure_compare.md # Task 7: fill in your 3 comparison sentences +│ └── azure_resource_groups.json # Task 7: generated by your Python script +├── models.py # Step 1 — Pydantic model (Task 4) +├── ingest_api.py # Step 2 — fetch_with_retry + API call (Tasks 1–2) +├── ingest_files.py # Step 3 — CSV reader (Task 3) +├── validate.py # Step 4 — batch validation (Task 4) +├── database.py # Step 5 — SQLite tables + upsert (Task 5) +├── pipeline.py # Step 6 — orchestrator that calls everything (Task 6) +├── AI_DEBUG.md # Step 8 — your debugging log (Task 8) ├── requirements.txt +├── .env.example ├── .hyf/ -│ └── test.sh # auto-grader (read it to see exactly what it checks) +│ └── test.sh # auto-grader — read this to see exactly what is checked └── .github/workflows/ - └── grade-assignment.yml # runs .hyf/test.sh on every PR + └── grade-assignment.yml ``` -## Run the grader locally +Files the pipeline generates at runtime (gitignored): +- `weather.db` — SQLite database +- `output/error_report.json` — invalid records from validation + +--- -Before opening a PR, run the same checks the auto-grader runs: +## Run the pipeline ```bash python3 -m pip install -r requirements.txt +python3 -m pipeline +``` + +--- + +## Check your score locally + +Run the same grader the auto-grader runs on every PR push: + +```bash bash .hyf/test.sh cat .hyf/score.json ``` -## Scoring ladder (Tasks 1-6) +--- + +## Scoring ladder (Tasks 1–6) + +Points are awarded incrementally so partial work earns partial credit: + +| Score | What the grader checks | +|---|---| +| 10/70 | All required files exist | +| 20/70 | `python3 -m pipeline` runs without crashing | +| 40/70 | `output/error_report.json` is a valid list with the right fields; `weather.db` has rows | +| 50/70 | Pipeline is idempotent: a second run leaves the same row count (upsert working) | +| 70/70 | Code uses: `@field_validator` + `@classmethod` in `models.py`, `?` placeholders in `database.py`, `ON CONFLICT` upsert in `database.py`, `time.sleep` backoff in `ingest_api.py` | + +--- + +## For instructors / track maintainers + +This repo is the upstream template. At the start of each cohort, generate a cohort repo under `HackYourAssignment` (**Use this template → Create a new repository**, owner = `HackYourAssignment`, name = `c-data-week3`). Students fork that cohort repo and open PRs back to it; the auto-grader runs on every push. -The grader awards points incrementally so partial credit is meaningful: +Edits to the assignment, dataset, or grader belong here on the template — not on cohort copies. -- **10/70** — required files all exist (`models.py`, `ingest_api.py`, `ingest_files.py`, `validate.py`, `database.py`, `pipeline.py`, `.env.example`). -- **20/70** — `python3 -m pipeline` runs from `task-1/` without crashing. -- **40/70** — `output/error_report.json` exists, is a valid JSON list, and contains objects with `index`, `source`, `raw_record`, and `error_details` fields; `weather.db` has rows in `weather_readings`. -- **50/70** — pipeline is idempotent: running it twice leaves the same row count in `weather_readings` (upsert working correctly). -- **70/70** — code uses the required patterns: `@field_validator` + `@classmethod` in `models.py`, parameterized queries (`?` placeholders) in `database.py`, `ON CONFLICT ... DO UPDATE SET` in `database.py`, retry/backoff logic in `ingest_api.py`. +> 👩‍🎓 **Students:** if you landed here, you are in the wrong place. Go to your cohort repo under [`HackYourAssignment`](https://github.com/HackYourAssignment). Your teacher posts the exact link in your cohort channel. diff --git a/database.py b/database.py index f5e2345..8322d17 100644 --- a/database.py +++ b/database.py @@ -1,3 +1,8 @@ +# Step 5 — Task 5: Database Storage +# create_tables() — run once at startup to set up raw_weather and weather_readings. +# insert_raw() — store every record before validation so nothing is lost. +# upsert_readings()— insert valid records; ON CONFLICT updates instead of duplicating. +# count_readings() — query the final row count for the pipeline summary. import sqlite3 from pathlib import Path diff --git a/ingest_api.py b/ingest_api.py index a7acc55..66eb32c 100644 --- a/ingest_api.py +++ b/ingest_api.py @@ -1,3 +1,6 @@ +# Step 2 — Tasks 1 & 2: Error Handling + API Ingestion +# fetch_with_retry handles transient network errors (Task 1). +# fetch_api_records calls it and shapes the response into flat dicts (Task 2). import logging import time diff --git a/ingest_files.py b/ingest_files.py index 9f01e15..8b010ff 100644 --- a/ingest_files.py +++ b/ingest_files.py @@ -1,3 +1,6 @@ +# Step 3 — Task 3: File Reading +# Read the messy CSV and normalize each row into the same dict format +# that fetch_api_records() produces, so validate_records() can handle both sources. import csv from pathlib import Path diff --git a/models.py b/models.py index 7a4680f..cae42fe 100644 --- a/models.py +++ b/models.py @@ -1,3 +1,6 @@ +# Step 1 — Task 4: Pydantic Validation +# Define the WeatherReading model that every ingested record must pass. +# Both the API and CSV data flow through this model before reaching the database. from pydantic import BaseModel, Field, field_validator diff --git a/pipeline.py b/pipeline.py index e19de9d..d8dc710 100644 --- a/pipeline.py +++ b/pipeline.py @@ -1,3 +1,7 @@ +# Step 6 — Task 6: Pipeline Orchestration +# This is the entry point. It calls every module you built in steps 1–5 in order. +# Implement run_pipeline() so that `python3 -m pipeline` produces a summary and +# writes output/error_report.json. The auto-grader runs this file directly. import json import logging from pathlib import Path diff --git a/validate.py b/validate.py index c2b694d..ada2716 100644 --- a/validate.py +++ b/validate.py @@ -1,3 +1,7 @@ +# Step 4 — Task 4: Pydantic Validation (batch) +# validate_records() runs every record through WeatherReading and splits the +# results into a valid list and an error list. pipeline.py calls this once for +# all records combined, then stores the valid ones and saves the errors to JSON. from pydantic import ValidationError from models import WeatherReading From 775c9792209aaca12a7ae97b81e87b994cde75b6 Mon Sep 17 00:00:00 2001 From: Lasse Benninga Date: Mon, 18 May 2026 17:45:35 +0200 Subject: [PATCH 3/3] fix(grader): handle multi-line SQL and clean generated files before each run - Parameterized query check now passes for both inline (execute('...?...')) and multi-line/variable-assignment SQL forms: checks for '?' anywhere in database.py AND an .execute call, rather than requiring both on the same physical line - Remove weather.db and output/error_report.json at grader start so local reruns cannot inflate the score with stale artifacts from a prior successful run --- .hyf/test.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.hyf/test.sh b/.hyf/test.sh index 18a7cc4..52634d5 100644 --- a/.hyf/test.sh +++ b/.hyf/test.sh @@ -18,6 +18,11 @@ cat > "$SCRIPT_DIR/score.json" <<'INIT' {"score": 0, "pass": false, "passingScore": 60} INIT +# Remove runtime-generated files so each grader run starts from a clean state. +# In CI the checkout is always clean; locally this prevents stale artifacts from +# a previous successful run inflating the score on a broken re-submission. +rm -f weather.db output/error_report.json + # --- Tasks 1-6: Ingestion Pipeline (70 points) --- # # Scoring ladder (each level requires all previous levels to pass): @@ -116,7 +121,11 @@ print(conn.execute('SELECT COUNT(*) FROM weather_readings').fetchone()[0]) # name "fetch_with_retry" or docstring words) has_field_validator=$(grep -cE "@field_validator" models.py || true) has_classmethod=$(grep -cE "@classmethod" models.py || true) - has_param_queries=$(grep -cE "execute[a-z]*\(.*\?" database.py || true) + # Parameterized queries: check for ? placeholder anywhere in the file + # AND an execute call — handles both inline and multi-line SQL forms. + has_q=$(grep -c "?" database.py || true) + has_exec=$(grep -cE "\.execute" database.py || true) + if [ "$has_q" -gt 0 ] && [ "$has_exec" -gt 0 ]; then has_param_queries=1; else has_param_queries=0; fi has_on_conflict=$(grep -ciE "ON CONFLICT" database.py || true) has_sleep=$(grep -cE "time\.sleep" ingest_api.py || true)