From 18e88999885ed10c41b88474ba68f9b3703cf7fb Mon Sep 17 00:00:00 2001
From: Lasse Benninga <devops.pipeline@example.com>
Date: Mon, 18 May 2026 17:03:07 +0200
Subject: [PATCH 1/3] refactor: flatten repo structure to match assignment's
 task numbering
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove the task-1/ and task-2/ folder split. All pipeline files now live
at the repo root, matching the Deliverables layout in the assignment chapter
exactly. Students no longer see "task-1/" and wonder if that maps to
"Task 1" in the assignment instructions.

- Moved task-1/{models,ingest_api,ingest_files,validate,database,pipeline}.py → root
- Moved task-1/data/ → data/
- Moved task-1/output/ → output/
- Moved task-1/.env.example and requirements.txt → root
- Moved task-2/AI_DEBUG.md → root
- Updated .gitignore, devcontainer.json, .hyf/test.sh, and README accordingly
---
 .devcontainer/devcontainer.json             |  2 +-
 task-1/.env.example => .env.example         |  0
 .gitignore                                  |  6 +-
 .hyf/test.sh                                | 93 ++++++++++-----------
 task-2/AI_DEBUG.md => AI_DEBUG.md           |  0
 README.md                                   | 44 +++++-----
 {task-1/data => data}/weather_stations.csv  |  0
 task-1/database.py => database.py           |  0
 task-1/ingest_api.py => ingest_api.py       |  0
 task-1/ingest_files.py => ingest_files.py   |  0
 task-1/models.py => models.py               |  0
 {task-1/output => output}/azure_compare.md  |  0
 task-1/pipeline.py => pipeline.py           |  0
 task-1/requirements.txt => requirements.txt |  0
 task-1/validate.py => validate.py           |  0
 15 files changed, 69 insertions(+), 76 deletions(-)
 rename task-1/.env.example => .env.example (100%)
 rename task-2/AI_DEBUG.md => AI_DEBUG.md (100%)
 rename {task-1/data => data}/weather_stations.csv (100%)
 rename task-1/database.py => database.py (100%)
 rename task-1/ingest_api.py => ingest_api.py (100%)
 rename task-1/ingest_files.py => ingest_files.py (100%)
 rename task-1/models.py => models.py (100%)
 rename {task-1/output => output}/azure_compare.md (100%)
 rename task-1/pipeline.py => pipeline.py (100%)
 rename task-1/requirements.txt => requirements.txt (100%)
 rename task-1/validate.py => validate.py (100%)

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 23f91dc..b8753d4 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -4,7 +4,7 @@
   "features": {
     "ghcr.io/devcontainers/features/azure-cli:1": {}
   },
-  "postCreateCommand": "python3 -m pip install -r task-1/requirements.txt",
+  "postCreateCommand": "python3 -m pip install -r requirements.txt",
   "customizations": {
     "vscode": {
       "extensions": [
diff --git a/task-1/.env.example b/.env.example
similarity index 100%
rename from task-1/.env.example
rename to .env.example
diff --git a/.gitignore b/.gitignore
index 5a0ee9d..9e117dc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,9 +25,9 @@ env/
 !.env.example
 
 # Generated pipeline output (committed templates stay; generated files do not)
-task-1/output/error_report.json
-task-1/output/azure_resource_groups.json
-task-1/weather.db
+output/error_report.json
+output/azure_resource_groups.json
+weather.db
 
 # Editor and IDE settings
 .vscode/
diff --git a/.hyf/test.sh b/.hyf/test.sh
index 130ee65..18a7cc4 100644
--- a/.hyf/test.sh
+++ b/.hyf/test.sh
@@ -29,16 +29,16 @@ INIT
 #   70  code uses required patterns (@field_validator, parameterized queries,
 #       ON CONFLICT upsert, time.sleep backoff)
 task16=0
-task16_msg="missing required files in task-1/"
+task16_msg="missing required files"
 
 required_files=(
-    "task-1/models.py"
-    "task-1/ingest_api.py"
-    "task-1/ingest_files.py"
-    "task-1/validate.py"
-    "task-1/database.py"
-    "task-1/pipeline.py"
-    "task-1/.env.example"
+    "models.py"
+    "ingest_api.py"
+    "ingest_files.py"
+    "validate.py"
+    "database.py"
+    "pipeline.py"
+    ".env.example"
 )
 
 all_present=true
@@ -53,13 +53,13 @@ if [ "$all_present" = true ]; then
     task16=10
     task16_msg="files exist but pipeline failed to run"
 
-    if [ -f task-1/requirements.txt ]; then
-        python3 -m pip install -q -r task-1/requirements.txt || \
+    if [ -f requirements.txt ]; then
+        python3 -m pip install -q -r requirements.txt || \
             echo "WARN: pip install failed; pipeline may crash with ModuleNotFoundError" >&2
     fi
 
     PIPELINE_ERR=$(mktemp)
-    if ( cd task-1 && python3 -m pipeline ) >/dev/null 2>"$PIPELINE_ERR"; then
+    if python3 -m pipeline >/dev/null 2>"$PIPELINE_ERR"; then
         task16=20
         task16_msg="pipeline ran but output checks failed"
 
@@ -69,7 +69,7 @@ import json, sqlite3
 from pathlib import Path
 
 # error_report.json must exist and be a non-empty list of error objects
-rpt = Path("task-1/output/error_report.json")
+rpt = Path("output/error_report.json")
 assert rpt.exists(), "output/error_report.json was not created"
 errors = json.loads(rpt.read_text())
 assert isinstance(errors, list), "error_report.json must be a JSON list"
@@ -81,7 +81,7 @@ for i, e in enumerate(errors[:3]):
     assert not missing, f"error object {i} missing fields: {missing}"
 
 # weather.db must exist and have rows
-db = Path("task-1/weather.db")
+db = Path("weather.db")
 assert db.exists(), "weather.db was not created"
 conn = sqlite3.connect(db)
 count = conn.execute("SELECT COUNT(*) FROM weather_readings").fetchone()[0]
@@ -95,13 +95,13 @@ PY
             # Idempotency: run a second time, row count must stay the same
             count_before=$(python3 -c "
 import sqlite3
-conn = sqlite3.connect('task-1/weather.db')
+conn = sqlite3.connect('weather.db')
 print(conn.execute('SELECT COUNT(*) FROM weather_readings').fetchone()[0])
 ")
-            if ( cd task-1 && python3 -m pipeline ) >/dev/null 2>&1; then
+            if python3 -m pipeline >/dev/null 2>&1; then
                 count_after=$(python3 -c "
 import sqlite3
-conn = sqlite3.connect('task-1/weather.db')
+conn = sqlite3.connect('weather.db')
 print(conn.execute('SELECT COUNT(*) FROM weather_readings').fetchone()[0])
 ")
                 if [ "$count_before" = "$count_after" ]; then
@@ -109,19 +109,16 @@ print(conn.execute('SELECT COUNT(*) FROM weather_readings').fetchone()[0])
                     task16_msg="output + idempotency pass; checking code patterns"
 
                     # Code introspection for the final 20 points.
-                    # These greps target actual code constructs, not docstrings:
-                    #   @field_validator / @classmethod  — present in scaffold but only
-                    #     safe to match after pipeline passes (NotImplementedError in the
-                    #     validator would crash the pipeline before we get here)
-                    #   execute.*?           — SQL parameterized placeholder in a call
-                    #   ON CONFLICT          — upsert keyword in actual SQL string
-                    #   time\.sleep          — stdlib sleep call (avoids matching the
-                    #     function name "fetch_with_retry" or docstring words)
-                    has_field_validator=$(grep -cE "@field_validator" task-1/models.py || true)
-                    has_classmethod=$(grep -cE "@classmethod" task-1/models.py || true)
-                    has_param_queries=$(grep -cE "execute[a-z]*\(.*\?" task-1/database.py || true)
-                    has_on_conflict=$(grep -ciE "ON CONFLICT" task-1/database.py || true)
-                    has_sleep=$(grep -cE "time\.sleep" task-1/ingest_api.py || true)
+                    # Patterns target actual code constructs, not docstrings:
+                    #   execute.*?    — SQL placeholder in an execute() call
+                    #   ON CONFLICT   — upsert keyword in actual SQL string
+                    #   time\.sleep   — stdlib sleep call (avoids the function
+                    #                   name "fetch_with_retry" or docstring words)
+                    has_field_validator=$(grep -cE "@field_validator" models.py || true)
+                    has_classmethod=$(grep -cE "@classmethod" models.py || true)
+                    has_param_queries=$(grep -cE "execute[a-z]*\(.*\?" database.py || true)
+                    has_on_conflict=$(grep -ciE "ON CONFLICT" database.py || true)
+                    has_sleep=$(grep -cE "time\.sleep" ingest_api.py || true)
 
                     if [ "$has_field_validator" -gt 0 ] && \
                        [ "$has_classmethod" -gt 0 ] && \
@@ -157,51 +154,51 @@ fi
 
 # --- Task 7: Azure CLI + Portal (15 points) ---
 #
-#  5 pts  azure_resource_groups.json exists and is valid JSON
-# 10 pts  azure_compare.md exists
-# 15 pts  azure_compare.md has all 3 sections and is filled in (>1200 chars,
-#         which is above the committed template's ~310 chars of non-comment text)
+#  5 pts  output/azure_resource_groups.json exists and is valid JSON
+# 10 pts  output/azure_compare.md exists
+# 15 pts  output/azure_compare.md has all 3 sections and is filled in (>1200 chars,
+#         which is above the committed template's ~233 bytes)
 task7=0
-task7_msg="missing task-1/output/azure_resource_groups.json"
+task7_msg="missing output/azure_resource_groups.json"
 
-if [ -s "task-1/output/azure_resource_groups.json" ]; then
-    if python3 -c "import json; json.load(open('task-1/output/azure_resource_groups.json'))" 2>/dev/null; then
+if [ -s "output/azure_resource_groups.json" ]; then
+    if python3 -c "import json; json.load(open('output/azure_resource_groups.json'))" 2>/dev/null; then
         task7=5
         task7_msg="azure_resource_groups.json is valid JSON; azure_compare.md missing or not filled in"
 
-        if [ -s "task-1/output/azure_compare.md" ]; then
+        if [ -s "output/azure_compare.md" ]; then
             task7=10
             task7_msg="azure_compare.md exists but looks too short or missing sections"
-            section_count=$(grep -cE "^## " task-1/output/azure_compare.md || true)
-            char_count=$(wc -c < task-1/output/azure_compare.md)
+            section_count=$(grep -cE "^## " output/azure_compare.md || true)
+            char_count=$(wc -c < output/azure_compare.md)
             if [ "$section_count" -ge 3 ] && [ "$char_count" -gt 1200 ]; then
                 task7=15
                 task7_msg="azure_resource_groups.json and azure_compare.md both present and filled in"
             fi
         fi
     else
-        task7_msg="task-1/output/azure_resource_groups.json is not valid JSON"
+        task7_msg="output/azure_resource_groups.json is not valid JSON"
     fi
 fi
 
 # --- Task 8: AI Debug Report (15 points) ---
 #
-#  5 pts  task-2/AI_DEBUG.md exists
+#  5 pts  AI_DEBUG.md exists
 # 10 pts  all four sections present (## The Error, ## The Prompt, ## The Solution, ## Reflection)
 # 15 pts  file is meaningfully filled in (>1800 chars)
 task8=0
-task8_msg="missing task-2/AI_DEBUG.md"
+task8_msg="missing AI_DEBUG.md"
 
-if [ -s "task-2/AI_DEBUG.md" ]; then
+if [ -s "AI_DEBUG.md" ]; then
     task8=5
     task8_msg="AI_DEBUG.md exists but missing required sections"
-    if grep -q "^## The Error" task-2/AI_DEBUG.md && \
-       grep -q "^## The Prompt" task-2/AI_DEBUG.md && \
-       grep -q "^## The Solution" task-2/AI_DEBUG.md && \
-       grep -q "^## Reflection" task-2/AI_DEBUG.md; then
+    if grep -q "^## The Error" AI_DEBUG.md && \
+       grep -q "^## The Prompt" AI_DEBUG.md && \
+       grep -q "^## The Solution" AI_DEBUG.md && \
+       grep -q "^## Reflection" AI_DEBUG.md; then
         task8=10
         task8_msg="all sections present but file looks too short to be filled in"
-        if [ "$(wc -c < task-2/AI_DEBUG.md)" -gt 1800 ]; then
+        if [ "$(wc -c < AI_DEBUG.md)" -gt 1800 ]; then
             task8=15
             task8_msg="AI_DEBUG.md is filled in"
         fi
diff --git a/task-2/AI_DEBUG.md b/AI_DEBUG.md
similarity index 100%
rename from task-2/AI_DEBUG.md
rename to AI_DEBUG.md
diff --git a/README.md b/README.md
index 3880f34..07d2f40 100644
--- a/README.md
+++ b/README.md
@@ -24,9 +24,9 @@ cohort copies.
 
 | Task | Folder | Points | What you build |
 |---|---|---|---|
-| **Tasks 1-6** — Ingestion Pipeline | `task-1/` | 70 | A modular pipeline: `fetch_with_retry` with exponential backoff, Open-Meteo API ingestion, CSV file ingestion, Pydantic validation with `@field_validator`, SQLite upsert storage, and a `pipeline.py` orchestrator that produces an error report and pipeline summary. |
-| **Task 7** — Azure CLI + Portal | `task-1/output/` | 15 | Run three `az` CLI commands, call the ARM API with a Bearer token, save `azure_resource_groups.json`, and fill in `azure_compare.md` with three comparison points. |
-| **Task 8** — AI Debug Report | `task-2/` | 15 | Document one LLM-assisted debugging session. Fill in the four sections of `AI_DEBUG.md`. |
+| **Tasks 1-6** — Ingestion Pipeline | root | 70 | A modular pipeline: `fetch_with_retry` with exponential backoff, Open-Meteo API ingestion, CSV file ingestion, Pydantic validation with `@field_validator`, SQLite upsert storage, and a `pipeline.py` orchestrator that produces an error report and pipeline summary. |
+| **Task 7** — Azure CLI + Portal | `output/` | 15 | Run three `az` CLI commands, call the ARM API with a Bearer token, save `azure_resource_groups.json`, and fill in `azure_compare.md` with three comparison points. |
+| **Task 8** — AI Debug Report | root | 15 | Document one LLM-assisted debugging session. Fill in the four sections of `AI_DEBUG.md`. |
 
 Total: 100 · Passing: 60.
 
@@ -34,27 +34,25 @@ Total: 100 · Passing: 60.
 
 ```text
 .
-├── task-1/
-│   ├── data/
-│   │   └── weather_stations.csv        # messy input dataset (committed; do not edit)
-│   ├── output/                          # pipeline writes here (gitignored except templates)
-│   │   ├── error_report.json            # generated by pipeline.py
-│   │   ├── azure_resource_groups.json   # Task 7: save ARM API response here
-│   │   └── azure_compare.md             # Task 7: fill in 3 comparison points
-│   ├── models.py                        # Pydantic WeatherReading model — fill in TODOs
-│   ├── ingest_api.py                    # fetch_with_retry + API ingestion — fill in TODOs
-│   ├── ingest_files.py                  # CSV reader — fill in TODOs
-│   ├── validate.py                      # batch validation — fill in TODOs
-│   ├── database.py                      # SQLite create, upsert, query — fill in TODOs
-│   ├── pipeline.py                      # orchestrator — fill in TODOs
-│   ├── .env.example                     # no secrets needed; copy to .env if you extend it
-│   └── requirements.txt
-├── task-2/
-│   └── AI_DEBUG.md                      # Task 8: fill in the four sections
+├── data/
+│   └── weather_stations.csv        # messy input dataset (committed; do not edit)
+├── output/                          # pipeline writes here (gitignored except templates)
+│   ├── error_report.json            # generated by pipeline.py
+│   ├── azure_resource_groups.json   # Task 7: save ARM API response here
+│   └── azure_compare.md             # Task 7: fill in 3 comparison points
+├── models.py                        # Pydantic WeatherReading model — fill in TODOs
+├── ingest_api.py                    # fetch_with_retry + API ingestion — fill in TODOs
+├── ingest_files.py                  # CSV reader — fill in TODOs
+├── validate.py                      # batch validation — fill in TODOs
+├── database.py                      # SQLite create, upsert, query — fill in TODOs
+├── pipeline.py                      # orchestrator — fill in TODOs
+├── AI_DEBUG.md                      # Task 8: fill in the four sections
+├── .env.example                     # no secrets needed; copy to .env if you extend it
+├── requirements.txt
 ├── .hyf/
-│   └── test.sh                          # auto-grader (read it to see exactly what it checks)
+│   └── test.sh                      # auto-grader (read it to see exactly what it checks)
 └── .github/workflows/
-    └── grade-assignment.yml             # runs .hyf/test.sh on every PR
+    └── grade-assignment.yml         # runs .hyf/test.sh on every PR
 ```
 
 ## Run the grader locally
@@ -62,9 +60,7 @@ Total: 100 · Passing: 60.
 Before opening a PR, run the same checks the auto-grader runs:
 
 ```bash
-cd task-1
 python3 -m pip install -r requirements.txt
-cd ..
 bash .hyf/test.sh
 cat .hyf/score.json
 ```
diff --git a/task-1/data/weather_stations.csv b/data/weather_stations.csv
similarity index 100%
rename from task-1/data/weather_stations.csv
rename to data/weather_stations.csv
diff --git a/task-1/database.py b/database.py
similarity index 100%
rename from task-1/database.py
rename to database.py
diff --git a/task-1/ingest_api.py b/ingest_api.py
similarity index 100%
rename from task-1/ingest_api.py
rename to ingest_api.py
diff --git a/task-1/ingest_files.py b/ingest_files.py
similarity index 100%
rename from task-1/ingest_files.py
rename to ingest_files.py
diff --git a/task-1/models.py b/models.py
similarity index 100%
rename from task-1/models.py
rename to models.py
diff --git a/task-1/output/azure_compare.md b/output/azure_compare.md
similarity index 100%
rename from task-1/output/azure_compare.md
rename to output/azure_compare.md
diff --git a/task-1/pipeline.py b/pipeline.py
similarity index 100%
rename from task-1/pipeline.py
rename to pipeline.py
diff --git a/task-1/requirements.txt b/requirements.txt
similarity index 100%
rename from task-1/requirements.txt
rename to requirements.txt
diff --git a/task-1/validate.py b/validate.py
similarity index 100%
rename from task-1/validate.py
rename to validate.py

From 84cd49f2740061ab8ab6a9ed62b96ecbd7c78402 Mon Sep 17 00:00:00 2001
From: Lasse Benninga <devops.pipeline@example.com>
Date: Mon, 18 May 2026 17:22:23 +0200
Subject: [PATCH 2/3] docs: rewrite README as student navigation guide + add
 step headers to all Python files

- README opens with a 'Why no task folders?' explanation and a step-by-step
  table (Step 1 = models.py through Step 6 = pipeline.py) so students know
  where to start without numbered folders to lean on
- Every Python file now has a 2-3 line header comment naming the step, the
  task, and the role that file plays in the pipeline
- Scoring ladder rewritten as a table for scannability
- Student-redirect callout moved to the bottom (instructors read the top)
---
 README.md       | 120 +++++++++++++++++++++++++++++-------------------
 database.py     |   5 ++
 ingest_api.py   |   3 ++
 ingest_files.py |   3 ++
 models.py       |   3 ++
 pipeline.py     |   4 ++
 validate.py     |   4 ++
 7 files changed, 96 insertions(+), 46 deletions(-)

diff --git a/README.md b/README.md
index 07d2f40..53a54d9 100644
--- a/README.md
+++ b/README.md
@@ -1,76 +1,104 @@
-# Data Track — Week 3 Assignment (Template)
+# Data Track — Week 3 Assignment
 
-The HackYourFuture Data Track Week 3 assignment: **Build a Validated Ingestion Pipeline**.
+**Build a Validated Ingestion Pipeline** · Total: 100 points · Passing: 60
 
-> 👩‍🎓 **Students:** you are in the wrong place. Do **not** fork or use this template.
-> Go to your cohort's assignment repo under
-> [`HackYourAssignment`](https://github.com/HackYourAssignment) (e.g. `c55-data-week3`,
-> `c56-data-week3`, …). Your teacher posts the exact link in your cohort channel.
-> Fork the cohort repo, branch, and open a PR back to it. Full instructions live in the
-> Week 3 Assignment chapter in the learning platform.
+---
 
-## For instructors / track maintainers
+## Why no task folders?
+
+Previous assignments split work across `task-1/`, `task-2/`, etc. This assignment drops that structure intentionally. Real Python projects keep all related modules at the root — you navigate by reading the code, not by opening numbered folders.
+
+Every file you need to touch is listed below, in the order you should work through them.
 
-This repo is the **upstream template** for the Week 3 assignment. At the start of each
-cohort, generate a cohort-specific repo under the `HackYourAssignment` org from this
-template (GitHub: **Use this template → Create a new repository**, owner =
-`HackYourAssignment`, name = `c<NN>-data-week3`). Students then fork *that* cohort repo
-and open PRs back to it; the auto-grader runs on every push.
+---
 
-Edits to the assignment, dataset, or grader belong here on the template, not on the
-cohort copies.
+## Where to start
 
-## Tasks at a glance
+Work through the files in this order. Each one maps to a task in the assignment chapter.
 
-| Task | Folder | Points | What you build |
+| Step | File | Task in the chapter | Points |
 |---|---|---|---|
-| **Tasks 1-6** — Ingestion Pipeline | root | 70 | A modular pipeline: `fetch_with_retry` with exponential backoff, Open-Meteo API ingestion, CSV file ingestion, Pydantic validation with `@field_validator`, SQLite upsert storage, and a `pipeline.py` orchestrator that produces an error report and pipeline summary. |
-| **Task 7** — Azure CLI + Portal | `output/` | 15 | Run three `az` CLI commands, call the ARM API with a Bearer token, save `azure_resource_groups.json`, and fill in `azure_compare.md` with three comparison points. |
-| **Task 8** — AI Debug Report | root | 15 | Document one LLM-assisted debugging session. Fill in the four sections of `AI_DEBUG.md`. |
+| 1 | `models.py` | Task 4: Pydantic Validation | — |
+| 2 | `ingest_api.py` | Task 1: Error Handling + Task 2: API Ingestion | — |
+| 3 | `ingest_files.py` | Task 3: File Reading | — |
+| 4 | `validate.py` | Task 4: Pydantic Validation | — |
+| 5 | `database.py` | Task 5: Database Storage | — |
+| 6 | `pipeline.py` | Task 6: Pipeline Orchestration | 70 total |
+| 7 | `output/azure_compare.md` | Task 7: Azure CLI + Portal | 15 |
+| 8 | `AI_DEBUG.md` | Task 8: AI Debug Report | 15 |
 
-Total: 100 · Passing: 60.
+Open each file and read the docstrings and TODO comments — they explain exactly what to implement. Start with `models.py` and `ingest_api.py`; `pipeline.py` is the last thing you wire together.
+
+---
 
 ## Repository layout
 
 ```text
 .
 ├── data/
-│   └── weather_stations.csv        # messy input dataset (committed; do not edit)
-├── output/                          # pipeline writes here (gitignored except templates)
-│   ├── error_report.json            # generated by pipeline.py
-│   ├── azure_resource_groups.json   # Task 7: save ARM API response here
-│   └── azure_compare.md             # Task 7: fill in 3 comparison points
-├── models.py                        # Pydantic WeatherReading model — fill in TODOs
-├── ingest_api.py                    # fetch_with_retry + API ingestion — fill in TODOs
-├── ingest_files.py                  # CSV reader — fill in TODOs
-├── validate.py                      # batch validation — fill in TODOs
-├── database.py                      # SQLite create, upsert, query — fill in TODOs
-├── pipeline.py                      # orchestrator — fill in TODOs
-├── AI_DEBUG.md                      # Task 8: fill in the four sections
-├── .env.example                     # no secrets needed; copy to .env if you extend it
+│   └── weather_stations.csv        # input dataset — do not edit
+├── output/
+│   ├── azure_compare.md            # Task 7: fill in your 3 comparison sentences
+│   └── azure_resource_groups.json  # Task 7: generated by your Python script
+├── models.py          # Step 1 — Pydantic model (Task 4)
+├── ingest_api.py      # Step 2 — fetch_with_retry + API call (Tasks 1–2)
+├── ingest_files.py    # Step 3 — CSV reader (Task 3)
+├── validate.py        # Step 4 — batch validation (Task 4)
+├── database.py        # Step 5 — SQLite tables + upsert (Task 5)
+├── pipeline.py        # Step 6 — orchestrator that calls everything (Task 6)
+├── AI_DEBUG.md        # Step 8 — your debugging log (Task 8)
 ├── requirements.txt
+├── .env.example
 ├── .hyf/
-│   └── test.sh                      # auto-grader (read it to see exactly what it checks)
+│   └── test.sh        # auto-grader — read this to see exactly what is checked
 └── .github/workflows/
-    └── grade-assignment.yml         # runs .hyf/test.sh on every PR
+    └── grade-assignment.yml
 ```
 
-## Run the grader locally
+Files the pipeline generates at runtime (gitignored):
+- `weather.db` — SQLite database
+- `output/error_report.json` — invalid records from validation
+
+---
 
-Before opening a PR, run the same checks the auto-grader runs:
+## Run the pipeline
 
 ```bash
 python3 -m pip install -r requirements.txt
+python3 -m pipeline
+```
+
+---
+
+## Check your score locally
+
+Run the same grader the auto-grader runs on every PR push:
+
+```bash
 bash .hyf/test.sh
 cat .hyf/score.json
 ```
 
-## Scoring ladder (Tasks 1-6)
+---
+
+## Scoring ladder (Tasks 1–6)
+
+Points are awarded incrementally so partial work earns partial credit:
+
+| Score | What the grader checks |
+|---|---|
+| 10/70 | All required files exist |
+| 20/70 | `python3 -m pipeline` runs without crashing |
+| 40/70 | `output/error_report.json` is a valid list with the right fields; `weather.db` has rows |
+| 50/70 | Pipeline is idempotent: a second run leaves the same row count (upsert working) |
+| 70/70 | Code uses: `@field_validator` + `@classmethod` in `models.py`, `?` placeholders in `database.py`, `ON CONFLICT` upsert in `database.py`, `time.sleep` backoff in `ingest_api.py` |
+
+---
+
+## For instructors / track maintainers
+
+This repo is the upstream template. At the start of each cohort, generate a cohort repo under `HackYourAssignment` (**Use this template → Create a new repository**, owner = `HackYourAssignment`, name = `c<NN>-data-week3`). Students fork that cohort repo and open PRs back to it; the auto-grader runs on every push.
 
-The grader awards points incrementally so partial credit is meaningful:
+Edits to the assignment, dataset, or grader belong here on the template — not on cohort copies.
 
-- **10/70** — required files all exist (`models.py`, `ingest_api.py`, `ingest_files.py`, `validate.py`, `database.py`, `pipeline.py`, `.env.example`).
-- **20/70** — `python3 -m pipeline` runs from `task-1/` without crashing.
-- **40/70** — `output/error_report.json` exists, is a valid JSON list, and contains objects with `index`, `source`, `raw_record`, and `error_details` fields; `weather.db` has rows in `weather_readings`.
-- **50/70** — pipeline is idempotent: running it twice leaves the same row count in `weather_readings` (upsert working correctly).
-- **70/70** — code uses the required patterns: `@field_validator` + `@classmethod` in `models.py`, parameterized queries (`?` placeholders) in `database.py`, `ON CONFLICT ... DO UPDATE SET` in `database.py`, retry/backoff logic in `ingest_api.py`.
+> 👩‍🎓 **Students:** if you landed here, you are in the wrong place. Go to your cohort repo under [`HackYourAssignment`](https://github.com/HackYourAssignment). Your teacher posts the exact link in your cohort channel.
diff --git a/database.py b/database.py
index f5e2345..8322d17 100644
--- a/database.py
+++ b/database.py
@@ -1,3 +1,8 @@
+# Step 5 — Task 5: Database Storage
+# create_tables()  — run once at startup to set up raw_weather and weather_readings.
+# insert_raw()     — store every record before validation so nothing is lost.
+# upsert_readings()— insert valid records; ON CONFLICT updates instead of duplicating.
+# count_readings() — query the final row count for the pipeline summary.
 import sqlite3
 from pathlib import Path
 
diff --git a/ingest_api.py b/ingest_api.py
index a7acc55..66eb32c 100644
--- a/ingest_api.py
+++ b/ingest_api.py
@@ -1,3 +1,6 @@
+# Step 2 — Tasks 1 & 2: Error Handling + API Ingestion
+# fetch_with_retry handles transient network errors (Task 1).
+# fetch_api_records calls it and shapes the response into flat dicts (Task 2).
 import logging
 import time
 
diff --git a/ingest_files.py b/ingest_files.py
index 9f01e15..8b010ff 100644
--- a/ingest_files.py
+++ b/ingest_files.py
@@ -1,3 +1,6 @@
+# Step 3 — Task 3: File Reading
+# Read the messy CSV and normalize each row into the same dict format
+# that fetch_api_records() produces, so validate_records() can handle both sources.
 import csv
 from pathlib import Path
 
diff --git a/models.py b/models.py
index 7a4680f..cae42fe 100644
--- a/models.py
+++ b/models.py
@@ -1,3 +1,6 @@
+# Step 1 — Task 4: Pydantic Validation
+# Define the WeatherReading model that every ingested record must pass.
+# Both the API and CSV data flow through this model before reaching the database.
 from pydantic import BaseModel, Field, field_validator
 
 
diff --git a/pipeline.py b/pipeline.py
index e19de9d..d8dc710 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -1,3 +1,7 @@
+# Step 6 — Task 6: Pipeline Orchestration
+# This is the entry point. It calls every module you built in steps 1–5 in order.
+# Implement run_pipeline() so that `python3 -m pipeline` produces a summary and
+# writes output/error_report.json. The auto-grader runs this file directly.
 import json
 import logging
 from pathlib import Path
diff --git a/validate.py b/validate.py
index c2b694d..ada2716 100644
--- a/validate.py
+++ b/validate.py
@@ -1,3 +1,7 @@
+# Step 4 — Task 4: Pydantic Validation (batch)
+# validate_records() runs every record through WeatherReading and splits the
+# results into a valid list and an error list. pipeline.py calls this once for
+# all records combined, then stores the valid ones and saves the errors to JSON.
 from pydantic import ValidationError
 
 from models import WeatherReading

From 775c9792209aaca12a7ae97b81e87b994cde75b6 Mon Sep 17 00:00:00 2001
From: Lasse Benninga <devops.pipeline@example.com>
Date: Mon, 18 May 2026 17:45:35 +0200
Subject: [PATCH 3/3] fix(grader): handle multi-line SQL and clean generated
 files before each run

- Parameterized query check now passes for both inline (execute('...?...')) and
  multi-line/variable-assignment SQL forms: checks for '?' anywhere in database.py
  AND an .execute call, rather than requiring both on the same physical line
- Remove weather.db and output/error_report.json at grader start so local reruns
  cannot inflate the score with stale artifacts from a prior successful run
---
 .hyf/test.sh | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/.hyf/test.sh b/.hyf/test.sh
index 18a7cc4..52634d5 100644
--- a/.hyf/test.sh
+++ b/.hyf/test.sh
@@ -18,6 +18,11 @@ cat > "$SCRIPT_DIR/score.json" <<'INIT'
 {"score": 0, "pass": false, "passingScore": 60}
 INIT
 
+# Remove runtime-generated files so each grader run starts from a clean state.
+# In CI the checkout is always clean; locally this prevents stale artifacts from
+# a previous successful run inflating the score on a broken re-submission.
+rm -f weather.db output/error_report.json
+
 # --- Tasks 1-6: Ingestion Pipeline (70 points) ---
 #
 # Scoring ladder (each level requires all previous levels to pass):
@@ -116,7 +121,11 @@ print(conn.execute('SELECT COUNT(*) FROM weather_readings').fetchone()[0])
                     #                   name "fetch_with_retry" or docstring words)
                     has_field_validator=$(grep -cE "@field_validator" models.py || true)
                     has_classmethod=$(grep -cE "@classmethod" models.py || true)
-                    has_param_queries=$(grep -cE "execute[a-z]*\(.*\?" database.py || true)
+                    # Parameterized queries: check for ? placeholder anywhere in the file
+                    # AND an execute call — handles both inline and multi-line SQL forms.
+                    has_q=$(grep -c "?" database.py || true)
+                    has_exec=$(grep -cE "\.execute" database.py || true)
+                    if [ "$has_q" -gt 0 ] && [ "$has_exec" -gt 0 ]; then has_param_queries=1; else has_param_queries=0; fi
                     has_on_conflict=$(grep -ciE "ON CONFLICT" database.py || true)
                     has_sleep=$(grep -cE "time\.sleep" ingest_api.py || true)