HackYourFuture · lassebenni · May 18, 2026 · May 18, 2026 · May 18, 2026 · May 18, 2026
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -4,7 +4,7 @@
   "features": {
     "ghcr.io/devcontainers/features/azure-cli:1": {}
   },
-  "postCreateCommand": "python3 -m pip install -r task-1/requirements.txt",
+  "postCreateCommand": "python3 -m pip install -r requirements.txt",
   "customizations": {
     "vscode": {
       "extensions": [

diff --git a/task-1/.env.example → .env.example b/task-1/.env.example → .env.example
diff --git a/.gitignore b/.gitignore
@@ -25,9 +25,9 @@ env/
 !.env.example
 
 # Generated pipeline output (committed templates stay; generated files do not)
-task-1/output/error_report.json
-task-1/output/azure_resource_groups.json
-task-1/weather.db
+output/error_report.json
+output/azure_resource_groups.json
+weather.db
 
 # Editor and IDE settings
 .vscode/

diff --git a/.hyf/test.sh b/.hyf/test.sh
@@ -18,6 +18,11 @@ cat > "$SCRIPT_DIR/score.json" <<'INIT'
 {"score": 0, "pass": false, "passingScore": 60}
 INIT
 
+# Remove runtime-generated files so each grader run starts from a clean state.
+# In CI the checkout is always clean; locally this prevents stale artifacts from
+# a previous successful run inflating the score on a broken re-submission.
+rm -f weather.db output/error_report.json
+
 # --- Tasks 1-6: Ingestion Pipeline (70 points) ---
 #
 # Scoring ladder (each level requires all previous levels to pass):
@@ -29,16 +34,16 @@ INIT
 #   70  code uses required patterns (@field_validator, parameterized queries,
 #       ON CONFLICT upsert, time.sleep backoff)
 task16=0
-task16_msg="missing required files in task-1/"
+task16_msg="missing required files"
 
 required_files=(
-    "task-1/models.py"
-    "task-1/ingest_api.py"
-    "task-1/ingest_files.py"
-    "task-1/validate.py"
-    "task-1/database.py"
-    "task-1/pipeline.py"
-    "task-1/.env.example"
+    "models.py"
+    "ingest_api.py"
+    "ingest_files.py"
+    "validate.py"
+    "database.py"
+    "pipeline.py"
+    ".env.example"
 )
 
 all_present=true
@@ -53,13 +58,13 @@ if [ "$all_present" = true ]; then
     task16=10
     task16_msg="files exist but pipeline failed to run"
 
-    if [ -f task-1/requirements.txt ]; then
-        python3 -m pip install -q -r task-1/requirements.txt || \
+    if [ -f requirements.txt ]; then
+        python3 -m pip install -q -r requirements.txt || \
             echo "WARN: pip install failed; pipeline may crash with ModuleNotFoundError" >&2
     fi
 
     PIPELINE_ERR=$(mktemp)
-    if ( cd task-1 && python3 -m pipeline ) >/dev/null 2>"$PIPELINE_ERR"; then
+    if python3 -m pipeline >/dev/null 2>"$PIPELINE_ERR"; then
         task16=20
         task16_msg="pipeline ran but output checks failed"
 
@@ -69,7 +74,7 @@ import json, sqlite3
 from pathlib import Path
 
 # error_report.json must exist and be a non-empty list of error objects
-rpt = Path("task-1/output/error_report.json")
+rpt = Path("output/error_report.json")
 assert rpt.exists(), "output/error_report.json was not created"
 errors = json.loads(rpt.read_text())
 assert isinstance(errors, list), "error_report.json must be a JSON list"
@@ -81,7 +86,7 @@ for i, e in enumerate(errors[:3]):
     assert not missing, f"error object {i} missing fields: {missing}"
 
 # weather.db must exist and have rows
-db = Path("task-1/weather.db")
+db = Path("weather.db")
 assert db.exists(), "weather.db was not created"
 conn = sqlite3.connect(db)
 count = conn.execute("SELECT COUNT(*) FROM weather_readings").fetchone()[0]
@@ -95,33 +100,34 @@ PY
             # Idempotency: run a second time, row count must stay the same
             count_before=$(python3 -c "
 import sqlite3
-conn = sqlite3.connect('task-1/weather.db')
+conn = sqlite3.connect('weather.db')
 print(conn.execute('SELECT COUNT(*) FROM weather_readings').fetchone()[0])
 ")
-            if ( cd task-1 && python3 -m pipeline ) >/dev/null 2>&1; then
+            if python3 -m pipeline >/dev/null 2>&1; then
                 count_after=$(python3 -c "
 import sqlite3
-conn = sqlite3.connect('task-1/weather.db')
+conn = sqlite3.connect('weather.db')
 print(conn.execute('SELECT COUNT(*) FROM weather_readings').fetchone()[0])
 ")
                 if [ "$count_before" = "$count_after" ]; then
                     task16=50
                     task16_msg="output + idempotency pass; checking code patterns"
 
                     # Code introspection for the final 20 points.
-                    # These greps target actual code constructs, not docstrings:
-                    #   @field_validator / @classmethod  — present in scaffold but only
-                    #     safe to match after pipeline passes (NotImplementedError in the
-                    #     validator would crash the pipeline before we get here)
-                    #   execute.*?           — SQL parameterized placeholder in a call
-                    #   ON CONFLICT          — upsert keyword in actual SQL string
-                    #   time\.sleep          — stdlib sleep call (avoids matching the
-                    #     function name "fetch_with_retry" or docstring words)
-                    has_field_validator=$(grep -cE "@field_validator" task-1/models.py || true)
-                    has_classmethod=$(grep -cE "@classmethod" task-1/models.py || true)
-                    has_param_queries=$(grep -cE "execute[a-z]*\(.*\?" task-1/database.py || true)
-                    has_on_conflict=$(grep -ciE "ON CONFLICT" task-1/database.py || true)
-                    has_sleep=$(grep -cE "time\.sleep" task-1/ingest_api.py || true)
+                    # Patterns target actual code constructs, not docstrings:
+                    #   execute.*?    — SQL placeholder in an execute() call
+                    #   ON CONFLICT   — upsert keyword in actual SQL string
+                    #   time\.sleep   — stdlib sleep call (avoids the function
+                    #                   name "fetch_with_retry" or docstring words)
+                    has_field_validator=$(grep -cE "@field_validator" models.py || true)
+                    has_classmethod=$(grep -cE "@classmethod" models.py || true)
+                    # Parameterized queries: check for ? placeholder anywhere in the file
+                    # AND an execute call — handles both inline and multi-line SQL forms.
+                    has_q=$(grep -c "?" database.py || true)
+                    has_exec=$(grep -cE "\.execute" database.py || true)
+                    if [ "$has_q" -gt 0 ] && [ "$has_exec" -gt 0 ]; then has_param_queries=1; else has_param_queries=0; fi
+                    has_on_conflict=$(grep -ciE "ON CONFLICT" database.py || true)
+                    has_sleep=$(grep -cE "time\.sleep" ingest_api.py || true)
 
                     if [ "$has_field_validator" -gt 0 ] && \
                        [ "$has_classmethod" -gt 0 ] && \
@@ -157,51 +163,51 @@ fi
 
 # --- Task 7: Azure CLI + Portal (15 points) ---
 #
-#  5 pts  azure_resource_groups.json exists and is valid JSON
-# 10 pts  azure_compare.md exists
-# 15 pts  azure_compare.md has all 3 sections and is filled in (>1200 chars,
-#         which is above the committed template's ~310 chars of non-comment text)
+#  5 pts  output/azure_resource_groups.json exists and is valid JSON
+# 10 pts  output/azure_compare.md exists
+# 15 pts  output/azure_compare.md has all 3 sections and is filled in (>1200 chars,
+#         which is above the committed template's ~233 bytes)
 task7=0
-task7_msg="missing task-1/output/azure_resource_groups.json"
+task7_msg="missing output/azure_resource_groups.json"
 
-if [ -s "task-1/output/azure_resource_groups.json" ]; then
-    if python3 -c "import json; json.load(open('task-1/output/azure_resource_groups.json'))" 2>/dev/null; then
+if [ -s "output/azure_resource_groups.json" ]; then
+    if python3 -c "import json; json.load(open('output/azure_resource_groups.json'))" 2>/dev/null; then
         task7=5
         task7_msg="azure_resource_groups.json is valid JSON; azure_compare.md missing or not filled in"
 
-        if [ -s "task-1/output/azure_compare.md" ]; then
+        if [ -s "output/azure_compare.md" ]; then
             task7=10
             task7_msg="azure_compare.md exists but looks too short or missing sections"
-            section_count=$(grep -cE "^## " task-1/output/azure_compare.md || true)
-            char_count=$(wc -c < task-1/output/azure_compare.md)
+            section_count=$(grep -cE "^## " output/azure_compare.md || true)
+            char_count=$(wc -c < output/azure_compare.md)
             if [ "$section_count" -ge 3 ] && [ "$char_count" -gt 1200 ]; then
                 task7=15
                 task7_msg="azure_resource_groups.json and azure_compare.md both present and filled in"
             fi
         fi
     else
-        task7_msg="task-1/output/azure_resource_groups.json is not valid JSON"
+        task7_msg="output/azure_resource_groups.json is not valid JSON"
     fi
 fi
 
 # --- Task 8: AI Debug Report (15 points) ---
 #
-#  5 pts  task-2/AI_DEBUG.md exists
+#  5 pts  AI_DEBUG.md exists
 # 10 pts  all four sections present (## The Error, ## The Prompt, ## The Solution, ## Reflection)
 # 15 pts  file is meaningfully filled in (>1800 chars)
 task8=0
-task8_msg="missing task-2/AI_DEBUG.md"
+task8_msg="missing AI_DEBUG.md"
 
-if [ -s "task-2/AI_DEBUG.md" ]; then
+if [ -s "AI_DEBUG.md" ]; then
     task8=5
     task8_msg="AI_DEBUG.md exists but missing required sections"
-    if grep -q "^## The Error" task-2/AI_DEBUG.md && \
-       grep -q "^## The Prompt" task-2/AI_DEBUG.md && \
-       grep -q "^## The Solution" task-2/AI_DEBUG.md && \
-       grep -q "^## Reflection" task-2/AI_DEBUG.md; then
+    if grep -q "^## The Error" AI_DEBUG.md && \
+       grep -q "^## The Prompt" AI_DEBUG.md && \
+       grep -q "^## The Solution" AI_DEBUG.md && \
+       grep -q "^## Reflection" AI_DEBUG.md; then
         task8=10
         task8_msg="all sections present but file looks too short to be filled in"
-        if [ "$(wc -c < task-2/AI_DEBUG.md)" -gt 1800 ]; then
+        if [ "$(wc -c < AI_DEBUG.md)" -gt 1800 ]; then
             task8=15
             task8_msg="AI_DEBUG.md is filled in"
         fi

diff --git a/task-2/AI_DEBUG.md → AI_DEBUG.md b/task-2/AI_DEBUG.md → AI_DEBUG.md
diff --git a/README.md b/README.md
@@ -1,80 +1,104 @@
-# Data Track — Week 3 Assignment (Template)
+# Data Track — Week 3 Assignment
 
-The HackYourFuture Data Track Week 3 assignment: **Build a Validated Ingestion Pipeline**.
+**Build a Validated Ingestion Pipeline** · Total: 100 points · Passing: 60
 
-> 👩‍🎓 **Students:** you are in the wrong place. Do **not** fork or use this template.
-> Go to your cohort's assignment repo under
-> [`HackYourAssignment`](https://github.com/HackYourAssignment) (e.g. `c55-data-week3`,
-> `c56-data-week3`, …). Your teacher posts the exact link in your cohort channel.
-> Fork the cohort repo, branch, and open a PR back to it. Full instructions live in the
-> Week 3 Assignment chapter in the learning platform.
+---
 
-## For instructors / track maintainers
+## Why no task folders?
+
+Previous assignments split work across `task-1/`, `task-2/`, etc. This assignment drops that structure intentionally. Real Python projects keep all related modules at the root — you navigate by reading the code, not by opening numbered folders.
+
+Every file you need to touch is listed below, in the order you should work through them.
 
-This repo is the **upstream template** for the Week 3 assignment. At the start of each
-cohort, generate a cohort-specific repo under the `HackYourAssignment` org from this
-template (GitHub: **Use this template → Create a new repository**, owner =
-`HackYourAssignment`, name = `c<NN>-data-week3`). Students then fork *that* cohort repo
-and open PRs back to it; the auto-grader runs on every push.
+---
 
-Edits to the assignment, dataset, or grader belong here on the template, not on the
-cohort copies.
+## Where to start
 
-## Tasks at a glance
+Work through the files in this order. Each one maps to a task in the assignment chapter.
 
-| Task | Folder | Points | What you build |
+| Step | File | Task in the chapter | Points |
 |---|---|---|---|
-| **Tasks 1-6** — Ingestion Pipeline | `task-1/` | 70 | A modular pipeline: `fetch_with_retry` with exponential backoff, Open-Meteo API ingestion, CSV file ingestion, Pydantic validation with `@field_validator`, SQLite upsert storage, and a `pipeline.py` orchestrator that produces an error report and pipeline summary. |
-| **Task 7** — Azure CLI + Portal | `task-1/output/` | 15 | Run three `az` CLI commands, call the ARM API with a Bearer token, save `azure_resource_groups.json`, and fill in `azure_compare.md` with three comparison points. |
-| **Task 8** — AI Debug Report | `task-2/` | 15 | Document one LLM-assisted debugging session. Fill in the four sections of `AI_DEBUG.md`. |
+| 1 | `models.py` | Task 4: Pydantic Validation | — |
+| 2 | `ingest_api.py` | Task 1: Error Handling + Task 2: API Ingestion | — |
+| 3 | `ingest_files.py` | Task 3: File Reading | — |
+| 4 | `validate.py` | Task 4: Pydantic Validation | — |
+| 5 | `database.py` | Task 5: Database Storage | — |
+| 6 | `pipeline.py` | Task 6: Pipeline Orchestration | 70 total |
+| 7 | `output/azure_compare.md` | Task 7: Azure CLI + Portal | 15 |
+| 8 | `AI_DEBUG.md` | Task 8: AI Debug Report | 15 |
 
-Total: 100 · Passing: 60.
+Open each file and read the docstrings and TODO comments — they explain exactly what to implement. Start with `models.py` and `ingest_api.py`; `pipeline.py` is the last thing you wire together.
+
+---
 
 ## Repository layout
 
 ```text
 .
-├── task-1/
-│   ├── data/
-│   │   └── weather_stations.csv        # messy input dataset (committed; do not edit)
-│   ├── output/                          # pipeline writes here (gitignored except templates)
-│   │   ├── error_report.json            # generated by pipeline.py
-│   │   ├── azure_resource_groups.json   # Task 7: save ARM API response here
-│   │   └── azure_compare.md             # Task 7: fill in 3 comparison points
-│   ├── models.py                        # Pydantic WeatherReading model — fill in TODOs
-│   ├── ingest_api.py                    # fetch_with_retry + API ingestion — fill in TODOs
-│   ├── ingest_files.py                  # CSV reader — fill in TODOs
-│   ├── validate.py                      # batch validation — fill in TODOs
-│   ├── database.py                      # SQLite create, upsert, query — fill in TODOs
-│   ├── pipeline.py                      # orchestrator — fill in TODOs
-│   ├── .env.example                     # no secrets needed; copy to .env if you extend it
-│   └── requirements.txt
-├── task-2/
-│   └── AI_DEBUG.md                      # Task 8: fill in the four sections
+├── data/
+│   └── weather_stations.csv        # input dataset — do not edit
+├── output/
+│   ├── azure_compare.md            # Task 7: fill in your 3 comparison sentences
+│   └── azure_resource_groups.json  # Task 7: generated by your Python script
+├── models.py          # Step 1 — Pydantic model (Task 4)
+├── ingest_api.py      # Step 2 — fetch_with_retry + API call (Tasks 1–2)
+├── ingest_files.py    # Step 3 — CSV reader (Task 3)
+├── validate.py        # Step 4 — batch validation (Task 4)
+├── database.py        # Step 5 — SQLite tables + upsert (Task 5)
+├── pipeline.py        # Step 6 — orchestrator that calls everything (Task 6)
+├── AI_DEBUG.md        # Step 8 — your debugging log (Task 8)
+├── requirements.txt
+├── .env.example
 ├── .hyf/
-│   └── test.sh                          # auto-grader (read it to see exactly what it checks)
+│   └── test.sh        # auto-grader — read this to see exactly what is checked
 └── .github/workflows/
-    └── grade-assignment.yml             # runs .hyf/test.sh on every PR
+    └── grade-assignment.yml
 ```
 
-## Run the grader locally
+Files the pipeline generates at runtime (gitignored):
+- `weather.db` — SQLite database
+- `output/error_report.json` — invalid records from validation
+
+---
 
-Before opening a PR, run the same checks the auto-grader runs:
+## Run the pipeline
 
 ```bash
-cd task-1
 python3 -m pip install -r requirements.txt
-cd ..
+python3 -m pipeline
+```
+
+---
+
+## Check your score locally
+
+Run the same grader the auto-grader runs on every PR push:
+
+```bash
 bash .hyf/test.sh
 cat .hyf/score.json
 ```
 
-## Scoring ladder (Tasks 1-6)
+---
+
+## Scoring ladder (Tasks 1–6)
+
+Points are awarded incrementally so partial work earns partial credit:
+
+| Score | What the grader checks |
+|---|---|
+| 10/70 | All required files exist |
+| 20/70 | `python3 -m pipeline` runs without crashing |
+| 40/70 | `output/error_report.json` is a valid list with the right fields; `weather.db` has rows |
+| 50/70 | Pipeline is idempotent: a second run leaves the same row count (upsert working) |
+| 70/70 | Code uses: `@field_validator` + `@classmethod` in `models.py`, `?` placeholders in `database.py`, `ON CONFLICT` upsert in `database.py`, `time.sleep` backoff in `ingest_api.py` |
+
+---
+
+## For instructors / track maintainers
+
+This repo is the upstream template. At the start of each cohort, generate a cohort repo under `HackYourAssignment` (**Use this template → Create a new repository**, owner = `HackYourAssignment`, name = `c<NN>-data-week3`). Students fork that cohort repo and open PRs back to it; the auto-grader runs on every push.
 
-The grader awards points incrementally so partial credit is meaningful:
+Edits to the assignment, dataset, or grader belong here on the template — not on cohort copies.
 
-- **10/70** — required files all exist (`models.py`, `ingest_api.py`, `ingest_files.py`, `validate.py`, `database.py`, `pipeline.py`, `.env.example`).
-- **20/70** — `python3 -m pipeline` runs from `task-1/` without crashing.
-- **40/70** — `output/error_report.json` exists, is a valid JSON list, and contains objects with `index`, `source`, `raw_record`, and `error_details` fields; `weather.db` has rows in `weather_readings`.
-- **50/70** — pipeline is idempotent: running it twice leaves the same row count in `weather_readings` (upsert working correctly).
-- **70/70** — code uses the required patterns: `@field_validator` + `@classmethod` in `models.py`, parameterized queries (`?` placeholders) in `database.py`, `ON CONFLICT ... DO UPDATE SET` in `database.py`, retry/backoff logic in `ingest_api.py`.
+> 👩‍🎓 **Students:** if you landed here, you are in the wrong place. Go to your cohort repo under [`HackYourAssignment`](https://github.com/HackYourAssignment). Your teacher posts the exact link in your cohort channel.
diff --git a/task-1/data/weather_stations.csv → data/weather_stations.csv b/task-1/data/weather_stations.csv → data/weather_stations.csv
diff --git a/task-1/database.py → database.py b/task-1/database.py → database.py
@@ -1,3 +1,8 @@
+# Step 5 — Task 5: Database Storage
+# create_tables()  — run once at startup to set up raw_weather and weather_readings.
+# insert_raw()     — store every record before validation so nothing is lost.
+# upsert_readings()— insert valid records; ON CONFLICT updates instead of duplicating.
+# count_readings() — query the final row count for the pipeline summary.
 import sqlite3
 from pathlib import Path
 

diff --git a/task-1/ingest_api.py → ingest_api.py b/task-1/ingest_api.py → ingest_api.py
@@ -1,3 +1,6 @@
+# Step 2 — Tasks 1 & 2: Error Handling + API Ingestion
+# fetch_with_retry handles transient network errors (Task 1).
+# fetch_api_records calls it and shapes the response into flat dicts (Task 2).
 import logging
 import time
 

diff --git a/task-1/ingest_files.py → ingest_files.py b/task-1/ingest_files.py → ingest_files.py
@@ -1,3 +1,6 @@
+# Step 3 — Task 3: File Reading
+# Read the messy CSV and normalize each row into the same dict format
+# that fetch_api_records() produces, so validate_records() can handle both sources.
 import csv
 from pathlib import Path