diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..5f7cd57 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,13 @@ +{ + "name": "HYF Week 2 Assignment", + "image": "mcr.microsoft.com/devcontainers/python:3.11", + "customizations": { + "vscode": { + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance" + ] + } + }, + "postCreateCommand": "python3 -m pip install -r task-1/requirements.txt && echo '✅ HYF Week 2 Assignment Codespace ready. Run the auto-grader locally with: bash .hyf/test.sh && cat .hyf/score.json'" +} diff --git a/.gitignore b/.gitignore index 2b76d7c..339a448 100644 --- a/.gitignore +++ b/.gitignore @@ -156,3 +156,9 @@ dist vite.config.js.timestamp-* vite.config.ts.timestamp-* + +# Python virtual environments +venv/ +.venv/ +task-*/venv/ +task-*/.venv/ diff --git a/.hyf/test.sh b/.hyf/test.sh old mode 100644 new mode 100755 index ee037fc..bacc5ca --- a/.hyf/test.sh +++ b/.hyf/test.sh @@ -1,13 +1,211 @@ #!/usr/bin/env bash +# Auto-grade Week 2 assignment. Writes score.json next to this script. +# Total = 100, passing = 60. +# +# The auto-grade workflow runs this from the .hyf working directory; we +# resolve the repo root so the script is robust to either invocation +# (cd .hyf && bash test.sh, or bash .hyf/test.sh from the repo root). set -euo pipefail -# Run your test scripts here. -# Auto grade tool will execute this file within the .hyf working directory. -# The result should be stored in score.json file with the format shown below. -cat << EOF > score.json +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +cd "$REPO_ROOT" + +PASSING=60 + +# --- Task 1: Cleaner Pipeline (60 points) --- +# +# Scoring ladder (each level depends on the previous): +# 0 nothing committed +# 10 required files all present (config.py, models.py, transforms.py, +# pipeline.py, tests/test_transforms.py, .env.example) +# 20 pipeline runs against messy_sales.csv without crashing (the +# grader injects INPUT_PATH/OUTPUT_PATH inline; no .env touched) +# 40 output/clean_sales.csv passes structural checks (12 rows, cleaned +# fields, revenue/vat correctly calculated) +# 60 the *code* also looks engineered: models.py defines a @dataclass +# with __post_init__; transforms.py uses the {**row, ...} spread +# pattern; pytest tests/ reports all tests passing. +# +# Why the introspection cap at 40: a script that hardcodes the expected +# JSON literal could pass the structural checks without doing any real +# transformation. The 60-point tier requires the chapter's actual patterns +# (dataclass, spread, tests) to be present in the source. +task1=0 +task1_msg="missing required files in task-1/" + +required_files=( + "task-1/src/config.py" + "task-1/src/models.py" + "task-1/src/transforms.py" + "task-1/src/pipeline.py" + "task-1/tests/test_transforms.py" + "task-1/.env.example" +) + +all_present=true +for f in "${required_files[@]}"; do + if [ ! -f "$f" ]; then + all_present=false + break + fi +done + +if [ "$all_present" = true ]; then + task1=10 + task1_msg="files exist but pipeline failed to run" + + # Make sure the python-dotenv + pytest deps are available; if a + # requirements.txt exists, install it quietly. + if [ -f task-1/requirements.txt ]; then + python3 -m pip install -q -r task-1/requirements.txt || \ + echo "WARN: pip install failed; pipeline may fail with ModuleNotFoundError" >&2 + fi + + # Force the canonical paths inline so the grader is deterministic + # regardless of the student's local .env (which may point INPUT_PATH / + # OUTPUT_PATH at /tmp or some other location during their own debugging). + # The student's .env is NOT read or modified by the grader. + PIPELINE_ERR=$(mktemp) + if ( cd task-1 && env INPUT_PATH=data/messy_sales.csv OUTPUT_PATH=output/clean_sales.csv python3 -m src.pipeline ) >/dev/null 2>"$PIPELINE_ERR"; then + task1=20 + task1_msg="pipeline ran but output/clean_sales.csv failed structural checks" + STRUCT_ERR=$(mktemp) + if python3 - <<'PY' 2>"$STRUCT_ERR" +import csv +from pathlib import Path + +p = Path("task-1/output/clean_sales.csv") +assert p.exists(), "output/clean_sales.csv was not created" + +with p.open() as f: + rows = list(csv.DictReader(f)) + +# 15 input rows - 3 invalid (empty name #6, negative price #7, zero qty #8) = 12 +assert len(rows) == 12, f"expected 12 cleaned rows, got {len(rows)}" + +# Required columns +required = {"transaction_id", "product_name", "category", "price", + "quantity", "customer_email", "date", "revenue", "vat"} +missing = required - set(rows[0].keys()) +assert not missing, f"output missing columns: {missing}" + +# Field-level checks +for row in rows: + name = row["product_name"] + assert name == name.strip() and name == name.title(), \ + f"product_name not cleaned: {name!r}" + email = row["customer_email"] + assert email == email.strip().lower(), \ + f"customer_email not cleaned: {email!r}" + cat = row["category"] + assert cat, f"category empty (should default to 'Unknown') in row {row['transaction_id']}" + +# Spot-check the math: row id=1 was 999.99 * 2 = 1999.98 revenue, then +# * 0.21 = 419.9958 vat (rounded to 420.00 at 2 decimals; the 0.01 +# tolerance below absorbs either rounding precision the student picks). +row_1 = next(r for r in rows if r["transaction_id"] == "1") +revenue_1 = float(row_1["revenue"]) +vat_1 = float(row_1["vat"]) +assert abs(revenue_1 - 1999.98) < 0.01, f"row 1 revenue wrong: {revenue_1}" +assert abs(vat_1 - 419.9958) < 0.01, f"row 1 vat wrong: {vat_1}" + +# At least one row should have category="Unknown" (row 15 had empty category) +assert any(r["category"].lower() == "unknown" for r in rows), \ + "no row has category='Unknown' (row 15's empty category should default)" +PY + then + rm -f "$STRUCT_ERR" + task1=40 + task1_msg="output passes structural checks but code is missing required engineering patterns (see below)" + + # Introspection caps. The full 60 requires: + # - models.py imports `dataclass` AND defines a __post_init__ method + # - transforms.py uses the {**row, ...} spread pattern (no mutation) + # - pytest tests/test_transforms.py passes (all student tests green) + models_has_dataclass=$(grep -cE "^[[:space:]]*from dataclasses\b|^[[:space:]]*import dataclasses\b" task-1/src/models.py || true) + models_has_post_init=$(grep -cE "^[[:space:]]*def __post_init__" task-1/src/models.py || true) + transforms_has_spread=$(grep -cE '\{\*\*' task-1/src/transforms.py || true) + + tests_pass=false + if ( cd task-1 && python3 -m pytest tests/ -q ) >/dev/null 2>&1; then + tests_pass=true + fi + + if [ "$models_has_dataclass" -gt 0 ] && \ + [ "$models_has_post_init" -gt 0 ] && \ + [ "$transforms_has_spread" -gt 0 ] && \ + [ "$tests_pass" = true ]; then + task1=60 + task1_msg="output and code structure both pass; tests green" + else + missing=() + [ "$models_has_dataclass" -eq 0 ] && missing+=("from dataclasses import ... in models.py") + [ "$models_has_post_init" -eq 0 ] && missing+=("__post_init__ in models.py") + [ "$transforms_has_spread" -eq 0 ] && missing+=("{**row, ...} spread pattern in transforms.py") + [ "$tests_pass" = false ] && missing+=("pytest tests/ all green") + task1_msg="output passes but code missing: $(IFS=, ; echo "${missing[*]}")" + fi + else + # Structural checks failed: surface the assertion message. + err=$(tail -3 "$STRUCT_ERR" | tr '\n' ' ' | sed 's/ */ /g' | sed 's/^ //;s/ $//') + [ -n "$err" ] && task1_msg="structural check failed: $err" + rm -f "$STRUCT_ERR" + fi + else + # Pipeline crashed: surface the last few stderr lines. + err=$(tail -3 "$PIPELINE_ERR" | tr '\n' ' ' | sed 's/ */ /g' | sed 's/^ //;s/ $//') + [ -n "$err" ] && task1_msg="pipeline failed to run: $err" + fi + rm -f "$PIPELINE_ERR" +fi + +# --- Task 2: AI Debug Report (20 points) --- +task2=0 +task2_msg="missing task-2/AI_DEBUG.md" +if [ -s task-2/AI_DEBUG.md ]; then + task2=5 + task2_msg="AI_DEBUG.md exists but missing required sections" + if grep -q "^## The Error" task-2/AI_DEBUG.md && \ + grep -q "^## The Prompt" task-2/AI_DEBUG.md && \ + grep -q "^## The Solution" task-2/AI_DEBUG.md && \ + grep -q "^## Reflection" task-2/AI_DEBUG.md; then + task2=10 + task2_msg="all sections present but file looks too short to be filled in" + # Empty template ships at ~1500 chars. Filled-in report should have + # meaningfully more content; threshold = template + ~600 chars + # of student writing (4 sections * ~150 chars each). + if [ "$(wc -c < task-2/AI_DEBUG.md)" -gt 2100 ]; then + task2=20 + task2_msg="AI_DEBUG.md is filled in" + fi + fi +fi + +# --- Task 3: HYF Azure Proof (20 points) --- +task3=0 +task3_msg="missing task-3/azure_proof.png|jpg|jpeg" +for ext in png jpg jpeg; do + if [ -s "task-3/azure_proof.$ext" ]; then + task3=20 + task3_msg="azure_proof.$ext present" + break + fi +done + +score=$((task1 + task2 + task3)) +if [ "$score" -ge "$PASSING" ]; then pass=true; else pass=false; fi + +cat > "$SCRIPT_DIR/score.json" < week X assignment -The Week X assignment for the HackYourFuture can be found at the following link: [TODO: Assignment url in the learning platform] +# Data Track — Week 2 Assignment (Template) +The HackYourFuture Data Track Week 2 assignment: **Refactoring to a Clean Pipeline**. -## Implementation Instructions +> 👩‍🎓 **Students:** you are in the wrong place. Do **not** fork or use this template. +> Go to your cohort's assignment repo under +> [`HackYourAssignment`](https://github.com/HackYourAssignment) (e.g. `c55-data-week2`, +> `c56-data-week2`, …). Your teacher posts the exact link in your cohort channel. +> Fork the cohort repo, branch, and open a PR back to it. Full instructions live in the +> [Week 2 Assignment on Notion](https://www.notion.so/hackyourfuture/Week-2-Assignment-Refactoring-to-a-Clean-Pipeline-f8c27aa88d144cb18f54c49d02f50b73). -Provide clear instructions on how trainees should implement the tasks. +## For instructors / track maintainers -### Task 1 -Instructions for Task 1 +This repo is the **upstream template** for the Week 2 assignment. At the start of each +cohort, generate a cohort-specific repo under the `HackYourAssignment` org from this +template (GitHub: **Use this template → Create a new repository**, owner = +`HackYourAssignment`, name = `c-data-week2`). Students then fork *that* cohort repo +and open PRs back to it; the auto-grader runs on every push. -### Task 2 -Instructions for Task 2 +Edits to the assignment, dataset, or grader belong here on the template, not on the +cohort copies. -... +## Tasks at a glance +| Task | Folder | Points | What you build | +|---|---|---|---| +| **Task 1** — Cleaner Pipeline | `task-1/` | 60 | A modular Python pipeline with `config.py` (env-var loading), `models.py` (`Transaction` dataclass with `__post_init__` validation), `transforms.py` (4+ pure composable functions, no mutation), `pipeline.py` (orchestrator), and `tests/test_transforms.py` (4+ pytest tests). Reads `data/messy_sales.csv`, writes `output/clean_sales.csv`. | +| **Task 2** — AI Debug Report | `task-2/` | 20 | Document one debugging session where you used an LLM to fix a bug. Fill in the four sections of `AI_DEBUG.md`. | +| **Task 3** — HYF Azure proof | `task-3/` | 20 | Confirm your HYF Azure tenant access still works. Screenshot proof at `task-3/azure_proof.png` (or `.jpg` / `.jpeg`) showing resource group + region + €0 cost. | + +Total: 100 · Passing: 60. + +## Repository layout + +```text +. +├── task-1/ +│ ├── data/ +│ │ └── messy_sales.csv # the dataset (committed; do not edit) +│ ├── src/ +│ │ ├── config.py # env-var loader — fill in TODOs +│ │ ├── models.py # Transaction dataclass — fill in TODOs +│ │ ├── transforms.py # 4 pure transform functions — fill in TODOs +│ │ └── pipeline.py # orchestrator — fill in TODOs +│ ├── tests/ +│ │ └── test_transforms.py # 4 pytest tests — fill in TODOs +│ ├── output/ # your pipeline writes clean_sales.csv here (gitignored) +│ ├── .env.example # copy to .env (gitignored) before running +│ └── requirements.txt # python3 -m pip install -r requirements.txt +├── task-2/ +│ └── AI_DEBUG.md # fill in the four sections +├── task-3/ +│ └── azure_proof.png # add your screenshot here +├── .hyf/ +│ └── test.sh # auto-grader (read it to see exactly what it checks) +└── .github/workflows/ + └── grade-assignment.yml # runs .hyf/test.sh on every PR +``` + +## Run the grader locally + +Before opening a PR, run the same checks the auto-grader runs: + +```bash +cd task-1 +python3 -m pip install -r requirements.txt +cp .env.example .env +cd .. +bash .hyf/test.sh +cat .hyf/score.json +``` + +The grader prints a per-task breakdown so you can see exactly which check failed and +why. The PR-time grader does the same — your local run and the CI run are identical. + +## Scoring ladder (Task 1) + +The grader awards points incrementally so partial credit is meaningful: + +- **10/60** — required files exist (`config.py`, `models.py`, `transforms.py`, `pipeline.py`, `tests/test_transforms.py`, `.env.example`). +- **20/60** — `python -m src.pipeline` runs from `task-1/` without crashing (the grader injects `INPUT_PATH` and `OUTPUT_PATH` inline; your local `.env` is not used during grading). +- **40/60** — `output/clean_sales.csv` passes structural checks: 12 rows (15 input − 3 invalid/zero-quantity), lowercased emails, title-cased product names, "Unknown" filled in for missing categories, `revenue` and `vat` columns present and correctly calculated. +- **60/60** — code looks engineered: `models.py` defines a `@dataclass` with `__post_init__`; `transforms.py` uses the `{**row, ...}` spread pattern (no mutation); `pytest tests/` reports all tests passing. + +The 40-point cap exists to stop a 5-line script that hardcodes the expected JSON from getting full marks. Real engineering patterns (dataclass + spread + tests) are required for the top 20 points. diff --git a/task-1/.env.example b/task-1/.env.example new file mode 100644 index 0000000..c76c0be --- /dev/null +++ b/task-1/.env.example @@ -0,0 +1,2 @@ +INPUT_PATH=data/messy_sales.csv +OUTPUT_PATH=output/clean_sales.csv diff --git a/task-1/data/messy_sales.csv b/task-1/data/messy_sales.csv new file mode 100644 index 0000000..e29656b --- /dev/null +++ b/task-1/data/messy_sales.csv @@ -0,0 +1,16 @@ +transaction_id,product_name,category,price,quantity,customer_email,date +1, laptop PRO ,Electronics,999.99,2,alice@example.com,2024-03-15 +2,WIRELESS MOUSE,Electronics,29.99,5, BOB@Company.COM ,2024-03-15 +3, usb cable,Electronics,4.99,10,,2024-03-16 +4, Office Chair ,Furniture,349.50,1,charlie@work.org,2024-03-16 +5,standing DESK,Furniture,599.00,1,charlie@work.org,not_a_date +6,,Electronics,19.99,3,dave@email.com,2024-03-17 +7, Mechanical Keyboard ,Electronics,-89.99,2,eve@startup.io,2024-03-17 +8,monitor ARM,Furniture,79.99,0,frank@corp.com,2024-03-18 +9, WEBCAM hd ,Electronics,54.99,1, ,2024-03-18 +10,desk lamp,Furniture,34.99,4,grace@university.edu,2024-03-19 +11, NOISE CANCELLING headphones,Electronics,199.99,1,alice@example.com,2024-03-19 +12,cable management KIT,Furniture,15.99,6,henry@business.com,2024-03-20 +13, ergonomic MOUSE PAD ,Furniture,24.99,3,ivan@email.com,2024-03-20 +14,laptop STAND,Furniture,45.99,2,jenny@work.org,2024-03-21 +15, BLUETOOTH speaker,,39.99,1,karl@startup.io,2024-03-21 diff --git a/task-1/task 1 files b/task-1/output/.gitkeep similarity index 100% rename from task-1/task 1 files rename to task-1/output/.gitkeep diff --git a/task-1/requirements.txt b/task-1/requirements.txt new file mode 100644 index 0000000..c0d3e0b --- /dev/null +++ b/task-1/requirements.txt @@ -0,0 +1,2 @@ +python-dotenv>=1.0.0 +pytest>=7.0 diff --git a/task-2/task 2 files b/task-1/src/__init__.py similarity index 100% rename from task-2/task 2 files rename to task-1/src/__init__.py diff --git a/task-1/src/config.py b/task-1/src/config.py new file mode 100644 index 0000000..e59f7c1 --- /dev/null +++ b/task-1/src/config.py @@ -0,0 +1,35 @@ +"""Configuration loader. + +Read INPUT_PATH and OUTPUT_PATH from a .env file (see .env.example for the +expected variable names) and expose them as named imports. + +Tasks (see chapter Task 1): + 1. Use python-dotenv's load_dotenv() to read the .env file. + 2. Read INPUT_PATH and OUTPUT_PATH from os.environ. + 3. Raise ValueError if either is missing — do NOT let None silently propagate. +""" +import os + +from dotenv import load_dotenv + + +# Load .env values into os.environ before they're read by _required(). +# (Step 1 from the docstring above; already wired up so the rest of the +# module can rely on os.environ being populated.) +load_dotenv() + + +def _required(name: str) -> str: + """Read an env var; fail loudly if missing.""" + # TODO 2: Read os.environ[name]; if not set, raise ValueError with a + # message that names the missing variable AND points at .env.example. + raise NotImplementedError("Implement _required: see TODO 2 in config.py") + + +# TODO 3: Replace the placeholder lines below by calling _required(...) for +# each variable. INPUT_PATH and OUTPUT_PATH must be importable from this +# module by the rest of the pipeline as a relative import +# (`from .config import INPUT_PATH, ...`), since the pipeline runs as +# `python -m src.pipeline`. +INPUT_PATH: str = "" # TODO: _required("INPUT_PATH") +OUTPUT_PATH: str = "" # TODO: _required("OUTPUT_PATH") diff --git a/task-1/src/models.py b/task-1/src/models.py new file mode 100644 index 0000000..865b2b1 --- /dev/null +++ b/task-1/src/models.py @@ -0,0 +1,33 @@ +"""Data model for a sales transaction. + +Tasks (see chapter Task 2): + 1. Define a Transaction dataclass with the fields listed below. + 2. Use __post_init__ to enforce: price >= 0 and product_name non-empty. + +The pipeline converts cleaned dicts into Transaction instances at the +boundary, so the dataclass is the schema-of-record for everything that +gets written to the output CSV. +""" +from dataclasses import dataclass + + +# TODO 1: Define a @dataclass called Transaction with these fields: +# transaction_id: int +# product_name: str +# category: str +# price: float +# quantity: int +# customer_email: str +# date: str +# revenue: float = 0.0 +# vat: float = 0.0 +# +# TODO 2: Add __post_init__ that raises ValueError when: +# - self.price < 0 (with a message naming the bad value) +# - not self.product_name.strip() (empty / whitespace-only product name) + + +# Replace this stub with your dataclass: +@dataclass +class Transaction: + transaction_id: int # TODO: replace this stub with the full field list above diff --git a/task-1/src/pipeline.py b/task-1/src/pipeline.py new file mode 100644 index 0000000..adce136 --- /dev/null +++ b/task-1/src/pipeline.py @@ -0,0 +1,70 @@ +"""Pipeline entry point. + +Tasks (see chapter Task 4): + 1. Load configuration (INPUT_PATH, OUTPUT_PATH) from config.py. + 2. Read the messy CSV into a list of dicts. + 3. Run the transform chain (remove_invalid -> clean_fields -> + filter_zero_quantity -> calculate_revenue). + 4. Convert each cleaned dict into a Transaction instance (this exercises + the dataclass __post_init__ validation as a final guard). + 5. Save the result as a CSV at OUTPUT_PATH. + 6. Print a summary: total transactions, total revenue, total VAT. + +The transform layer never opens files or prints. All I/O lives here. + +Run from the task-1/ directory: + python -m src.pipeline +""" +import csv +from dataclasses import asdict +from pathlib import Path + +from .config import INPUT_PATH, OUTPUT_PATH +from .models import Transaction +from .transforms import ( + calculate_revenue, + clean_fields, + filter_zero_quantity, + remove_invalid, +) + + +def read_csv(path: str) -> list[dict]: + """Read a CSV file into a list of dicts. I/O only — no business rules.""" + # TODO: implement using csv.DictReader. + raise NotImplementedError + + +def write_csv(rows: list[dict], path: str) -> None: + """Write a list of dicts to CSV. I/O only — no business rules.""" + # TODO: implement using csv.DictWriter. + raise NotImplementedError + + +def run() -> None: + raw = read_csv(INPUT_PATH) + data = remove_invalid(raw) + data = clean_fields(data) + data = filter_zero_quantity(data) + data = calculate_revenue(data) + + # Materialise as Transaction instances so the dataclass __post_init__ + # acts as a final guard before serialisation. + # TODO: cast price / quantity / revenue / vat to the right types here + # if your transforms left them as strings, then iterate over `data` + # to build Transaction(**row) for each cleaned row. + transactions = [Transaction(**row) for row in data] + + # Output dir must exist. Use pathlib for cross-platform safety. + Path(OUTPUT_PATH).parent.mkdir(parents=True, exist_ok=True) + write_csv([asdict(t) for t in transactions], OUTPUT_PATH) + + total_revenue = sum(t.revenue for t in transactions) + total_vat = sum(t.vat for t in transactions) + print(f"Processed {len(transactions)} transactions") + print(f"Total revenue: €{total_revenue:.2f}") + print(f"Total VAT: €{total_vat:.2f}") + + +if __name__ == "__main__": + run() diff --git a/task-1/src/transforms.py b/task-1/src/transforms.py new file mode 100644 index 0000000..e6bdd76 --- /dev/null +++ b/task-1/src/transforms.py @@ -0,0 +1,50 @@ +"""Pure transform functions over a list of dicts. + +Tasks (see chapter Task 3): + Build at least these four pure, composable transforms. Each MUST: + - take a list of dicts and return a NEW list (never mutate the input) + - be free of I/O (no file reads, no prints) + - be testable with hand-rolled data (see tests/test_transforms.py) + +Each function must return a new list of new dicts, not mutate the input +rows in place. The chapter (Functional Composition) shows the canonical +pattern; the auto-grader checks that you used it inside the function +bodies, not just in prose. +""" + + +def remove_invalid(rows: list[dict]) -> list[dict]: + """Remove rows with empty product_name OR negative price. + + Empty here means missing, "", or whitespace-only. + """ + # TODO: implement. Return a new list, do not mutate `rows`. + raise NotImplementedError + + +def clean_fields(rows: list[dict]) -> list[dict]: + """Clean string fields: + + - product_name: strip + title-case + - customer_email: strip + lowercase + - category: default to "Unknown" if missing or empty + + Return a new list. Do not mutate the input rows. + """ + # TODO: implement. + raise NotImplementedError + + +def calculate_revenue(rows: list[dict], vat_rate: float = 0.21) -> list[dict]: + """Add 'revenue' (price * quantity) and 'vat' (revenue * vat_rate) fields. + + Round both to 2 decimal places. Coerce price/quantity from string if needed. + """ + # TODO: implement. + raise NotImplementedError + + +def filter_zero_quantity(rows: list[dict]) -> list[dict]: + """Remove rows where quantity is 0.""" + # TODO: implement. + raise NotImplementedError diff --git a/task-1/tests/__init__.py b/task-1/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/task-1/tests/test_transforms.py b/task-1/tests/test_transforms.py new file mode 100644 index 0000000..8d6bd80 --- /dev/null +++ b/task-1/tests/test_transforms.py @@ -0,0 +1,40 @@ +"""Tests for the pure transform functions (chapter Task 5). + +Write at least 4 tests: + - test_remove_invalid_drops_empty_names + - test_clean_fields_normalizes_names + - test_calculate_revenue_adds_fields + - test_no_mutation +""" +import pytest + +from src.transforms import ( + calculate_revenue, + clean_fields, + filter_zero_quantity, + remove_invalid, +) + + +def test_remove_invalid_drops_empty_names(): + # TODO: feed in 3 rows (one valid, one empty product_name, one + # whitespace-only product_name); assert only the valid one survives. + raise NotImplementedError + + +def test_clean_fields_normalizes_names(): + # TODO: feed a row with messy product_name and uppercase email; assert + # the output has stripped + title-cased name and lowercase email. + raise NotImplementedError + + +def test_calculate_revenue_adds_fields(): + # TODO: feed a row with price=100, quantity=3; assert output has + # revenue=300.0 and vat=63.0 (default VAT rate is 0.21). + raise NotImplementedError + + +def test_no_mutation(): + # TODO: feed in a list, run any transform on it, assert the original + # list is unchanged. This is the most important test in the file. + raise NotImplementedError diff --git a/task-2/AI_DEBUG.md b/task-2/AI_DEBUG.md new file mode 100644 index 0000000..83cad22 --- /dev/null +++ b/task-2/AI_DEBUG.md @@ -0,0 +1,37 @@ +# AI Debug Report — Task 2 + +While building Task 1 (the Cleaner Pipeline), you will encounter at least one bug. (If not, introduce one intentionally — pick the most surprising thing about Python you noticed this week and break it.) + +Use an LLM (ChatGPT, Claude, etc.) to help you debug it, then fill in the four sections below. The goal is not "the AI fixed it"; the goal is showing you understood what was broken, what the AI suggested, and whether you accepted or pushed back. + +Aim for 100-200 words per section. Bullet points are fine. + +## The Error + +What went wrong? Paste the traceback or the wrong-output sample. Include the file and the line you were running when it broke. + +``` +(paste here) +``` + +## The Prompt + +What did you ask the AI? Paste the actual prompt verbatim. (Include the code or stack trace you pasted alongside it; do NOT include any real `.env` values, API keys, or PII — replace those with ``.) + +``` +(paste here) +``` + +## The Solution + +What did the AI suggest? Did it work on the first try? Did you have to follow up? Paste the final code change (a small diff is best). + +``` +(paste here) +``` + +## Reflection + +Did you understand *why* the original code was broken before the AI told you? If not, what was the gap in your mental model? If you understood it before asking, why did you still ask the AI — speed, second opinion, or something else? + +(write here, ~100 words) diff --git a/task-3/.gitkeep b/task-3/.gitkeep new file mode 100644 index 0000000..e69de29