Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
a52c00c
embeddings: init subproject
b-fein Apr 18, 2026
0fa20ac
embeddings: outline ggnn model api
b-fein Apr 18, 2026
0cd4286
fix property names for max upload sizes
b-fein Apr 19, 2026
1aaaf35
sessions: add example solution
b-fein Apr 19, 2026
fc87025
embeddings: prototype connector connection
b-fein Apr 25, 2026
4c1680a
embedding: prototype API round-trip
b-fein Apr 25, 2026
099ebff
dev: add Spring Boot Actuator dependency
b-fein Apr 25, 2026
aa3e8e0
embeddings: dummy data renderable in UI chart
b-fein Apr 25, 2026
5f2ab7b
embeddings: add debug duration logging
b-fein Apr 25, 2026
1bb808e
embeddings: progress-variance projection over time
b-fein Apr 27, 2026
00fd5aa
embeddings: call actual GGNN embedding model
b-fein Apr 28, 2026
1587614
embeddings: cache GGNN results
b-fein Apr 28, 2026
86aaae3
embeddings: only show link to embedding dashboard if profile active
b-fein Apr 28, 2026
e92d09c
embeddings: allow selecting users for timeline
b-fein Apr 28, 2026
afda3cd
embeddings: make charts zoomable
b-fein Apr 28, 2026
573838c
embeddings: fix project order in projection over time
b-fein Apr 29, 2026
98009b4
embeddings: allow selection of timeline step interval
b-fein Apr 29, 2026
0944fab
embeddings: compute embedding distance
b-fein Apr 30, 2026
6ee87a8
whisker: database model
b-fein Apr 29, 2026
e22cb52
whisker: api call
b-fein Apr 29, 2026
ed8290f
whisker: queue test execution
b-fein Apr 29, 2026
ab6e655
whisker(ui): test suite upload
b-fein Apr 29, 2026
4a63642
style: disable method name lint for repositories
b-fein Apr 29, 2026
cdf6be5
whisker: configure maximum number of parallel test executions
b-fein Apr 30, 2026
32134cf
whisker: test execution queue
b-fein Apr 29, 2026
b0e8aa5
whisker: compute test fitness
b-fein Apr 30, 2026
350c78a
whisker: show test results
b-fein May 5, 2026
1fbec07
whisker: embedding/test distance plot
b-fein May 5, 2026
4d90ff8
embeddings: explanation texts
b-fein May 5, 2026
0956826
embeddings: rename to exercise progress dashboard
b-fein May 5, 2026
867cd90
whisker: show only show if feature enabled
b-fein May 5, 2026
d597117
embeddings: only show active participants
b-fein May 5, 2026
84e553e
whisker: show test results only for selected users
b-fein May 5, 2026
0c222c2
embeddings: add template and solution to p-v-projection plot
b-fein May 5, 2026
2aec68f
embeddings: nicer layout for user selection
b-fein May 5, 2026
2084598
embeddings: add llm-embedding integration
b-fein May 6, 2026
ca75633
embeddings: add direct link to code embedding dashboard
b-fein May 6, 2026
da64c0d
setup: add embedding and Whisker connectors to docker-compose
b-fein May 6, 2026
881bbbe
embeddings(test): plumbing for integration tests
b-fein May 6, 2026
b1dd594
embeddings(ui): larger font sizes
b-fein May 9, 2026
580feef
add scratch-gui to docker-compose
b-fein May 11, 2026
4428fdb
fix: submodules
b-fein May 11, 2026
9c5efef
embeddings: add diagonal to embedding/test fitness chart
b-fein May 12, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[submodule "whisker-connector"]
path = whisker-connector
url = git@github.com:se2p/whisker.git
[submodule "scratch-gui"]
path = scratch-gui
url = git@github.com:se2p/NuzzleBug.git
13 changes: 13 additions & 0 deletions checkstyle-suppressions.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?xml version="1.0"?>

<!DOCTYPE suppressions PUBLIC
"-//Puppy Crawl//DTD Suppressions 1.1//EN"
"http://checkstyle.sourceforge.net/dtds/suppressions_1_1.dtd">

<suppressions>
<!-- JPA repository name magic methods may use underscores -->
<suppress
checks="MethodName"
files=".*Repository.java"
/>
</suppressions>
5 changes: 5 additions & 0 deletions checkstyle.xml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@
<property name="fileNamePattern" value="module\-info\.java$"/>
</module>

<module name="SuppressionFilter">
<property name="file" value="checkstyle-suppressions.xml"/>
<property name="optional" value="false"/>
</module>

<!-- Checks whether files end with a new line. -->
<!-- See https://checkstyle.org/config_misc.html#NewlineAtEndOfFile -->
<module name="NewlineAtEndOfFile"/>
Expand Down
40 changes: 39 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,17 @@ services:
dockerfile: Containerfile
restart: on-failure
environment:
spring.profiles.active: prod
spring.profiles.active: embeddings,whisker,prod
spring.datasource.url: "jdbc:mariadb://database:3306/scratchlog"
spring.datasource.user: scratchlog
spring.datasource.password: scratchlog
app.gui: http://localhost:8601
app.gui.base: http://localhost:8601
# TODO: any other options from the properties that should be overriden?
code-embeddings.model: llm
code-embeddings.embedding-connector-url: http://embedding-connector:8080
whisker.base-url: http://whisker:8091
whisker.max-parallel: 1
networks:
- scratchlog
ports:
Expand All @@ -29,12 +36,43 @@ services:
MYSQL_PASSWORD: scratchlog
MYSQL_ROOT_PASSWORD: scratchlog
volumes:
# seed database with some initial data
# - scratchlog_demo_db.sql.zst:/docker-entrypoint-initdb.d/init.sql.zst
- scratchlog-db:/var/lib/mysql
networks:
- scratchlog
ports:
- "127.0.0.1:3306:3306/tcp"

scratch-gui:
build:
context: ./scratch-gui
dockerfile: Dockerfile
restart: on-failure
ports:
- "127.0.0.1:8601:80/tcp"
networks:
- scratchlog

embedding-connector:
build:
context: ./embedding-connector
dockerfile: Containerfile
restart: on-failure
env_file:
- .env
networks:
- scratchlog

whisker:
build:
context: ./whisker-connector
dockerfile: build-for-apptainer.Dockerfile
target: api
restart: on-failure
networks:
- scratchlog

volumes:
scratchlog-db:
networks:
Expand Down
1 change: 1 addition & 0 deletions embedding-connector/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.venv/
5 changes: 5 additions & 0 deletions embedding-connector/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.env*
.idea/
.vscode/
__pycache__/
*.pyc
1 change: 1 addition & 0 deletions embedding-connector/.python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.13
27 changes: 27 additions & 0 deletions embedding-connector/Containerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
FROM docker.io/library/python:3.13-slim AS builder
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/

ENV UV_PYTHON_DOWNLOADS=0 UV_LINK_MODE=copy

WORKDIR /app

RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
uv sync --locked --no-install-project --no-editable

COPY . /app

RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --locked --no-editable

FROM docker.io/library/python:3.13-slim

COPY --from=builder /app/.venv /app/.venv

EXPOSE 8080

ENV MODEL_CONFIG_FILE=/ggnn-model-config.yaml

ENTRYPOINT ["/app/.venv/bin/uvicorn", "embedding_connector.main:app"]
CMD ["--host", "0.0.0.0", "--port", "8080"]
1 change: 1 addition & 0 deletions embedding-connector/Dockerfile
23 changes: 23 additions & 0 deletions embedding-connector/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Code Embedding Connector

Uses [uv](https://docs.astral.sh/uv/) for dependency management.


## Starting

```bash
# install dependencies
uv sync
# run the application on port 8080 (add `--port=OTHER` to change the port)
uv run embedding-connector
```


## Environment variables

- GGNN variables: only required when using the GGNN model
- `GGNN_MODEL_CONFIG`: path to a GGNN model configuration YAML
- LLM variables: only required when using an LLM API endpoint
- `LLM_API_ENDPOINT`: base URL for the OpenAI-compatible LLM provider API, e.g. `http://localhost:11434/v1/` for a local Ollama instance
- `LLM_API_KEY`: the access key for the LLM provider API
- `LLM_MODEL`: the model that should be used
52 changes: 52 additions & 0 deletions embedding-connector/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
[project]
name = "embedding-connector"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
authors = [
{ name = "Benedikt Fein", email = "fein@fim.uni-passau.de" }
]
requires-python = ">=3.13"
dependencies = [
"fastapi[standard]>=0.136.0",
"ggnn>=0.1.0",
"numpy>=2.4.4",
"openai>=2.34.0",
"pydantic>=2.13.2",
"torch>=2.10.0,<2.11.0",
"uvicorn>=0.44.0",
]

[dependency-groups]
dev = [
"mypy>=1.20.1",
"ruff>=0.15.11",
]


[tool.uv.extra-build-dependencies]
torch-scatter = ["torch"]
torch-sparse = ["torch"]

[tool.uv.sources]
ggnn = [
{ index = "ggnn" }
]

[[tool.uv.index]]
name = "ggnn"
url = "https://gitlab.infosun.fim.uni-passau.de/api/v4/projects/1040/packages/pypi/simple"
explicit = true


[tool.fastapi]
entrypoint = "embedding_connector.main:app"


[project.scripts]
embedding-connector = "embedding_connector.main:main"


[build-system]
requires = ["uv_build>=0.11.6,<0.12.0"]
build-backend = "uv_build"
Empty file.
32 changes: 32 additions & 0 deletions embedding-connector/src/embedding_connector/ggnn_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import asyncio
import functools
import logging
from pathlib import Path
from typing import Final

import numpy
from ggnn.config import load_config
from ggnn.program_embedding.train import ProgramEmbeddingTool

log: Final[logging.Logger] = logging.getLogger(__name__)


class GgnnApiModel:
_lock: asyncio.Lock
_embedding_tool: ProgramEmbeddingTool

def __init__(self, config_file: Path) -> None:
self._lock = asyncio.Lock()

config = load_config(config_file, None, eval_only=True)
self._embedding_tool = ProgramEmbeddingTool(config, embedding_per_sprite=True)

async def embed(self, program: str) -> numpy.typing.NDArray[numpy.float64]:
async with self._lock:
per_sprite_embeddings = self._compute(program)

return numpy.mean(per_sprite_embeddings, axis=0)

@functools.lru_cache(maxsize=5_000)
def _compute(self, program: str) -> list[float]:
return self._embedding_tool.embedding(program)
38 changes: 38 additions & 0 deletions embedding-connector/src/embedding_connector/llm_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import datetime
import functools
import logging
from timeit import default_timer as timer
from typing import Final

import numpy
import openai

_log: Final[logging.Logger] = logging.getLogger(__name__)


class LlmApiModel:
_client: openai.OpenAI
_api_key: str
_model: str

def __init__(self, api_endpoint: str, api_key: str, model: str) -> None:
self._client = openai.OpenAI(base_url=api_endpoint, api_key=api_key)
self._model = model

def embed(self, programs: list[str]) -> numpy.typing.NDArray[numpy.float64]:
start = timer()
batch = self._client.embeddings.create(
model=self._model,
input=programs,
)
end = timer()
_log.info(
"Computed an %s embedding in %s.",
self._model,
datetime.timedelta(seconds=end - start),
)
return numpy.stack([e.embedding for e in batch.data])

@functools.lru_cache(maxsize=5_000)
def embed_single(self, program: str) -> numpy.typing.NDArray[numpy.float64]:
return self.embed([program])[0]
Loading