From 2749d5d7730ae27708e5012dff26748a04d1df44 Mon Sep 17 00:00:00 2001 From: David Culbreth Date: Tue, 17 Feb 2026 07:03:47 -0600 Subject: [PATCH] wip --- AGENTS.md | 22 +++- docker-compose.yaml | 5 +- docker/Dockerfile.sensor.optimized | 172 +++++++++++++++++++++++++++++ 3 files changed, 192 insertions(+), 7 deletions(-) create mode 100644 docker/Dockerfile.sensor.optimized diff --git a/AGENTS.md b/AGENTS.md index c47e6ae..6eedc55 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -84,7 +84,7 @@ docker compose logs -f # View logs **Key environment overrides**: `JWT_SECRET`, `ENCRYPTION_KEY` (required for production) ### Docker Build Optimization -- **Optimized Dockerfiles**: `docker/Dockerfile.optimized` and `docker/Dockerfile.worker.optimized` +- **Optimized Dockerfiles**: `docker/Dockerfile.optimized`, `docker/Dockerfile.worker.optimized`, and `docker/Dockerfile.sensor.optimized` - **Strategy**: Selective crate copying - only copy crates needed for each service (not entire workspace) - **Performance**: 90% faster incremental builds (~30 sec vs ~5 min for code changes) - **BuildKit cache mounts**: Persist cargo registry and compilation artifacts between builds @@ -92,6 +92,16 @@ docker compose logs -f # View logs - **Parallel builds**: 4x faster than old `sharing=locked` strategy - no serialization overhead - **Documentation**: See `docs/docker-layer-optimization.md`, `docs/QUICKREF-docker-optimization.md`, `docs/QUICKREF-buildkit-cache-strategy.md` +### Docker Runtime Standardization +- **Base image**: All worker and sensor runtime stages use `debian:bookworm-slim` (or `debian:bookworm` for worker-full) +- **Python**: Always installed via `apt-get install python3 python3-pip python3-venv` → binary at `/usr/bin/python3` +- **Node.js**: Always installed via NodeSource apt repo (`setup_${NODE_VERSION}.x`) → binary at `/usr/bin/node` +- **NEVER** use `python:` or `node:` Docker images as base — they install binaries at `/usr/local/bin/` which causes broken venv symlinks when multiple containers share the `runtime_envs` volume +- **UID**: All containers use UID 1000 for the `attune` user +- **Venv creation**: Uses `--copies` flag (`python3 -m venv --copies`) to avoid cross-container broken symlinks +- **Worker targets**: `worker-base` (shell), `worker-python` (shell+python), `worker-node` (shell+node), `worker-full` (all) +- **Sensor targets**: `sensor-base` (native only), `sensor-full` (native+python+node) + ### Packs Volume Architecture - **Key Principle**: Packs are NOT copied into Docker images - they are mounted as volumes - **Volume Flow**: Host `./packs/` → `init-packs` service → `packs_data` volume → mounted in all services @@ -102,9 +112,10 @@ docker compose logs -f # View logs ### Runtime Environments Volume - **Key Principle**: Runtime environments (virtualenvs, node_modules) are stored OUTSIDE pack directories -- **Volume**: `runtime_envs` named volume mounted at `/opt/attune/runtime_envs` in worker and API containers +- **Volume**: `runtime_envs` named volume mounted at `/opt/attune/runtime_envs` in worker, sensor, and API containers - **Path Pattern**: `{runtime_envs_dir}/{pack_ref}/{runtime_name}` (e.g., `/opt/attune/runtime_envs/python_example/python`) -- **Creation**: Worker creates environments on-demand before first action execution (idempotent) +- **Creation**: Worker creates environments proactively at startup and via `pack.registered` MQ events; lightweight existence check at execution time +- **Broken venv auto-repair**: Worker detects broken interpreter symlinks (e.g., from mismatched container python paths) and automatically recreates the environment - **API best-effort**: API attempts environment setup during pack registration but logs and defers to worker on failure (Docker API containers lack interpreters) - **Pack directories remain read-only**: Packs mounted `:ro` in workers; all generated env files go to `runtime_envs` volume - **Config**: `runtime_envs_dir` setting in config YAML (default: `/opt/attune/runtime_envs`) @@ -362,8 +373,9 @@ When reporting, ask: "Should I fix this first or continue with [original task]?" - `config.development.yaml` - Dev configuration - `Cargo.toml` - Workspace dependencies - `Makefile` - Development commands -- `docker/Dockerfile.optimized` - Optimized service builds -- `docker/Dockerfile.worker.optimized` - Optimized worker builds +- `docker/Dockerfile.optimized` - Optimized service builds (api, executor, notifier) +- `docker/Dockerfile.worker.optimized` - Optimized worker builds (shell, python, node, full) +- `docker/Dockerfile.sensor.optimized` - Optimized sensor builds (base, full) - `docker/Dockerfile.pack-binaries` - Separate pack binary builder - `scripts/build-pack-binaries.sh` - Build pack binaries script diff --git a/docker-compose.yaml b/docker-compose.yaml index 301ce1b..75b3b3b 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -446,9 +446,9 @@ services: sensor: build: context: . - dockerfile: docker/Dockerfile.optimized + dockerfile: docker/Dockerfile.sensor.optimized + target: sensor-full args: - SERVICE: sensor BUILDKIT_INLINE_CACHE: 1 container_name: attune-sensor stop_grace_period: 45s @@ -467,6 +467,7 @@ services: volumes: - packs_data:/opt/attune/packs:rw - ./packs.dev:/opt/attune/packs.dev:rw + - runtime_envs:/opt/attune/runtime_envs - sensor_logs:/opt/attune/logs depends_on: init-packs: diff --git a/docker/Dockerfile.sensor.optimized b/docker/Dockerfile.sensor.optimized new file mode 100644 index 0000000..0f7f3f3 --- /dev/null +++ b/docker/Dockerfile.sensor.optimized @@ -0,0 +1,172 @@ +# Multi-stage Dockerfile for Attune sensor service +# +# Simple and robust: build the entire workspace, then copy the sensor binary +# into different runtime base images depending on language support needed. +# +# Targets: +# sensor-base - Native sensors only (lightweight) +# sensor-full - Native + Python + Node.js sensors +# +# Usage: +# DOCKER_BUILDKIT=1 docker build --target sensor-base -t attune-sensor:base -f docker/Dockerfile.sensor.optimized . +# DOCKER_BUILDKIT=1 docker build --target sensor-full -t attune-sensor:full -f docker/Dockerfile.sensor.optimized . +# +# Note: Packs are NOT copied into the image — they are mounted as volumes at runtime. + +ARG RUST_VERSION=1.92 +ARG DEBIAN_VERSION=bookworm +ARG NODE_VERSION=20 + +# ============================================================================ +# Stage 1: Builder - Compile the entire workspace +# ============================================================================ +FROM rust:${RUST_VERSION}-${DEBIAN_VERSION} AS builder + +RUN apt-get update && apt-get install -y \ + pkg-config \ + libssl-dev \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /build + +# Copy dependency metadata first so `cargo fetch` layer is cached +# when only source code changes (Cargo.toml/Cargo.lock stay the same) +COPY Cargo.toml Cargo.lock ./ +COPY crates/common/Cargo.toml ./crates/common/Cargo.toml +COPY crates/api/Cargo.toml ./crates/api/Cargo.toml +COPY crates/executor/Cargo.toml ./crates/executor/Cargo.toml +COPY crates/sensor/Cargo.toml ./crates/sensor/Cargo.toml +COPY crates/core-timer-sensor/Cargo.toml ./crates/core-timer-sensor/Cargo.toml +COPY crates/worker/Cargo.toml ./crates/worker/Cargo.toml +COPY crates/notifier/Cargo.toml ./crates/notifier/Cargo.toml +COPY crates/cli/Cargo.toml ./crates/cli/Cargo.toml + +# Create minimal stub sources so cargo can resolve the workspace and fetch deps. +# These are ONLY used for `cargo fetch` — never compiled. +RUN mkdir -p crates/common/src && echo "" > crates/common/src/lib.rs && \ + mkdir -p crates/api/src && echo "fn main(){}" > crates/api/src/main.rs && \ + mkdir -p crates/executor/src && echo "fn main(){}" > crates/executor/src/main.rs && \ + mkdir -p crates/executor/benches && echo "fn main(){}" > crates/executor/benches/context_clone.rs && \ + mkdir -p crates/sensor/src && echo "fn main(){}" > crates/sensor/src/main.rs && \ + mkdir -p crates/core-timer-sensor/src && echo "fn main(){}" > crates/core-timer-sensor/src/main.rs && \ + mkdir -p crates/worker/src && echo "fn main(){}" > crates/worker/src/main.rs && \ + mkdir -p crates/notifier/src && echo "fn main(){}" > crates/notifier/src/main.rs && \ + mkdir -p crates/cli/src && echo "fn main(){}" > crates/cli/src/main.rs + +# Download all dependencies (cached unless Cargo.toml/Cargo.lock change) +RUN --mount=type=cache,target=/usr/local/cargo/registry,sharing=shared \ + --mount=type=cache,target=/usr/local/cargo/git,sharing=shared \ + cargo fetch + +# Now copy the real source code, SQLx metadata, and migrations +COPY .sqlx/ ./.sqlx/ +COPY migrations/ ./migrations/ +COPY crates/ ./crates/ + +# Build the entire workspace in release mode. +# All binaries are compiled together, sharing dependency compilation. +# target cache uses sharing=locked so concurrent service builds serialize +# writes to the shared compilation cache instead of corrupting it. +RUN --mount=type=cache,target=/usr/local/cargo/registry,sharing=shared \ + --mount=type=cache,target=/usr/local/cargo/git,sharing=shared \ + --mount=type=cache,target=/build/target,sharing=locked \ + cargo build --release --workspace --bins -j 4 && \ + cp /build/target/release/attune-sensor /build/attune-sensor + +# Verify the binary was built +RUN ls -lh /build/attune-sensor && \ + file /build/attune-sensor + +# ============================================================================ +# Stage 2a: Base Sensor (Native sensors only) +# Runtime capabilities: native binary sensors +# ============================================================================ +FROM debian:${DEBIAN_VERSION}-slim AS sensor-base + +RUN apt-get update && apt-get install -y \ + ca-certificates \ + libssl3 \ + curl \ + bash \ + procps \ + && rm -rf /var/lib/apt/lists/* + +RUN useradd -m -u 1000 attune && \ + mkdir -p /opt/attune/packs /opt/attune/logs /opt/attune/runtime_envs && \ + chown -R attune:attune /opt/attune + +WORKDIR /opt/attune + +COPY --from=builder /build/attune-sensor /usr/local/bin/attune-sensor +COPY config.docker.yaml ./config.yaml +COPY migrations/ ./migrations/ + +USER attune + +ENV RUST_LOG=info +ENV ATTUNE_CONFIG=/opt/attune/config.yaml + +HEALTHCHECK --interval=30s --timeout=3s --start-period=20s --retries=3 \ + CMD kill -0 1 || exit 1 + +CMD ["/usr/local/bin/attune-sensor"] + +# ============================================================================ +# Stage 2b: Full Sensor (Native + Python + Node.js sensors) +# Runtime capabilities: native, python, node +# +# Uses debian-slim + apt python3 + NodeSource node so that interpreter +# paths (/usr/bin/python3, /usr/bin/node) are identical to the worker +# containers. This avoids broken symlinks and path mismatches when +# sensors and workers share the runtime_envs volume. +# ============================================================================ +FROM debian:${DEBIAN_VERSION}-slim AS sensor-full + +RUN apt-get update && apt-get install -y \ + ca-certificates \ + libssl3 \ + curl \ + bash \ + build-essential \ + python3 \ + python3-pip \ + python3-venv \ + procps \ + && rm -rf /var/lib/apt/lists/* + +# Install Node.js from NodeSource (same method and version as workers) +RUN curl -fsSL https://deb.nodesource.com/setup_${NODE_VERSION}.x | bash - && \ + apt-get install -y nodejs && \ + rm -rf /var/lib/apt/lists/* + +# Create python symlink for convenience +RUN ln -sf /usr/bin/python3 /usr/bin/python + +# Install common Python packages used by sensor scripts +# Use --break-system-packages for Debian 12+ pip-in-system-python restrictions +RUN pip3 install --no-cache-dir --break-system-packages \ + requests>=2.31.0 \ + pyyaml>=6.0 \ + jinja2>=3.1.0 \ + python-dateutil>=2.8.0 + +RUN useradd -m -u 1000 attune && \ + mkdir -p /opt/attune/packs /opt/attune/logs /opt/attune/runtime_envs && \ + chown -R attune:attune /opt/attune + +WORKDIR /opt/attune + +COPY --from=builder /build/attune-sensor /usr/local/bin/attune-sensor +COPY config.docker.yaml ./config.yaml +COPY migrations/ ./migrations/ + +USER attune + +ENV RUST_LOG=info +ENV ATTUNE_CONFIG=/opt/attune/config.yaml + +HEALTHCHECK --interval=30s --timeout=3s --start-period=20s --retries=3 \ + CMD kill -0 1 || exit 1 + +CMD ["/usr/local/bin/attune-sensor"]