[wip] universal workers

2026-03-21 07:32:11 -05:00
parent 0782675a2b
commit 8ba7e3bb84
59 changed files with 4971 additions and 34 deletions
--- a/charts/attune/templates/NOTES.txt
+++ b/charts/attune/templates/NOTES.txt
@@ -1,3 +1,23 @@
 1. Set `global.imageRegistry`, `global.imageNamespace`, and `global.imageTag` so the chart pulls the images published by the Gitea workflow.
 2. Set `web.config.apiUrl` and `web.config.wsUrl` to browser-reachable endpoints before exposing the web UI.
 3. The shared `packs`, `runtime_envs`, and `artifacts` PVCs default to `ReadWriteMany`; your cluster storage class must support RWX or you need to override those claims.
+{{- if .Values.agentWorkers }}
+
+Agent-based workers enabled:
+{{- range .Values.agentWorkers }}
+  - {{ .name }}: image={{ .image }}, replicas={{ .replicas | default 1 }}
+    {{- if .runtimes }} runtimes={{ join "," .runtimes }}{{ else }} runtimes=auto-detect{{ end }}
+{{- end }}
+
+Each agent worker uses an init container to copy the statically-linked
+attune-agent binary into the worker pod via an emptyDir volume. The agent
+auto-detects available runtimes in the container and registers with Attune.
+
+To add more agent workers, append entries to `agentWorkers` in your values:
+
+  agentWorkers:
+    - name: my-runtime
+      image: my-org/my-image:latest
+      replicas: 1
+      runtimes: []  # auto-detect
+{{- end }}
--- a/charts/attune/templates/agent-workers.yaml
+++ b/charts/attune/templates/agent-workers.yaml
@@ -0,0 +1,135 @@
+{{- range .Values.agentWorkers }}
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "attune.fullname" $ }}-agent-worker-{{ .name }}
+  labels:
+    {{- include "attune.labels" $ | nindent 4 }}
+    app.kubernetes.io/component: agent-worker-{{ .name }}
+spec:
+  replicas: {{ .replicas | default 1 }}
+  selector:
+    matchLabels:
+      {{- include "attune.selectorLabels" $ | nindent 6 }}
+      app.kubernetes.io/component: agent-worker-{{ .name }}
+  template:
+    metadata:
+      labels:
+        {{- include "attune.selectorLabels" $ | nindent 8 }}
+        app.kubernetes.io/component: agent-worker-{{ .name }}
+    spec:
+      {{- if $.Values.global.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml $.Values.global.imagePullSecrets | nindent 8 }}
+      {{- end }}
+      {{- if .runtimeClassName }}
+      runtimeClassName: {{ .runtimeClassName }}
+      {{- end }}
+      {{- if .nodeSelector }}
+      nodeSelector:
+        {{- toYaml .nodeSelector | nindent 8 }}
+      {{- end }}
+      {{- if .tolerations }}
+      tolerations:
+        {{- toYaml .tolerations | nindent 8 }}
+      {{- end }}
+      {{- if .stopGracePeriod }}
+      terminationGracePeriodSeconds: {{ .stopGracePeriod }}
+      {{- else }}
+      terminationGracePeriodSeconds: 45
+      {{- end }}
+      initContainers:
+        - name: agent-loader
+          image: {{ include "attune.image" (dict "root" $ "image" $.Values.images.agent) }}
+          imagePullPolicy: {{ $.Values.images.agent.pullPolicy }}
+          command: ["cp", "/usr/local/bin/attune-agent", "/opt/attune/agent/attune-agent"]
+          volumeMounts:
+            - name: agent-bin
+              mountPath: /opt/attune/agent
+        - name: wait-for-schema
+          image: postgres:16-alpine
+          command: ["/bin/sh", "-ec"]
+          args:
+            - |
+              until PGPASSWORD="$DB_PASSWORD" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -tAc "SELECT to_regclass('${DB_SCHEMA}.identity')" | grep -q identity; do
+                echo "waiting for schema";
+                sleep 2;
+              done
+          envFrom:
+            - secretRef:
+                name: {{ include "attune.secretName" $ }}
+        - name: wait-for-packs
+          image: busybox:1.36
+          command: ["/bin/sh", "-ec"]
+          args:
+            - |
+              until [ -f /opt/attune/packs/core/pack.yaml ]; do
+                echo "waiting for packs";
+                sleep 2;
+              done
+          volumeMounts:
+            - name: packs
+              mountPath: /opt/attune/packs
+      containers:
+        - name: worker
+          image: {{ .image }}
+          {{- if .imagePullPolicy }}
+          imagePullPolicy: {{ .imagePullPolicy }}
+          {{- end }}
+          command: ["/opt/attune/agent/attune-agent"]
+          envFrom:
+            - secretRef:
+                name: {{ include "attune.secretName" $ }}
+          env:
+            - name: ATTUNE_CONFIG
+              value: /opt/attune/config.yaml
+            - name: ATTUNE__DATABASE__SCHEMA
+              value: {{ $.Values.database.schema | quote }}
+            - name: ATTUNE_WORKER_TYPE
+              value: container
+            - name: ATTUNE_WORKER_NAME
+              value: agent-worker-{{ .name }}-01
+            - name: ATTUNE_API_URL
+              value: http://{{ include "attune.apiServiceName" $ }}:{{ $.Values.api.service.port }}
+            - name: RUST_LOG
+              value: {{ .logLevel | default "info" }}
+            {{- if .runtimes }}
+            - name: ATTUNE_WORKER_RUNTIMES
+              value: {{ join "," .runtimes | quote }}
+            {{- end }}
+            {{- if .env }}
+            {{- toYaml .env | nindent 12 }}
+            {{- end }}
+          resources:
+            {{- toYaml (.resources | default dict) | nindent 12 }}
+          volumeMounts:
+            - name: agent-bin
+              mountPath: /opt/attune/agent
+              readOnly: true
+            - name: config
+              mountPath: /opt/attune/config.yaml
+              subPath: config.yaml
+            - name: packs
+              mountPath: /opt/attune/packs
+              readOnly: true
+            - name: runtime-envs
+              mountPath: /opt/attune/runtime_envs
+            - name: artifacts
+              mountPath: /opt/attune/artifacts
+      volumes:
+        - name: agent-bin
+          emptyDir: {}
+        - name: config
+          configMap:
+            name: {{ include "attune.fullname" $ }}-config
+        - name: packs
+          persistentVolumeClaim:
+            claimName: {{ include "attune.fullname" $ }}-packs
+        - name: runtime-envs
+          persistentVolumeClaim:
+            claimName: {{ include "attune.fullname" $ }}-runtime-envs
+        - name: artifacts
+          persistentVolumeClaim:
+            claimName: {{ include "attune.fullname" $ }}-artifacts
+{{- end }}
--- a/charts/attune/values.yaml
+++ b/charts/attune/values.yaml
@@ -131,6 +131,10 @@ images:
    repository: attune-init-packs
    tag: ""
    pullPolicy: IfNotPresent
+  agent:
+    repository: attune-agent
+    tag: ""
+    pullPolicy: IfNotPresent

 jobs:
  migrations:
@@ -191,3 +195,57 @@ web:
          - path: /
            pathType: Prefix
    tls: []
+
+# Agent-based workers
+# These deploy the universal worker agent into any container image.
+# The agent auto-detects available runtimes (python, ruby, node, etc.)
+# and registers with the Attune platform.
+#
+# Each entry creates a separate Deployment with an init container that
+# copies the statically-linked agent binary into the worker container.
+#
+# Supported fields per worker:
+#   name             (required) - Unique name for this worker (used in resource names)
+#   image            (required) - Container image with your desired runtime(s)
+#   replicas         (optional) - Number of pod replicas (default: 1)
+#   runtimes         (optional) - List of runtimes to expose; [] = auto-detect
+#   resources        (optional) - Kubernetes resource requests/limits
+#   env              (optional) - Extra environment variables (list of {name, value})
+#   imagePullPolicy  (optional) - Pull policy for the worker image
+#   logLevel         (optional) - RUST_LOG level (default: "info")
+#   runtimeClassName (optional) - Kubernetes RuntimeClass (e.g., "nvidia" for GPU)
+#   nodeSelector     (optional) - Node selector map for pod scheduling
+#   tolerations      (optional) - Tolerations list for pod scheduling
+#   stopGracePeriod  (optional) - Termination grace period in seconds (default: 45)
+#
+# Examples:
+#   agentWorkers:
+#     - name: ruby
+#       image: ruby:3.3
+#       replicas: 2
+#       runtimes: []  # auto-detect
+#       resources: {}
+#
+#     - name: python-gpu
+#       image: nvidia/cuda:12.3.1-runtime-ubuntu22.04
+#       replicas: 1
+#       runtimes: [python, shell]
+#       runtimeClassName: nvidia
+#       nodeSelector:
+#         gpu: "true"
+#       tolerations:
+#         - key: nvidia.com/gpu
+#           operator: Exists
+#           effect: NoSchedule
+#       resources:
+#         limits:
+#           nvidia.com/gpu: 1
+#
+#     - name: custom
+#       image: my-org/my-custom-image:latest
+#       replicas: 1
+#       runtimes: []
+#       env:
+#         - name: MY_CUSTOM_VAR
+#           value: my-value
+agentWorkers: []