[wip] universal workers

This commit is contained in:
2026-03-21 07:32:11 -05:00
parent 0782675a2b
commit 8ba7e3bb84
59 changed files with 4971 additions and 34 deletions

View File

@@ -1,3 +1,23 @@
1. Set `global.imageRegistry`, `global.imageNamespace`, and `global.imageTag` so the chart pulls the images published by the Gitea workflow.
2. Set `web.config.apiUrl` and `web.config.wsUrl` to browser-reachable endpoints before exposing the web UI.
3. The shared `packs`, `runtime_envs`, and `artifacts` PVCs default to `ReadWriteMany`; your cluster storage class must support RWX or you need to override those claims.
{{- if .Values.agentWorkers }}
Agent-based workers enabled:
{{- range .Values.agentWorkers }}
- {{ .name }}: image={{ .image }}, replicas={{ .replicas | default 1 }}
{{- if .runtimes }} runtimes={{ join "," .runtimes }}{{ else }} runtimes=auto-detect{{ end }}
{{- end }}
Each agent worker uses an init container to copy the statically-linked
attune-agent binary into the worker pod via an emptyDir volume. The agent
auto-detects available runtimes in the container and registers with Attune.
To add more agent workers, append entries to `agentWorkers` in your values:
agentWorkers:
- name: my-runtime
image: my-org/my-image:latest
replicas: 1
runtimes: [] # auto-detect
{{- end }}

View File

@@ -0,0 +1,135 @@
{{- range .Values.agentWorkers }}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "attune.fullname" $ }}-agent-worker-{{ .name }}
labels:
{{- include "attune.labels" $ | nindent 4 }}
app.kubernetes.io/component: agent-worker-{{ .name }}
spec:
replicas: {{ .replicas | default 1 }}
selector:
matchLabels:
{{- include "attune.selectorLabels" $ | nindent 6 }}
app.kubernetes.io/component: agent-worker-{{ .name }}
template:
metadata:
labels:
{{- include "attune.selectorLabels" $ | nindent 8 }}
app.kubernetes.io/component: agent-worker-{{ .name }}
spec:
{{- if $.Values.global.imagePullSecrets }}
imagePullSecrets:
{{- toYaml $.Values.global.imagePullSecrets | nindent 8 }}
{{- end }}
{{- if .runtimeClassName }}
runtimeClassName: {{ .runtimeClassName }}
{{- end }}
{{- if .nodeSelector }}
nodeSelector:
{{- toYaml .nodeSelector | nindent 8 }}
{{- end }}
{{- if .tolerations }}
tolerations:
{{- toYaml .tolerations | nindent 8 }}
{{- end }}
{{- if .stopGracePeriod }}
terminationGracePeriodSeconds: {{ .stopGracePeriod }}
{{- else }}
terminationGracePeriodSeconds: 45
{{- end }}
initContainers:
- name: agent-loader
image: {{ include "attune.image" (dict "root" $ "image" $.Values.images.agent) }}
imagePullPolicy: {{ $.Values.images.agent.pullPolicy }}
command: ["cp", "/usr/local/bin/attune-agent", "/opt/attune/agent/attune-agent"]
volumeMounts:
- name: agent-bin
mountPath: /opt/attune/agent
- name: wait-for-schema
image: postgres:16-alpine
command: ["/bin/sh", "-ec"]
args:
- |
until PGPASSWORD="$DB_PASSWORD" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -tAc "SELECT to_regclass('${DB_SCHEMA}.identity')" | grep -q identity; do
echo "waiting for schema";
sleep 2;
done
envFrom:
- secretRef:
name: {{ include "attune.secretName" $ }}
- name: wait-for-packs
image: busybox:1.36
command: ["/bin/sh", "-ec"]
args:
- |
until [ -f /opt/attune/packs/core/pack.yaml ]; do
echo "waiting for packs";
sleep 2;
done
volumeMounts:
- name: packs
mountPath: /opt/attune/packs
containers:
- name: worker
image: {{ .image }}
{{- if .imagePullPolicy }}
imagePullPolicy: {{ .imagePullPolicy }}
{{- end }}
command: ["/opt/attune/agent/attune-agent"]
envFrom:
- secretRef:
name: {{ include "attune.secretName" $ }}
env:
- name: ATTUNE_CONFIG
value: /opt/attune/config.yaml
- name: ATTUNE__DATABASE__SCHEMA
value: {{ $.Values.database.schema | quote }}
- name: ATTUNE_WORKER_TYPE
value: container
- name: ATTUNE_WORKER_NAME
value: agent-worker-{{ .name }}-01
- name: ATTUNE_API_URL
value: http://{{ include "attune.apiServiceName" $ }}:{{ $.Values.api.service.port }}
- name: RUST_LOG
value: {{ .logLevel | default "info" }}
{{- if .runtimes }}
- name: ATTUNE_WORKER_RUNTIMES
value: {{ join "," .runtimes | quote }}
{{- end }}
{{- if .env }}
{{- toYaml .env | nindent 12 }}
{{- end }}
resources:
{{- toYaml (.resources | default dict) | nindent 12 }}
volumeMounts:
- name: agent-bin
mountPath: /opt/attune/agent
readOnly: true
- name: config
mountPath: /opt/attune/config.yaml
subPath: config.yaml
- name: packs
mountPath: /opt/attune/packs
readOnly: true
- name: runtime-envs
mountPath: /opt/attune/runtime_envs
- name: artifacts
mountPath: /opt/attune/artifacts
volumes:
- name: agent-bin
emptyDir: {}
- name: config
configMap:
name: {{ include "attune.fullname" $ }}-config
- name: packs
persistentVolumeClaim:
claimName: {{ include "attune.fullname" $ }}-packs
- name: runtime-envs
persistentVolumeClaim:
claimName: {{ include "attune.fullname" $ }}-runtime-envs
- name: artifacts
persistentVolumeClaim:
claimName: {{ include "attune.fullname" $ }}-artifacts
{{- end }}

View File

@@ -131,6 +131,10 @@ images:
repository: attune-init-packs
tag: ""
pullPolicy: IfNotPresent
agent:
repository: attune-agent
tag: ""
pullPolicy: IfNotPresent
jobs:
migrations:
@@ -191,3 +195,57 @@ web:
- path: /
pathType: Prefix
tls: []
# Agent-based workers
# These deploy the universal worker agent into any container image.
# The agent auto-detects available runtimes (python, ruby, node, etc.)
# and registers with the Attune platform.
#
# Each entry creates a separate Deployment with an init container that
# copies the statically-linked agent binary into the worker container.
#
# Supported fields per worker:
# name (required) - Unique name for this worker (used in resource names)
# image (required) - Container image with your desired runtime(s)
# replicas (optional) - Number of pod replicas (default: 1)
# runtimes (optional) - List of runtimes to expose; [] = auto-detect
# resources (optional) - Kubernetes resource requests/limits
# env (optional) - Extra environment variables (list of {name, value})
# imagePullPolicy (optional) - Pull policy for the worker image
# logLevel (optional) - RUST_LOG level (default: "info")
# runtimeClassName (optional) - Kubernetes RuntimeClass (e.g., "nvidia" for GPU)
# nodeSelector (optional) - Node selector map for pod scheduling
# tolerations (optional) - Tolerations list for pod scheduling
# stopGracePeriod (optional) - Termination grace period in seconds (default: 45)
#
# Examples:
# agentWorkers:
# - name: ruby
# image: ruby:3.3
# replicas: 2
# runtimes: [] # auto-detect
# resources: {}
#
# - name: python-gpu
# image: nvidia/cuda:12.3.1-runtime-ubuntu22.04
# replicas: 1
# runtimes: [python, shell]
# runtimeClassName: nvidia
# nodeSelector:
# gpu: "true"
# tolerations:
# - key: nvidia.com/gpu
# operator: Exists
# effect: NoSchedule
# resources:
# limits:
# nvidia.com/gpu: 1
#
# - name: custom
# image: my-org/my-custom-image:latest
# replicas: 1
# runtimes: []
# env:
# - name: MY_CUSTOM_VAR
# value: my-value
agentWorkers: []