agent workers

This commit is contained in:
2026-03-21 10:05:02 -05:00
parent 4d5a3b1bf5
commit d4c6240485
10 changed files with 280 additions and 152 deletions

View File

@@ -89,6 +89,7 @@ hmac = "0.12"
sha1 = "0.10"
sha2 = { workspace = true }
hex = "0.4"
subtle = "2.6"
# OpenAPI/Swagger
utoipa = { workspace = true, features = ["axum_extras"] }

View File

@@ -14,6 +14,7 @@ use axum::{
};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use subtle::ConstantTimeEq;
use tokio::fs;
use tokio_util::io::ReaderStream;
use utoipa::{IntoParams, ToSchema};
@@ -83,7 +84,18 @@ fn validate_token(
let expected_token = match expected_token {
Some(t) => t,
None => return Ok(()), // No token configured, allow access
None => {
use std::sync::Once;
static WARN_ONCE: Once = Once::new();
WARN_ONCE.call_once(|| {
tracing::warn!(
"Agent binary download endpoint has no bootstrap_token configured. \
Anyone with network access to the API can download the agent binary. \
Set agent.bootstrap_token in config to restrict access."
);
});
return Ok(());
}
};
// Check X-Agent-Token header first, then query param
@@ -94,7 +106,7 @@ fn validate_token(
.or_else(|| query_token.clone());
match provided_token {
Some(ref t) if t == expected_token => Ok(()),
Some(ref t) if bool::from(t.as_bytes().ct_eq(expected_token.as_bytes())) => Ok(()),
Some(_) => Err((
StatusCode::UNAUTHORIZED,
Json(serde_json::json!({
@@ -152,15 +164,19 @@ pub async fn download_agent_binary(
let binary_dir = std::path::Path::new(&agent_config.binary_dir);
// Try arch-specific binary first, then fall back to generic name
// Try arch-specific binary first, then fall back to generic name.
// IMPORTANT: The generic `attune-agent` binary is only safe to serve for
// x86_64 requests, because the current build pipeline produces an
// x86_64-unknown-linux-musl binary. Serving it for aarch64/arm64 would
// give the caller an incompatible executable (exec format error).
let arch_specific = binary_dir.join(format!("attune-agent-{}", arch));
let generic = binary_dir.join("attune-agent");
let binary_path = if arch_specific.exists() {
arch_specific
} else if generic.exists() {
} else if arch == "x86_64" && generic.exists() {
tracing::debug!(
"Arch-specific binary not found at {:?}, falling back to {:?}",
"Arch-specific binary not found at {:?}, falling back to generic {:?} (safe for x86_64)",
arch_specific,
generic
);
@@ -269,12 +285,14 @@ pub async fn agent_info(
let arch_specific = binary_dir.join(format!("attune-agent-{}", arch));
let generic = binary_dir.join("attune-agent");
// Only fall back to the generic binary for x86_64, since the build
// pipeline currently produces x86_64-only generic binaries.
let (available, size_bytes) = if arch_specific.exists() {
match fs::metadata(&arch_specific).await {
Ok(m) => (true, m.len()),
Err(_) => (false, 0),
}
} else if generic.exists() {
} else if *arch == "x86_64" && generic.exists() {
match fs::metadata(&generic).await {
Ok(m) => (true, m.len()),
Err(_) => (false, 0),

View File

@@ -34,21 +34,22 @@ use tracing::{debug, info, warn};
/// use attune_common::runtime_detection::normalize_runtime_name;
/// assert_eq!(normalize_runtime_name("node.js"), "node");
/// assert_eq!(normalize_runtime_name("nodejs"), "node");
/// assert_eq!(normalize_runtime_name("python3"), "python");
/// assert_eq!(normalize_runtime_name("shell"), "shell");
/// assert_eq!(normalize_runtime_name("Python3"), "python");
/// assert_eq!(normalize_runtime_name("Shell"), "shell");
/// ```
pub fn normalize_runtime_name(name: &str) -> &str {
match name {
"node" | "nodejs" | "node.js" => "node",
"python" | "python3" => "python",
"bash" | "sh" | "shell" => "shell",
"native" | "builtin" | "standalone" => "native",
"ruby" | "rb" => "ruby",
"go" | "golang" => "go",
"java" | "jdk" | "openjdk" => "java",
"perl" | "perl5" => "perl",
"r" | "rscript" => "r",
other => other,
pub fn normalize_runtime_name(name: &str) -> String {
let lower = name.to_ascii_lowercase();
match lower.as_str() {
"node" | "nodejs" | "node.js" => "node".to_string(),
"python" | "python3" => "python".to_string(),
"bash" | "sh" | "shell" => "shell".to_string(),
"native" | "builtin" | "standalone" => "native".to_string(),
"ruby" | "rb" => "ruby".to_string(),
"go" | "golang" => "go".to_string(),
"java" | "jdk" | "openjdk" => "java".to_string(),
"perl" | "perl5" => "perl".to_string(),
"r" | "rscript" => "r".to_string(),
_ => lower,
}
}
@@ -57,9 +58,7 @@ pub fn normalize_runtime_name(name: &str) -> &str {
/// Both sides are lowercased and then normalized before comparison so that,
/// e.g., a filter value of `"node"` matches a database runtime name `"Node.js"`.
pub fn runtime_matches_filter(rt_name: &str, filter_entry: &str) -> bool {
let rt_lower = rt_name.to_ascii_lowercase();
let filter_lower = filter_entry.to_ascii_lowercase();
normalize_runtime_name(&rt_lower) == normalize_runtime_name(&filter_lower)
normalize_runtime_name(rt_name) == normalize_runtime_name(filter_entry)
}
/// Check if a runtime name matches any entry in a filter list.
@@ -398,6 +397,25 @@ mod tests {
assert_eq!(normalize_runtime_name("custom_runtime"), "custom_runtime");
}
#[test]
fn test_normalize_runtime_name_case_insensitive() {
assert_eq!(normalize_runtime_name("Node"), "node");
assert_eq!(normalize_runtime_name("NodeJS"), "node");
assert_eq!(normalize_runtime_name("Node.js"), "node");
assert_eq!(normalize_runtime_name("Python"), "python");
assert_eq!(normalize_runtime_name("Python3"), "python");
assert_eq!(normalize_runtime_name("Shell"), "shell");
assert_eq!(normalize_runtime_name("BASH"), "shell");
assert_eq!(normalize_runtime_name("Ruby"), "ruby");
assert_eq!(normalize_runtime_name("Go"), "go");
assert_eq!(normalize_runtime_name("GoLang"), "go");
assert_eq!(normalize_runtime_name("Java"), "java");
assert_eq!(normalize_runtime_name("JDK"), "java");
assert_eq!(normalize_runtime_name("Perl"), "perl");
assert_eq!(normalize_runtime_name("R"), "r");
assert_eq!(normalize_runtime_name("Custom_Runtime"), "custom_runtime");
}
#[test]
fn test_runtime_matches_filter() {
// Node.js DB name lowercased vs worker filter "node"

View File

@@ -60,8 +60,7 @@ struct Args {
detect_only: bool,
}
#[tokio::main]
async fn main() -> Result<()> {
fn main() -> Result<()> {
// Install HMAC-only JWT crypto provider (must be before any token operations)
attune_common::auth::install_crypto_provider();
@@ -75,7 +74,11 @@ async fn main() -> Result<()> {
info!("Starting Attune Universal Worker Agent");
// --- Phase 1: Runtime auto-detection ---
// --- Phase 1: Runtime auto-detection (synchronous, before tokio runtime) ---
//
// All std::env::set_var calls MUST happen here, before we create the tokio
// runtime, to avoid undefined behavior from mutating the process environment
// while other threads are running.
//
// Check if the user has explicitly set ATTUNE_WORKER_RUNTIMES. If so, skip
// auto-detection and respect their override. Otherwise, probe the system for
@@ -83,15 +86,57 @@ async fn main() -> Result<()> {
let runtimes_override = std::env::var("ATTUNE_WORKER_RUNTIMES").ok();
// Holds the detected runtimes so we can pass them to WorkerService later.
// Populated only when auto-detection actually runs (no env var override).
// Populated in both branches: auto-detection and override (filtered to
// match the override list).
let mut agent_detected_runtimes: Option<Vec<attune_worker::runtime_detect::DetectedRuntime>> =
None;
if let Some(ref override_value) = runtimes_override {
info!(
"ATTUNE_WORKER_RUNTIMES already set (override), skipping auto-detection: {}",
"ATTUNE_WORKER_RUNTIMES already set (override): {}",
override_value
);
// Even with an explicit override, run detection so we can register
// the overridden runtimes in the database and advertise accurate
// capability metadata (binary paths, versions). Without this, the
// worker would accept work for runtimes that were never registered
// locally — e.g. ruby/go on a fresh deployment.
info!("Running auto-detection for override-specified runtimes...");
let detected = detect_runtimes();
// Filter detected runtimes to only those matching the override list,
// so we don't register runtimes the user explicitly excluded.
let override_names: Vec<&str> = override_value.split(',').map(|s| s.trim()).collect();
let filtered: Vec<_> = detected
.into_iter()
.filter(|rt| {
let normalized = attune_common::runtime_detection::normalize_runtime_name(&rt.name);
override_names.iter().any(|ov| {
attune_common::runtime_detection::normalize_runtime_name(ov) == normalized
})
})
.collect();
if filtered.is_empty() {
warn!(
"None of the override runtimes ({}) were found on this system! \
The agent may not be able to execute any actions.",
override_value
);
} else {
info!(
"Matched {} override runtime(s) to detected interpreters:",
filtered.len()
);
for rt in &filtered {
match &rt.version {
Some(ver) => info!(" ✓ {} — {} ({})", rt.name, rt.path, ver),
None => info!(" ✓ {} — {}", rt.name, rt.path),
}
}
agent_detected_runtimes = Some(filtered);
}
} else {
info!("No ATTUNE_WORKER_RUNTIMES override — running auto-detection...");
@@ -113,10 +158,7 @@ async fn main() -> Result<()> {
let runtime_list: Vec<&str> = detected.iter().map(|r| r.name.as_str()).collect();
let runtime_csv = runtime_list.join(",");
info!("Setting ATTUNE_WORKER_RUNTIMES={}", runtime_csv);
// SAFETY: std::env::set_var is safe in Rust 2021 edition. If upgrading
// to edition 2024+, this call will need to be wrapped in `unsafe {}`.
// It's sound here because detection runs single-threaded before tokio
// starts any worker tasks.
// Safe: no other threads are running yet (tokio runtime not started).
std::env::set_var("ATTUNE_WORKER_RUNTIMES", &runtime_csv);
// Stash for Phase 2: pass to WorkerService for rich capability registration
@@ -124,7 +166,7 @@ async fn main() -> Result<()> {
}
}
// --- Handle --detect-only ---
// --- Handle --detect-only (synchronous, no async runtime needed) ---
if args.detect_only {
if runtimes_override.is_some() {
// User set an override, but --detect-only should show what's actually
@@ -147,12 +189,24 @@ async fn main() -> Result<()> {
return Ok(());
}
// --- Phase 2: Load configuration ---
if let Some(config_path) = args.config {
// SAFETY: std::env::set_var is safe in Rust 2021 edition. See note above.
// --- Set config path env var (synchronous, before tokio runtime) ---
if let Some(ref config_path) = args.config {
// Safe: no other threads are running yet (tokio runtime not started).
std::env::set_var("ATTUNE_CONFIG", config_path);
}
// --- Build the tokio runtime and run the async portion ---
let runtime = tokio::runtime::Runtime::new()?;
runtime.block_on(async_main(args, agent_detected_runtimes))
}
/// The async portion of the agent entrypoint. Called from `main()` via
/// `runtime.block_on()` after all environment variable mutations are complete.
async fn async_main(
args: Args,
agent_detected_runtimes: Option<Vec<attune_worker::runtime_detect::DetectedRuntime>>,
) -> Result<()> {
// --- Phase 2: Load configuration ---
let mut config = Config::load()?;
config.validate()?;

View File

@@ -84,10 +84,10 @@ pub async fn auto_register_detected_runtimes(
// Check if a runtime with a matching name already exists in the DB.
// We normalize both sides for alias-aware comparison.
let already_exists = existing_runtimes.iter().any(|r| {
let db_name = r.name.to_ascii_lowercase();
normalize_runtime_name(&db_name) == canonical_name
});
// normalize_runtime_name lowercases internally, so no need to pre-lowercase.
let already_exists = existing_runtimes
.iter()
.any(|r| normalize_runtime_name(&r.name) == canonical_name);
if already_exists {
debug!(
@@ -194,7 +194,7 @@ pub async fn auto_register_detected_runtimes(
"Auto-detected {} runtime at {}",
detected_rt.name, detected_rt.path
)),
name: capitalize_runtime_name(canonical_name),
name: capitalize_runtime_name(&canonical_name),
distributions: build_minimal_distributions(detected_rt),
installation: None,
execution_config,
@@ -286,7 +286,7 @@ fn build_execution_config_from_template(
/// interpreter directly, without environment or dependency management.
fn build_minimal_execution_config(detected: &DetectedRuntime) -> serde_json::Value {
let canonical = normalize_runtime_name(&detected.name);
let file_ext = default_file_extension(canonical);
let file_ext = default_file_extension(&canonical);
let mut config = json!({
"interpreter": {

View File

@@ -24,9 +24,27 @@ use attune_common::models::runtime::{
};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex as StdMutex, OnceLock};
use tokio::process::Command;
use tracing::{debug, error, info, warn};
/// Per-directory locks for lazy environment setup to prevent concurrent
/// setup of the same environment from corrupting it. When two executions
/// for the same pack arrive concurrently (e.g. in agent mode), both may
/// see `!env_dir.exists()` and race to run `setup_pack_environment`.
/// This map provides a per-directory async mutex so that only one setup
/// runs at a time for each env_dir path.
static ENV_SETUP_LOCKS: OnceLock<StdMutex<HashMap<PathBuf, Arc<tokio::sync::Mutex<()>>>>> =
OnceLock::new();
fn get_env_setup_lock(env_dir: &Path) -> Arc<tokio::sync::Mutex<()>> {
let locks = ENV_SETUP_LOCKS.get_or_init(|| StdMutex::new(HashMap::new()));
let mut map = locks.lock().unwrap();
map.entry(env_dir.to_path_buf())
.or_insert_with(|| Arc::new(tokio::sync::Mutex::new(())))
.clone()
}
fn bash_single_quote_escape(s: &str) -> String {
s.replace('\'', "'\\''")
}
@@ -620,111 +638,122 @@ impl Runtime for ProcessRuntime {
// create it on-demand. This is the primary code path for agent mode where
// proactive startup setup is skipped, but it also serves as a safety net
// for standard workers if the environment was somehow missed.
if effective_config.environment.is_some() && pack_dir.exists() && !env_dir.exists() {
info!(
"Runtime environment for pack '{}' not found at {}. \
Creating on first use (lazy setup).",
context.action_ref,
env_dir.display(),
);
// Acquire a per-directory async lock to serialize environment setup.
// This prevents concurrent executions for the same pack from racing
// to create or repair the environment simultaneously.
if effective_config.environment.is_some() && pack_dir.exists() {
let env_lock = get_env_setup_lock(&env_dir);
let _guard = env_lock.lock().await;
let setup_runtime = ProcessRuntime::new(
self.runtime_name.clone(),
effective_config.clone(),
self.packs_base_dir.clone(),
self.runtime_envs_dir.clone(),
);
match setup_runtime
.setup_pack_environment(&pack_dir, &env_dir)
.await
{
Ok(()) => {
info!(
"Successfully created environment for pack '{}' at {} (lazy setup)",
context.action_ref,
env_dir.display(),
);
}
Err(e) => {
warn!(
"Failed to create environment for pack '{}' at {}: {}. \
Proceeding with system interpreter as fallback.",
context.action_ref,
env_dir.display(),
e,
);
// --- Lazy environment creation (double-checked after lock) ---
if !env_dir.exists() {
info!(
"Runtime environment for pack '{}' not found at {}. \
Creating on first use (lazy setup).",
context.action_ref,
env_dir.display(),
);
let setup_runtime = ProcessRuntime::new(
self.runtime_name.clone(),
effective_config.clone(),
self.packs_base_dir.clone(),
self.runtime_envs_dir.clone(),
);
match setup_runtime
.setup_pack_environment(&pack_dir, &env_dir)
.await
{
Ok(()) => {
info!(
"Successfully created environment for pack '{}' at {} (lazy setup)",
context.action_ref,
env_dir.display(),
);
}
Err(e) => {
warn!(
"Failed to create environment for pack '{}' at {}: {}. \
Proceeding with system interpreter as fallback.",
context.action_ref,
env_dir.display(),
e,
);
}
}
}
}
// If the environment directory exists but contains a broken interpreter
// (e.g. broken symlinks from a venv created in a different container),
// attempt to recreate it before resolving the interpreter.
if effective_config.environment.is_some() && env_dir.exists() && pack_dir.exists() {
if let Some(ref env_cfg) = effective_config.environment {
if let Some(ref interp_template) = env_cfg.interpreter_path {
let mut vars = std::collections::HashMap::new();
vars.insert("env_dir", env_dir.to_string_lossy().to_string());
vars.insert("pack_dir", pack_dir.to_string_lossy().to_string());
let resolved = RuntimeExecutionConfig::resolve_template(interp_template, &vars);
let resolved_path = std::path::PathBuf::from(&resolved);
// --- Broken-symlink repair (also under the per-directory lock) ---
// If the environment directory exists but contains a broken interpreter
// (e.g. broken symlinks from a venv created in a different container),
// attempt to recreate it before resolving the interpreter.
if env_dir.exists() {
if let Some(ref env_cfg) = effective_config.environment {
if let Some(ref interp_template) = env_cfg.interpreter_path {
let mut vars = std::collections::HashMap::new();
vars.insert("env_dir", env_dir.to_string_lossy().to_string());
vars.insert("pack_dir", pack_dir.to_string_lossy().to_string());
let resolved =
RuntimeExecutionConfig::resolve_template(interp_template, &vars);
let resolved_path = std::path::PathBuf::from(&resolved);
// Check for a broken symlink: symlink_metadata succeeds for
// the link itself even when its target is missing, while
// exists() (which follows symlinks) returns false.
let is_broken_symlink = !resolved_path.exists()
&& std::fs::symlink_metadata(&resolved_path)
.map(|m| m.file_type().is_symlink())
.unwrap_or(false);
// Check for a broken symlink: symlink_metadata succeeds for
// the link itself even when its target is missing, while
// exists() (which follows symlinks) returns false.
let is_broken_symlink = !resolved_path.exists()
&& std::fs::symlink_metadata(&resolved_path)
.map(|m| m.file_type().is_symlink())
.unwrap_or(false);
if is_broken_symlink {
let target = std::fs::read_link(&resolved_path)
.map(|t| t.display().to_string())
.unwrap_or_else(|_| "<unreadable>".to_string());
warn!(
"Detected broken symlink at '{}' -> '{}' in venv for pack '{}'. \
Removing broken environment and recreating...",
resolved_path.display(),
target,
context.action_ref,
);
// Remove the broken environment directory
if let Err(e) = std::fs::remove_dir_all(&env_dir) {
if is_broken_symlink {
let target = std::fs::read_link(&resolved_path)
.map(|t| t.display().to_string())
.unwrap_or_else(|_| "<unreadable>".to_string());
warn!(
"Failed to remove broken environment at {}: {}. \
Will proceed with system interpreter.",
env_dir.display(),
e,
"Detected broken symlink at '{}' -> '{}' in venv for pack '{}'. \
Removing broken environment and recreating...",
resolved_path.display(),
target,
context.action_ref,
);
} else {
// Recreate the environment using a temporary ProcessRuntime
// with the effective (possibly version-specific) config.
let setup_runtime = ProcessRuntime::new(
self.runtime_name.clone(),
effective_config.clone(),
self.packs_base_dir.clone(),
self.runtime_envs_dir.clone(),
);
match setup_runtime
.setup_pack_environment(&pack_dir, &env_dir)
.await
{
Ok(()) => {
info!(
"Successfully recreated environment for pack '{}' at {}",
context.action_ref,
env_dir.display(),
);
}
Err(e) => {
warn!(
"Failed to recreate environment for pack '{}' at {}: {}. \
Will proceed with system interpreter.",
context.action_ref,
env_dir.display(),
e,
);
// Remove the broken environment directory
if let Err(e) = std::fs::remove_dir_all(&env_dir) {
warn!(
"Failed to remove broken environment at {}: {}. \
Will proceed with system interpreter.",
env_dir.display(),
e,
);
} else {
// Recreate the environment using a temporary ProcessRuntime
// with the effective (possibly version-specific) config.
let setup_runtime = ProcessRuntime::new(
self.runtime_name.clone(),
effective_config.clone(),
self.packs_base_dir.clone(),
self.runtime_envs_dir.clone(),
);
match setup_runtime
.setup_pack_environment(&pack_dir, &env_dir)
.await
{
Ok(()) => {
info!(
"Successfully recreated environment for pack '{}' at {}",
context.action_ref,
env_dir.display(),
);
}
Err(e) => {
warn!(
"Failed to recreate environment for pack '{}' at {}: {}. \
Will proceed with system interpreter.",
context.action_ref,
env_dir.display(),
e,
);
}
}
}
}