agent workers

This commit is contained in:
2026-03-21 10:05:02 -05:00
parent 4d5a3b1bf5
commit d4c6240485
10 changed files with 280 additions and 152 deletions

View File

@@ -60,8 +60,7 @@ struct Args {
detect_only: bool,
}
#[tokio::main]
async fn main() -> Result<()> {
fn main() -> Result<()> {
// Install HMAC-only JWT crypto provider (must be before any token operations)
attune_common::auth::install_crypto_provider();
@@ -75,7 +74,11 @@ async fn main() -> Result<()> {
info!("Starting Attune Universal Worker Agent");
// --- Phase 1: Runtime auto-detection ---
// --- Phase 1: Runtime auto-detection (synchronous, before tokio runtime) ---
//
// All std::env::set_var calls MUST happen here, before we create the tokio
// runtime, to avoid undefined behavior from mutating the process environment
// while other threads are running.
//
// Check if the user has explicitly set ATTUNE_WORKER_RUNTIMES. If so, skip
// auto-detection and respect their override. Otherwise, probe the system for
@@ -83,15 +86,57 @@ async fn main() -> Result<()> {
let runtimes_override = std::env::var("ATTUNE_WORKER_RUNTIMES").ok();
// Holds the detected runtimes so we can pass them to WorkerService later.
// Populated only when auto-detection actually runs (no env var override).
// Populated in both branches: auto-detection and override (filtered to
// match the override list).
let mut agent_detected_runtimes: Option<Vec<attune_worker::runtime_detect::DetectedRuntime>> =
None;
if let Some(ref override_value) = runtimes_override {
info!(
"ATTUNE_WORKER_RUNTIMES already set (override), skipping auto-detection: {}",
"ATTUNE_WORKER_RUNTIMES already set (override): {}",
override_value
);
// Even with an explicit override, run detection so we can register
// the overridden runtimes in the database and advertise accurate
// capability metadata (binary paths, versions). Without this, the
// worker would accept work for runtimes that were never registered
// locally — e.g. ruby/go on a fresh deployment.
info!("Running auto-detection for override-specified runtimes...");
let detected = detect_runtimes();
// Filter detected runtimes to only those matching the override list,
// so we don't register runtimes the user explicitly excluded.
let override_names: Vec<&str> = override_value.split(',').map(|s| s.trim()).collect();
let filtered: Vec<_> = detected
.into_iter()
.filter(|rt| {
let normalized = attune_common::runtime_detection::normalize_runtime_name(&rt.name);
override_names.iter().any(|ov| {
attune_common::runtime_detection::normalize_runtime_name(ov) == normalized
})
})
.collect();
if filtered.is_empty() {
warn!(
"None of the override runtimes ({}) were found on this system! \
The agent may not be able to execute any actions.",
override_value
);
} else {
info!(
"Matched {} override runtime(s) to detected interpreters:",
filtered.len()
);
for rt in &filtered {
match &rt.version {
Some(ver) => info!(" ✓ {} — {} ({})", rt.name, rt.path, ver),
None => info!(" ✓ {} — {}", rt.name, rt.path),
}
}
agent_detected_runtimes = Some(filtered);
}
} else {
info!("No ATTUNE_WORKER_RUNTIMES override — running auto-detection...");
@@ -113,10 +158,7 @@ async fn main() -> Result<()> {
let runtime_list: Vec<&str> = detected.iter().map(|r| r.name.as_str()).collect();
let runtime_csv = runtime_list.join(",");
info!("Setting ATTUNE_WORKER_RUNTIMES={}", runtime_csv);
// SAFETY: std::env::set_var is safe in Rust 2021 edition. If upgrading
// to edition 2024+, this call will need to be wrapped in `unsafe {}`.
// It's sound here because detection runs single-threaded before tokio
// starts any worker tasks.
// Safe: no other threads are running yet (tokio runtime not started).
std::env::set_var("ATTUNE_WORKER_RUNTIMES", &runtime_csv);
// Stash for Phase 2: pass to WorkerService for rich capability registration
@@ -124,7 +166,7 @@ async fn main() -> Result<()> {
}
}
// --- Handle --detect-only ---
// --- Handle --detect-only (synchronous, no async runtime needed) ---
if args.detect_only {
if runtimes_override.is_some() {
// User set an override, but --detect-only should show what's actually
@@ -147,12 +189,24 @@ async fn main() -> Result<()> {
return Ok(());
}
// --- Phase 2: Load configuration ---
if let Some(config_path) = args.config {
// SAFETY: std::env::set_var is safe in Rust 2021 edition. See note above.
// --- Set config path env var (synchronous, before tokio runtime) ---
if let Some(ref config_path) = args.config {
// Safe: no other threads are running yet (tokio runtime not started).
std::env::set_var("ATTUNE_CONFIG", config_path);
}
// --- Build the tokio runtime and run the async portion ---
let runtime = tokio::runtime::Runtime::new()?;
runtime.block_on(async_main(args, agent_detected_runtimes))
}
/// The async portion of the agent entrypoint. Called from `main()` via
/// `runtime.block_on()` after all environment variable mutations are complete.
async fn async_main(
args: Args,
agent_detected_runtimes: Option<Vec<attune_worker::runtime_detect::DetectedRuntime>>,
) -> Result<()> {
// --- Phase 2: Load configuration ---
let mut config = Config::load()?;
config.validate()?;

View File

@@ -84,10 +84,10 @@ pub async fn auto_register_detected_runtimes(
// Check if a runtime with a matching name already exists in the DB.
// We normalize both sides for alias-aware comparison.
let already_exists = existing_runtimes.iter().any(|r| {
let db_name = r.name.to_ascii_lowercase();
normalize_runtime_name(&db_name) == canonical_name
});
// normalize_runtime_name lowercases internally, so no need to pre-lowercase.
let already_exists = existing_runtimes
.iter()
.any(|r| normalize_runtime_name(&r.name) == canonical_name);
if already_exists {
debug!(
@@ -194,7 +194,7 @@ pub async fn auto_register_detected_runtimes(
"Auto-detected {} runtime at {}",
detected_rt.name, detected_rt.path
)),
name: capitalize_runtime_name(canonical_name),
name: capitalize_runtime_name(&canonical_name),
distributions: build_minimal_distributions(detected_rt),
installation: None,
execution_config,
@@ -286,7 +286,7 @@ fn build_execution_config_from_template(
/// interpreter directly, without environment or dependency management.
fn build_minimal_execution_config(detected: &DetectedRuntime) -> serde_json::Value {
let canonical = normalize_runtime_name(&detected.name);
let file_ext = default_file_extension(canonical);
let file_ext = default_file_extension(&canonical);
let mut config = json!({
"interpreter": {

View File

@@ -24,9 +24,27 @@ use attune_common::models::runtime::{
};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex as StdMutex, OnceLock};
use tokio::process::Command;
use tracing::{debug, error, info, warn};
/// Per-directory locks for lazy environment setup to prevent concurrent
/// setup of the same environment from corrupting it. When two executions
/// for the same pack arrive concurrently (e.g. in agent mode), both may
/// see `!env_dir.exists()` and race to run `setup_pack_environment`.
/// This map provides a per-directory async mutex so that only one setup
/// runs at a time for each env_dir path.
static ENV_SETUP_LOCKS: OnceLock<StdMutex<HashMap<PathBuf, Arc<tokio::sync::Mutex<()>>>>> =
OnceLock::new();
fn get_env_setup_lock(env_dir: &Path) -> Arc<tokio::sync::Mutex<()>> {
let locks = ENV_SETUP_LOCKS.get_or_init(|| StdMutex::new(HashMap::new()));
let mut map = locks.lock().unwrap();
map.entry(env_dir.to_path_buf())
.or_insert_with(|| Arc::new(tokio::sync::Mutex::new(())))
.clone()
}
fn bash_single_quote_escape(s: &str) -> String {
s.replace('\'', "'\\''")
}
@@ -620,111 +638,122 @@ impl Runtime for ProcessRuntime {
// create it on-demand. This is the primary code path for agent mode where
// proactive startup setup is skipped, but it also serves as a safety net
// for standard workers if the environment was somehow missed.
if effective_config.environment.is_some() && pack_dir.exists() && !env_dir.exists() {
info!(
"Runtime environment for pack '{}' not found at {}. \
Creating on first use (lazy setup).",
context.action_ref,
env_dir.display(),
);
// Acquire a per-directory async lock to serialize environment setup.
// This prevents concurrent executions for the same pack from racing
// to create or repair the environment simultaneously.
if effective_config.environment.is_some() && pack_dir.exists() {
let env_lock = get_env_setup_lock(&env_dir);
let _guard = env_lock.lock().await;
let setup_runtime = ProcessRuntime::new(
self.runtime_name.clone(),
effective_config.clone(),
self.packs_base_dir.clone(),
self.runtime_envs_dir.clone(),
);
match setup_runtime
.setup_pack_environment(&pack_dir, &env_dir)
.await
{
Ok(()) => {
info!(
"Successfully created environment for pack '{}' at {} (lazy setup)",
context.action_ref,
env_dir.display(),
);
}
Err(e) => {
warn!(
"Failed to create environment for pack '{}' at {}: {}. \
Proceeding with system interpreter as fallback.",
context.action_ref,
env_dir.display(),
e,
);
// --- Lazy environment creation (double-checked after lock) ---
if !env_dir.exists() {
info!(
"Runtime environment for pack '{}' not found at {}. \
Creating on first use (lazy setup).",
context.action_ref,
env_dir.display(),
);
let setup_runtime = ProcessRuntime::new(
self.runtime_name.clone(),
effective_config.clone(),
self.packs_base_dir.clone(),
self.runtime_envs_dir.clone(),
);
match setup_runtime
.setup_pack_environment(&pack_dir, &env_dir)
.await
{
Ok(()) => {
info!(
"Successfully created environment for pack '{}' at {} (lazy setup)",
context.action_ref,
env_dir.display(),
);
}
Err(e) => {
warn!(
"Failed to create environment for pack '{}' at {}: {}. \
Proceeding with system interpreter as fallback.",
context.action_ref,
env_dir.display(),
e,
);
}
}
}
}
// If the environment directory exists but contains a broken interpreter
// (e.g. broken symlinks from a venv created in a different container),
// attempt to recreate it before resolving the interpreter.
if effective_config.environment.is_some() && env_dir.exists() && pack_dir.exists() {
if let Some(ref env_cfg) = effective_config.environment {
if let Some(ref interp_template) = env_cfg.interpreter_path {
let mut vars = std::collections::HashMap::new();
vars.insert("env_dir", env_dir.to_string_lossy().to_string());
vars.insert("pack_dir", pack_dir.to_string_lossy().to_string());
let resolved = RuntimeExecutionConfig::resolve_template(interp_template, &vars);
let resolved_path = std::path::PathBuf::from(&resolved);
// --- Broken-symlink repair (also under the per-directory lock) ---
// If the environment directory exists but contains a broken interpreter
// (e.g. broken symlinks from a venv created in a different container),
// attempt to recreate it before resolving the interpreter.
if env_dir.exists() {
if let Some(ref env_cfg) = effective_config.environment {
if let Some(ref interp_template) = env_cfg.interpreter_path {
let mut vars = std::collections::HashMap::new();
vars.insert("env_dir", env_dir.to_string_lossy().to_string());
vars.insert("pack_dir", pack_dir.to_string_lossy().to_string());
let resolved =
RuntimeExecutionConfig::resolve_template(interp_template, &vars);
let resolved_path = std::path::PathBuf::from(&resolved);
// Check for a broken symlink: symlink_metadata succeeds for
// the link itself even when its target is missing, while
// exists() (which follows symlinks) returns false.
let is_broken_symlink = !resolved_path.exists()
&& std::fs::symlink_metadata(&resolved_path)
.map(|m| m.file_type().is_symlink())
.unwrap_or(false);
// Check for a broken symlink: symlink_metadata succeeds for
// the link itself even when its target is missing, while
// exists() (which follows symlinks) returns false.
let is_broken_symlink = !resolved_path.exists()
&& std::fs::symlink_metadata(&resolved_path)
.map(|m| m.file_type().is_symlink())
.unwrap_or(false);
if is_broken_symlink {
let target = std::fs::read_link(&resolved_path)
.map(|t| t.display().to_string())
.unwrap_or_else(|_| "<unreadable>".to_string());
warn!(
"Detected broken symlink at '{}' -> '{}' in venv for pack '{}'. \
Removing broken environment and recreating...",
resolved_path.display(),
target,
context.action_ref,
);
// Remove the broken environment directory
if let Err(e) = std::fs::remove_dir_all(&env_dir) {
if is_broken_symlink {
let target = std::fs::read_link(&resolved_path)
.map(|t| t.display().to_string())
.unwrap_or_else(|_| "<unreadable>".to_string());
warn!(
"Failed to remove broken environment at {}: {}. \
Will proceed with system interpreter.",
env_dir.display(),
e,
"Detected broken symlink at '{}' -> '{}' in venv for pack '{}'. \
Removing broken environment and recreating...",
resolved_path.display(),
target,
context.action_ref,
);
} else {
// Recreate the environment using a temporary ProcessRuntime
// with the effective (possibly version-specific) config.
let setup_runtime = ProcessRuntime::new(
self.runtime_name.clone(),
effective_config.clone(),
self.packs_base_dir.clone(),
self.runtime_envs_dir.clone(),
);
match setup_runtime
.setup_pack_environment(&pack_dir, &env_dir)
.await
{
Ok(()) => {
info!(
"Successfully recreated environment for pack '{}' at {}",
context.action_ref,
env_dir.display(),
);
}
Err(e) => {
warn!(
"Failed to recreate environment for pack '{}' at {}: {}. \
Will proceed with system interpreter.",
context.action_ref,
env_dir.display(),
e,
);
// Remove the broken environment directory
if let Err(e) = std::fs::remove_dir_all(&env_dir) {
warn!(
"Failed to remove broken environment at {}: {}. \
Will proceed with system interpreter.",
env_dir.display(),
e,
);
} else {
// Recreate the environment using a temporary ProcessRuntime
// with the effective (possibly version-specific) config.
let setup_runtime = ProcessRuntime::new(
self.runtime_name.clone(),
effective_config.clone(),
self.packs_base_dir.clone(),
self.runtime_envs_dir.clone(),
);
match setup_runtime
.setup_pack_environment(&pack_dir, &env_dir)
.await
{
Ok(()) => {
info!(
"Successfully recreated environment for pack '{}' at {}",
context.action_ref,
env_dir.display(),
);
}
Err(e) => {
warn!(
"Failed to recreate environment for pack '{}' at {}: {}. \
Will proceed with system interpreter.",
context.action_ref,
env_dir.display(),
e,
);
}
}
}
}