agent workers
This commit is contained in:
@@ -60,8 +60,7 @@ struct Args {
|
||||
detect_only: bool,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
fn main() -> Result<()> {
|
||||
// Install HMAC-only JWT crypto provider (must be before any token operations)
|
||||
attune_common::auth::install_crypto_provider();
|
||||
|
||||
@@ -75,7 +74,11 @@ async fn main() -> Result<()> {
|
||||
|
||||
info!("Starting Attune Universal Worker Agent");
|
||||
|
||||
// --- Phase 1: Runtime auto-detection ---
|
||||
// --- Phase 1: Runtime auto-detection (synchronous, before tokio runtime) ---
|
||||
//
|
||||
// All std::env::set_var calls MUST happen here, before we create the tokio
|
||||
// runtime, to avoid undefined behavior from mutating the process environment
|
||||
// while other threads are running.
|
||||
//
|
||||
// Check if the user has explicitly set ATTUNE_WORKER_RUNTIMES. If so, skip
|
||||
// auto-detection and respect their override. Otherwise, probe the system for
|
||||
@@ -83,15 +86,57 @@ async fn main() -> Result<()> {
|
||||
let runtimes_override = std::env::var("ATTUNE_WORKER_RUNTIMES").ok();
|
||||
|
||||
// Holds the detected runtimes so we can pass them to WorkerService later.
|
||||
// Populated only when auto-detection actually runs (no env var override).
|
||||
// Populated in both branches: auto-detection and override (filtered to
|
||||
// match the override list).
|
||||
let mut agent_detected_runtimes: Option<Vec<attune_worker::runtime_detect::DetectedRuntime>> =
|
||||
None;
|
||||
|
||||
if let Some(ref override_value) = runtimes_override {
|
||||
info!(
|
||||
"ATTUNE_WORKER_RUNTIMES already set (override), skipping auto-detection: {}",
|
||||
"ATTUNE_WORKER_RUNTIMES already set (override): {}",
|
||||
override_value
|
||||
);
|
||||
|
||||
// Even with an explicit override, run detection so we can register
|
||||
// the overridden runtimes in the database and advertise accurate
|
||||
// capability metadata (binary paths, versions). Without this, the
|
||||
// worker would accept work for runtimes that were never registered
|
||||
// locally — e.g. ruby/go on a fresh deployment.
|
||||
info!("Running auto-detection for override-specified runtimes...");
|
||||
let detected = detect_runtimes();
|
||||
|
||||
// Filter detected runtimes to only those matching the override list,
|
||||
// so we don't register runtimes the user explicitly excluded.
|
||||
let override_names: Vec<&str> = override_value.split(',').map(|s| s.trim()).collect();
|
||||
let filtered: Vec<_> = detected
|
||||
.into_iter()
|
||||
.filter(|rt| {
|
||||
let normalized = attune_common::runtime_detection::normalize_runtime_name(&rt.name);
|
||||
override_names.iter().any(|ov| {
|
||||
attune_common::runtime_detection::normalize_runtime_name(ov) == normalized
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
if filtered.is_empty() {
|
||||
warn!(
|
||||
"None of the override runtimes ({}) were found on this system! \
|
||||
The agent may not be able to execute any actions.",
|
||||
override_value
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
"Matched {} override runtime(s) to detected interpreters:",
|
||||
filtered.len()
|
||||
);
|
||||
for rt in &filtered {
|
||||
match &rt.version {
|
||||
Some(ver) => info!(" ✓ {} — {} ({})", rt.name, rt.path, ver),
|
||||
None => info!(" ✓ {} — {}", rt.name, rt.path),
|
||||
}
|
||||
}
|
||||
agent_detected_runtimes = Some(filtered);
|
||||
}
|
||||
} else {
|
||||
info!("No ATTUNE_WORKER_RUNTIMES override — running auto-detection...");
|
||||
|
||||
@@ -113,10 +158,7 @@ async fn main() -> Result<()> {
|
||||
let runtime_list: Vec<&str> = detected.iter().map(|r| r.name.as_str()).collect();
|
||||
let runtime_csv = runtime_list.join(",");
|
||||
info!("Setting ATTUNE_WORKER_RUNTIMES={}", runtime_csv);
|
||||
// SAFETY: std::env::set_var is safe in Rust 2021 edition. If upgrading
|
||||
// to edition 2024+, this call will need to be wrapped in `unsafe {}`.
|
||||
// It's sound here because detection runs single-threaded before tokio
|
||||
// starts any worker tasks.
|
||||
// Safe: no other threads are running yet (tokio runtime not started).
|
||||
std::env::set_var("ATTUNE_WORKER_RUNTIMES", &runtime_csv);
|
||||
|
||||
// Stash for Phase 2: pass to WorkerService for rich capability registration
|
||||
@@ -124,7 +166,7 @@ async fn main() -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
// --- Handle --detect-only ---
|
||||
// --- Handle --detect-only (synchronous, no async runtime needed) ---
|
||||
if args.detect_only {
|
||||
if runtimes_override.is_some() {
|
||||
// User set an override, but --detect-only should show what's actually
|
||||
@@ -147,12 +189,24 @@ async fn main() -> Result<()> {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// --- Phase 2: Load configuration ---
|
||||
if let Some(config_path) = args.config {
|
||||
// SAFETY: std::env::set_var is safe in Rust 2021 edition. See note above.
|
||||
// --- Set config path env var (synchronous, before tokio runtime) ---
|
||||
if let Some(ref config_path) = args.config {
|
||||
// Safe: no other threads are running yet (tokio runtime not started).
|
||||
std::env::set_var("ATTUNE_CONFIG", config_path);
|
||||
}
|
||||
|
||||
// --- Build the tokio runtime and run the async portion ---
|
||||
let runtime = tokio::runtime::Runtime::new()?;
|
||||
runtime.block_on(async_main(args, agent_detected_runtimes))
|
||||
}
|
||||
|
||||
/// The async portion of the agent entrypoint. Called from `main()` via
|
||||
/// `runtime.block_on()` after all environment variable mutations are complete.
|
||||
async fn async_main(
|
||||
args: Args,
|
||||
agent_detected_runtimes: Option<Vec<attune_worker::runtime_detect::DetectedRuntime>>,
|
||||
) -> Result<()> {
|
||||
// --- Phase 2: Load configuration ---
|
||||
let mut config = Config::load()?;
|
||||
config.validate()?;
|
||||
|
||||
|
||||
@@ -84,10 +84,10 @@ pub async fn auto_register_detected_runtimes(
|
||||
|
||||
// Check if a runtime with a matching name already exists in the DB.
|
||||
// We normalize both sides for alias-aware comparison.
|
||||
let already_exists = existing_runtimes.iter().any(|r| {
|
||||
let db_name = r.name.to_ascii_lowercase();
|
||||
normalize_runtime_name(&db_name) == canonical_name
|
||||
});
|
||||
// normalize_runtime_name lowercases internally, so no need to pre-lowercase.
|
||||
let already_exists = existing_runtimes
|
||||
.iter()
|
||||
.any(|r| normalize_runtime_name(&r.name) == canonical_name);
|
||||
|
||||
if already_exists {
|
||||
debug!(
|
||||
@@ -194,7 +194,7 @@ pub async fn auto_register_detected_runtimes(
|
||||
"Auto-detected {} runtime at {}",
|
||||
detected_rt.name, detected_rt.path
|
||||
)),
|
||||
name: capitalize_runtime_name(canonical_name),
|
||||
name: capitalize_runtime_name(&canonical_name),
|
||||
distributions: build_minimal_distributions(detected_rt),
|
||||
installation: None,
|
||||
execution_config,
|
||||
@@ -286,7 +286,7 @@ fn build_execution_config_from_template(
|
||||
/// interpreter directly, without environment or dependency management.
|
||||
fn build_minimal_execution_config(detected: &DetectedRuntime) -> serde_json::Value {
|
||||
let canonical = normalize_runtime_name(&detected.name);
|
||||
let file_ext = default_file_extension(canonical);
|
||||
let file_ext = default_file_extension(&canonical);
|
||||
|
||||
let mut config = json!({
|
||||
"interpreter": {
|
||||
|
||||
@@ -24,9 +24,27 @@ use attune_common::models::runtime::{
|
||||
};
|
||||
use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::{Arc, Mutex as StdMutex, OnceLock};
|
||||
use tokio::process::Command;
|
||||
use tracing::{debug, error, info, warn};
|
||||
|
||||
/// Per-directory locks for lazy environment setup to prevent concurrent
|
||||
/// setup of the same environment from corrupting it. When two executions
|
||||
/// for the same pack arrive concurrently (e.g. in agent mode), both may
|
||||
/// see `!env_dir.exists()` and race to run `setup_pack_environment`.
|
||||
/// This map provides a per-directory async mutex so that only one setup
|
||||
/// runs at a time for each env_dir path.
|
||||
static ENV_SETUP_LOCKS: OnceLock<StdMutex<HashMap<PathBuf, Arc<tokio::sync::Mutex<()>>>>> =
|
||||
OnceLock::new();
|
||||
|
||||
fn get_env_setup_lock(env_dir: &Path) -> Arc<tokio::sync::Mutex<()>> {
|
||||
let locks = ENV_SETUP_LOCKS.get_or_init(|| StdMutex::new(HashMap::new()));
|
||||
let mut map = locks.lock().unwrap();
|
||||
map.entry(env_dir.to_path_buf())
|
||||
.or_insert_with(|| Arc::new(tokio::sync::Mutex::new(())))
|
||||
.clone()
|
||||
}
|
||||
|
||||
fn bash_single_quote_escape(s: &str) -> String {
|
||||
s.replace('\'', "'\\''")
|
||||
}
|
||||
@@ -620,111 +638,122 @@ impl Runtime for ProcessRuntime {
|
||||
// create it on-demand. This is the primary code path for agent mode where
|
||||
// proactive startup setup is skipped, but it also serves as a safety net
|
||||
// for standard workers if the environment was somehow missed.
|
||||
if effective_config.environment.is_some() && pack_dir.exists() && !env_dir.exists() {
|
||||
info!(
|
||||
"Runtime environment for pack '{}' not found at {}. \
|
||||
Creating on first use (lazy setup).",
|
||||
context.action_ref,
|
||||
env_dir.display(),
|
||||
);
|
||||
// Acquire a per-directory async lock to serialize environment setup.
|
||||
// This prevents concurrent executions for the same pack from racing
|
||||
// to create or repair the environment simultaneously.
|
||||
if effective_config.environment.is_some() && pack_dir.exists() {
|
||||
let env_lock = get_env_setup_lock(&env_dir);
|
||||
let _guard = env_lock.lock().await;
|
||||
|
||||
let setup_runtime = ProcessRuntime::new(
|
||||
self.runtime_name.clone(),
|
||||
effective_config.clone(),
|
||||
self.packs_base_dir.clone(),
|
||||
self.runtime_envs_dir.clone(),
|
||||
);
|
||||
match setup_runtime
|
||||
.setup_pack_environment(&pack_dir, &env_dir)
|
||||
.await
|
||||
{
|
||||
Ok(()) => {
|
||||
info!(
|
||||
"Successfully created environment for pack '{}' at {} (lazy setup)",
|
||||
context.action_ref,
|
||||
env_dir.display(),
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Failed to create environment for pack '{}' at {}: {}. \
|
||||
Proceeding with system interpreter as fallback.",
|
||||
context.action_ref,
|
||||
env_dir.display(),
|
||||
e,
|
||||
);
|
||||
// --- Lazy environment creation (double-checked after lock) ---
|
||||
if !env_dir.exists() {
|
||||
info!(
|
||||
"Runtime environment for pack '{}' not found at {}. \
|
||||
Creating on first use (lazy setup).",
|
||||
context.action_ref,
|
||||
env_dir.display(),
|
||||
);
|
||||
|
||||
let setup_runtime = ProcessRuntime::new(
|
||||
self.runtime_name.clone(),
|
||||
effective_config.clone(),
|
||||
self.packs_base_dir.clone(),
|
||||
self.runtime_envs_dir.clone(),
|
||||
);
|
||||
match setup_runtime
|
||||
.setup_pack_environment(&pack_dir, &env_dir)
|
||||
.await
|
||||
{
|
||||
Ok(()) => {
|
||||
info!(
|
||||
"Successfully created environment for pack '{}' at {} (lazy setup)",
|
||||
context.action_ref,
|
||||
env_dir.display(),
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Failed to create environment for pack '{}' at {}: {}. \
|
||||
Proceeding with system interpreter as fallback.",
|
||||
context.action_ref,
|
||||
env_dir.display(),
|
||||
e,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the environment directory exists but contains a broken interpreter
|
||||
// (e.g. broken symlinks from a venv created in a different container),
|
||||
// attempt to recreate it before resolving the interpreter.
|
||||
if effective_config.environment.is_some() && env_dir.exists() && pack_dir.exists() {
|
||||
if let Some(ref env_cfg) = effective_config.environment {
|
||||
if let Some(ref interp_template) = env_cfg.interpreter_path {
|
||||
let mut vars = std::collections::HashMap::new();
|
||||
vars.insert("env_dir", env_dir.to_string_lossy().to_string());
|
||||
vars.insert("pack_dir", pack_dir.to_string_lossy().to_string());
|
||||
let resolved = RuntimeExecutionConfig::resolve_template(interp_template, &vars);
|
||||
let resolved_path = std::path::PathBuf::from(&resolved);
|
||||
// --- Broken-symlink repair (also under the per-directory lock) ---
|
||||
// If the environment directory exists but contains a broken interpreter
|
||||
// (e.g. broken symlinks from a venv created in a different container),
|
||||
// attempt to recreate it before resolving the interpreter.
|
||||
if env_dir.exists() {
|
||||
if let Some(ref env_cfg) = effective_config.environment {
|
||||
if let Some(ref interp_template) = env_cfg.interpreter_path {
|
||||
let mut vars = std::collections::HashMap::new();
|
||||
vars.insert("env_dir", env_dir.to_string_lossy().to_string());
|
||||
vars.insert("pack_dir", pack_dir.to_string_lossy().to_string());
|
||||
let resolved =
|
||||
RuntimeExecutionConfig::resolve_template(interp_template, &vars);
|
||||
let resolved_path = std::path::PathBuf::from(&resolved);
|
||||
|
||||
// Check for a broken symlink: symlink_metadata succeeds for
|
||||
// the link itself even when its target is missing, while
|
||||
// exists() (which follows symlinks) returns false.
|
||||
let is_broken_symlink = !resolved_path.exists()
|
||||
&& std::fs::symlink_metadata(&resolved_path)
|
||||
.map(|m| m.file_type().is_symlink())
|
||||
.unwrap_or(false);
|
||||
// Check for a broken symlink: symlink_metadata succeeds for
|
||||
// the link itself even when its target is missing, while
|
||||
// exists() (which follows symlinks) returns false.
|
||||
let is_broken_symlink = !resolved_path.exists()
|
||||
&& std::fs::symlink_metadata(&resolved_path)
|
||||
.map(|m| m.file_type().is_symlink())
|
||||
.unwrap_or(false);
|
||||
|
||||
if is_broken_symlink {
|
||||
let target = std::fs::read_link(&resolved_path)
|
||||
.map(|t| t.display().to_string())
|
||||
.unwrap_or_else(|_| "<unreadable>".to_string());
|
||||
warn!(
|
||||
"Detected broken symlink at '{}' -> '{}' in venv for pack '{}'. \
|
||||
Removing broken environment and recreating...",
|
||||
resolved_path.display(),
|
||||
target,
|
||||
context.action_ref,
|
||||
);
|
||||
|
||||
// Remove the broken environment directory
|
||||
if let Err(e) = std::fs::remove_dir_all(&env_dir) {
|
||||
if is_broken_symlink {
|
||||
let target = std::fs::read_link(&resolved_path)
|
||||
.map(|t| t.display().to_string())
|
||||
.unwrap_or_else(|_| "<unreadable>".to_string());
|
||||
warn!(
|
||||
"Failed to remove broken environment at {}: {}. \
|
||||
Will proceed with system interpreter.",
|
||||
env_dir.display(),
|
||||
e,
|
||||
"Detected broken symlink at '{}' -> '{}' in venv for pack '{}'. \
|
||||
Removing broken environment and recreating...",
|
||||
resolved_path.display(),
|
||||
target,
|
||||
context.action_ref,
|
||||
);
|
||||
} else {
|
||||
// Recreate the environment using a temporary ProcessRuntime
|
||||
// with the effective (possibly version-specific) config.
|
||||
let setup_runtime = ProcessRuntime::new(
|
||||
self.runtime_name.clone(),
|
||||
effective_config.clone(),
|
||||
self.packs_base_dir.clone(),
|
||||
self.runtime_envs_dir.clone(),
|
||||
);
|
||||
match setup_runtime
|
||||
.setup_pack_environment(&pack_dir, &env_dir)
|
||||
.await
|
||||
{
|
||||
Ok(()) => {
|
||||
info!(
|
||||
"Successfully recreated environment for pack '{}' at {}",
|
||||
context.action_ref,
|
||||
env_dir.display(),
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Failed to recreate environment for pack '{}' at {}: {}. \
|
||||
Will proceed with system interpreter.",
|
||||
context.action_ref,
|
||||
env_dir.display(),
|
||||
e,
|
||||
);
|
||||
|
||||
// Remove the broken environment directory
|
||||
if let Err(e) = std::fs::remove_dir_all(&env_dir) {
|
||||
warn!(
|
||||
"Failed to remove broken environment at {}: {}. \
|
||||
Will proceed with system interpreter.",
|
||||
env_dir.display(),
|
||||
e,
|
||||
);
|
||||
} else {
|
||||
// Recreate the environment using a temporary ProcessRuntime
|
||||
// with the effective (possibly version-specific) config.
|
||||
let setup_runtime = ProcessRuntime::new(
|
||||
self.runtime_name.clone(),
|
||||
effective_config.clone(),
|
||||
self.packs_base_dir.clone(),
|
||||
self.runtime_envs_dir.clone(),
|
||||
);
|
||||
match setup_runtime
|
||||
.setup_pack_environment(&pack_dir, &env_dir)
|
||||
.await
|
||||
{
|
||||
Ok(()) => {
|
||||
info!(
|
||||
"Successfully recreated environment for pack '{}' at {}",
|
||||
context.action_ref,
|
||||
env_dir.display(),
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Failed to recreate environment for pack '{}' at {}: {}. \
|
||||
Will proceed with system interpreter.",
|
||||
context.action_ref,
|
||||
env_dir.display(),
|
||||
e,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user