working on runtime executions

This commit is contained in:
2026-02-16 22:04:20 -06:00
parent f52320f889
commit 904ede04be
99 changed files with 6778 additions and 5929 deletions

View File

@@ -0,0 +1,497 @@
//! Proactive Runtime Environment Setup
//!
//! This module provides functions for setting up runtime environments (Python
//! virtualenvs, Node.js node_modules, etc.) proactively — either at worker
//! startup (scanning all registered packs) or in response to a `pack.registered`
//! MQ event.
//!
//! The goal is to ensure environments are ready *before* the first execution,
//! eliminating the first-run penalty and potential permission errors that occur
//! when setup is deferred to execution time.
use std::collections::{HashMap, HashSet};
use std::path::Path;
use sqlx::PgPool;
use tracing::{debug, error, info, warn};
use attune_common::mq::PackRegisteredPayload;
use attune_common::repositories::action::ActionRepository;
use attune_common::repositories::pack::PackRepository;
use attune_common::repositories::runtime::RuntimeRepository;
use attune_common::repositories::{FindById, List};
// Re-export the utility that the API also uses so callers can reach it from
// either crate without adding a direct common dependency for this one function.
pub use attune_common::pack_environment::collect_runtime_names_for_pack;
use crate::runtime::process::ProcessRuntime;
/// Result of setting up environments for a single pack.
#[derive(Debug)]
pub struct PackEnvSetupResult {
pub pack_ref: String,
pub environments_created: Vec<String>,
pub environments_skipped: Vec<String>,
pub errors: Vec<String>,
}
/// Result of the full startup scan across all packs.
#[derive(Debug)]
pub struct StartupScanResult {
pub packs_scanned: usize,
pub environments_created: usize,
pub environments_skipped: usize,
pub errors: Vec<String>,
}
/// Scan all registered packs and create missing runtime environments.
///
/// This is called at worker startup, before the worker begins consuming
/// execution messages. It ensures that environments for all known packs
/// are ready to go.
///
/// # Arguments
/// * `db_pool` - Database connection pool
/// * `runtime_filter` - Optional list of runtime names this worker supports
/// (from `ATTUNE_WORKER_RUNTIMES`). If `None`, all runtimes are considered.
/// * `packs_base_dir` - Base directory where pack files are stored
/// * `runtime_envs_dir` - Base directory for isolated runtime environments
pub async fn scan_and_setup_all_environments(
db_pool: &PgPool,
runtime_filter: Option<&[String]>,
packs_base_dir: &Path,
runtime_envs_dir: &Path,
) -> StartupScanResult {
info!("Starting runtime environment scan for all registered packs");
let mut result = StartupScanResult {
packs_scanned: 0,
environments_created: 0,
environments_skipped: 0,
errors: Vec::new(),
};
// Load all runtimes from DB, indexed by ID for quick lookup
let runtimes = match RuntimeRepository::list(db_pool).await {
Ok(rts) => rts,
Err(e) => {
let msg = format!("Failed to load runtimes from database: {}", e);
error!("{}", msg);
result.errors.push(msg);
return result;
}
};
let runtime_map: HashMap<i64, _> = runtimes.into_iter().map(|r| (r.id, r)).collect();
// Load all packs
let packs = match PackRepository::list(db_pool).await {
Ok(p) => p,
Err(e) => {
let msg = format!("Failed to load packs from database: {}", e);
error!("{}", msg);
result.errors.push(msg);
return result;
}
};
info!("Found {} registered pack(s) to scan", packs.len());
for pack in &packs {
result.packs_scanned += 1;
let pack_result = setup_environments_for_pack(
db_pool,
&pack.r#ref,
pack.id,
runtime_filter,
packs_base_dir,
runtime_envs_dir,
&runtime_map,
)
.await;
result.environments_created += pack_result.environments_created.len();
result.environments_skipped += pack_result.environments_skipped.len();
result.errors.extend(pack_result.errors);
}
info!(
"Environment scan complete: {} pack(s) scanned, {} environment(s) created, \
{} skipped, {} error(s)",
result.packs_scanned,
result.environments_created,
result.environments_skipped,
result.errors.len(),
);
result
}
/// Set up environments for a single pack, triggered by a `pack.registered` MQ event.
///
/// This is called when the worker receives a `PackRegistered` message. It only
/// sets up environments for the runtimes listed in the event payload (intersection
/// with this worker's supported runtimes).
pub async fn setup_environments_for_registered_pack(
db_pool: &PgPool,
event: &PackRegisteredPayload,
runtime_filter: Option<&[String]>,
packs_base_dir: &Path,
runtime_envs_dir: &Path,
) -> PackEnvSetupResult {
info!(
"Setting up environments for newly registered pack '{}' (version {})",
event.pack_ref, event.version
);
let mut pack_result = PackEnvSetupResult {
pack_ref: event.pack_ref.clone(),
environments_created: Vec::new(),
environments_skipped: Vec::new(),
errors: Vec::new(),
};
let pack_dir = packs_base_dir.join(&event.pack_ref);
if !pack_dir.exists() {
let msg = format!(
"Pack directory does not exist: {}. Skipping environment setup.",
pack_dir.display()
);
warn!("{}", msg);
pack_result.errors.push(msg);
return pack_result;
}
// Filter to runtimes this worker supports
let target_runtimes: Vec<&String> = event
.runtime_names
.iter()
.filter(|name| {
if let Some(filter) = runtime_filter {
filter.contains(name)
} else {
true
}
})
.collect();
if target_runtimes.is_empty() {
debug!(
"No matching runtimes for pack '{}' on this worker (event runtimes: {:?}, worker filter: {:?})",
event.pack_ref, event.runtime_names, runtime_filter,
);
return pack_result;
}
// Load runtime configs from DB by name
let all_runtimes = match RuntimeRepository::list(db_pool).await {
Ok(rts) => rts,
Err(e) => {
let msg = format!("Failed to load runtimes from database: {}", e);
error!("{}", msg);
pack_result.errors.push(msg);
return pack_result;
}
};
for rt_name in target_runtimes {
// Find the runtime in DB (match by lowercase name)
let rt = match all_runtimes
.iter()
.find(|r| r.name.to_lowercase() == *rt_name)
{
Some(r) => r,
None => {
debug!("Runtime '{}' not found in database, skipping", rt_name);
continue;
}
};
let exec_config = rt.parsed_execution_config();
if exec_config.environment.is_none() && !exec_config.has_dependencies(&pack_dir) {
debug!(
"Runtime '{}' has no environment config, skipping for pack '{}'",
rt_name, event.pack_ref,
);
pack_result.environments_skipped.push(rt_name.clone());
continue;
}
let env_dir = runtime_envs_dir.join(&event.pack_ref).join(rt_name);
let process_runtime = ProcessRuntime::new(
rt_name.clone(),
exec_config,
packs_base_dir.to_path_buf(),
runtime_envs_dir.to_path_buf(),
);
match process_runtime
.setup_pack_environment(&pack_dir, &env_dir)
.await
{
Ok(()) => {
info!(
"Environment for runtime '{}' ready for pack '{}'",
rt_name, event.pack_ref,
);
pack_result.environments_created.push(rt_name.clone());
}
Err(e) => {
let msg = format!(
"Failed to set up '{}' environment for pack '{}': {}",
rt_name, event.pack_ref, e,
);
warn!("{}", msg);
pack_result.errors.push(msg);
}
}
}
pack_result
}
/// Internal helper: set up environments for a single pack during the startup scan.
///
/// Discovers which runtimes the pack's actions use, filters by this worker's
/// capabilities, and creates any missing environments.
#[allow(clippy::too_many_arguments)]
async fn setup_environments_for_pack(
db_pool: &PgPool,
pack_ref: &str,
pack_id: i64,
runtime_filter: Option<&[String]>,
packs_base_dir: &Path,
runtime_envs_dir: &Path,
runtime_map: &HashMap<i64, attune_common::models::Runtime>,
) -> PackEnvSetupResult {
let mut pack_result = PackEnvSetupResult {
pack_ref: pack_ref.to_string(),
environments_created: Vec::new(),
environments_skipped: Vec::new(),
errors: Vec::new(),
};
let pack_dir = packs_base_dir.join(pack_ref);
if !pack_dir.exists() {
debug!(
"Pack directory '{}' does not exist on disk, skipping",
pack_dir.display()
);
return pack_result;
}
// Get all actions for this pack
let actions = match ActionRepository::find_by_pack(db_pool, pack_id).await {
Ok(a) => a,
Err(e) => {
let msg = format!("Failed to load actions for pack '{}': {}", pack_ref, e);
warn!("{}", msg);
pack_result.errors.push(msg);
return pack_result;
}
};
// Collect unique runtime IDs referenced by actions in this pack
let mut seen_runtime_ids = HashSet::new();
for action in &actions {
if let Some(runtime_id) = action.runtime {
seen_runtime_ids.insert(runtime_id);
}
}
if seen_runtime_ids.is_empty() {
debug!("Pack '{}' has no actions with runtimes, skipping", pack_ref);
return pack_result;
}
for runtime_id in seen_runtime_ids {
let rt = match runtime_map.get(&runtime_id) {
Some(r) => r,
None => {
// Try fetching from DB directly (might be a newly added runtime)
match RuntimeRepository::find_by_id(db_pool, runtime_id).await {
Ok(Some(r)) => {
// Can't insert into the borrowed map, so just use it inline
let rt_name = r.name.to_lowercase();
process_runtime_for_pack(
&r,
&rt_name,
pack_ref,
runtime_filter,
&pack_dir,
packs_base_dir,
runtime_envs_dir,
&mut pack_result,
)
.await;
continue;
}
Ok(None) => {
debug!("Runtime ID {} not found in database, skipping", runtime_id);
continue;
}
Err(e) => {
warn!("Failed to load runtime {}: {}", runtime_id, e);
continue;
}
}
}
};
let rt_name = rt.name.to_lowercase();
process_runtime_for_pack(
rt,
&rt_name,
pack_ref,
runtime_filter,
&pack_dir,
packs_base_dir,
runtime_envs_dir,
&mut pack_result,
)
.await;
}
if !pack_result.environments_created.is_empty() {
info!(
"Pack '{}': created environments for {:?}",
pack_ref, pack_result.environments_created,
);
}
pack_result
}
/// Process a single runtime for a pack: check filters, check if env exists, create if needed.
#[allow(clippy::too_many_arguments)]
async fn process_runtime_for_pack(
rt: &attune_common::models::Runtime,
rt_name: &str,
pack_ref: &str,
runtime_filter: Option<&[String]>,
pack_dir: &Path,
packs_base_dir: &Path,
runtime_envs_dir: &Path,
pack_result: &mut PackEnvSetupResult,
) {
// Apply worker runtime filter
if let Some(filter) = runtime_filter {
if !filter.iter().any(|f| f == rt_name) {
debug!(
"Runtime '{}' not in worker filter, skipping for pack '{}'",
rt_name, pack_ref,
);
return;
}
}
let exec_config = rt.parsed_execution_config();
// Check if this runtime actually needs an environment
if exec_config.environment.is_none() && !exec_config.has_dependencies(pack_dir) {
debug!(
"Runtime '{}' has no environment config, skipping for pack '{}'",
rt_name, pack_ref,
);
pack_result.environments_skipped.push(rt_name.to_string());
return;
}
let env_dir = runtime_envs_dir.join(pack_ref).join(rt_name);
// Create a temporary ProcessRuntime to perform the setup
let process_runtime = ProcessRuntime::new(
rt_name.to_string(),
exec_config,
packs_base_dir.to_path_buf(),
runtime_envs_dir.to_path_buf(),
);
match process_runtime
.setup_pack_environment(pack_dir, &env_dir)
.await
{
Ok(()) => {
// setup_pack_environment is idempotent — it logs whether it created
// the env or found it already existing.
pack_result.environments_created.push(rt_name.to_string());
}
Err(e) => {
let msg = format!(
"Failed to set up '{}' environment for pack '{}': {}",
rt_name, pack_ref, e,
);
warn!("{}", msg);
pack_result.errors.push(msg);
}
}
}
/// Determine the runtime filter from the `ATTUNE_WORKER_RUNTIMES` environment variable.
///
/// Returns `None` if the variable is not set (meaning all runtimes are accepted).
pub fn runtime_filter_from_env() -> Option<Vec<String>> {
std::env::var("ATTUNE_WORKER_RUNTIMES").ok().map(|val| {
val.split(',')
.map(|s| s.trim().to_lowercase())
.filter(|s| !s.is_empty())
.collect()
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_runtime_filter_from_env_not_set() {
// When ATTUNE_WORKER_RUNTIMES is not set, filter should be None
std::env::remove_var("ATTUNE_WORKER_RUNTIMES");
assert!(runtime_filter_from_env().is_none());
}
#[test]
fn test_runtime_filter_from_env_set() {
std::env::set_var("ATTUNE_WORKER_RUNTIMES", "shell,Python, Node");
let filter = runtime_filter_from_env().unwrap();
assert_eq!(filter, vec!["shell", "python", "node"]);
std::env::remove_var("ATTUNE_WORKER_RUNTIMES");
}
#[test]
fn test_runtime_filter_from_env_empty() {
std::env::set_var("ATTUNE_WORKER_RUNTIMES", "");
let filter = runtime_filter_from_env().unwrap();
assert!(filter.is_empty());
std::env::remove_var("ATTUNE_WORKER_RUNTIMES");
}
#[test]
fn test_pack_env_setup_result_defaults() {
let result = PackEnvSetupResult {
pack_ref: "test".to_string(),
environments_created: vec![],
environments_skipped: vec![],
errors: vec![],
};
assert_eq!(result.pack_ref, "test");
assert!(result.environments_created.is_empty());
assert!(result.errors.is_empty());
}
#[test]
fn test_startup_scan_result_defaults() {
let result = StartupScanResult {
packs_scanned: 0,
environments_created: 0,
environments_skipped: 0,
errors: vec![],
};
assert_eq!(result.packs_scanned, 0);
assert_eq!(result.environments_created, 0);
assert!(result.errors.is_empty());
}
}

View File

@@ -7,6 +7,7 @@ use attune_common::error::{Error, Result};
use attune_common::models::{runtime::Runtime as RuntimeModel, Action, Execution, ExecutionStatus};
use attune_common::repositories::execution::{ExecutionRepository, UpdateExecutionInput};
use attune_common::repositories::{FindById, Update};
use std::path::PathBuf as StdPathBuf;
use serde_json::Value as JsonValue;
use sqlx::PgPool;
@@ -78,7 +79,12 @@ impl ActionExecutor {
Ok(ctx) => ctx,
Err(e) => {
error!("Failed to prepare execution context: {}", e);
self.handle_execution_failure(execution_id, None).await?;
self.handle_execution_failure(
execution_id,
None,
Some(&format!("Failed to prepare execution context: {}", e)),
)
.await?;
return Err(e);
}
};
@@ -91,7 +97,12 @@ impl ActionExecutor {
Err(e) => {
error!("Action execution failed catastrophically: {}", e);
// This should only happen for unrecoverable errors like runtime not found
self.handle_execution_failure(execution_id, None).await?;
self.handle_execution_failure(
execution_id,
None,
Some(&format!("Action execution failed: {}", e)),
)
.await?;
return Err(e);
}
};
@@ -112,7 +123,7 @@ impl ActionExecutor {
if is_success {
self.handle_execution_success(execution_id, &result).await?;
} else {
self.handle_execution_failure(execution_id, Some(&result))
self.handle_execution_failure(execution_id, Some(&result), None)
.await?;
}
@@ -306,18 +317,23 @@ impl ActionExecutor {
let timeout = Some(300_u64);
// Load runtime information if specified
let runtime_name = if let Some(runtime_id) = action.runtime {
match sqlx::query_as::<_, RuntimeModel>("SELECT * FROM runtime WHERE id = $1")
.bind(runtime_id)
.fetch_optional(&self.pool)
.await
let runtime_record = if let Some(runtime_id) = action.runtime {
match sqlx::query_as::<_, RuntimeModel>(
r#"SELECT id, ref, pack, pack_ref, description, name,
distributions, installation, installers, execution_config,
created, updated
FROM runtime WHERE id = $1"#,
)
.bind(runtime_id)
.fetch_optional(&self.pool)
.await
{
Ok(Some(runtime)) => {
debug!(
"Loaded runtime '{}' for action '{}'",
runtime.name, action.r#ref
"Loaded runtime '{}' (ref: {}) for action '{}'",
runtime.name, runtime.r#ref, action.r#ref
);
Some(runtime.name.to_lowercase())
Some(runtime)
}
Ok(None) => {
warn!(
@@ -338,15 +354,16 @@ impl ActionExecutor {
None
};
let runtime_name = runtime_record.as_ref().map(|r| r.name.to_lowercase());
// Determine the pack directory for this action
let pack_dir = self.packs_base_dir.join(&action.pack_ref);
// Construct code_path for pack actions
// Pack actions have their script files in packs/{pack_ref}/actions/{entrypoint}
let code_path = if action.pack_ref.starts_with("core") || !action.is_adhoc {
// This is a pack action, construct the file path
let action_file_path = self
.packs_base_dir
.join(&action.pack_ref)
.join("actions")
.join(&entry_point);
let action_file_path = pack_dir.join("actions").join(&entry_point);
if action_file_path.exists() {
Some(action_file_path)
@@ -368,6 +385,15 @@ impl ActionExecutor {
None
};
// Resolve the working directory from the runtime's execution_config.
// The ProcessRuntime also does this internally, but setting it in the
// context allows the executor to override if needed.
let working_dir: Option<StdPathBuf> = if pack_dir.exists() {
Some(pack_dir)
} else {
None
};
let context = ExecutionContext {
execution_id: execution.id,
action_ref: execution.action_ref.clone(),
@@ -375,7 +401,7 @@ impl ActionExecutor {
env,
secrets, // Passed securely via stdin
timeout,
working_dir: None, // Could be configured per action
working_dir,
entry_point,
code,
code_path,
@@ -482,6 +508,7 @@ impl ActionExecutor {
&self,
execution_id: i64,
result: Option<&ExecutionResult>,
error_message: Option<&str>,
) -> Result<()> {
if let Some(r) = result {
error!(
@@ -489,7 +516,11 @@ impl ActionExecutor {
execution_id, r.exit_code, r.error, r.duration_ms
);
} else {
error!("Execution {} failed during preparation", execution_id);
error!(
"Execution {} failed during preparation: {}",
execution_id,
error_message.unwrap_or("unknown error")
);
}
let exec_dir = self.artifact_manager.get_execution_dir(execution_id);
@@ -531,9 +562,15 @@ impl ActionExecutor {
} else {
// No execution result available (early failure during setup/preparation)
// This should be rare - most errors should be captured in ExecutionResult
result_data["error"] = serde_json::json!("Execution failed during preparation");
let err_msg = error_message.unwrap_or("Execution failed during preparation");
result_data["error"] = serde_json::json!(err_msg);
warn!("Execution {} failed without ExecutionResult - this indicates an early/catastrophic failure", execution_id);
warn!(
"Execution {} failed without ExecutionResult - {}: {}",
execution_id,
"early/catastrophic failure",
err_msg
);
// Check if stderr log exists and is non-empty from artifact storage
let stderr_path = exec_dir.join("stderr.log");

View File

@@ -4,6 +4,7 @@
//! which executes actions in various runtime environments.
pub mod artifacts;
pub mod env_setup;
pub mod executor;
pub mod heartbeat;
pub mod registration;
@@ -16,7 +17,7 @@ pub use executor::ActionExecutor;
pub use heartbeat::HeartbeatManager;
pub use registration::WorkerRegistration;
pub use runtime::{
ExecutionContext, ExecutionResult, LocalRuntime, NativeRuntime, PythonRuntime, Runtime,
ExecutionContext, ExecutionResult, LocalRuntime, NativeRuntime, ProcessRuntime, Runtime,
RuntimeError, RuntimeResult, ShellRuntime,
};
pub use secrets::SecretManager;

View File

@@ -1,28 +1,51 @@
//! Local Runtime Module
//!
//! Provides local execution capabilities by combining Python and Shell runtimes.
//! Provides local execution capabilities by combining Process and Shell runtimes.
//! This module serves as a facade for all local process-based execution.
//!
//! The `ProcessRuntime` is used for Python (and other interpreted languages),
//! driven by `RuntimeExecutionConfig` rather than language-specific Rust code.
use super::native::NativeRuntime;
use super::python::PythonRuntime;
use super::process::ProcessRuntime;
use super::shell::ShellRuntime;
use super::{ExecutionContext, ExecutionResult, Runtime, RuntimeError, RuntimeResult};
use async_trait::async_trait;
use attune_common::models::runtime::{InterpreterConfig, RuntimeExecutionConfig};
use std::path::PathBuf;
use tracing::{debug, info};
/// Local runtime that delegates to Python, Shell, or Native based on action type
/// Local runtime that delegates to Process, Shell, or Native based on action type
pub struct LocalRuntime {
native: NativeRuntime,
python: PythonRuntime,
python: ProcessRuntime,
shell: ShellRuntime,
}
impl LocalRuntime {
/// Create a new local runtime with default settings
/// Create a new local runtime with default settings.
///
/// Uses a default Python `RuntimeExecutionConfig` for the process runtime,
/// since this is a fallback when runtimes haven't been loaded from the database.
pub fn new() -> Self {
let python_config = RuntimeExecutionConfig {
interpreter: InterpreterConfig {
binary: "python3".to_string(),
args: vec![],
file_extension: Some(".py".to_string()),
},
environment: None,
dependencies: None,
};
Self {
native: NativeRuntime::new(),
python: PythonRuntime::new(),
python: ProcessRuntime::new(
"python".to_string(),
python_config,
PathBuf::from("/opt/attune/packs"),
PathBuf::from("/opt/attune/runtime_envs"),
),
shell: ShellRuntime::new(),
}
}
@@ -30,7 +53,7 @@ impl LocalRuntime {
/// Create a local runtime with custom runtimes
pub fn with_runtimes(
native: NativeRuntime,
python: PythonRuntime,
python: ProcessRuntime,
shell: ShellRuntime,
) -> Self {
Self {
@@ -46,7 +69,10 @@ impl LocalRuntime {
debug!("Selected Native runtime for action: {}", context.action_ref);
Ok(&self.native)
} else if self.python.can_execute(context) {
debug!("Selected Python runtime for action: {}", context.action_ref);
debug!(
"Selected Python (ProcessRuntime) for action: {}",
context.action_ref
);
Ok(&self.python)
} else if self.shell.can_execute(context) {
debug!("Selected Shell runtime for action: {}", context.action_ref);
@@ -126,40 +152,6 @@ mod tests {
use crate::runtime::{OutputFormat, ParameterDelivery, ParameterFormat};
use std::collections::HashMap;
#[tokio::test]
async fn test_local_runtime_python() {
let runtime = LocalRuntime::new();
let context = ExecutionContext {
execution_id: 1,
action_ref: "test.python_action".to_string(),
parameters: HashMap::new(),
env: HashMap::new(),
secrets: HashMap::new(),
timeout: Some(10),
working_dir: None,
entry_point: "run".to_string(),
code: Some(
r#"
def run():
return "hello from python"
"#
.to_string(),
),
code_path: None,
runtime_name: Some("python".to_string()),
max_stdout_bytes: 10 * 1024 * 1024,
max_stderr_bytes: 10 * 1024 * 1024,
parameter_delivery: ParameterDelivery::default(),
parameter_format: ParameterFormat::default(),
output_format: OutputFormat::default(),
};
assert!(runtime.can_execute(&context));
let result = runtime.execute(context).await.unwrap();
assert!(result.is_success());
}
#[tokio::test]
async fn test_local_runtime_shell() {
let runtime = LocalRuntime::new();

View File

@@ -1,21 +1,28 @@
//! Runtime Module
//!
//! Provides runtime abstraction and implementations for executing actions
//! in different environments (Python, Shell, Node.js, Containers).
//! in different environments. The primary runtime is `ProcessRuntime`, a
//! generic, configuration-driven runtime that reads its behavior from the
//! database `runtime.execution_config` JSONB column.
//!
//! Language-specific runtimes (Python, Node.js, etc.) are NOT implemented
//! as separate Rust types. Instead, the `ProcessRuntime` handles all
//! languages by using the interpreter, environment, and dependency
//! configuration stored in the database.
pub mod dependency;
pub mod local;
pub mod log_writer;
pub mod native;
pub mod parameter_passing;
pub mod python;
pub mod python_venv;
pub mod process;
pub mod process_executor;
pub mod shell;
// Re-export runtime implementations
pub use local::LocalRuntime;
pub use native::NativeRuntime;
pub use python::PythonRuntime;
pub use process::ProcessRuntime;
pub use shell::ShellRuntime;
use async_trait::async_trait;
@@ -31,7 +38,6 @@ pub use dependency::{
};
pub use log_writer::{BoundedLogResult, BoundedLogWriter};
pub use parameter_passing::{ParameterDeliveryConfig, PreparedParameters};
pub use python_venv::PythonVenvManager;
// Re-export parameter types from common
pub use attune_common::models::{OutputFormat, ParameterDelivery, ParameterFormat};

View File

@@ -92,9 +92,13 @@ fn format_dotenv(parameters: &HashMap<String, JsonValue>) -> Result<String, Runt
Ok(lines.join("\n"))
}
/// Format parameters as JSON
/// Format parameters as JSON (compact, single-line)
///
/// Uses compact format so that actions reading stdin line-by-line
/// (e.g., `json.loads(sys.stdin.readline())`) receive the entire
/// JSON object on a single line.
fn format_json(parameters: &HashMap<String, JsonValue>) -> Result<String, RuntimeError> {
serde_json::to_string_pretty(parameters).map_err(|e| {
serde_json::to_string(parameters).map_err(|e| {
RuntimeError::ExecutionFailed(format!("Failed to serialize parameters to JSON: {}", e))
})
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,495 @@
//! Shared Process Executor
//!
//! Provides common subprocess execution infrastructure used by all runtime
//! implementations. Handles streaming stdout/stderr capture, bounded log
//! collection, timeout management, stdin parameter/secret delivery, and
//! output format parsing.
use super::{BoundedLogWriter, ExecutionResult, OutputFormat, RuntimeResult};
use std::collections::HashMap;
use std::path::Path;
use std::time::Instant;
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
use tokio::process::Command;
use tokio::time::timeout;
use tracing::{debug, warn};
/// Execute a subprocess command with streaming output capture.
///
/// This is the core execution function used by all runtime implementations.
/// It handles:
/// - Spawning the process with piped I/O
/// - Writing parameters and secrets to stdin
/// - Streaming stdout/stderr with bounded log collection
/// - Timeout management
/// - Output format parsing (JSON, YAML, JSONL, text)
///
/// # Arguments
/// * `cmd` - Pre-configured `Command` (interpreter, args, env vars, working dir already set)
/// * `secrets` - Secrets to pass via stdin (as JSON)
/// * `parameters_stdin` - Optional parameter data to write to stdin before secrets
/// * `timeout_secs` - Optional execution timeout in seconds
/// * `max_stdout_bytes` - Maximum stdout size before truncation
/// * `max_stderr_bytes` - Maximum stderr size before truncation
/// * `output_format` - How to parse stdout (Text, Json, Yaml, Jsonl)
pub async fn execute_streaming(
mut cmd: Command,
secrets: &HashMap<String, String>,
parameters_stdin: Option<&str>,
timeout_secs: Option<u64>,
max_stdout_bytes: usize,
max_stderr_bytes: usize,
output_format: OutputFormat,
) -> RuntimeResult<ExecutionResult> {
let start = Instant::now();
// Spawn process with piped I/O
let mut child = cmd
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()?;
// Write to stdin - parameters (if using stdin delivery) and/or secrets.
// If this fails, the process has already started, so we continue and capture output.
let stdin_write_error = if let Some(mut stdin) = child.stdin.take() {
let mut error = None;
// Write parameters first if using stdin delivery
if let Some(params_data) = parameters_stdin {
if let Err(e) = stdin.write_all(params_data.as_bytes()).await {
error = Some(format!("Failed to write parameters to stdin: {}", e));
} else if let Err(e) = stdin.write_all(b"\n---ATTUNE_PARAMS_END---\n").await {
error = Some(format!("Failed to write parameter delimiter: {}", e));
}
}
// Write secrets as JSON (always, for backward compatibility)
if error.is_none() && !secrets.is_empty() {
match serde_json::to_string(secrets) {
Ok(secrets_json) => {
if let Err(e) = stdin.write_all(secrets_json.as_bytes()).await {
error = Some(format!("Failed to write secrets to stdin: {}", e));
} else if let Err(e) = stdin.write_all(b"\n").await {
error = Some(format!("Failed to write newline to stdin: {}", e));
}
}
Err(e) => error = Some(format!("Failed to serialize secrets: {}", e)),
}
}
drop(stdin);
error
} else {
None
};
// Create bounded writers
let mut stdout_writer = BoundedLogWriter::new_stdout(max_stdout_bytes);
let mut stderr_writer = BoundedLogWriter::new_stderr(max_stderr_bytes);
// Take stdout and stderr streams
let stdout = child.stdout.take().expect("stdout not captured");
let stderr = child.stderr.take().expect("stderr not captured");
// Create buffered readers
let mut stdout_reader = BufReader::new(stdout);
let mut stderr_reader = BufReader::new(stderr);
// Stream both outputs concurrently
let stdout_task = async {
let mut line = Vec::new();
loop {
line.clear();
match stdout_reader.read_until(b'\n', &mut line).await {
Ok(0) => break, // EOF
Ok(_) => {
if stdout_writer.write_all(&line).await.is_err() {
break;
}
}
Err(_) => break,
}
}
stdout_writer
};
let stderr_task = async {
let mut line = Vec::new();
loop {
line.clear();
match stderr_reader.read_until(b'\n', &mut line).await {
Ok(0) => break, // EOF
Ok(_) => {
if stderr_writer.write_all(&line).await.is_err() {
break;
}
}
Err(_) => break,
}
}
stderr_writer
};
// Wait for both streams and the process
let (stdout_writer, stderr_writer, wait_result) =
tokio::join!(stdout_task, stderr_task, async {
if let Some(timeout_secs) = timeout_secs {
timeout(std::time::Duration::from_secs(timeout_secs), child.wait()).await
} else {
Ok(child.wait().await)
}
});
let duration_ms = start.elapsed().as_millis() as u64;
// Get results from bounded writers
let stdout_result = stdout_writer.into_result();
let stderr_result = stderr_writer.into_result();
// Handle process wait result
let (exit_code, process_error) = match wait_result {
Ok(Ok(status)) => (status.code().unwrap_or(-1), None),
Ok(Err(e)) => {
warn!("Process wait failed but captured output: {}", e);
(-1, Some(format!("Process wait failed: {}", e)))
}
Err(_) => {
// Timeout occurred
return Ok(ExecutionResult {
exit_code: -1,
stdout: stdout_result.content.clone(),
stderr: stderr_result.content.clone(),
result: None,
duration_ms,
error: Some(format!(
"Execution timed out after {} seconds",
timeout_secs.unwrap()
)),
stdout_truncated: stdout_result.truncated,
stderr_truncated: stderr_result.truncated,
stdout_bytes_truncated: stdout_result.bytes_truncated,
stderr_bytes_truncated: stderr_result.bytes_truncated,
});
}
};
debug!(
"Process execution completed: exit_code={}, duration={}ms, stdout_truncated={}, stderr_truncated={}",
exit_code, duration_ms, stdout_result.truncated, stderr_result.truncated
);
// Parse result from stdout based on output_format
let result = if exit_code == 0 && !stdout_result.content.trim().is_empty() {
parse_output(&stdout_result.content, output_format)
} else {
None
};
// Determine error message
let error = if let Some(proc_err) = process_error {
Some(proc_err)
} else if let Some(stdin_err) = stdin_write_error {
// Ignore broken pipe errors for fast-exiting successful actions.
// These occur when the process exits before we finish writing secrets to stdin.
let is_broken_pipe = stdin_err.contains("Broken pipe") || stdin_err.contains("os error 32");
let is_fast_exit = duration_ms < 500;
let is_success = exit_code == 0;
if is_broken_pipe && is_fast_exit && is_success {
debug!(
"Ignoring broken pipe error for fast-exiting successful action ({}ms)",
duration_ms
);
None
} else {
Some(stdin_err)
}
} else if exit_code != 0 {
Some(if stderr_result.content.is_empty() {
format!("Command exited with code {}", exit_code)
} else {
// Use last line of stderr as error, or full stderr if short
if stderr_result.content.lines().count() > 5 {
stderr_result
.content
.lines()
.last()
.unwrap_or("")
.to_string()
} else {
stderr_result.content.clone()
}
})
} else {
None
};
Ok(ExecutionResult {
exit_code,
// Only populate stdout if result wasn't parsed (avoid duplication)
stdout: if result.is_some() {
String::new()
} else {
stdout_result.content.clone()
},
stderr: stderr_result.content.clone(),
result,
duration_ms,
error,
stdout_truncated: stdout_result.truncated,
stderr_truncated: stderr_result.truncated,
stdout_bytes_truncated: stdout_result.bytes_truncated,
stderr_bytes_truncated: stderr_result.bytes_truncated,
})
}
/// Parse stdout content according to the specified output format.
fn parse_output(stdout: &str, format: OutputFormat) -> Option<serde_json::Value> {
let trimmed = stdout.trim();
if trimmed.is_empty() {
return None;
}
match format {
OutputFormat::Text => {
// No parsing - text output is captured in stdout field
None
}
OutputFormat::Json => {
// Try to parse full stdout as JSON first (handles multi-line JSON),
// then fall back to last line only (for scripts that log before output)
serde_json::from_str(trimmed).ok().or_else(|| {
trimmed
.lines()
.last()
.and_then(|line| serde_json::from_str(line).ok())
})
}
OutputFormat::Yaml => {
// Try to parse stdout as YAML
serde_yaml_ng::from_str(trimmed).ok()
}
OutputFormat::Jsonl => {
// Parse each line as JSON and collect into array
let mut items = Vec::new();
for line in trimmed.lines() {
if let Ok(value) = serde_json::from_str::<serde_json::Value>(line) {
items.push(value);
}
}
if items.is_empty() {
None
} else {
Some(serde_json::Value::Array(items))
}
}
}
}
/// Build a `Command` for executing an action script with the given interpreter.
///
/// This configures the command with:
/// - The interpreter binary and any additional args
/// - The action file path as the final argument
/// - Environment variables from the execution context
/// - Working directory (pack directory)
///
/// # Arguments
/// * `interpreter` - Path to the interpreter binary
/// * `interpreter_args` - Additional args before the action file
/// * `action_file` - Path to the action script file
/// * `working_dir` - Working directory for the process (typically the pack dir)
/// * `env_vars` - Environment variables to set
pub fn build_action_command(
interpreter: &Path,
interpreter_args: &[String],
action_file: &Path,
working_dir: Option<&Path>,
env_vars: &HashMap<String, String>,
) -> Command {
let mut cmd = Command::new(interpreter);
// Add interpreter args (e.g., "-u" for unbuffered Python)
for arg in interpreter_args {
cmd.arg(arg);
}
// Add the action file as the last argument
cmd.arg(action_file);
// Set working directory
if let Some(dir) = working_dir {
if dir.exists() {
cmd.current_dir(dir);
}
}
// Set environment variables
for (key, value) in env_vars {
cmd.env(key, value);
}
cmd
}
/// Build a `Command` for executing inline code with the given interpreter.
///
/// This is used for ad-hoc/inline actions where code is passed as a string
/// rather than a file path.
///
/// # Arguments
/// * `interpreter` - Path to the interpreter binary
/// * `code` - The inline code to execute
/// * `env_vars` - Environment variables to set
pub fn build_inline_command(
interpreter: &Path,
code: &str,
env_vars: &HashMap<String, String>,
) -> Command {
let mut cmd = Command::new(interpreter);
// Pass code via -c flag (works for bash, python, etc.)
cmd.arg("-c").arg(code);
// Set environment variables
for (key, value) in env_vars {
cmd.env(key, value);
}
cmd
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_output_text() {
let result = parse_output("hello world", OutputFormat::Text);
assert!(result.is_none());
}
#[test]
fn test_parse_output_json() {
let result = parse_output(r#"{"key": "value"}"#, OutputFormat::Json);
assert!(result.is_some());
assert_eq!(result.unwrap()["key"], "value");
}
#[test]
fn test_parse_output_json_with_log_prefix() {
let result = parse_output(
"some log line\nanother log\n{\"key\": \"value\"}",
OutputFormat::Json,
);
assert!(result.is_some());
assert_eq!(result.unwrap()["key"], "value");
}
#[test]
fn test_parse_output_jsonl() {
let result = parse_output("{\"a\": 1}\n{\"b\": 2}\n{\"c\": 3}", OutputFormat::Jsonl);
assert!(result.is_some());
let arr = result.unwrap();
assert_eq!(arr.as_array().unwrap().len(), 3);
}
#[test]
fn test_parse_output_yaml() {
let result = parse_output("key: value\nother: 42", OutputFormat::Yaml);
assert!(result.is_some());
let val = result.unwrap();
assert_eq!(val["key"], "value");
assert_eq!(val["other"], 42);
}
#[test]
fn test_parse_output_empty() {
assert!(parse_output("", OutputFormat::Json).is_none());
assert!(parse_output(" ", OutputFormat::Yaml).is_none());
assert!(parse_output("\n", OutputFormat::Jsonl).is_none());
}
#[tokio::test]
async fn test_execute_streaming_simple() {
let mut cmd = Command::new("/bin/echo");
cmd.arg("hello world");
let result = execute_streaming(
cmd,
&HashMap::new(),
None,
Some(10),
1024 * 1024,
1024 * 1024,
OutputFormat::Text,
)
.await
.unwrap();
assert_eq!(result.exit_code, 0);
assert!(result.stdout.contains("hello world"));
assert!(result.error.is_none());
}
#[tokio::test]
async fn test_execute_streaming_json_output() {
let mut cmd = Command::new("/bin/bash");
cmd.arg("-c").arg(r#"echo '{"status": "ok", "count": 42}'"#);
let result = execute_streaming(
cmd,
&HashMap::new(),
None,
Some(10),
1024 * 1024,
1024 * 1024,
OutputFormat::Json,
)
.await
.unwrap();
assert_eq!(result.exit_code, 0);
assert!(result.result.is_some());
let parsed = result.result.unwrap();
assert_eq!(parsed["status"], "ok");
assert_eq!(parsed["count"], 42);
}
#[tokio::test]
async fn test_execute_streaming_failure() {
let mut cmd = Command::new("/bin/bash");
cmd.arg("-c").arg("echo 'error msg' >&2; exit 1");
let result = execute_streaming(
cmd,
&HashMap::new(),
None,
Some(10),
1024 * 1024,
1024 * 1024,
OutputFormat::Text,
)
.await
.unwrap();
assert_eq!(result.exit_code, 1);
assert!(result.error.is_some());
assert!(result.stderr.contains("error msg"));
}
#[tokio::test]
async fn test_build_action_command() {
let interpreter = Path::new("/usr/bin/python3");
let args = vec!["-u".to_string()];
let action_file = Path::new("/opt/attune/packs/mypack/actions/hello.py");
let mut env = HashMap::new();
env.insert("ATTUNE_EXEC_ID".to_string(), "123".to_string());
let cmd = build_action_command(interpreter, &args, action_file, None, &env);
// We can't easily inspect Command internals, but at least verify it builds without panic
let _ = cmd;
}
}

View File

@@ -10,29 +10,34 @@ use attune_common::models::ExecutionStatus;
use attune_common::mq::{
config::MessageQueueConfig as MqConfig, Connection, Consumer, ConsumerConfig,
ExecutionCompletedPayload, ExecutionStatusChangedPayload, MessageEnvelope, MessageType,
Publisher, PublisherConfig,
PackRegisteredPayload, Publisher, PublisherConfig,
};
use attune_common::repositories::{execution::ExecutionRepository, FindById};
use chrono::Utc;
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::RwLock;
use tokio::task::JoinHandle;
use tracing::{error, info, warn};
use tracing::{debug, error, info, warn};
use crate::artifacts::ArtifactManager;
use crate::env_setup;
use crate::executor::ActionExecutor;
use crate::heartbeat::HeartbeatManager;
use crate::registration::WorkerRegistration;
use crate::runtime::local::LocalRuntime;
use crate::runtime::native::NativeRuntime;
use crate::runtime::python::PythonRuntime;
use crate::runtime::process::ProcessRuntime;
use crate::runtime::shell::ShellRuntime;
use crate::runtime::{DependencyManagerRegistry, PythonVenvManager, RuntimeRegistry};
use crate::runtime::RuntimeRegistry;
use crate::secrets::SecretManager;
use attune_common::repositories::runtime::RuntimeRepository;
use attune_common::repositories::List;
/// Message payload for execution.scheduled events
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExecutionScheduledPayload {
@@ -53,7 +58,15 @@ pub struct WorkerService {
publisher: Arc<Publisher>,
consumer: Option<Arc<Consumer>>,
consumer_handle: Option<JoinHandle<()>>,
pack_consumer: Option<Arc<Consumer>>,
pack_consumer_handle: Option<JoinHandle<()>>,
worker_id: Option<i64>,
/// Runtime filter derived from ATTUNE_WORKER_RUNTIMES
runtime_filter: Option<Vec<String>>,
/// Base directory for pack files
packs_base_dir: PathBuf,
/// Base directory for isolated runtime environments
runtime_envs_dir: PathBuf,
}
impl WorkerService {
@@ -119,86 +132,104 @@ impl WorkerService {
let artifact_manager = ArtifactManager::new(artifact_base_dir);
artifact_manager.initialize().await?;
let packs_base_dir = std::path::PathBuf::from(&config.packs_base_dir);
let runtime_envs_dir = std::path::PathBuf::from(&config.runtime_envs_dir);
// Determine which runtimes to register based on configuration
// This reads from ATTUNE_WORKER_RUNTIMES env var (highest priority)
let configured_runtimes = if let Ok(runtimes_env) = std::env::var("ATTUNE_WORKER_RUNTIMES")
{
info!(
"Registering runtimes from ATTUNE_WORKER_RUNTIMES: {}",
runtimes_env
);
runtimes_env
.split(',')
.map(|s| s.trim().to_lowercase())
.filter(|s| !s.is_empty())
.collect::<Vec<String>>()
} else {
// Fallback to auto-detection if not configured
info!("No ATTUNE_WORKER_RUNTIMES found, registering all available runtimes");
vec![
"shell".to_string(),
"python".to_string(),
"native".to_string(),
]
};
info!("Configured runtimes: {:?}", configured_runtimes);
// Initialize dependency manager registry for isolated environments
let mut dependency_manager_registry = DependencyManagerRegistry::new();
// Only setup Python virtual environment manager if Python runtime is needed
if configured_runtimes.contains(&"python".to_string()) {
let venv_base_dir = std::path::PathBuf::from(
config
.worker
.as_ref()
.and_then(|w| w.name.clone())
.map(|name| format!("/tmp/attune/venvs/{}", name))
.unwrap_or_else(|| "/tmp/attune/venvs".to_string()),
);
let python_venv_manager = PythonVenvManager::new(venv_base_dir);
dependency_manager_registry.register(Box::new(python_venv_manager));
info!("Dependency manager initialized with Python venv support");
}
let dependency_manager_arc = Arc::new(dependency_manager_registry);
// ATTUNE_WORKER_RUNTIMES env var filters which runtimes this worker handles.
// If not set, all action runtimes from the database are loaded.
let runtime_filter: Option<Vec<String>> =
std::env::var("ATTUNE_WORKER_RUNTIMES").ok().map(|env_val| {
info!(
"Filtering runtimes from ATTUNE_WORKER_RUNTIMES: {}",
env_val
);
env_val
.split(',')
.map(|s| s.trim().to_lowercase())
.filter(|s| !s.is_empty())
.collect()
});
// Initialize runtime registry
let mut runtime_registry = RuntimeRegistry::new();
// Register runtimes based on configuration
for runtime_name in &configured_runtimes {
match runtime_name.as_str() {
"python" => {
let python_runtime = PythonRuntime::with_dependency_manager(
std::path::PathBuf::from("python3"),
std::path::PathBuf::from("/tmp/attune/actions"),
dependency_manager_arc.clone(),
// Load runtimes from the database and create ProcessRuntime instances.
// Each runtime row's `execution_config` JSONB drives how the ProcessRuntime
// invokes interpreters, manages environments, and installs dependencies.
// We skip runtimes with empty execution_config (e.g., the built-in sensor
// runtime) since they have no interpreter and cannot execute as a process.
match RuntimeRepository::list(&pool).await {
Ok(db_runtimes) => {
let executable_runtimes: Vec<_> = db_runtimes
.into_iter()
.filter(|r| {
let config = r.parsed_execution_config();
// A runtime is executable if it has a non-default interpreter
// (the default is "/bin/sh" from InterpreterConfig::default,
// but runtimes with no execution_config at all will have an
// empty JSON object that deserializes to defaults with no
// file_extension — those are not real process runtimes).
config.interpreter.file_extension.is_some()
|| r.execution_config != serde_json::json!({})
})
.collect();
info!(
"Found {} executable runtime(s) in database",
executable_runtimes.len()
);
for rt in executable_runtimes {
let rt_name = rt.name.to_lowercase();
// Apply filter if ATTUNE_WORKER_RUNTIMES is set
if let Some(ref filter) = runtime_filter {
if !filter.contains(&rt_name) {
debug!(
"Skipping runtime '{}' (not in ATTUNE_WORKER_RUNTIMES filter)",
rt_name
);
continue;
}
}
let exec_config = rt.parsed_execution_config();
let process_runtime = ProcessRuntime::new(
rt_name.clone(),
exec_config,
packs_base_dir.clone(),
runtime_envs_dir.clone(),
);
runtime_registry.register(Box::new(process_runtime));
info!(
"Registered ProcessRuntime '{}' from database (ref: {})",
rt_name, rt.r#ref
);
runtime_registry.register(Box::new(python_runtime));
info!("Registered Python runtime");
}
"shell" => {
runtime_registry.register(Box::new(ShellRuntime::new()));
info!("Registered Shell runtime");
}
"native" => {
runtime_registry.register(Box::new(NativeRuntime::new()));
info!("Registered Native runtime");
}
"node" => {
warn!("Node.js runtime requested but not yet implemented, skipping");
}
_ => {
warn!("Unknown runtime type '{}', skipping", runtime_name);
}
}
Err(e) => {
warn!(
"Failed to load runtimes from database: {}. \
Falling back to built-in defaults.",
e
);
}
}
// Only register local runtime as fallback if no specific runtimes configured
// (LocalRuntime contains Python/Shell/Native and tries to validate all)
if configured_runtimes.is_empty() {
// If no runtimes were loaded from the DB, register built-in defaults
if runtime_registry.list_runtimes().is_empty() {
info!("No runtimes loaded from database, registering built-in defaults");
// Shell runtime (always available)
runtime_registry.register(Box::new(ShellRuntime::new()));
info!("Registered built-in Shell runtime");
// Native runtime (for compiled binaries)
runtime_registry.register(Box::new(NativeRuntime::new()));
info!("Registered built-in Native runtime");
// Local runtime as catch-all fallback
let local_runtime = LocalRuntime::new();
runtime_registry.register(Box::new(local_runtime));
info!("Registered Local runtime (fallback)");
@@ -231,7 +262,6 @@ impl WorkerService {
.as_ref()
.map(|w| w.max_stderr_bytes)
.unwrap_or(10 * 1024 * 1024);
let packs_base_dir = std::path::PathBuf::from(&config.packs_base_dir);
// Get API URL from environment or construct from server config
let api_url = std::env::var("ATTUNE_API_URL")
@@ -244,7 +274,7 @@ impl WorkerService {
secret_manager,
max_stdout_bytes,
max_stderr_bytes,
packs_base_dir,
packs_base_dir.clone(),
api_url,
));
@@ -259,6 +289,9 @@ impl WorkerService {
heartbeat_interval,
));
// Capture the runtime filter for use in env setup
let runtime_filter_for_service = runtime_filter.clone();
Ok(Self {
config,
db_pool: pool,
@@ -269,7 +302,12 @@ impl WorkerService {
publisher: Arc::new(publisher),
consumer: None,
consumer_handle: None,
pack_consumer: None,
pack_consumer_handle: None,
worker_id: None,
runtime_filter: runtime_filter_for_service,
packs_base_dir,
runtime_envs_dir,
})
}
@@ -288,6 +326,7 @@ impl WorkerService {
info!("Worker registered with ID: {}", worker_id);
// Setup worker-specific message queue infrastructure
// (includes per-worker execution queue AND pack registration queue)
let mq_config = MqConfig::default();
self.mq_connection
.setup_worker_infrastructure(worker_id, &mq_config)
@@ -297,12 +336,20 @@ impl WorkerService {
})?;
info!("Worker-specific message queue infrastructure setup completed");
// Proactively set up runtime environments for all registered packs.
// This runs before we start consuming execution messages so that
// environments are ready by the time the first execution arrives.
self.scan_and_setup_environments().await;
// Start heartbeat
self.heartbeat.start().await?;
// Start consuming execution messages
self.start_execution_consumer().await?;
// Start consuming pack registration events
self.start_pack_consumer().await?;
info!("Worker Service started successfully");
Ok(())
@@ -316,6 +363,137 @@ impl WorkerService {
/// 3. Wait for in-flight tasks with timeout
/// 4. Close MQ connection
/// 5. Close DB connection
/// Scan all registered packs and create missing runtime environments.
async fn scan_and_setup_environments(&self) {
let filter_refs: Option<Vec<String>> = self.runtime_filter.clone();
let filter_slice: Option<&[String]> = filter_refs.as_deref();
let result = env_setup::scan_and_setup_all_environments(
&self.db_pool,
filter_slice,
&self.packs_base_dir,
&self.runtime_envs_dir,
)
.await;
if !result.errors.is_empty() {
warn!(
"Environment startup scan completed with {} error(s): {:?}",
result.errors.len(),
result.errors,
);
} else {
info!(
"Environment startup scan completed: {} pack(s) scanned, \
{} environment(s) ensured, {} skipped",
result.packs_scanned, result.environments_created, result.environments_skipped,
);
}
}
/// Start consuming pack.registered events from the per-worker packs queue.
async fn start_pack_consumer(&mut self) -> Result<()> {
let worker_id = self
.worker_id
.ok_or_else(|| Error::Internal("Worker not registered".to_string()))?;
let queue_name = format!("worker.{}.packs", worker_id);
info!(
"Starting pack registration consumer for queue: {}",
queue_name
);
let consumer = Arc::new(
Consumer::new(
&self.mq_connection,
ConsumerConfig {
queue: queue_name.clone(),
tag: format!("worker-{}-packs", worker_id),
prefetch_count: 5,
auto_ack: false,
exclusive: false,
},
)
.await
.map_err(|e| Error::Internal(format!("Failed to create pack consumer: {}", e)))?,
);
let db_pool = self.db_pool.clone();
let consumer_for_task = consumer.clone();
let queue_name_for_log = queue_name.clone();
let runtime_filter = self.runtime_filter.clone();
let packs_base_dir = self.packs_base_dir.clone();
let runtime_envs_dir = self.runtime_envs_dir.clone();
let handle = tokio::spawn(async move {
info!(
"Pack consumer loop started for queue '{}'",
queue_name_for_log
);
let result = consumer_for_task
.consume_with_handler(move |envelope: MessageEnvelope<PackRegisteredPayload>| {
let db_pool = db_pool.clone();
let runtime_filter = runtime_filter.clone();
let packs_base_dir = packs_base_dir.clone();
let runtime_envs_dir = runtime_envs_dir.clone();
async move {
info!(
"Received pack.registered event for pack '{}' (version {})",
envelope.payload.pack_ref, envelope.payload.version,
);
let filter_slice: Option<Vec<String>> = runtime_filter;
let filter_ref: Option<&[String]> = filter_slice.as_deref();
let pack_result = env_setup::setup_environments_for_registered_pack(
&db_pool,
&envelope.payload,
filter_ref,
&packs_base_dir,
&runtime_envs_dir,
)
.await;
if !pack_result.errors.is_empty() {
warn!(
"Pack '{}' environment setup had {} error(s): {:?}",
pack_result.pack_ref,
pack_result.errors.len(),
pack_result.errors,
);
} else if !pack_result.environments_created.is_empty() {
info!(
"Pack '{}' environments set up: {:?}",
pack_result.pack_ref, pack_result.environments_created,
);
}
Ok(())
}
})
.await;
match result {
Ok(()) => info!(
"Pack consumer loop for queue '{}' ended",
queue_name_for_log
),
Err(e) => error!(
"Pack consumer loop for queue '{}' failed: {}",
queue_name_for_log, e
),
}
});
self.pack_consumer = Some(consumer);
self.pack_consumer_handle = Some(handle);
info!("Pack registration consumer initialized");
Ok(())
}
pub async fn stop(&mut self) -> Result<()> {
info!("Stopping Worker Service - initiating graceful shutdown");
@@ -355,14 +533,20 @@ impl WorkerService {
Err(_) => warn!("Shutdown timeout reached - some tasks may have been interrupted"),
}
// 4. Abort consumer task and close message queue connection
// 4. Abort consumer tasks and close message queue connection
if let Some(handle) = self.consumer_handle.take() {
info!("Stopping consumer task...");
info!("Stopping execution consumer task...");
handle.abort();
// Wait briefly for the task to finish
let _ = handle.await;
}
if let Some(handle) = self.pack_consumer_handle.take() {
info!("Stopping pack consumer task...");
handle.abort();
let _ = handle.await;
}
info!("Closing message queue connection...");
if let Err(e) = self.mq_connection.close().await {
warn!("Error closing message queue: {}", e);