Files
attune/crates/executor/src/execution_manager.rs

269 lines
9.7 KiB
Rust

//! Execution Manager - Handles execution orchestration and lifecycle events
//!
//! This module is responsible for:
//! - Listening for ExecutionStatusChanged messages from workers
//! - Orchestrating workflow executions (parent-child relationships)
//! - Triggering child executions when parent completes
//! - Handling execution failures and retries
//!
//! ## Ownership Model
//!
//! The Executor owns execution state until it is scheduled to a worker.
//! After scheduling, the Worker owns the state and updates the database directly.
//!
//! - **Executor owns**: Requested → Scheduling → Scheduled
//! - **Worker owns**: Running → Completed/Failed/Cancelled/Timeout
//!
//! The ExecutionManager receives status change notifications for orchestration
//! purposes (e.g., triggering child executions) but does NOT update the database.
use anyhow::Result;
use attune_common::{
models::{enums::ExecutionStatus, Execution},
mq::{
Consumer, ExecutionRequestedPayload, ExecutionStatusChangedPayload, MessageEnvelope,
MessageType, Publisher,
},
repositories::{
execution::{CreateExecutionInput, ExecutionRepository},
Create, FindById,
},
};
use sqlx::PgPool;
use std::sync::Arc;
use tracing::{debug, error, info, warn};
/// Execution manager that handles lifecycle and status updates
pub struct ExecutionManager {
pool: PgPool,
publisher: Arc<Publisher>,
consumer: Arc<Consumer>,
}
impl ExecutionManager {
/// Create a new execution manager
pub fn new(pool: PgPool, publisher: Arc<Publisher>, consumer: Arc<Consumer>) -> Self {
Self {
pool,
publisher,
consumer,
}
}
/// Start processing execution status messages
pub async fn start(&self) -> Result<()> {
info!("Starting execution manager");
let pool = self.pool.clone();
let publisher = self.publisher.clone();
// Use the handler pattern to consume messages
self.consumer
.consume_with_handler(
move |envelope: MessageEnvelope<ExecutionStatusChangedPayload>| {
let pool = pool.clone();
let publisher = publisher.clone();
async move {
if let Err(e) =
Self::process_status_change(&pool, &publisher, &envelope).await
{
error!("Error processing status change: {}", e);
// Return error to trigger nack with requeue
return Err(format!("Failed to process status change: {}", e).into());
}
Ok(())
}
},
)
.await?;
Ok(())
}
/// Process an execution status change message
///
/// NOTE: This method does NOT update the database. The worker is responsible
/// for updating execution state after the execution is scheduled. The executor
/// only handles orchestration logic (e.g., triggering workflow children).
async fn process_status_change(
pool: &PgPool,
publisher: &Publisher,
envelope: &MessageEnvelope<ExecutionStatusChangedPayload>,
) -> Result<()> {
debug!("Processing execution status change: {:?}", envelope);
let execution_id = envelope.payload.execution_id;
let status_str = &envelope.payload.new_status;
let status = Self::parse_execution_status(status_str)?;
debug!(
"Received status change notification for execution {}: {}",
execution_id, status_str
);
// Fetch execution from database (for orchestration logic)
let execution = ExecutionRepository::find_by_id(pool, execution_id)
.await?
.ok_or_else(|| anyhow::anyhow!("Execution not found: {}", execution_id))?;
// Handle orchestration logic based on status
// Note: Worker has already updated the database directly
match status {
ExecutionStatus::Completed | ExecutionStatus::Failed | ExecutionStatus::Cancelled => {
info!(
"Execution {} reached terminal state: {:?}, handling orchestration",
execution_id, status
);
Self::handle_completion(pool, publisher, &execution).await?;
}
ExecutionStatus::Running => {
debug!(
"Execution {} now running (worker has updated DB)",
execution_id
);
}
_ => {
debug!(
"Execution {} status changed to {:?} (no orchestration needed)",
execution_id, status
);
}
}
Ok(())
}
/// Parse execution status from string
fn parse_execution_status(status: &str) -> Result<ExecutionStatus> {
match status.to_lowercase().as_str() {
"requested" => Ok(ExecutionStatus::Requested),
"scheduling" => Ok(ExecutionStatus::Scheduling),
"scheduled" => Ok(ExecutionStatus::Scheduled),
"running" => Ok(ExecutionStatus::Running),
"completed" => Ok(ExecutionStatus::Completed),
"failed" => Ok(ExecutionStatus::Failed),
"cancelled" | "canceled" => Ok(ExecutionStatus::Cancelled),
"canceling" => Ok(ExecutionStatus::Canceling),
"abandoned" => Ok(ExecutionStatus::Abandoned),
"timeout" => Ok(ExecutionStatus::Timeout),
_ => Err(anyhow::anyhow!("Invalid execution status: {}", status)),
}
}
/// Handle execution completion (success, failure, or cancellation)
async fn handle_completion(
pool: &PgPool,
publisher: &Publisher,
execution: &Execution,
) -> Result<()> {
info!("Handling completion for execution: {}", execution.id);
// Check if this execution has child executions to trigger
if let Some(child_actions) = Self::get_child_actions(execution).await? {
// Only trigger children on completion
if execution.status == ExecutionStatus::Completed {
Self::trigger_child_executions(pool, publisher, execution, &child_actions).await?;
} else {
warn!(
"Execution {} failed/canceled, skipping child executions",
execution.id
);
}
}
// NOTE: Completion notification is published by the worker, not here.
// This prevents duplicate execution.completed messages that would cause
// the queue manager to decrement active_count twice.
Ok(())
}
/// Get child actions from execution result (for workflow orchestration)
async fn get_child_actions(_execution: &Execution) -> Result<Option<Vec<String>>> {
// TODO: Implement workflow logic
// - Check if action has defined workflow
// - Extract next actions from execution result
// - Parse workflow definition
// For now, return None (no child executions)
Ok(None)
}
/// Trigger child executions for a completed parent
async fn trigger_child_executions(
pool: &PgPool,
publisher: &Publisher,
parent: &Execution,
child_actions: &[String],
) -> Result<()> {
info!(
"Triggering {} child executions for parent: {}",
child_actions.len(),
parent.id
);
for action_ref in child_actions {
let child_input = CreateExecutionInput {
action: None,
action_ref: action_ref.clone(),
config: parent.config.clone(), // Pass parent config to child
env_vars: parent.env_vars.clone(), // Pass parent env vars to child
parent: Some(parent.id), // Link to parent execution
enforcement: parent.enforcement,
executor: None, // Will be assigned during scheduling
status: ExecutionStatus::Requested,
result: None,
workflow_task: None, // Non-workflow execution
};
let child_execution = ExecutionRepository::create(pool, child_input).await?;
info!(
"Created child execution {} for parent {}",
child_execution.id, parent.id
);
// Publish ExecutionRequested message for child
let payload = ExecutionRequestedPayload {
execution_id: child_execution.id,
action_id: None, // Child executions typically don't have action_id set yet
action_ref: action_ref.clone(),
parent_id: Some(parent.id),
enforcement_id: None,
config: None,
};
let envelope = MessageEnvelope::new(MessageType::ExecutionRequested, payload)
.with_source("executor");
publisher.publish_envelope(&envelope).await?;
}
Ok(())
}
// REMOVED: publish_completion_notification
// This method was causing duplicate execution.completed messages.
// The worker is responsible for publishing completion notifications,
// not the executor. Removing this prevents double-decrementing the
// queue manager's active_count.
}
#[cfg(test)]
mod tests {
#[test]
fn test_execution_manager_creation() {
// This is a placeholder test
// Real tests will require database and message queue setup
assert!(true);
}
#[test]
fn test_parse_execution_status() {
// Mock pool, publisher, consumer for testing
// In real tests, these would be properly initialized
}
}