attune/crates/executor/src/execution_manager.rs

//! Execution Manager - Handles execution orchestration and lifecycle events
//!
//! This module is responsible for:
//! - Listening for ExecutionStatusChanged messages from workers
//! - Orchestrating workflow executions (parent-child relationships)
//! - Triggering child executions when parent completes
//! - Handling execution failures and retries
//!
//! ## Ownership Model
//!
//! The Executor owns execution state until it is scheduled to a worker.
//! After scheduling, the Worker owns the state and updates the database directly.
//!
//! - **Executor owns**: Requested → Scheduling → Scheduled
//! - **Worker owns**: Running → Completed/Failed/Cancelled/Timeout
//!
//! The ExecutionManager receives status change notifications for orchestration
//! purposes (e.g., triggering child executions) but does NOT update the database.

use anyhow::Result;
use attune_common::{
    models::{enums::ExecutionStatus, Execution},
    mq::{
        Consumer, ExecutionRequestedPayload, ExecutionStatusChangedPayload, MessageEnvelope,
        MessageType, Publisher,
    },
    repositories::{
        execution::{CreateExecutionInput, ExecutionRepository},
        Create, FindById,
    },
};

use sqlx::PgPool;
use std::sync::Arc;
use tracing::{debug, error, info, warn};

/// Execution manager that handles lifecycle and status updates
pub struct ExecutionManager {
    pool: PgPool,
    publisher: Arc<Publisher>,
    consumer: Arc<Consumer>,
}

impl ExecutionManager {
    /// Create a new execution manager
    pub fn new(pool: PgPool, publisher: Arc<Publisher>, consumer: Arc<Consumer>) -> Self {
        Self {
            pool,
            publisher,
            consumer,
        }
    }

    /// Start processing execution status messages
    pub async fn start(&self) -> Result<()> {
        info!("Starting execution manager");

        let pool = self.pool.clone();
        let publisher = self.publisher.clone();

        // Use the handler pattern to consume messages
        self.consumer
            .consume_with_handler(
                move |envelope: MessageEnvelope<ExecutionStatusChangedPayload>| {
                    let pool = pool.clone();
                    let publisher = publisher.clone();

                    async move {
                        if let Err(e) =
                            Self::process_status_change(&pool, &publisher, &envelope).await
                        {
                            error!("Error processing status change: {}", e);
                            // Return error to trigger nack with requeue
                            return Err(format!("Failed to process status change: {}", e).into());
                        }
                        Ok(())
                    }
                },
            )
            .await?;

        Ok(())
    }

    /// Process an execution status change message
    ///
    /// NOTE: This method does NOT update the database. The worker is responsible
    /// for updating execution state after the execution is scheduled. The executor
    /// only handles orchestration logic (e.g., triggering workflow children).
    async fn process_status_change(
        pool: &PgPool,
        publisher: &Publisher,
        envelope: &MessageEnvelope<ExecutionStatusChangedPayload>,
    ) -> Result<()> {
        debug!("Processing execution status change: {:?}", envelope);

        let execution_id = envelope.payload.execution_id;
        let status_str = &envelope.payload.new_status;
        let status = Self::parse_execution_status(status_str)?;

        debug!(
            "Received status change notification for execution {}: {}",
            execution_id, status_str
        );

        // Fetch execution from database (for orchestration logic)
        let execution = ExecutionRepository::find_by_id(pool, execution_id)
            .await?
            .ok_or_else(|| anyhow::anyhow!("Execution not found: {}", execution_id))?;

        // Handle orchestration logic based on status
        // Note: Worker has already updated the database directly
        match status {
            ExecutionStatus::Completed | ExecutionStatus::Failed | ExecutionStatus::Cancelled => {
                info!(
                    "Execution {} reached terminal state: {:?}, handling orchestration",
                    execution_id, status
                );
                Self::handle_completion(pool, publisher, &execution).await?;
            }
            ExecutionStatus::Running => {
                debug!(
                    "Execution {} now running (worker has updated DB)",
                    execution_id
                );
            }
            _ => {
                debug!(
                    "Execution {} status changed to {:?} (no orchestration needed)",
                    execution_id, status
                );
            }
        }

        Ok(())
    }

    /// Parse execution status from string
    fn parse_execution_status(status: &str) -> Result<ExecutionStatus> {
        match status.to_lowercase().as_str() {
            "requested" => Ok(ExecutionStatus::Requested),
            "scheduling" => Ok(ExecutionStatus::Scheduling),
            "scheduled" => Ok(ExecutionStatus::Scheduled),
            "running" => Ok(ExecutionStatus::Running),
            "completed" => Ok(ExecutionStatus::Completed),
            "failed" => Ok(ExecutionStatus::Failed),
            "cancelled" | "canceled" => Ok(ExecutionStatus::Cancelled),
            "canceling" => Ok(ExecutionStatus::Canceling),
            "abandoned" => Ok(ExecutionStatus::Abandoned),
            "timeout" => Ok(ExecutionStatus::Timeout),
            _ => Err(anyhow::anyhow!("Invalid execution status: {}", status)),
        }
    }

    /// Handle execution completion (success, failure, or cancellation)
    async fn handle_completion(
        pool: &PgPool,
        publisher: &Publisher,
        execution: &Execution,
    ) -> Result<()> {
        info!("Handling completion for execution: {}", execution.id);

        // Check if this execution has child executions to trigger
        if let Some(child_actions) = Self::get_child_actions(execution).await? {
            // Only trigger children on completion
            if execution.status == ExecutionStatus::Completed {
                Self::trigger_child_executions(pool, publisher, execution, &child_actions).await?;
            } else {
                warn!(
                    "Execution {} failed/canceled, skipping child executions",
                    execution.id
                );
            }
        }

        // NOTE: Completion notification is published by the worker, not here.
        // This prevents duplicate execution.completed messages that would cause
        // the queue manager to decrement active_count twice.

        Ok(())
    }

    /// Get child actions from execution result (for workflow orchestration)
    async fn get_child_actions(_execution: &Execution) -> Result<Option<Vec<String>>> {
        // TODO: Implement workflow logic
        // - Check if action has defined workflow
        // - Extract next actions from execution result
        // - Parse workflow definition

        // For now, return None (no child executions)
        Ok(None)
    }

    /// Trigger child executions for a completed parent
    async fn trigger_child_executions(
        pool: &PgPool,
        publisher: &Publisher,
        parent: &Execution,
        child_actions: &[String],
    ) -> Result<()> {
        info!(
            "Triggering {} child executions for parent: {}",
            child_actions.len(),
            parent.id
        );

        for action_ref in child_actions {
            let child_input = CreateExecutionInput {
                action: None,
                action_ref: action_ref.clone(),
                config: parent.config.clone(), // Pass parent config to child
                env_vars: parent.env_vars.clone(), // Pass parent env vars to child
                parent: Some(parent.id),       // Link to parent execution
                enforcement: parent.enforcement,
                executor: None, // Will be assigned during scheduling
                status: ExecutionStatus::Requested,
                result: None,
                workflow_task: None, // Non-workflow execution
            };

            let child_execution = ExecutionRepository::create(pool, child_input).await?;

            info!(
                "Created child execution {} for parent {}",
                child_execution.id, parent.id
            );

            // Publish ExecutionRequested message for child
            let payload = ExecutionRequestedPayload {
                execution_id: child_execution.id,
                action_id: None, // Child executions typically don't have action_id set yet
                action_ref: action_ref.clone(),
                parent_id: Some(parent.id),
                enforcement_id: None,
                config: None,
            };

            let envelope = MessageEnvelope::new(MessageType::ExecutionRequested, payload)
                .with_source("executor");

            publisher.publish_envelope(&envelope).await?;
        }

        Ok(())
    }

    // REMOVED: publish_completion_notification
    // This method was causing duplicate execution.completed messages.
    // The worker is responsible for publishing completion notifications,
    // not the executor. Removing this prevents double-decrementing the
    // queue manager's active_count.
}

#[cfg(test)]
mod tests {
    #[test]
    fn test_execution_manager_creation() {
        // This is a placeholder test
        // Real tests will require database and message queue setup
        assert!(true);
    }

    #[test]
    fn test_parse_execution_status() {
        // Mock pool, publisher, consumer for testing
        // In real tests, these would be properly initialized
    }
}