re-uploading work

2026-02-04 17:46:30 -06:00
commit 3b14c65998
1388 changed files with 381262 additions and 0 deletions
--- a/crates/executor/src/scheduler.rs
+++ b/crates/executor/src/scheduler.rs
@@ -0,0 +1,303 @@
+//! Execution Scheduler - Routes executions to available workers
+//!
+//! This module is responsible for:
+//! - Listening for ExecutionRequested messages
+//! - Selecting appropriate workers for executions
+//! - Queuing executions to worker-specific queues
+//! - Updating execution status to Scheduled
+//! - Handling worker unavailability and retries
+
+use anyhow::Result;
+use attune_common::{
+    models::{enums::ExecutionStatus, Action, Execution},
+    mq::{Consumer, ExecutionRequestedPayload, MessageEnvelope, MessageType, Publisher},
+    repositories::{
+        action::ActionRepository,
+        execution::ExecutionRepository,
+        runtime::{RuntimeRepository, WorkerRepository},
+        FindById, FindByRef, Update,
+    },
+};
+use serde::{Deserialize, Serialize};
+use serde_json::Value as JsonValue;
+use sqlx::PgPool;
+use std::sync::Arc;
+use tracing::{debug, error, info};
+
+/// Payload for execution scheduled messages
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct ExecutionScheduledPayload {
+    execution_id: i64,
+    worker_id: i64,
+    action_ref: String,
+    config: Option<JsonValue>,
+}
+
+/// Execution scheduler that routes executions to workers
+pub struct ExecutionScheduler {
+    pool: PgPool,
+    publisher: Arc<Publisher>,
+    consumer: Arc<Consumer>,
+}
+
+impl ExecutionScheduler {
+    /// Create a new execution scheduler
+    pub fn new(pool: PgPool, publisher: Arc<Publisher>, consumer: Arc<Consumer>) -> Self {
+        Self {
+            pool,
+            publisher,
+            consumer,
+        }
+    }
+
+    /// Start processing execution requested messages
+    pub async fn start(&self) -> Result<()> {
+        info!("Starting execution scheduler");
+
+        let pool = self.pool.clone();
+        let publisher = self.publisher.clone();
+
+        // Use the handler pattern to consume messages
+        self.consumer
+            .consume_with_handler(
+                move |envelope: MessageEnvelope<ExecutionRequestedPayload>| {
+                    let pool = pool.clone();
+                    let publisher = publisher.clone();
+
+                    async move {
+                        if let Err(e) =
+                            Self::process_execution_requested(&pool, &publisher, &envelope).await
+                        {
+                            error!("Error scheduling execution: {}", e);
+                            // Return error to trigger nack with requeue
+                            return Err(format!("Failed to schedule execution: {}", e).into());
+                        }
+                        Ok(())
+                    }
+                },
+            )
+            .await?;
+
+        Ok(())
+    }
+
+    /// Process an execution requested message
+    async fn process_execution_requested(
+        pool: &PgPool,
+        publisher: &Publisher,
+        envelope: &MessageEnvelope<ExecutionRequestedPayload>,
+    ) -> Result<()> {
+        debug!("Processing execution requested message: {:?}", envelope);
+
+        let execution_id = envelope.payload.execution_id;
+
+        info!("Scheduling execution: {}", execution_id);
+
+        // Fetch execution from database
+        let mut execution = ExecutionRepository::find_by_id(pool, execution_id)
+            .await?
+            .ok_or_else(|| anyhow::anyhow!("Execution not found: {}", execution_id))?;
+
+        // Fetch action to determine runtime requirements
+        let action = Self::get_action_for_execution(pool, &execution).await?;
+
+        // Select appropriate worker
+        let worker = Self::select_worker(pool, &action).await?;
+
+        info!(
+            "Selected worker {} for execution {}",
+            worker.id, execution_id
+        );
+
+        // Update execution status to scheduled
+        let execution_config = execution.config.clone();
+        execution.status = ExecutionStatus::Scheduled;
+        ExecutionRepository::update(pool, execution.id, execution.into()).await?;
+
+        // Publish message to worker-specific queue
+        Self::queue_to_worker(
+            publisher,
+            &execution_id,
+            &worker.id,
+            &envelope.payload.action_ref,
+            &execution_config,
+            &action,
+        )
+        .await?;
+
+        info!(
+            "Execution {} scheduled to worker {}",
+            execution_id, worker.id
+        );
+
+        Ok(())
+    }
+
+    /// Get the action associated with an execution
+    async fn get_action_for_execution(pool: &PgPool, execution: &Execution) -> Result<Action> {
+        // Try to get action by ID first
+        if let Some(action_id) = execution.action {
+            if let Some(action) = ActionRepository::find_by_id(pool, action_id).await? {
+                return Ok(action);
+            }
+        }
+
+        // Fall back to action_ref
+        ActionRepository::find_by_ref(pool, &execution.action_ref)
+            .await?
+            .ok_or_else(|| anyhow::anyhow!("Action not found for execution: {}", execution.id))
+    }
+
+    /// Select an appropriate worker for the execution
+    async fn select_worker(
+        pool: &PgPool,
+        action: &Action,
+    ) -> Result<attune_common::models::Worker> {
+        // Get runtime requirements for the action
+        let runtime = if let Some(runtime_id) = action.runtime {
+            RuntimeRepository::find_by_id(pool, runtime_id).await?
+        } else {
+            None
+        };
+
+        // Find available action workers (role = 'action')
+        let workers = WorkerRepository::find_action_workers(pool).await?;
+
+        if workers.is_empty() {
+            return Err(anyhow::anyhow!("No action workers available"));
+        }
+
+        // Filter workers by runtime compatibility if runtime is specified
+        let compatible_workers: Vec<_> = if let Some(ref runtime) = runtime {
+            workers
+                .into_iter()
+                .filter(|w| Self::worker_supports_runtime(w, &runtime.name))
+                .collect()
+        } else {
+            workers
+        };
+
+        if compatible_workers.is_empty() {
+            let runtime_name = runtime.as_ref().map(|r| r.name.as_str()).unwrap_or("any");
+            return Err(anyhow::anyhow!(
+                "No compatible workers found for action: {} (requires runtime: {})",
+                action.r#ref,
+                runtime_name
+            ));
+        }
+
+        // Filter by worker status (only active workers)
+        let active_workers: Vec<_> = compatible_workers
+            .into_iter()
+            .filter(|w| w.status == Some(attune_common::models::enums::WorkerStatus::Active))
+            .collect();
+
+        if active_workers.is_empty() {
+            return Err(anyhow::anyhow!("No active workers available"));
+        }
+
+        // TODO: Implement intelligent worker selection:
+        // - Consider worker load/capacity
+        // - Consider worker affinity (same pack, same runtime)
+        // - Consider geographic locality
+        // - Round-robin or least-connections strategy
+
+        // For now, just select the first available worker
+        Ok(active_workers
+            .into_iter()
+            .next()
+            .expect("Worker list should not be empty"))
+    }
+
+    /// Check if a worker supports a given runtime
+    ///
+    /// This checks the worker's capabilities.runtimes array for the runtime name.
+    /// Falls back to checking the deprecated runtime column if capabilities are not set.
+    fn worker_supports_runtime(worker: &attune_common::models::Worker, runtime_name: &str) -> bool {
+        // First, try to parse capabilities and check runtimes array
+        if let Some(ref capabilities) = worker.capabilities {
+            if let Some(runtimes) = capabilities.get("runtimes") {
+                if let Some(runtime_array) = runtimes.as_array() {
+                    // Check if any runtime in the array matches (case-insensitive)
+                    for runtime_value in runtime_array {
+                        if let Some(runtime_str) = runtime_value.as_str() {
+                            if runtime_str.eq_ignore_ascii_case(runtime_name) {
+                                debug!(
+                                    "Worker {} supports runtime '{}' via capabilities",
+                                    worker.name, runtime_name
+                                );
+                                return true;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // Fallback: check deprecated runtime column
+        // This is kept for backward compatibility but should be removed in the future
+        if worker.runtime.is_some() {
+            debug!(
+                "Worker {} using deprecated runtime column for matching",
+                worker.name
+            );
+            // Note: This fallback is incomplete because we'd need to look up the runtime name
+            // from the ID, which would require an async call. Since we're moving to capabilities,
+            // we'll just return false here and require workers to set capabilities properly.
+        }
+
+        debug!(
+            "Worker {} does not support runtime '{}'",
+            worker.name, runtime_name
+        );
+        false
+    }
+
+    /// Queue execution to a specific worker
+    async fn queue_to_worker(
+        publisher: &Publisher,
+        execution_id: &i64,
+        worker_id: &i64,
+        action_ref: &str,
+        config: &Option<JsonValue>,
+        _action: &Action,
+    ) -> Result<()> {
+        debug!("Queuing execution {} to worker {}", execution_id, worker_id);
+
+        // Create payload for worker
+        let payload = ExecutionScheduledPayload {
+            execution_id: *execution_id,
+            worker_id: *worker_id,
+            action_ref: action_ref.to_string(),
+            config: config.clone(),
+        };
+
+        let envelope =
+            MessageEnvelope::new(MessageType::ExecutionRequested, payload).with_source("executor");
+
+        // Publish to worker-specific queue with routing key
+        let routing_key = format!("worker.{}", worker_id);
+        let exchange = "attune.executions";
+
+        publisher
+            .publish_envelope_with_routing(&envelope, exchange, &routing_key)
+            .await?;
+
+        info!(
+            "Published execution.scheduled message to worker {} (routing key: {})",
+            worker_id, routing_key
+        );
+
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn test_scheduler_creation() {
+        // This is a placeholder test
+        // Real tests will require database and message queue setup
+        assert!(true);
+    }
+}