working on runtime executions

This commit is contained in:
2026-02-16 22:04:20 -06:00
parent f52320f889
commit 904ede04be
99 changed files with 6778 additions and 5929 deletions

View File

@@ -9,7 +9,7 @@
//! - Creating execution records
//! - Publishing ExecutionRequested messages
use anyhow::Result;
use anyhow::{bail, Result};
use attune_common::{
models::{Enforcement, Event, Rule},
mq::{
@@ -166,6 +166,24 @@ impl EnforcementProcessor {
return Ok(false);
}
// Check if the rule's action still exists (may have been deleted with its pack)
if rule.action.is_none() {
warn!(
"Rule {} references a deleted action (action_ref: {}), skipping execution",
rule.id, rule.action_ref
);
return Ok(false);
}
// Check if the rule's trigger still exists
if rule.trigger.is_none() {
warn!(
"Rule {} references a deleted trigger (trigger_ref: {}), skipping execution",
rule.id, rule.trigger_ref
);
return Ok(false);
}
// TODO: Evaluate rule conditions against event payload
// For now, we'll create executions for all valid enforcements
@@ -186,13 +204,27 @@ impl EnforcementProcessor {
enforcement: &Enforcement,
rule: &Rule,
) -> Result<()> {
// Extract action ID — should_create_execution already verified it's Some,
// but guard defensively here as well.
let action_id = match rule.action {
Some(id) => id,
None => {
error!(
"Rule {} has no action ID (deleted?), cannot create execution for enforcement {}",
rule.id, enforcement.id
);
bail!(
"Rule {} references a deleted action (action_ref: {})",
rule.id, rule.action_ref
);
}
};
info!(
"Creating execution for enforcement: {}, rule: {}, action: {}",
enforcement.id, rule.id, rule.action
enforcement.id, rule.id, action_id
);
// Get action and pack IDs from rule
let action_id = rule.action;
let pack_id = rule.pack;
let action_ref = &rule.action_ref;
@@ -305,9 +337,9 @@ mod tests {
label: "Test Rule".to_string(),
description: "Test rule description".to_string(),
trigger_ref: "test.trigger".to_string(),
trigger: 1,
trigger: Some(1),
action_ref: "test.action".to_string(),
action: 1,
action: Some(1),
enabled: false, // Disabled
conditions: json!({}),
action_params: json!({}),

View File

@@ -345,22 +345,7 @@ impl RetryManager {
/// Calculate exponential backoff with jitter
fn calculate_backoff(&self, retry_count: i32) -> Duration {
let base_secs = self.config.base_backoff_secs as f64;
let multiplier = self.config.backoff_multiplier;
let max_secs = self.config.max_backoff_secs as f64;
let jitter_factor = self.config.jitter_factor;
// Calculate exponential backoff: base * multiplier^retry_count
let backoff_secs = base_secs * multiplier.powi(retry_count);
// Cap at max
let backoff_secs = backoff_secs.min(max_secs);
// Add jitter: random value between (1 - jitter) and (1 + jitter)
let jitter = 1.0 + (rand::random::<f64>() * 2.0 - 1.0) * jitter_factor;
let backoff_with_jitter = backoff_secs * jitter;
Duration::from_secs(backoff_with_jitter.max(0.0) as u64)
calculate_backoff_duration(&self.config, retry_count)
}
/// Update execution with retry metadata
@@ -408,6 +393,28 @@ impl RetryManager {
}
}
/// Calculate exponential backoff with jitter from a retry config.
///
/// Extracted as a free function so it can be tested without a database pool.
fn calculate_backoff_duration(config: &RetryConfig, retry_count: i32) -> Duration {
let base_secs = config.base_backoff_secs as f64;
let multiplier = config.backoff_multiplier;
let max_secs = config.max_backoff_secs as f64;
let jitter_factor = config.jitter_factor;
// Calculate exponential backoff: base * multiplier^retry_count
let backoff_secs = base_secs * multiplier.powi(retry_count);
// Cap at max
let backoff_secs = backoff_secs.min(max_secs);
// Add jitter: random value between (1 - jitter) and (1 + jitter)
let jitter = 1.0 + (rand::random::<f64>() * 2.0 - 1.0) * jitter_factor;
let backoff_with_jitter = backoff_secs * jitter;
Duration::from_secs(backoff_with_jitter.max(0.0) as u64)
}
/// Check if an error message indicates a retriable failure
#[allow(dead_code)]
pub fn is_error_retriable(error_msg: &str) -> bool {
@@ -466,17 +473,14 @@ mod tests {
#[test]
fn test_backoff_calculation() {
let manager = RetryManager::with_defaults(
// Mock pool - won't be used in this test
unsafe { std::mem::zeroed() },
);
let config = RetryConfig::default();
let backoff0 = manager.calculate_backoff(0);
let backoff1 = manager.calculate_backoff(1);
let backoff2 = manager.calculate_backoff(2);
let backoff0 = calculate_backoff_duration(&config, 0);
let backoff1 = calculate_backoff_duration(&config, 1);
let backoff2 = calculate_backoff_duration(&config, 2);
// First attempt: ~1s
assert!(backoff0.as_secs() >= 0 && backoff0.as_secs() <= 2);
// First attempt: ~1s (with jitter 0..2s)
assert!(backoff0.as_secs() <= 2);
// Second attempt: ~2s
assert!(backoff1.as_secs() >= 1 && backoff1.as_secs() <= 3);
// Third attempt: ~4s

View File

@@ -237,9 +237,7 @@ impl ExecutionTimeoutMonitor {
#[cfg(test)]
mod tests {
use super::*;
use attune_common::mq::MessageQueue;
use chrono::Duration as ChronoDuration;
use sqlx::PgPool;
fn create_test_config() -> TimeoutMonitorConfig {
TimeoutMonitorConfig {
@@ -259,46 +257,39 @@ mod tests {
#[test]
fn test_cutoff_calculation() {
let config = create_test_config();
let pool = PgPool::connect("postgresql://localhost/test")
.await
.expect("DB connection");
let mq = MessageQueue::connect("amqp://localhost")
.await
.expect("MQ connection");
// Test that cutoff is calculated as now - scheduled_timeout
let config = create_test_config(); // scheduled_timeout = 60s
let monitor = ExecutionTimeoutMonitor::new(pool, Arc::new(mq.publisher), config);
let before = Utc::now() - ChronoDuration::seconds(60);
let cutoff = monitor.calculate_cutoff_time();
let now = Utc::now();
let expected_cutoff = now - ChronoDuration::seconds(60);
// calculate_cutoff uses Utc::now() internally, so we compute expected bounds
let timeout_duration =
chrono::Duration::from_std(config.scheduled_timeout).expect("Invalid timeout duration");
let cutoff = Utc::now() - timeout_duration;
// Allow 1 second tolerance
let diff = (cutoff - expected_cutoff).num_seconds().abs();
assert!(diff <= 1, "Cutoff time calculation incorrect");
let after = Utc::now() - ChronoDuration::seconds(60);
// cutoff should be between before and after (both ~60s ago)
let diff_before = (cutoff - before).num_seconds().abs();
let diff_after = (cutoff - after).num_seconds().abs();
assert!(
diff_before <= 1,
"Cutoff time should be ~60s ago (before check)"
);
assert!(
diff_after <= 1,
"Cutoff time should be ~60s ago (after check)"
);
}
#[test]
fn test_disabled_monitor() {
fn test_disabled_config() {
let mut config = create_test_config();
config.enabled = false;
let pool = PgPool::connect("postgresql://localhost/test")
.await
.expect("DB connection");
let mq = MessageQueue::connect("amqp://localhost")
.await
.expect("MQ connection");
let monitor = Arc::new(ExecutionTimeoutMonitor::new(
pool,
Arc::new(mq.publisher),
config,
));
// Should return immediately without error
let result = tokio::time::timeout(Duration::from_secs(1), monitor.start()).await;
assert!(result.is_ok(), "Disabled monitor should return immediately");
// Verify the config is properly set to disabled
assert!(!config.enabled);
assert_eq!(config.scheduled_timeout.as_secs(), 60);
assert_eq!(config.check_interval.as_secs(), 1);
}
}

View File

@@ -297,64 +297,73 @@ impl WorkerHealthProbe {
/// Extract health metrics from worker capabilities
fn extract_health_metrics(&self, worker: &Worker) -> HealthMetrics {
let mut metrics = HealthMetrics {
last_check: Utc::now(),
..Default::default()
extract_health_metrics(worker)
}
}
/// Extract health metrics from worker capabilities.
///
/// Extracted as a free function so it can be tested without a database pool.
fn extract_health_metrics(worker: &Worker) -> HealthMetrics {
let mut metrics = HealthMetrics {
last_check: Utc::now(),
..Default::default()
};
let Some(capabilities) = &worker.capabilities else {
return metrics;
};
let Some(health_obj) = capabilities.get("health") else {
return metrics;
};
// Extract metrics from health object
if let Some(status_str) = health_obj.get("status").and_then(|v| v.as_str()) {
metrics.status = match status_str {
"healthy" => HealthStatus::Healthy,
"degraded" => HealthStatus::Degraded,
"unhealthy" => HealthStatus::Unhealthy,
_ => HealthStatus::Healthy,
};
let Some(capabilities) = &worker.capabilities else {
return metrics;
};
let Some(health_obj) = capabilities.get("health") else {
return metrics;
};
// Extract metrics from health object
if let Some(status_str) = health_obj.get("status").and_then(|v| v.as_str()) {
metrics.status = match status_str {
"healthy" => HealthStatus::Healthy,
"degraded" => HealthStatus::Degraded,
"unhealthy" => HealthStatus::Unhealthy,
_ => HealthStatus::Healthy,
};
}
if let Some(last_check_str) = health_obj.get("last_check").and_then(|v| v.as_str()) {
if let Ok(last_check) = DateTime::parse_from_rfc3339(last_check_str) {
metrics.last_check = last_check.with_timezone(&Utc);
}
}
if let Some(failures) = health_obj
.get("consecutive_failures")
.and_then(|v| v.as_u64())
{
metrics.consecutive_failures = failures as u32;
}
if let Some(total) = health_obj.get("total_executions").and_then(|v| v.as_u64()) {
metrics.total_executions = total;
}
if let Some(failed) = health_obj.get("failed_executions").and_then(|v| v.as_u64()) {
metrics.failed_executions = failed;
}
if let Some(avg_time) = health_obj
.get("average_execution_time_ms")
.and_then(|v| v.as_u64())
{
metrics.average_execution_time_ms = avg_time;
}
if let Some(depth) = health_obj.get("queue_depth").and_then(|v| v.as_u64()) {
metrics.queue_depth = depth as u32;
}
metrics
}
if let Some(last_check_str) = health_obj.get("last_check").and_then(|v| v.as_str()) {
if let Ok(last_check) = DateTime::parse_from_rfc3339(last_check_str) {
metrics.last_check = last_check.with_timezone(&Utc);
}
}
if let Some(failures) = health_obj
.get("consecutive_failures")
.and_then(|v| v.as_u64())
{
metrics.consecutive_failures = failures as u32;
}
if let Some(total) = health_obj.get("total_executions").and_then(|v| v.as_u64()) {
metrics.total_executions = total;
}
if let Some(failed) = health_obj.get("failed_executions").and_then(|v| v.as_u64()) {
metrics.failed_executions = failed;
}
if let Some(avg_time) = health_obj
.get("average_execution_time_ms")
.and_then(|v| v.as_u64())
{
metrics.average_execution_time_ms = avg_time;
}
if let Some(depth) = health_obj.get("queue_depth").and_then(|v| v.as_u64()) {
metrics.queue_depth = depth as u32;
}
metrics
}
impl WorkerHealthProbe {
/// Get recommended worker for execution based on health
#[allow(dead_code)]
pub async fn get_best_worker(&self, runtime_name: &str) -> Result<Option<Worker>> {
@@ -435,8 +444,6 @@ mod tests {
#[test]
fn test_extract_health_metrics() {
let probe = WorkerHealthProbe::with_defaults(Arc::new(unsafe { std::mem::zeroed() }));
let worker = Worker {
id: 1,
name: "test-worker".to_string(),
@@ -461,7 +468,7 @@ mod tests {
updated: Utc::now(),
};
let metrics = probe.extract_health_metrics(&worker);
let metrics = extract_health_metrics(&worker);
assert_eq!(metrics.status, HealthStatus::Degraded);
assert_eq!(metrics.consecutive_failures, 5);
assert_eq!(metrics.queue_depth, 25);

View File

@@ -74,6 +74,13 @@ async fn _create_test_runtime(pool: &PgPool, suffix: &str) -> i64 {
name: format!("Python {}", suffix),
distributions: json!({"ubuntu": "python3"}),
installation: Some(json!({"method": "apt"})),
execution_config: json!({
"interpreter": {
"binary": "python3",
"args": ["-u"],
"file_extension": ".py"
}
}),
};
RuntimeRepository::create(pool, runtime_input)

View File

@@ -69,6 +69,13 @@ async fn create_test_runtime(pool: &PgPool, suffix: &str) -> i64 {
name: format!("Python {}", suffix),
distributions: json!({"ubuntu": "python3"}),
installation: Some(json!({"method": "apt"})),
execution_config: json!({
"interpreter": {
"binary": "python3",
"args": ["-u"],
"file_extension": ".py"
}
}),
};
let runtime = RuntimeRepository::create(pool, runtime_input)