working on runtime executions
This commit is contained in:
@@ -9,7 +9,7 @@
|
||||
//! - Creating execution records
|
||||
//! - Publishing ExecutionRequested messages
|
||||
|
||||
use anyhow::Result;
|
||||
use anyhow::{bail, Result};
|
||||
use attune_common::{
|
||||
models::{Enforcement, Event, Rule},
|
||||
mq::{
|
||||
@@ -166,6 +166,24 @@ impl EnforcementProcessor {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
// Check if the rule's action still exists (may have been deleted with its pack)
|
||||
if rule.action.is_none() {
|
||||
warn!(
|
||||
"Rule {} references a deleted action (action_ref: {}), skipping execution",
|
||||
rule.id, rule.action_ref
|
||||
);
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
// Check if the rule's trigger still exists
|
||||
if rule.trigger.is_none() {
|
||||
warn!(
|
||||
"Rule {} references a deleted trigger (trigger_ref: {}), skipping execution",
|
||||
rule.id, rule.trigger_ref
|
||||
);
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
// TODO: Evaluate rule conditions against event payload
|
||||
// For now, we'll create executions for all valid enforcements
|
||||
|
||||
@@ -186,13 +204,27 @@ impl EnforcementProcessor {
|
||||
enforcement: &Enforcement,
|
||||
rule: &Rule,
|
||||
) -> Result<()> {
|
||||
// Extract action ID — should_create_execution already verified it's Some,
|
||||
// but guard defensively here as well.
|
||||
let action_id = match rule.action {
|
||||
Some(id) => id,
|
||||
None => {
|
||||
error!(
|
||||
"Rule {} has no action ID (deleted?), cannot create execution for enforcement {}",
|
||||
rule.id, enforcement.id
|
||||
);
|
||||
bail!(
|
||||
"Rule {} references a deleted action (action_ref: {})",
|
||||
rule.id, rule.action_ref
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
info!(
|
||||
"Creating execution for enforcement: {}, rule: {}, action: {}",
|
||||
enforcement.id, rule.id, rule.action
|
||||
enforcement.id, rule.id, action_id
|
||||
);
|
||||
|
||||
// Get action and pack IDs from rule
|
||||
let action_id = rule.action;
|
||||
let pack_id = rule.pack;
|
||||
let action_ref = &rule.action_ref;
|
||||
|
||||
@@ -305,9 +337,9 @@ mod tests {
|
||||
label: "Test Rule".to_string(),
|
||||
description: "Test rule description".to_string(),
|
||||
trigger_ref: "test.trigger".to_string(),
|
||||
trigger: 1,
|
||||
trigger: Some(1),
|
||||
action_ref: "test.action".to_string(),
|
||||
action: 1,
|
||||
action: Some(1),
|
||||
enabled: false, // Disabled
|
||||
conditions: json!({}),
|
||||
action_params: json!({}),
|
||||
|
||||
@@ -345,22 +345,7 @@ impl RetryManager {
|
||||
|
||||
/// Calculate exponential backoff with jitter
|
||||
fn calculate_backoff(&self, retry_count: i32) -> Duration {
|
||||
let base_secs = self.config.base_backoff_secs as f64;
|
||||
let multiplier = self.config.backoff_multiplier;
|
||||
let max_secs = self.config.max_backoff_secs as f64;
|
||||
let jitter_factor = self.config.jitter_factor;
|
||||
|
||||
// Calculate exponential backoff: base * multiplier^retry_count
|
||||
let backoff_secs = base_secs * multiplier.powi(retry_count);
|
||||
|
||||
// Cap at max
|
||||
let backoff_secs = backoff_secs.min(max_secs);
|
||||
|
||||
// Add jitter: random value between (1 - jitter) and (1 + jitter)
|
||||
let jitter = 1.0 + (rand::random::<f64>() * 2.0 - 1.0) * jitter_factor;
|
||||
let backoff_with_jitter = backoff_secs * jitter;
|
||||
|
||||
Duration::from_secs(backoff_with_jitter.max(0.0) as u64)
|
||||
calculate_backoff_duration(&self.config, retry_count)
|
||||
}
|
||||
|
||||
/// Update execution with retry metadata
|
||||
@@ -408,6 +393,28 @@ impl RetryManager {
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate exponential backoff with jitter from a retry config.
|
||||
///
|
||||
/// Extracted as a free function so it can be tested without a database pool.
|
||||
fn calculate_backoff_duration(config: &RetryConfig, retry_count: i32) -> Duration {
|
||||
let base_secs = config.base_backoff_secs as f64;
|
||||
let multiplier = config.backoff_multiplier;
|
||||
let max_secs = config.max_backoff_secs as f64;
|
||||
let jitter_factor = config.jitter_factor;
|
||||
|
||||
// Calculate exponential backoff: base * multiplier^retry_count
|
||||
let backoff_secs = base_secs * multiplier.powi(retry_count);
|
||||
|
||||
// Cap at max
|
||||
let backoff_secs = backoff_secs.min(max_secs);
|
||||
|
||||
// Add jitter: random value between (1 - jitter) and (1 + jitter)
|
||||
let jitter = 1.0 + (rand::random::<f64>() * 2.0 - 1.0) * jitter_factor;
|
||||
let backoff_with_jitter = backoff_secs * jitter;
|
||||
|
||||
Duration::from_secs(backoff_with_jitter.max(0.0) as u64)
|
||||
}
|
||||
|
||||
/// Check if an error message indicates a retriable failure
|
||||
#[allow(dead_code)]
|
||||
pub fn is_error_retriable(error_msg: &str) -> bool {
|
||||
@@ -466,17 +473,14 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_backoff_calculation() {
|
||||
let manager = RetryManager::with_defaults(
|
||||
// Mock pool - won't be used in this test
|
||||
unsafe { std::mem::zeroed() },
|
||||
);
|
||||
let config = RetryConfig::default();
|
||||
|
||||
let backoff0 = manager.calculate_backoff(0);
|
||||
let backoff1 = manager.calculate_backoff(1);
|
||||
let backoff2 = manager.calculate_backoff(2);
|
||||
let backoff0 = calculate_backoff_duration(&config, 0);
|
||||
let backoff1 = calculate_backoff_duration(&config, 1);
|
||||
let backoff2 = calculate_backoff_duration(&config, 2);
|
||||
|
||||
// First attempt: ~1s
|
||||
assert!(backoff0.as_secs() >= 0 && backoff0.as_secs() <= 2);
|
||||
// First attempt: ~1s (with jitter 0..2s)
|
||||
assert!(backoff0.as_secs() <= 2);
|
||||
// Second attempt: ~2s
|
||||
assert!(backoff1.as_secs() >= 1 && backoff1.as_secs() <= 3);
|
||||
// Third attempt: ~4s
|
||||
|
||||
@@ -237,9 +237,7 @@ impl ExecutionTimeoutMonitor {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use attune_common::mq::MessageQueue;
|
||||
use chrono::Duration as ChronoDuration;
|
||||
use sqlx::PgPool;
|
||||
|
||||
fn create_test_config() -> TimeoutMonitorConfig {
|
||||
TimeoutMonitorConfig {
|
||||
@@ -259,46 +257,39 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_cutoff_calculation() {
|
||||
let config = create_test_config();
|
||||
let pool = PgPool::connect("postgresql://localhost/test")
|
||||
.await
|
||||
.expect("DB connection");
|
||||
let mq = MessageQueue::connect("amqp://localhost")
|
||||
.await
|
||||
.expect("MQ connection");
|
||||
// Test that cutoff is calculated as now - scheduled_timeout
|
||||
let config = create_test_config(); // scheduled_timeout = 60s
|
||||
|
||||
let monitor = ExecutionTimeoutMonitor::new(pool, Arc::new(mq.publisher), config);
|
||||
let before = Utc::now() - ChronoDuration::seconds(60);
|
||||
|
||||
let cutoff = monitor.calculate_cutoff_time();
|
||||
let now = Utc::now();
|
||||
let expected_cutoff = now - ChronoDuration::seconds(60);
|
||||
// calculate_cutoff uses Utc::now() internally, so we compute expected bounds
|
||||
let timeout_duration =
|
||||
chrono::Duration::from_std(config.scheduled_timeout).expect("Invalid timeout duration");
|
||||
let cutoff = Utc::now() - timeout_duration;
|
||||
|
||||
// Allow 1 second tolerance
|
||||
let diff = (cutoff - expected_cutoff).num_seconds().abs();
|
||||
assert!(diff <= 1, "Cutoff time calculation incorrect");
|
||||
let after = Utc::now() - ChronoDuration::seconds(60);
|
||||
|
||||
// cutoff should be between before and after (both ~60s ago)
|
||||
let diff_before = (cutoff - before).num_seconds().abs();
|
||||
let diff_after = (cutoff - after).num_seconds().abs();
|
||||
assert!(
|
||||
diff_before <= 1,
|
||||
"Cutoff time should be ~60s ago (before check)"
|
||||
);
|
||||
assert!(
|
||||
diff_after <= 1,
|
||||
"Cutoff time should be ~60s ago (after check)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_disabled_monitor() {
|
||||
fn test_disabled_config() {
|
||||
let mut config = create_test_config();
|
||||
config.enabled = false;
|
||||
|
||||
let pool = PgPool::connect("postgresql://localhost/test")
|
||||
.await
|
||||
.expect("DB connection");
|
||||
let mq = MessageQueue::connect("amqp://localhost")
|
||||
.await
|
||||
.expect("MQ connection");
|
||||
|
||||
let monitor = Arc::new(ExecutionTimeoutMonitor::new(
|
||||
pool,
|
||||
Arc::new(mq.publisher),
|
||||
config,
|
||||
));
|
||||
|
||||
// Should return immediately without error
|
||||
let result = tokio::time::timeout(Duration::from_secs(1), monitor.start()).await;
|
||||
|
||||
assert!(result.is_ok(), "Disabled monitor should return immediately");
|
||||
// Verify the config is properly set to disabled
|
||||
assert!(!config.enabled);
|
||||
assert_eq!(config.scheduled_timeout.as_secs(), 60);
|
||||
assert_eq!(config.check_interval.as_secs(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -297,64 +297,73 @@ impl WorkerHealthProbe {
|
||||
|
||||
/// Extract health metrics from worker capabilities
|
||||
fn extract_health_metrics(&self, worker: &Worker) -> HealthMetrics {
|
||||
let mut metrics = HealthMetrics {
|
||||
last_check: Utc::now(),
|
||||
..Default::default()
|
||||
extract_health_metrics(worker)
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract health metrics from worker capabilities.
|
||||
///
|
||||
/// Extracted as a free function so it can be tested without a database pool.
|
||||
fn extract_health_metrics(worker: &Worker) -> HealthMetrics {
|
||||
let mut metrics = HealthMetrics {
|
||||
last_check: Utc::now(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let Some(capabilities) = &worker.capabilities else {
|
||||
return metrics;
|
||||
};
|
||||
|
||||
let Some(health_obj) = capabilities.get("health") else {
|
||||
return metrics;
|
||||
};
|
||||
|
||||
// Extract metrics from health object
|
||||
if let Some(status_str) = health_obj.get("status").and_then(|v| v.as_str()) {
|
||||
metrics.status = match status_str {
|
||||
"healthy" => HealthStatus::Healthy,
|
||||
"degraded" => HealthStatus::Degraded,
|
||||
"unhealthy" => HealthStatus::Unhealthy,
|
||||
_ => HealthStatus::Healthy,
|
||||
};
|
||||
|
||||
let Some(capabilities) = &worker.capabilities else {
|
||||
return metrics;
|
||||
};
|
||||
|
||||
let Some(health_obj) = capabilities.get("health") else {
|
||||
return metrics;
|
||||
};
|
||||
|
||||
// Extract metrics from health object
|
||||
if let Some(status_str) = health_obj.get("status").and_then(|v| v.as_str()) {
|
||||
metrics.status = match status_str {
|
||||
"healthy" => HealthStatus::Healthy,
|
||||
"degraded" => HealthStatus::Degraded,
|
||||
"unhealthy" => HealthStatus::Unhealthy,
|
||||
_ => HealthStatus::Healthy,
|
||||
};
|
||||
}
|
||||
|
||||
if let Some(last_check_str) = health_obj.get("last_check").and_then(|v| v.as_str()) {
|
||||
if let Ok(last_check) = DateTime::parse_from_rfc3339(last_check_str) {
|
||||
metrics.last_check = last_check.with_timezone(&Utc);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(failures) = health_obj
|
||||
.get("consecutive_failures")
|
||||
.and_then(|v| v.as_u64())
|
||||
{
|
||||
metrics.consecutive_failures = failures as u32;
|
||||
}
|
||||
|
||||
if let Some(total) = health_obj.get("total_executions").and_then(|v| v.as_u64()) {
|
||||
metrics.total_executions = total;
|
||||
}
|
||||
|
||||
if let Some(failed) = health_obj.get("failed_executions").and_then(|v| v.as_u64()) {
|
||||
metrics.failed_executions = failed;
|
||||
}
|
||||
|
||||
if let Some(avg_time) = health_obj
|
||||
.get("average_execution_time_ms")
|
||||
.and_then(|v| v.as_u64())
|
||||
{
|
||||
metrics.average_execution_time_ms = avg_time;
|
||||
}
|
||||
|
||||
if let Some(depth) = health_obj.get("queue_depth").and_then(|v| v.as_u64()) {
|
||||
metrics.queue_depth = depth as u32;
|
||||
}
|
||||
|
||||
metrics
|
||||
}
|
||||
|
||||
if let Some(last_check_str) = health_obj.get("last_check").and_then(|v| v.as_str()) {
|
||||
if let Ok(last_check) = DateTime::parse_from_rfc3339(last_check_str) {
|
||||
metrics.last_check = last_check.with_timezone(&Utc);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(failures) = health_obj
|
||||
.get("consecutive_failures")
|
||||
.and_then(|v| v.as_u64())
|
||||
{
|
||||
metrics.consecutive_failures = failures as u32;
|
||||
}
|
||||
|
||||
if let Some(total) = health_obj.get("total_executions").and_then(|v| v.as_u64()) {
|
||||
metrics.total_executions = total;
|
||||
}
|
||||
|
||||
if let Some(failed) = health_obj.get("failed_executions").and_then(|v| v.as_u64()) {
|
||||
metrics.failed_executions = failed;
|
||||
}
|
||||
|
||||
if let Some(avg_time) = health_obj
|
||||
.get("average_execution_time_ms")
|
||||
.and_then(|v| v.as_u64())
|
||||
{
|
||||
metrics.average_execution_time_ms = avg_time;
|
||||
}
|
||||
|
||||
if let Some(depth) = health_obj.get("queue_depth").and_then(|v| v.as_u64()) {
|
||||
metrics.queue_depth = depth as u32;
|
||||
}
|
||||
|
||||
metrics
|
||||
}
|
||||
|
||||
impl WorkerHealthProbe {
|
||||
/// Get recommended worker for execution based on health
|
||||
#[allow(dead_code)]
|
||||
pub async fn get_best_worker(&self, runtime_name: &str) -> Result<Option<Worker>> {
|
||||
@@ -435,8 +444,6 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_extract_health_metrics() {
|
||||
let probe = WorkerHealthProbe::with_defaults(Arc::new(unsafe { std::mem::zeroed() }));
|
||||
|
||||
let worker = Worker {
|
||||
id: 1,
|
||||
name: "test-worker".to_string(),
|
||||
@@ -461,7 +468,7 @@ mod tests {
|
||||
updated: Utc::now(),
|
||||
};
|
||||
|
||||
let metrics = probe.extract_health_metrics(&worker);
|
||||
let metrics = extract_health_metrics(&worker);
|
||||
assert_eq!(metrics.status, HealthStatus::Degraded);
|
||||
assert_eq!(metrics.consecutive_failures, 5);
|
||||
assert_eq!(metrics.queue_depth, 25);
|
||||
|
||||
@@ -74,6 +74,13 @@ async fn _create_test_runtime(pool: &PgPool, suffix: &str) -> i64 {
|
||||
name: format!("Python {}", suffix),
|
||||
distributions: json!({"ubuntu": "python3"}),
|
||||
installation: Some(json!({"method": "apt"})),
|
||||
execution_config: json!({
|
||||
"interpreter": {
|
||||
"binary": "python3",
|
||||
"args": ["-u"],
|
||||
"file_extension": ".py"
|
||||
}
|
||||
}),
|
||||
};
|
||||
|
||||
RuntimeRepository::create(pool, runtime_input)
|
||||
|
||||
@@ -69,6 +69,13 @@ async fn create_test_runtime(pool: &PgPool, suffix: &str) -> i64 {
|
||||
name: format!("Python {}", suffix),
|
||||
distributions: json!({"ubuntu": "python3"}),
|
||||
installation: Some(json!({"method": "apt"})),
|
||||
execution_config: json!({
|
||||
"interpreter": {
|
||||
"binary": "python3",
|
||||
"args": ["-u"],
|
||||
"file_extension": ".py"
|
||||
}
|
||||
}),
|
||||
};
|
||||
|
||||
let runtime = RuntimeRepository::create(pool, runtime_input)
|
||||
|
||||
Reference in New Issue
Block a user