[wip] workflow cancellation policy
Some checks failed
CI / Rustfmt (push) Successful in 21s
CI / Cargo Audit & Deny (push) Successful in 32s
CI / Web Blocking Checks (push) Successful in 50s
CI / Security Blocking Checks (push) Successful in 9s
CI / Clippy (push) Failing after 1m58s
CI / Web Advisory Checks (push) Successful in 34s
CI / Security Advisory Checks (push) Successful in 1m26s
CI / Tests (push) Successful in 8m47s

This commit is contained in:
2026-03-09 14:08:01 -05:00
parent 87d830f952
commit 9e7e35cbe3
7 changed files with 451 additions and 32 deletions

View File

@@ -17,7 +17,7 @@ use attune_common::{
mq::{Consumer, ExecutionRequestedPayload, MessageEnvelope, MessageType, Publisher},
repositories::{
action::ActionRepository,
execution::{CreateExecutionInput, ExecutionRepository},
execution::{CreateExecutionInput, ExecutionRepository, UpdateExecutionInput},
runtime::{RuntimeRepository, WorkerRepository},
workflow::{
CreateWorkflowExecutionInput, WorkflowDefinitionRepository, WorkflowExecutionRepository,
@@ -884,10 +884,10 @@ impl ExecutionScheduler {
anyhow::anyhow!("Workflow execution {} not found", workflow_execution_id)
})?;
// Already in a terminal state — nothing to do
// Already fully terminal (Completed / Failed) — nothing to do
if matches!(
workflow_execution.status,
ExecutionStatus::Completed | ExecutionStatus::Failed | ExecutionStatus::Cancelled
ExecutionStatus::Completed | ExecutionStatus::Failed
) {
debug!(
"Workflow execution {} already in terminal state {:?}, skipping advance",
@@ -896,6 +896,41 @@ impl ExecutionScheduler {
return Ok(());
}
// Cancelled workflow: don't dispatch new tasks, but check whether all
// running children have now finished. When none remain, finalize the
// parent execution as Cancelled so it doesn't stay stuck in "Canceling".
if workflow_execution.status == ExecutionStatus::Cancelled {
let running = Self::count_running_workflow_children(
pool,
workflow_execution_id,
&workflow_execution.completed_tasks,
&workflow_execution.failed_tasks,
)
.await?;
if running == 0 {
info!(
"Cancelled workflow_execution {} has no more running children, \
finalizing parent execution {} as Cancelled",
workflow_execution_id, workflow_execution.execution
);
Self::finalize_cancelled_workflow(
pool,
workflow_execution.execution,
workflow_execution_id,
)
.await?;
} else {
debug!(
"Cancelled workflow_execution {} still has {} running children, \
waiting for them to finish",
workflow_execution_id, running
);
}
return Ok(());
}
// Load the workflow definition so we can apply param_schema defaults
let workflow_def =
WorkflowDefinitionRepository::find_by_id(pool, workflow_execution.workflow_def)
@@ -1375,6 +1410,32 @@ impl ExecutionScheduler {
Ok(count)
}
/// Finalize a cancelled workflow by updating the parent `execution` record
/// to `Cancelled`. The `workflow_execution` record is already `Cancelled`
/// (set by `cancel_workflow_children`); this only touches the parent.
async fn finalize_cancelled_workflow(
pool: &PgPool,
parent_execution_id: i64,
workflow_execution_id: i64,
) -> Result<()> {
info!(
"Finalizing cancelled workflow: parent execution {} (workflow_execution {})",
parent_execution_id, workflow_execution_id
);
let update = UpdateExecutionInput {
status: Some(ExecutionStatus::Cancelled),
result: Some(serde_json::json!({
"error": "Workflow cancelled",
"succeeded": false,
})),
..Default::default()
};
ExecutionRepository::update(pool, parent_execution_id, update).await?;
Ok(())
}
/// Mark a workflow as completed (success or failure) and update both the
/// `workflow_execution` and parent `execution` records.
async fn complete_workflow(