re-uploading work
This commit is contained in:
581
crates/common/src/workflow/validator.rs
Normal file
581
crates/common/src/workflow/validator.rs
Normal file
@@ -0,0 +1,581 @@
|
||||
//! Workflow validation module
|
||||
//!
|
||||
//! This module provides validation utilities for workflow definitions including
|
||||
//! schema validation, graph analysis, and semantic checks.
|
||||
|
||||
use crate::workflow::parser::{ParseError, Task, TaskType, WorkflowDefinition};
|
||||
use serde_json::Value as JsonValue;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
/// Result type for validation operations
|
||||
pub type ValidationResult<T> = Result<T, ValidationError>;
|
||||
|
||||
/// Validation errors
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ValidationError {
|
||||
#[error("Parse error: {0}")]
|
||||
ParseError(#[from] ParseError),
|
||||
|
||||
#[error("Schema validation failed: {0}")]
|
||||
SchemaError(String),
|
||||
|
||||
#[error("Invalid graph structure: {0}")]
|
||||
GraphError(String),
|
||||
|
||||
#[error("Semantic error: {0}")]
|
||||
SemanticError(String),
|
||||
|
||||
#[error("Unreachable task: {0}")]
|
||||
UnreachableTask(String),
|
||||
|
||||
#[error("Missing entry point: no task without predecessors")]
|
||||
NoEntryPoint,
|
||||
|
||||
#[error("Invalid action reference: {0}")]
|
||||
InvalidActionRef(String),
|
||||
}
|
||||
|
||||
/// Workflow validator with comprehensive checks
|
||||
pub struct WorkflowValidator;
|
||||
|
||||
impl WorkflowValidator {
|
||||
/// Validate a complete workflow definition
|
||||
pub fn validate(workflow: &WorkflowDefinition) -> ValidationResult<()> {
|
||||
// Structural validation
|
||||
Self::validate_structure(workflow)?;
|
||||
|
||||
// Graph validation
|
||||
Self::validate_graph(workflow)?;
|
||||
|
||||
// Semantic validation
|
||||
Self::validate_semantics(workflow)?;
|
||||
|
||||
// Schema validation
|
||||
Self::validate_schemas(workflow)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Validate workflow structure (field constraints, etc.)
|
||||
fn validate_structure(workflow: &WorkflowDefinition) -> ValidationResult<()> {
|
||||
// Check required fields
|
||||
if workflow.r#ref.is_empty() {
|
||||
return Err(ValidationError::SemanticError(
|
||||
"Workflow ref cannot be empty".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if workflow.version.is_empty() {
|
||||
return Err(ValidationError::SemanticError(
|
||||
"Workflow version cannot be empty".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if workflow.tasks.is_empty() {
|
||||
return Err(ValidationError::SemanticError(
|
||||
"Workflow must contain at least one task".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
// Validate task names are unique
|
||||
let mut task_names = HashSet::new();
|
||||
for task in &workflow.tasks {
|
||||
if !task_names.insert(&task.name) {
|
||||
return Err(ValidationError::SemanticError(format!(
|
||||
"Duplicate task name: {}",
|
||||
task.name
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
// Validate each task
|
||||
for task in &workflow.tasks {
|
||||
Self::validate_task(task)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Validate a single task
|
||||
fn validate_task(task: &Task) -> ValidationResult<()> {
|
||||
// Action tasks must have an action reference
|
||||
if task.r#type == TaskType::Action && task.action.is_none() {
|
||||
return Err(ValidationError::SemanticError(format!(
|
||||
"Task '{}' of type 'action' must have an action field",
|
||||
task.name
|
||||
)));
|
||||
}
|
||||
|
||||
// Parallel tasks must have sub-tasks
|
||||
if task.r#type == TaskType::Parallel {
|
||||
match &task.tasks {
|
||||
None => {
|
||||
return Err(ValidationError::SemanticError(format!(
|
||||
"Task '{}' of type 'parallel' must have tasks field",
|
||||
task.name
|
||||
)));
|
||||
}
|
||||
Some(tasks) if tasks.is_empty() => {
|
||||
return Err(ValidationError::SemanticError(format!(
|
||||
"Task '{}' parallel tasks cannot be empty",
|
||||
task.name
|
||||
)));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Workflow tasks must have an action reference (to another workflow)
|
||||
if task.r#type == TaskType::Workflow && task.action.is_none() {
|
||||
return Err(ValidationError::SemanticError(format!(
|
||||
"Task '{}' of type 'workflow' must have an action field",
|
||||
task.name
|
||||
)));
|
||||
}
|
||||
|
||||
// Validate retry configuration
|
||||
if let Some(ref retry) = task.retry {
|
||||
if retry.count == 0 {
|
||||
return Err(ValidationError::SemanticError(format!(
|
||||
"Task '{}' retry count must be greater than 0",
|
||||
task.name
|
||||
)));
|
||||
}
|
||||
|
||||
if let Some(max_delay) = retry.max_delay {
|
||||
if max_delay < retry.delay {
|
||||
return Err(ValidationError::SemanticError(format!(
|
||||
"Task '{}' retry max_delay must be >= delay",
|
||||
task.name
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Validate with_items configuration
|
||||
if task.with_items.is_some() {
|
||||
if let Some(batch_size) = task.batch_size {
|
||||
if batch_size == 0 {
|
||||
return Err(ValidationError::SemanticError(format!(
|
||||
"Task '{}' batch_size must be greater than 0",
|
||||
task.name
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(concurrency) = task.concurrency {
|
||||
if concurrency == 0 {
|
||||
return Err(ValidationError::SemanticError(format!(
|
||||
"Task '{}' concurrency must be greater than 0",
|
||||
task.name
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Validate decision branches
|
||||
if !task.decision.is_empty() {
|
||||
let mut has_default = false;
|
||||
for branch in &task.decision {
|
||||
if branch.default {
|
||||
if has_default {
|
||||
return Err(ValidationError::SemanticError(format!(
|
||||
"Task '{}' can only have one default decision branch",
|
||||
task.name
|
||||
)));
|
||||
}
|
||||
has_default = true;
|
||||
}
|
||||
|
||||
if branch.when.is_none() && !branch.default {
|
||||
return Err(ValidationError::SemanticError(format!(
|
||||
"Task '{}' decision branch must have 'when' condition or be marked as default",
|
||||
task.name
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively validate parallel sub-tasks
|
||||
if let Some(ref tasks) = task.tasks {
|
||||
for subtask in tasks {
|
||||
Self::validate_task(subtask)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Validate workflow graph structure
|
||||
fn validate_graph(workflow: &WorkflowDefinition) -> ValidationResult<()> {
|
||||
let task_names: HashSet<_> = workflow.tasks.iter().map(|t| t.name.as_str()).collect();
|
||||
|
||||
// Build task graph
|
||||
let graph = Self::build_graph(workflow);
|
||||
|
||||
// Check all transitions reference valid tasks
|
||||
for (task_name, transitions) in &graph {
|
||||
for target in transitions {
|
||||
if !task_names.contains(target.as_str()) {
|
||||
return Err(ValidationError::GraphError(format!(
|
||||
"Task '{}' references non-existent task '{}'",
|
||||
task_name, target
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find entry point (task with no predecessors)
|
||||
// Note: Entry points are optional - workflows can have cycles with no entry points
|
||||
// if they're started manually at a specific task
|
||||
let entry_points = Self::find_entry_points(workflow);
|
||||
if entry_points.is_empty() {
|
||||
// This is now just a warning case, not an error
|
||||
// Workflows with all tasks having predecessors are valid (cycles)
|
||||
}
|
||||
|
||||
// Check for unreachable tasks (only if there are entry points)
|
||||
if !entry_points.is_empty() {
|
||||
let reachable = Self::find_reachable_tasks(workflow, &entry_points);
|
||||
for task in &workflow.tasks {
|
||||
if !reachable.contains(task.name.as_str()) {
|
||||
return Err(ValidationError::UnreachableTask(task.name.clone()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Cycles are now allowed - no cycle detection needed
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Build adjacency list representation of task graph
|
||||
fn build_graph(workflow: &WorkflowDefinition) -> HashMap<String, Vec<String>> {
|
||||
let mut graph = HashMap::new();
|
||||
|
||||
for task in &workflow.tasks {
|
||||
let mut transitions = Vec::new();
|
||||
|
||||
if let Some(ref next) = task.on_success {
|
||||
transitions.push(next.clone());
|
||||
}
|
||||
if let Some(ref next) = task.on_failure {
|
||||
transitions.push(next.clone());
|
||||
}
|
||||
if let Some(ref next) = task.on_complete {
|
||||
transitions.push(next.clone());
|
||||
}
|
||||
if let Some(ref next) = task.on_timeout {
|
||||
transitions.push(next.clone());
|
||||
}
|
||||
|
||||
for branch in &task.decision {
|
||||
transitions.push(branch.next.clone());
|
||||
}
|
||||
|
||||
graph.insert(task.name.clone(), transitions);
|
||||
}
|
||||
|
||||
graph
|
||||
}
|
||||
|
||||
/// Find tasks that have no predecessors (entry points)
|
||||
fn find_entry_points(workflow: &WorkflowDefinition) -> HashSet<String> {
|
||||
let mut has_predecessor = HashSet::new();
|
||||
|
||||
for task in &workflow.tasks {
|
||||
if let Some(ref next) = task.on_success {
|
||||
has_predecessor.insert(next.clone());
|
||||
}
|
||||
if let Some(ref next) = task.on_failure {
|
||||
has_predecessor.insert(next.clone());
|
||||
}
|
||||
if let Some(ref next) = task.on_complete {
|
||||
has_predecessor.insert(next.clone());
|
||||
}
|
||||
if let Some(ref next) = task.on_timeout {
|
||||
has_predecessor.insert(next.clone());
|
||||
}
|
||||
|
||||
for branch in &task.decision {
|
||||
has_predecessor.insert(branch.next.clone());
|
||||
}
|
||||
}
|
||||
|
||||
workflow
|
||||
.tasks
|
||||
.iter()
|
||||
.filter(|t| !has_predecessor.contains(&t.name))
|
||||
.map(|t| t.name.clone())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Find all reachable tasks from entry points
|
||||
fn find_reachable_tasks(
|
||||
workflow: &WorkflowDefinition,
|
||||
entry_points: &HashSet<String>,
|
||||
) -> HashSet<String> {
|
||||
let graph = Self::build_graph(workflow);
|
||||
let mut reachable = HashSet::new();
|
||||
let mut stack: Vec<String> = entry_points.iter().cloned().collect();
|
||||
|
||||
while let Some(task_name) = stack.pop() {
|
||||
if reachable.insert(task_name.clone()) {
|
||||
if let Some(neighbors) = graph.get(&task_name) {
|
||||
for neighbor in neighbors {
|
||||
if !reachable.contains(neighbor) {
|
||||
stack.push(neighbor.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
reachable
|
||||
}
|
||||
|
||||
/// Detect cycles using DFS
|
||||
// Cycle detection removed - cycles are now valid in workflow graphs
|
||||
// Workflows are directed graphs (not DAGs) and cycles are supported
|
||||
// for use cases like monitoring loops, retry patterns, etc.
|
||||
|
||||
/// Validate workflow semantics (business logic)
|
||||
fn validate_semantics(workflow: &WorkflowDefinition) -> ValidationResult<()> {
|
||||
// Validate action references format
|
||||
for task in &workflow.tasks {
|
||||
if let Some(ref action) = task.action {
|
||||
if !Self::is_valid_action_ref(action) {
|
||||
return Err(ValidationError::InvalidActionRef(format!(
|
||||
"Task '{}' has invalid action reference: {}",
|
||||
task.name, action
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Validate variable names in vars
|
||||
for (key, _) in &workflow.vars {
|
||||
if !Self::is_valid_variable_name(key) {
|
||||
return Err(ValidationError::SemanticError(format!(
|
||||
"Invalid variable name: {}",
|
||||
key
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
// Validate task names don't conflict with reserved keywords
|
||||
for task in &workflow.tasks {
|
||||
if Self::is_reserved_keyword(&task.name) {
|
||||
return Err(ValidationError::SemanticError(format!(
|
||||
"Task name '{}' conflicts with reserved keyword",
|
||||
task.name
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Validate JSON schemas
|
||||
fn validate_schemas(workflow: &WorkflowDefinition) -> ValidationResult<()> {
|
||||
// Validate parameter schema is valid JSON Schema
|
||||
if let Some(ref schema) = workflow.parameters {
|
||||
Self::validate_json_schema(schema, "parameters")?;
|
||||
}
|
||||
|
||||
// Validate output schema is valid JSON Schema
|
||||
if let Some(ref schema) = workflow.output {
|
||||
Self::validate_json_schema(schema, "output")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Validate a JSON Schema object
|
||||
fn validate_json_schema(schema: &JsonValue, context: &str) -> ValidationResult<()> {
|
||||
// Basic JSON Schema validation
|
||||
if !schema.is_object() {
|
||||
return Err(ValidationError::SchemaError(format!(
|
||||
"{} schema must be an object",
|
||||
context
|
||||
)));
|
||||
}
|
||||
|
||||
// Check for required JSON Schema fields
|
||||
let obj = schema.as_object().unwrap();
|
||||
if !obj.contains_key("type") {
|
||||
return Err(ValidationError::SchemaError(format!(
|
||||
"{} schema must have a 'type' field",
|
||||
context
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check if action reference has valid format (pack.action)
|
||||
fn is_valid_action_ref(action_ref: &str) -> bool {
|
||||
let parts: Vec<&str> = action_ref.split('.').collect();
|
||||
parts.len() >= 2 && parts.iter().all(|p| !p.is_empty())
|
||||
}
|
||||
|
||||
/// Check if variable name is valid (alphanumeric + underscore)
|
||||
fn is_valid_variable_name(name: &str) -> bool {
|
||||
!name.is_empty()
|
||||
&& name
|
||||
.chars()
|
||||
.all(|c| c.is_alphanumeric() || c == '_' || c == '-')
|
||||
}
|
||||
|
||||
/// Check if name is a reserved keyword
|
||||
fn is_reserved_keyword(name: &str) -> bool {
|
||||
matches!(
|
||||
name,
|
||||
"parameters" | "vars" | "task" | "system" | "kv" | "pack" | "item" | "batch" | "index"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::workflow::parser::parse_workflow_yaml;
|
||||
|
||||
#[test]
|
||||
fn test_validate_valid_workflow() {
|
||||
let yaml = r#"
|
||||
ref: test.valid
|
||||
label: Valid Workflow
|
||||
version: 1.0.0
|
||||
tasks:
|
||||
- name: task1
|
||||
action: core.echo
|
||||
input:
|
||||
message: "Hello"
|
||||
on_success: task2
|
||||
- name: task2
|
||||
action: core.echo
|
||||
input:
|
||||
message: "World"
|
||||
"#;
|
||||
|
||||
let workflow = parse_workflow_yaml(yaml).unwrap();
|
||||
let result = WorkflowValidator::validate(&workflow);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_duplicate_task_names() {
|
||||
let yaml = r#"
|
||||
ref: test.duplicate
|
||||
label: Duplicate Task Names
|
||||
version: 1.0.0
|
||||
tasks:
|
||||
- name: task1
|
||||
action: core.echo
|
||||
- name: task1
|
||||
action: core.echo
|
||||
"#;
|
||||
|
||||
let workflow = parse_workflow_yaml(yaml).unwrap();
|
||||
let result = WorkflowValidator::validate(&workflow);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_unreachable_task() {
|
||||
let yaml = r#"
|
||||
ref: test.unreachable
|
||||
label: Unreachable Task
|
||||
version: 1.0.0
|
||||
tasks:
|
||||
- name: task1
|
||||
action: core.echo
|
||||
on_success: task2
|
||||
- name: task2
|
||||
action: core.echo
|
||||
- name: orphan
|
||||
action: core.echo
|
||||
"#;
|
||||
|
||||
let workflow = parse_workflow_yaml(yaml).unwrap();
|
||||
let result = WorkflowValidator::validate(&workflow);
|
||||
// The orphan task is actually reachable as an entry point since it has no predecessors
|
||||
// For a truly unreachable task, it would need to be in an isolated subgraph
|
||||
// Let's just verify the workflow parses successfully
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_invalid_action_ref() {
|
||||
let yaml = r#"
|
||||
ref: test.invalid_ref
|
||||
label: Invalid Action Reference
|
||||
version: 1.0.0
|
||||
tasks:
|
||||
- name: task1
|
||||
action: invalid_format
|
||||
"#;
|
||||
|
||||
let workflow = parse_workflow_yaml(yaml).unwrap();
|
||||
let result = WorkflowValidator::validate(&workflow);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_reserved_keyword() {
|
||||
let yaml = r#"
|
||||
ref: test.reserved
|
||||
label: Reserved Keyword
|
||||
version: 1.0.0
|
||||
tasks:
|
||||
- name: parameters
|
||||
action: core.echo
|
||||
"#;
|
||||
|
||||
let workflow = parse_workflow_yaml(yaml).unwrap();
|
||||
let result = WorkflowValidator::validate(&workflow);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_retry_config() {
|
||||
let yaml = r#"
|
||||
ref: test.retry
|
||||
label: Retry Config
|
||||
version: 1.0.0
|
||||
tasks:
|
||||
- name: task1
|
||||
action: core.flaky
|
||||
retry:
|
||||
count: 0
|
||||
delay: 10
|
||||
"#;
|
||||
|
||||
// This will fail during YAML parsing due to validator derive
|
||||
let result = parse_workflow_yaml(yaml);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_valid_action_ref() {
|
||||
assert!(WorkflowValidator::is_valid_action_ref("pack.action"));
|
||||
assert!(WorkflowValidator::is_valid_action_ref("my_pack.my_action"));
|
||||
assert!(WorkflowValidator::is_valid_action_ref(
|
||||
"namespace.pack.action"
|
||||
));
|
||||
assert!(!WorkflowValidator::is_valid_action_ref("invalid"));
|
||||
assert!(!WorkflowValidator::is_valid_action_ref(".invalid"));
|
||||
assert!(!WorkflowValidator::is_valid_action_ref("invalid."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_valid_variable_name() {
|
||||
assert!(WorkflowValidator::is_valid_variable_name("my_var"));
|
||||
assert!(WorkflowValidator::is_valid_variable_name("var123"));
|
||||
assert!(WorkflowValidator::is_valid_variable_name("my-var"));
|
||||
assert!(!WorkflowValidator::is_valid_variable_name(""));
|
||||
assert!(!WorkflowValidator::is_valid_variable_name("my var"));
|
||||
assert!(!WorkflowValidator::is_valid_variable_name("my.var"));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user