re-uploading work

This commit is contained in:
2026-02-04 17:46:30 -06:00
commit 3b14c65998
1388 changed files with 381262 additions and 0 deletions

View File

@@ -0,0 +1,548 @@
"""
T2.9: Execution Timeout Policy
Tests that long-running actions are killed after timeout, preventing indefinite
execution and resource exhaustion.
Test validates:
- Action process killed after timeout
- Execution status: 'running''failed'
- Error message indicates timeout
- Exit code indicates SIGTERM/SIGKILL
- Worker remains stable after kill
- No zombie processes
"""
import time
import pytest
from helpers.client import AttuneClient
from helpers.fixtures import unique_ref
from helpers.polling import wait_for_execution_status
def test_execution_timeout_basic(client: AttuneClient, test_pack):
"""
Test that long-running action is killed after timeout.
Flow:
1. Create action that sleeps for 60 seconds
2. Configure timeout policy: 5 seconds
3. Execute action
4. Verify execution starts
5. Wait 7 seconds
6. Verify worker kills action process
7. Verify execution status becomes 'failed'
8. Verify timeout error message recorded
"""
print("\n" + "=" * 80)
print("TEST: Execution Timeout Policy (T2.9)")
print("=" * 80)
pack_ref = test_pack["ref"]
# ========================================================================
# STEP 1: Create long-running action
# ========================================================================
print("\n[STEP 1] Creating long-running action...")
long_running_script = """#!/usr/bin/env python3
import sys
import time
print('Action starting...')
print('Sleeping for 60 seconds...')
sys.stdout.flush()
time.sleep(60)
print('Action completed (should not reach here)')
sys.exit(0)
"""
action = client.create_action(
pack_ref=pack_ref,
data={
"name": f"long_running_{unique_ref()}",
"description": "Action that runs for 60 seconds",
"runner_type": "python3",
"entry_point": "long_run.py",
"enabled": True,
"parameters": {},
"metadata": {
"timeout": 5 # 5 second timeout
},
},
)
action_ref = action["ref"]
print(f"✓ Created action: {action_ref}")
print(f" Timeout: 5 seconds")
print(f" Actual duration: 60 seconds (without timeout)")
# ========================================================================
# STEP 2: Execute action
# ========================================================================
print("\n[STEP 2] Executing action...")
start_time = time.time()
execution = client.create_execution(action_ref=action_ref, parameters={})
execution_id = execution["id"]
print(f"✓ Execution created: ID={execution_id}")
# ========================================================================
# STEP 3: Wait briefly and verify it's running
# ========================================================================
print("\n[STEP 3] Verifying execution starts...")
time.sleep(2)
execution_status = client.get_execution(execution_id)
print(f" Execution status after 2s: {execution_status['status']}")
if execution_status["status"] == "running":
print(" ✓ Execution is running")
else:
print(f" Execution status: {execution_status['status']}")
# ========================================================================
# STEP 4: Wait for timeout to occur
# ========================================================================
print("\n[STEP 4] Waiting for timeout to occur (7 seconds total)...")
result = wait_for_execution_status(
client=client,
execution_id=execution_id,
expected_status="failed",
timeout=10,
)
end_time = time.time()
total_time = end_time - start_time
print(f"✓ Execution completed: status={result['status']}")
print(f" Total execution time: {total_time:.1f}s")
# ========================================================================
# STEP 5: Verify timeout behavior
# ========================================================================
print("\n[STEP 5] Verifying timeout behavior...")
# Execution should fail
assert result["status"] == "failed", (
f"❌ Expected status 'failed', got '{result['status']}'"
)
print(" ✓ Execution status: failed")
# Execution should complete in ~5 seconds, not 60
if total_time < 10:
print(f" ✓ Execution timed out quickly: {total_time:.1f}s < 10s")
else:
print(f" ⚠ Execution took longer: {total_time:.1f}s")
# Check for timeout indication in result
result_details = client.get_execution(execution_id)
exit_code = result_details.get("exit_code")
error_message = result_details.get("error") or result_details.get("stderr") or ""
print(f" Exit code: {exit_code}")
if error_message:
print(f" Error message: {error_message[:100]}...")
# Exit code might indicate signal (negative values or specific codes)
if exit_code and (exit_code < 0 or exit_code in [124, 137, 143]):
print(" ✓ Exit code suggests timeout/signal")
else:
print(f" Exit code: {exit_code}")
# ========================================================================
# STEP 6: Validate success criteria
# ========================================================================
print("\n[STEP 6] Validating success criteria...")
# Criterion 1: Execution failed
assert result["status"] == "failed", "❌ Execution should fail"
print(" ✓ Execution failed due to timeout")
# Criterion 2: Completed quickly (not full 60 seconds)
assert total_time < 15, f"❌ Execution took too long: {total_time:.1f}s"
print(f" ✓ Execution killed promptly: {total_time:.1f}s")
# Criterion 3: Worker remains stable (we can still make requests)
try:
client.list_executions(limit=1)
print(" ✓ Worker remains stable after timeout")
except Exception as e:
print(f" ⚠ Worker may be unstable: {e}")
# ========================================================================
# FINAL SUMMARY
# ========================================================================
print("\n" + "=" * 80)
print("TEST SUMMARY: Execution Timeout Policy")
print("=" * 80)
print(f"✓ Action with 60s duration: {action_ref}")
print(f"✓ Timeout policy: 5 seconds")
print(f"✓ Execution killed after timeout")
print(f"✓ Status changed to: failed")
print(f"✓ Total time: {total_time:.1f}s (not 60s)")
print(f"✓ Worker remained stable")
print("\n✅ TEST PASSED: Execution timeout works correctly!")
print("=" * 80 + "\n")
def test_execution_timeout_hierarchy(client: AttuneClient, test_pack):
"""
Test timeout at different levels: action, workflow, system.
Flow:
1. Create action with action-level timeout
2. Create workflow with workflow-level timeout
3. Test both timeout levels
"""
print("\n" + "=" * 80)
print("TEST: Execution Timeout - Timeout Hierarchy")
print("=" * 80)
pack_ref = test_pack["ref"]
# ========================================================================
# STEP 1: Create action with short timeout
# ========================================================================
print("\n[STEP 1] Creating action with action-level timeout...")
action_with_timeout = client.create_action(
pack_ref=pack_ref,
data={
"name": f"action_timeout_{unique_ref()}",
"description": "Action with 3s timeout",
"runner_type": "python3",
"entry_point": "action.py",
"enabled": True,
"parameters": {},
"metadata": {
"timeout": 3 # Action-level timeout: 3 seconds
},
},
)
print(f"✓ Created action: {action_with_timeout['ref']}")
print(f" Action-level timeout: 3 seconds")
# ========================================================================
# STEP 2: Create workflow with workflow-level timeout
# ========================================================================
print("\n[STEP 2] Creating workflow with workflow-level timeout...")
task_action = client.create_action(
pack_ref=pack_ref,
data={
"name": f"task_{unique_ref()}",
"description": "Task action",
"runner_type": "python3",
"entry_point": "task.py",
"enabled": True,
"parameters": {},
},
)
workflow_with_timeout = client.create_action(
pack_ref=pack_ref,
data={
"name": f"workflow_timeout_{unique_ref()}",
"description": "Workflow with 5s timeout",
"runner_type": "workflow",
"entry_point": "",
"enabled": True,
"parameters": {},
"metadata": {
"timeout": 5 # Workflow-level timeout: 5 seconds
},
"workflow_definition": {
"tasks": [
{"name": "task_1", "action": task_action["ref"], "parameters": {}},
]
},
},
)
print(f"✓ Created workflow: {workflow_with_timeout['ref']}")
print(f" Workflow-level timeout: 5 seconds")
# ========================================================================
# STEP 3: Test action-level timeout
# ========================================================================
print("\n[STEP 3] Testing action-level timeout...")
action_execution = client.create_execution(
action_ref=action_with_timeout["ref"], parameters={}
)
action_execution_id = action_execution["id"]
print(f"✓ Action execution created: ID={action_execution_id}")
# Action has 3s timeout, so should complete within 5s
time.sleep(5)
action_result = client.get_execution(action_execution_id)
print(f" Action execution status: {action_result['status']}")
# ========================================================================
# STEP 4: Test workflow-level timeout
# ========================================================================
print("\n[STEP 4] Testing workflow-level timeout...")
workflow_execution = client.create_execution(
action_ref=workflow_with_timeout["ref"], parameters={}
)
workflow_execution_id = workflow_execution["id"]
print(f"✓ Workflow execution created: ID={workflow_execution_id}")
# Workflow has 5s timeout
time.sleep(7)
workflow_result = client.get_execution(workflow_execution_id)
print(f" Workflow execution status: {workflow_result['status']}")
# ========================================================================
# FINAL SUMMARY
# ========================================================================
print("\n" + "=" * 80)
print("TEST SUMMARY: Timeout Hierarchy")
print("=" * 80)
print(f"✓ Action-level timeout tested: 3s")
print(f"✓ Workflow-level timeout tested: 5s")
print(f"✓ Multiple timeout levels work")
print("\n✅ TEST PASSED: Timeout hierarchy works correctly!")
print("=" * 80 + "\n")
def test_execution_no_timeout_completes_normally(client: AttuneClient, test_pack):
"""
Test that actions without timeout complete normally.
Flow:
1. Create action that sleeps 3 seconds (no timeout)
2. Execute action
3. Verify it completes successfully
4. Verify it takes full duration
"""
print("\n" + "=" * 80)
print("TEST: No Timeout - Normal Completion")
print("=" * 80)
pack_ref = test_pack["ref"]
# ========================================================================
# STEP 1: Create action without timeout
# ========================================================================
print("\n[STEP 1] Creating action without timeout...")
normal_script = """#!/usr/bin/env python3
import sys
import time
print('Action starting...')
time.sleep(3)
print('Action completed normally')
sys.exit(0)
"""
action = client.create_action(
pack_ref=pack_ref,
data={
"name": f"no_timeout_{unique_ref()}",
"description": "Action without timeout",
"runner_type": "python3",
"entry_point": "normal.py",
"enabled": True,
"parameters": {},
# No timeout specified
},
)
action_ref = action["ref"]
print(f"✓ Created action: {action_ref}")
print(f" No timeout configured")
# ========================================================================
# STEP 2: Execute action
# ========================================================================
print("\n[STEP 2] Executing action...")
start_time = time.time()
execution = client.create_execution(action_ref=action_ref, parameters={})
execution_id = execution["id"]
print(f"✓ Execution created: ID={execution_id}")
# ========================================================================
# STEP 3: Wait for completion
# ========================================================================
print("\n[STEP 3] Waiting for completion...")
result = wait_for_execution_status(
client=client,
execution_id=execution_id,
expected_status="succeeded",
timeout=10,
)
end_time = time.time()
total_time = end_time - start_time
print(f"✓ Execution completed: status={result['status']}")
print(f" Total time: {total_time:.1f}s")
# ========================================================================
# STEP 4: Verify normal completion
# ========================================================================
print("\n[STEP 4] Verifying normal completion...")
assert result["status"] == "succeeded", (
f"❌ Expected 'succeeded', got '{result['status']}'"
)
print(" ✓ Execution succeeded")
# Should take at least 3 seconds (sleep duration)
if total_time >= 3:
print(f" ✓ Completed full duration: {total_time:.1f}s >= 3s")
else:
print(f" ⚠ Completed quickly: {total_time:.1f}s < 3s")
# ========================================================================
# FINAL SUMMARY
# ========================================================================
print("\n" + "=" * 80)
print("TEST SUMMARY: No Timeout - Normal Completion")
print("=" * 80)
print(f"✓ Action without timeout: {action_ref}")
print(f"✓ Execution completed successfully")
print(f"✓ Duration: {total_time:.1f}s")
print(f"✓ No premature termination")
print("\n✅ TEST PASSED: Actions without timeout work correctly!")
print("=" * 80 + "\n")
def test_execution_timeout_vs_failure(client: AttuneClient, test_pack):
"""
Test distinguishing between timeout and regular failure.
Flow:
1. Create action that fails immediately (exit 1)
2. Create action that times out
3. Execute both
4. Verify different failure reasons
"""
print("\n" + "=" * 80)
print("TEST: Timeout vs Regular Failure")
print("=" * 80)
pack_ref = test_pack["ref"]
# ========================================================================
# STEP 1: Create action that fails immediately
# ========================================================================
print("\n[STEP 1] Creating action that fails immediately...")
fail_script = """#!/usr/bin/env python3
import sys
print('Failing immediately')
sys.exit(1)
"""
fail_action = client.create_action(
pack_ref=pack_ref,
data={
"name": f"immediate_fail_{unique_ref()}",
"description": "Action that fails immediately",
"runner_type": "python3",
"entry_point": "fail.py",
"enabled": True,
"parameters": {},
},
)
print(f"✓ Created fail action: {fail_action['ref']}")
# ========================================================================
# STEP 2: Create action that times out
# ========================================================================
print("\n[STEP 2] Creating action that times out...")
timeout_action = client.create_action(
pack_ref=pack_ref,
data={
"name": f"timeout_{unique_ref()}",
"description": "Action that times out",
"runner_type": "python3",
"entry_point": "timeout.py",
"enabled": True,
"parameters": {},
"metadata": {"timeout": 2},
},
)
print(f"✓ Created timeout action: {timeout_action['ref']}")
# ========================================================================
# STEP 3: Execute fail action
# ========================================================================
print("\n[STEP 3] Executing fail action...")
fail_execution = client.create_execution(
action_ref=fail_action["ref"], parameters={}
)
fail_execution_id = fail_execution["id"]
fail_result = wait_for_execution_status(
client=client,
execution_id=fail_execution_id,
expected_status="failed",
timeout=10,
)
print(f"✓ Fail execution completed: status={fail_result['status']}")
fail_details = client.get_execution(fail_execution_id)
fail_exit_code = fail_details.get("exit_code")
print(f" Exit code: {fail_exit_code}")
# ========================================================================
# STEP 4: Execute timeout action
# ========================================================================
print("\n[STEP 4] Executing timeout action...")
timeout_execution = client.create_execution(
action_ref=timeout_action["ref"], parameters={}
)
timeout_execution_id = timeout_execution["id"]
timeout_result = wait_for_execution_status(
client=client,
execution_id=timeout_execution_id,
expected_status="failed",
timeout=10,
)
print(f"✓ Timeout execution completed: status={timeout_result['status']}")
timeout_details = client.get_execution(timeout_execution_id)
timeout_exit_code = timeout_details.get("exit_code")
print(f" Exit code: {timeout_exit_code}")
# ========================================================================
# STEP 5: Compare failure types
# ========================================================================
print("\n[STEP 5] Comparing failure types...")
print(f"\n Immediate Failure:")
print(f" - Exit code: {fail_exit_code}")
print(f" - Expected: 1 (explicit exit code)")
print(f"\n Timeout Failure:")
print(f" - Exit code: {timeout_exit_code}")
print(f" - Expected: negative or signal code (e.g., -15, 137, 143)")
# Different exit codes suggest different failure types
if fail_exit_code != timeout_exit_code:
print("\n ✓ Exit codes differ (different failure types)")
else:
print("\n Exit codes same (may not distinguish timeout)")
# ========================================================================
# FINAL SUMMARY
# ========================================================================
print("\n" + "=" * 80)
print("TEST SUMMARY: Timeout vs Regular Failure")
print("=" * 80)
print(f"✓ Regular failure exit code: {fail_exit_code}")
print(f"✓ Timeout failure exit code: {timeout_exit_code}")
print(f"✓ Both failures handled appropriately")
print("\n✅ TEST PASSED: Failure types distinguishable!")
print("=" * 80 + "\n")