""" T2.9: Execution Timeout Policy Tests that long-running actions are killed after timeout, preventing indefinite execution and resource exhaustion. Test validates: - Action process killed after timeout - Execution status: 'running' → 'failed' - Error message indicates timeout - Exit code indicates SIGTERM/SIGKILL - Worker remains stable after kill - No zombie processes """ import time import pytest from helpers.client import AttuneClient from helpers.fixtures import unique_ref from helpers.polling import wait_for_execution_status def test_execution_timeout_basic(client: AttuneClient, test_pack): """ Test that long-running action is killed after timeout. Flow: 1. Create action that sleeps for 60 seconds 2. Configure timeout policy: 5 seconds 3. Execute action 4. Verify execution starts 5. Wait 7 seconds 6. Verify worker kills action process 7. Verify execution status becomes 'failed' 8. Verify timeout error message recorded """ print("\n" + "=" * 80) print("TEST: Execution Timeout Policy (T2.9)") print("=" * 80) pack_ref = test_pack["ref"] # ======================================================================== # STEP 1: Create long-running action # ======================================================================== print("\n[STEP 1] Creating long-running action...") long_running_script = """#!/usr/bin/env python3 import sys import time print('Action starting...') print('Sleeping for 60 seconds...') sys.stdout.flush() time.sleep(60) print('Action completed (should not reach here)') sys.exit(0) """ action = client.create_action( pack_ref=pack_ref, data={ "name": f"long_running_{unique_ref()}", "description": "Action that runs for 60 seconds", "runner_type": "python3", "entry_point": "long_run.py", "enabled": True, "parameters": {}, "metadata": { "timeout": 5 # 5 second timeout }, }, ) action_ref = action["ref"] print(f"✓ Created action: {action_ref}") print(f" Timeout: 5 seconds") print(f" Actual duration: 60 seconds (without timeout)") # ======================================================================== # STEP 2: Execute action # ======================================================================== print("\n[STEP 2] Executing action...") start_time = time.time() execution = client.create_execution(action_ref=action_ref, parameters={}) execution_id = execution["id"] print(f"✓ Execution created: ID={execution_id}") # ======================================================================== # STEP 3: Wait briefly and verify it's running # ======================================================================== print("\n[STEP 3] Verifying execution starts...") time.sleep(2) execution_status = client.get_execution(execution_id) print(f" Execution status after 2s: {execution_status['status']}") if execution_status["status"] == "running": print(" ✓ Execution is running") else: print(f" ℹ Execution status: {execution_status['status']}") # ======================================================================== # STEP 4: Wait for timeout to occur # ======================================================================== print("\n[STEP 4] Waiting for timeout to occur (7 seconds total)...") result = wait_for_execution_status( client=client, execution_id=execution_id, expected_status="failed", timeout=10, ) end_time = time.time() total_time = end_time - start_time print(f"✓ Execution completed: status={result['status']}") print(f" Total execution time: {total_time:.1f}s") # ======================================================================== # STEP 5: Verify timeout behavior # ======================================================================== print("\n[STEP 5] Verifying timeout behavior...") # Execution should fail assert result["status"] == "failed", ( f"❌ Expected status 'failed', got '{result['status']}'" ) print(" ✓ Execution status: failed") # Execution should complete in ~5 seconds, not 60 if total_time < 10: print(f" ✓ Execution timed out quickly: {total_time:.1f}s < 10s") else: print(f" ⚠ Execution took longer: {total_time:.1f}s") # Check for timeout indication in result result_details = client.get_execution(execution_id) exit_code = result_details.get("exit_code") error_message = result_details.get("error") or result_details.get("stderr") or "" print(f" Exit code: {exit_code}") if error_message: print(f" Error message: {error_message[:100]}...") # Exit code might indicate signal (negative values or specific codes) if exit_code and (exit_code < 0 or exit_code in [124, 137, 143]): print(" ✓ Exit code suggests timeout/signal") else: print(f" ℹ Exit code: {exit_code}") # ======================================================================== # STEP 6: Validate success criteria # ======================================================================== print("\n[STEP 6] Validating success criteria...") # Criterion 1: Execution failed assert result["status"] == "failed", "❌ Execution should fail" print(" ✓ Execution failed due to timeout") # Criterion 2: Completed quickly (not full 60 seconds) assert total_time < 15, f"❌ Execution took too long: {total_time:.1f}s" print(f" ✓ Execution killed promptly: {total_time:.1f}s") # Criterion 3: Worker remains stable (we can still make requests) try: client.list_executions(limit=1) print(" ✓ Worker remains stable after timeout") except Exception as e: print(f" ⚠ Worker may be unstable: {e}") # ======================================================================== # FINAL SUMMARY # ======================================================================== print("\n" + "=" * 80) print("TEST SUMMARY: Execution Timeout Policy") print("=" * 80) print(f"✓ Action with 60s duration: {action_ref}") print(f"✓ Timeout policy: 5 seconds") print(f"✓ Execution killed after timeout") print(f"✓ Status changed to: failed") print(f"✓ Total time: {total_time:.1f}s (not 60s)") print(f"✓ Worker remained stable") print("\n✅ TEST PASSED: Execution timeout works correctly!") print("=" * 80 + "\n") def test_execution_timeout_hierarchy(client: AttuneClient, test_pack): """ Test timeout at different levels: action, workflow, system. Flow: 1. Create action with action-level timeout 2. Create workflow with workflow-level timeout 3. Test both timeout levels """ print("\n" + "=" * 80) print("TEST: Execution Timeout - Timeout Hierarchy") print("=" * 80) pack_ref = test_pack["ref"] # ======================================================================== # STEP 1: Create action with short timeout # ======================================================================== print("\n[STEP 1] Creating action with action-level timeout...") action_with_timeout = client.create_action( pack_ref=pack_ref, data={ "name": f"action_timeout_{unique_ref()}", "description": "Action with 3s timeout", "runner_type": "python3", "entry_point": "action.py", "enabled": True, "parameters": {}, "metadata": { "timeout": 3 # Action-level timeout: 3 seconds }, }, ) print(f"✓ Created action: {action_with_timeout['ref']}") print(f" Action-level timeout: 3 seconds") # ======================================================================== # STEP 2: Create workflow with workflow-level timeout # ======================================================================== print("\n[STEP 2] Creating workflow with workflow-level timeout...") task_action = client.create_action( pack_ref=pack_ref, data={ "name": f"task_{unique_ref()}", "description": "Task action", "runner_type": "python3", "entry_point": "task.py", "enabled": True, "parameters": {}, }, ) workflow_with_timeout = client.create_action( pack_ref=pack_ref, data={ "name": f"workflow_timeout_{unique_ref()}", "description": "Workflow with 5s timeout", "runner_type": "workflow", "entry_point": "", "enabled": True, "parameters": {}, "metadata": { "timeout": 5 # Workflow-level timeout: 5 seconds }, "workflow_definition": { "tasks": [ {"name": "task_1", "action": task_action["ref"], "parameters": {}}, ] }, }, ) print(f"✓ Created workflow: {workflow_with_timeout['ref']}") print(f" Workflow-level timeout: 5 seconds") # ======================================================================== # STEP 3: Test action-level timeout # ======================================================================== print("\n[STEP 3] Testing action-level timeout...") action_execution = client.create_execution( action_ref=action_with_timeout["ref"], parameters={} ) action_execution_id = action_execution["id"] print(f"✓ Action execution created: ID={action_execution_id}") # Action has 3s timeout, so should complete within 5s time.sleep(5) action_result = client.get_execution(action_execution_id) print(f" Action execution status: {action_result['status']}") # ======================================================================== # STEP 4: Test workflow-level timeout # ======================================================================== print("\n[STEP 4] Testing workflow-level timeout...") workflow_execution = client.create_execution( action_ref=workflow_with_timeout["ref"], parameters={} ) workflow_execution_id = workflow_execution["id"] print(f"✓ Workflow execution created: ID={workflow_execution_id}") # Workflow has 5s timeout time.sleep(7) workflow_result = client.get_execution(workflow_execution_id) print(f" Workflow execution status: {workflow_result['status']}") # ======================================================================== # FINAL SUMMARY # ======================================================================== print("\n" + "=" * 80) print("TEST SUMMARY: Timeout Hierarchy") print("=" * 80) print(f"✓ Action-level timeout tested: 3s") print(f"✓ Workflow-level timeout tested: 5s") print(f"✓ Multiple timeout levels work") print("\n✅ TEST PASSED: Timeout hierarchy works correctly!") print("=" * 80 + "\n") def test_execution_no_timeout_completes_normally(client: AttuneClient, test_pack): """ Test that actions without timeout complete normally. Flow: 1. Create action that sleeps 3 seconds (no timeout) 2. Execute action 3. Verify it completes successfully 4. Verify it takes full duration """ print("\n" + "=" * 80) print("TEST: No Timeout - Normal Completion") print("=" * 80) pack_ref = test_pack["ref"] # ======================================================================== # STEP 1: Create action without timeout # ======================================================================== print("\n[STEP 1] Creating action without timeout...") normal_script = """#!/usr/bin/env python3 import sys import time print('Action starting...') time.sleep(3) print('Action completed normally') sys.exit(0) """ action = client.create_action( pack_ref=pack_ref, data={ "name": f"no_timeout_{unique_ref()}", "description": "Action without timeout", "runner_type": "python3", "entry_point": "normal.py", "enabled": True, "parameters": {}, # No timeout specified }, ) action_ref = action["ref"] print(f"✓ Created action: {action_ref}") print(f" No timeout configured") # ======================================================================== # STEP 2: Execute action # ======================================================================== print("\n[STEP 2] Executing action...") start_time = time.time() execution = client.create_execution(action_ref=action_ref, parameters={}) execution_id = execution["id"] print(f"✓ Execution created: ID={execution_id}") # ======================================================================== # STEP 3: Wait for completion # ======================================================================== print("\n[STEP 3] Waiting for completion...") result = wait_for_execution_status( client=client, execution_id=execution_id, expected_status="succeeded", timeout=10, ) end_time = time.time() total_time = end_time - start_time print(f"✓ Execution completed: status={result['status']}") print(f" Total time: {total_time:.1f}s") # ======================================================================== # STEP 4: Verify normal completion # ======================================================================== print("\n[STEP 4] Verifying normal completion...") assert result["status"] == "succeeded", ( f"❌ Expected 'succeeded', got '{result['status']}'" ) print(" ✓ Execution succeeded") # Should take at least 3 seconds (sleep duration) if total_time >= 3: print(f" ✓ Completed full duration: {total_time:.1f}s >= 3s") else: print(f" ⚠ Completed quickly: {total_time:.1f}s < 3s") # ======================================================================== # FINAL SUMMARY # ======================================================================== print("\n" + "=" * 80) print("TEST SUMMARY: No Timeout - Normal Completion") print("=" * 80) print(f"✓ Action without timeout: {action_ref}") print(f"✓ Execution completed successfully") print(f"✓ Duration: {total_time:.1f}s") print(f"✓ No premature termination") print("\n✅ TEST PASSED: Actions without timeout work correctly!") print("=" * 80 + "\n") def test_execution_timeout_vs_failure(client: AttuneClient, test_pack): """ Test distinguishing between timeout and regular failure. Flow: 1. Create action that fails immediately (exit 1) 2. Create action that times out 3. Execute both 4. Verify different failure reasons """ print("\n" + "=" * 80) print("TEST: Timeout vs Regular Failure") print("=" * 80) pack_ref = test_pack["ref"] # ======================================================================== # STEP 1: Create action that fails immediately # ======================================================================== print("\n[STEP 1] Creating action that fails immediately...") fail_script = """#!/usr/bin/env python3 import sys print('Failing immediately') sys.exit(1) """ fail_action = client.create_action( pack_ref=pack_ref, data={ "name": f"immediate_fail_{unique_ref()}", "description": "Action that fails immediately", "runner_type": "python3", "entry_point": "fail.py", "enabled": True, "parameters": {}, }, ) print(f"✓ Created fail action: {fail_action['ref']}") # ======================================================================== # STEP 2: Create action that times out # ======================================================================== print("\n[STEP 2] Creating action that times out...") timeout_action = client.create_action( pack_ref=pack_ref, data={ "name": f"timeout_{unique_ref()}", "description": "Action that times out", "runner_type": "python3", "entry_point": "timeout.py", "enabled": True, "parameters": {}, "metadata": {"timeout": 2}, }, ) print(f"✓ Created timeout action: {timeout_action['ref']}") # ======================================================================== # STEP 3: Execute fail action # ======================================================================== print("\n[STEP 3] Executing fail action...") fail_execution = client.create_execution( action_ref=fail_action["ref"], parameters={} ) fail_execution_id = fail_execution["id"] fail_result = wait_for_execution_status( client=client, execution_id=fail_execution_id, expected_status="failed", timeout=10, ) print(f"✓ Fail execution completed: status={fail_result['status']}") fail_details = client.get_execution(fail_execution_id) fail_exit_code = fail_details.get("exit_code") print(f" Exit code: {fail_exit_code}") # ======================================================================== # STEP 4: Execute timeout action # ======================================================================== print("\n[STEP 4] Executing timeout action...") timeout_execution = client.create_execution( action_ref=timeout_action["ref"], parameters={} ) timeout_execution_id = timeout_execution["id"] timeout_result = wait_for_execution_status( client=client, execution_id=timeout_execution_id, expected_status="failed", timeout=10, ) print(f"✓ Timeout execution completed: status={timeout_result['status']}") timeout_details = client.get_execution(timeout_execution_id) timeout_exit_code = timeout_details.get("exit_code") print(f" Exit code: {timeout_exit_code}") # ======================================================================== # STEP 5: Compare failure types # ======================================================================== print("\n[STEP 5] Comparing failure types...") print(f"\n Immediate Failure:") print(f" - Exit code: {fail_exit_code}") print(f" - Expected: 1 (explicit exit code)") print(f"\n Timeout Failure:") print(f" - Exit code: {timeout_exit_code}") print(f" - Expected: negative or signal code (e.g., -15, 137, 143)") # Different exit codes suggest different failure types if fail_exit_code != timeout_exit_code: print("\n ✓ Exit codes differ (different failure types)") else: print("\n ℹ Exit codes same (may not distinguish timeout)") # ======================================================================== # FINAL SUMMARY # ======================================================================== print("\n" + "=" * 80) print("TEST SUMMARY: Timeout vs Regular Failure") print("=" * 80) print(f"✓ Regular failure exit code: {fail_exit_code}") print(f"✓ Timeout failure exit code: {timeout_exit_code}") print(f"✓ Both failures handled appropriately") print("\n✅ TEST PASSED: Failure types distinguishable!") print("=" * 80 + "\n")