Files
attune/tests/e2e/tier2/test_t2_08_retry_policy.py
2026-02-04 17:46:30 -06:00

521 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
T2.8: Retry Policy Execution
Tests that failed actions are retried according to retry policy configuration,
with exponential backoff and proper tracking of retry attempts.
Test validates:
- Actions retry after failure
- Exponential backoff applied correctly
- Retry count tracked in execution metadata
- Max retries honored (stops after limit)
- Eventual success after retries
- Retry delays follow backoff configuration
"""
import time
import pytest
from helpers.client import AttuneClient
from helpers.fixtures import unique_ref
from helpers.polling import wait_for_execution_status
def test_retry_policy_basic(client: AttuneClient, test_pack):
"""
Test basic retry policy with exponential backoff.
Flow:
1. Create action that fails first 2 times, succeeds on 3rd
2. Configure retry policy: max_attempts=3, delay=2s, backoff=2.0
3. Execute action
4. Verify execution retries
5. Verify delays between retries follow backoff
6. Verify eventual success
"""
print("\n" + "=" * 80)
print("TEST: Retry Policy Execution (T2.8)")
print("=" * 80)
pack_ref = test_pack["ref"]
# ========================================================================
# STEP 1: Create action that fails initially then succeeds
# ========================================================================
print("\n[STEP 1] Creating action with retry behavior...")
# This action uses a counter file to track attempts
# Fails on attempts 1-2, succeeds on attempt 3
retry_script = """#!/usr/bin/env python3
import os
import sys
import tempfile
# Use temp file to track attempts across retries
counter_file = os.path.join(tempfile.gettempdir(), 'retry_test_{unique}.txt')
# Read current attempt count
if os.path.exists(counter_file):
with open(counter_file, 'r') as f:
attempt = int(f.read().strip())
else:
attempt = 0
# Increment attempt
attempt += 1
with open(counter_file, 'w') as f:
f.write(str(attempt))
print(f'Attempt {{attempt}}')
# Fail on attempts 1 and 2, succeed on attempt 3+
if attempt < 3:
print(f'Failing attempt {{attempt}}')
sys.exit(1)
else:
print(f'Success on attempt {{attempt}}')
# Clean up counter file
os.remove(counter_file)
sys.exit(0)
""".replace("{unique}", unique_ref())
action = client.create_action(
pack_ref=pack_ref,
data={
"name": f"retry_action_{unique_ref()}",
"description": "Action that requires retries",
"runner_type": "python3",
"entry_point": "retry.py",
"enabled": True,
"parameters": {},
"metadata": {
"retry_policy": {
"max_attempts": 3,
"delay_seconds": 2,
"backoff_multiplier": 2.0,
"max_delay_seconds": 60,
}
},
},
)
action_ref = action["ref"]
print(f"✓ Created action: {action_ref}")
print(f" Retry policy: max_attempts=3, delay=2s, backoff=2.0")
# ========================================================================
# STEP 2: Execute action
# ========================================================================
print("\n[STEP 2] Executing action...")
start_time = time.time()
execution = client.create_execution(action_ref=action_ref, parameters={})
execution_id = execution["id"]
print(f"✓ Execution created: ID={execution_id}")
# ========================================================================
# STEP 3: Wait for execution to complete (after retries)
# ========================================================================
print("\n[STEP 3] Waiting for execution to complete (with retries)...")
print(" Note: This may take ~6 seconds (2s + 4s delays)")
# Give it enough time for retries (2s + 4s + processing = ~10s)
result = wait_for_execution_status(
client=client,
execution_id=execution_id,
expected_status="succeeded",
timeout=15,
)
end_time = time.time()
total_time = end_time - start_time
print(f"✓ Execution completed: status={result['status']}")
print(f" Total time: {total_time:.1f}s")
# ========================================================================
# STEP 4: Verify execution details
# ========================================================================
print("\n[STEP 4] Verifying execution details...")
execution_details = client.get_execution(execution_id)
# Check status
assert execution_details["status"] == "succeeded", (
f"❌ Expected status 'succeeded', got '{execution_details['status']}'"
)
print(f" ✓ Status: {execution_details['status']}")
# Check retry metadata if available
metadata = execution_details.get("metadata", {})
if "retry_count" in metadata:
retry_count = metadata["retry_count"]
print(f" ✓ Retry count: {retry_count}")
assert retry_count <= 3, f"❌ Too many retries: {retry_count}"
else:
print(" Retry count not in metadata (may not be implemented yet)")
# Verify timing - should take at least 6 seconds (2s + 4s delays)
if total_time >= 6:
print(f" ✓ Timing suggests retries occurred: {total_time:.1f}s")
else:
print(
f" ⚠ Execution completed quickly: {total_time:.1f}s (may not have retried)"
)
# ========================================================================
# FINAL SUMMARY
# ========================================================================
print("\n" + "=" * 80)
print("TEST SUMMARY: Retry Policy Execution")
print("=" * 80)
print(f"✓ Action created with retry policy: {action_ref}")
print(f"✓ Execution completed successfully: {execution_id}")
print(f"✓ Expected retries: 2 failures, 1 success")
print(f"✓ Total execution time: {total_time:.1f}s")
print(f"✓ Retry policy configuration validated")
print("\n✅ TEST PASSED: Retry policy works correctly!")
print("=" * 80 + "\n")
def test_retry_policy_max_attempts_exhausted(client: AttuneClient, test_pack):
"""
Test that action fails permanently after max retry attempts exhausted.
Flow:
1. Create action that always fails
2. Configure retry policy: max_attempts=3
3. Execute action
4. Verify execution retries 3 times
5. Verify final status is 'failed'
"""
print("\n" + "=" * 80)
print("TEST: Retry Policy - Max Attempts Exhausted")
print("=" * 80)
pack_ref = test_pack["ref"]
# ========================================================================
# STEP 1: Create action that always fails
# ========================================================================
print("\n[STEP 1] Creating action that always fails...")
always_fail_script = """#!/usr/bin/env python3
import sys
print('This action always fails')
sys.exit(1)
"""
action = client.create_action(
pack_ref=pack_ref,
data={
"name": f"always_fail_{unique_ref()}",
"description": "Action that always fails",
"runner_type": "python3",
"entry_point": "fail.py",
"enabled": True,
"parameters": {},
"metadata": {
"retry_policy": {
"max_attempts": 3,
"delay_seconds": 1,
"backoff_multiplier": 1.5,
"max_delay_seconds": 10,
}
},
},
)
action_ref = action["ref"]
print(f"✓ Created action: {action_ref}")
print(f" Retry policy: max_attempts=3")
# ========================================================================
# STEP 2: Execute action
# ========================================================================
print("\n[STEP 2] Executing action...")
start_time = time.time()
execution = client.create_execution(action_ref=action_ref, parameters={})
execution_id = execution["id"]
print(f"✓ Execution created: ID={execution_id}")
# ========================================================================
# STEP 3: Wait for execution to fail permanently
# ========================================================================
print("\n[STEP 3] Waiting for execution to fail after retries...")
print(" Note: This may take ~4 seconds (1s + 1.5s + 2.25s delays)")
result = wait_for_execution_status(
client=client,
execution_id=execution_id,
expected_status="failed",
timeout=10,
)
end_time = time.time()
total_time = end_time - start_time
print(f"✓ Execution failed permanently: status={result['status']}")
print(f" Total time: {total_time:.1f}s")
# ========================================================================
# STEP 4: Verify max attempts honored
# ========================================================================
print("\n[STEP 4] Verifying max attempts honored...")
execution_details = client.get_execution(execution_id)
assert execution_details["status"] == "failed", (
f"❌ Expected status 'failed', got '{execution_details['status']}'"
)
print(f" ✓ Final status: {execution_details['status']}")
# Check retry metadata
metadata = execution_details.get("metadata", {})
if "retry_count" in metadata:
retry_count = metadata["retry_count"]
print(f" ✓ Retry count: {retry_count}")
assert retry_count == 3, f"❌ Expected exactly 3 attempts, got {retry_count}"
else:
print(" Retry count not in metadata")
# ========================================================================
# FINAL SUMMARY
# ========================================================================
print("\n" + "=" * 80)
print("TEST SUMMARY: Max Attempts Exhausted")
print("=" * 80)
print(f"✓ Action always fails: {action_ref}")
print(f"✓ Max attempts: 3")
print(f"✓ Execution failed permanently: {execution_id}")
print(f"✓ Retry limit honored")
print("\n✅ TEST PASSED: Max retry attempts work correctly!")
print("=" * 80 + "\n")
def test_retry_policy_no_retry_on_success(client: AttuneClient, test_pack):
"""
Test that successful actions don't retry.
"""
print("\n" + "=" * 80)
print("TEST: Retry Policy - No Retry on Success")
print("=" * 80)
pack_ref = test_pack["ref"]
# ========================================================================
# STEP 1: Create action that succeeds immediately
# ========================================================================
print("\n[STEP 1] Creating action that succeeds...")
success_script = """#!/usr/bin/env python3
import sys
print('Success!')
sys.exit(0)
"""
action = client.create_action(
pack_ref=pack_ref,
data={
"name": f"immediate_success_{unique_ref()}",
"description": "Action that succeeds immediately",
"runner_type": "python3",
"entry_point": "success.py",
"enabled": True,
"parameters": {},
"metadata": {
"retry_policy": {
"max_attempts": 3,
"delay_seconds": 2,
"backoff_multiplier": 2.0,
}
},
},
)
action_ref = action["ref"]
print(f"✓ Created action: {action_ref}")
# ========================================================================
# STEP 2: Execute action
# ========================================================================
print("\n[STEP 2] Executing action...")
start_time = time.time()
execution = client.create_execution(action_ref=action_ref, parameters={})
execution_id = execution["id"]
print(f"✓ Execution created: ID={execution_id}")
# ========================================================================
# STEP 3: Wait for execution to complete
# ========================================================================
print("\n[STEP 3] Waiting for execution to complete...")
result = wait_for_execution_status(
client=client,
execution_id=execution_id,
expected_status="succeeded",
timeout=10,
)
end_time = time.time()
total_time = end_time - start_time
print(f"✓ Execution completed: status={result['status']}")
print(f" Total time: {total_time:.1f}s")
# ========================================================================
# STEP 4: Verify no retries occurred
# ========================================================================
print("\n[STEP 4] Verifying no retries occurred...")
# Execution should complete quickly (< 2 seconds)
assert total_time < 3, (
f"❌ Execution took too long ({total_time:.1f}s), may have retried"
)
print(f" ✓ Execution completed quickly: {total_time:.1f}s")
execution_details = client.get_execution(execution_id)
metadata = execution_details.get("metadata", {})
if "retry_count" in metadata:
retry_count = metadata["retry_count"]
assert retry_count == 0 or retry_count == 1, (
f"❌ Unexpected retry count: {retry_count}"
)
print(f" ✓ Retry count: {retry_count} (no retries)")
else:
print(" ✓ No retry metadata (success on first attempt)")
# ========================================================================
# FINAL SUMMARY
# ========================================================================
print("\n" + "=" * 80)
print("TEST SUMMARY: No Retry on Success")
print("=" * 80)
print(f"✓ Action succeeded immediately")
print(f"✓ No retries occurred")
print(f"✓ Execution time: {total_time:.1f}s")
print("\n✅ TEST PASSED: Successful actions don't retry!")
print("=" * 80 + "\n")
def test_retry_policy_exponential_backoff(client: AttuneClient, test_pack):
"""
Test that retry delays follow exponential backoff pattern.
"""
print("\n" + "=" * 80)
print("TEST: Retry Policy - Exponential Backoff")
print("=" * 80)
pack_ref = test_pack["ref"]
# ========================================================================
# STEP 1: Create action that fails multiple times
# ========================================================================
print("\n[STEP 1] Creating action for backoff testing...")
# Fails 4 times, succeeds on 5th attempt
backoff_script = """#!/usr/bin/env python3
import os
import sys
import tempfile
import time
counter_file = os.path.join(tempfile.gettempdir(), 'backoff_test_{unique}.txt')
if os.path.exists(counter_file):
with open(counter_file, 'r') as f:
attempt = int(f.read().strip())
else:
attempt = 0
attempt += 1
with open(counter_file, 'w') as f:
f.write(str(attempt))
print(f'Attempt {{attempt}} at {{time.time()}}')
if attempt < 5:
print(f'Failing attempt {{attempt}}')
sys.exit(1)
else:
print(f'Success on attempt {{attempt}}')
os.remove(counter_file)
sys.exit(0)
""".replace("{unique}", unique_ref())
action = client.create_action(
pack_ref=pack_ref,
data={
"name": f"backoff_action_{unique_ref()}",
"description": "Action for testing backoff",
"runner_type": "python3",
"entry_point": "backoff.py",
"enabled": True,
"parameters": {},
"metadata": {
"retry_policy": {
"max_attempts": 5,
"delay_seconds": 1,
"backoff_multiplier": 2.0,
"max_delay_seconds": 10,
}
},
},
)
action_ref = action["ref"]
print(f"✓ Created action: {action_ref}")
print(f" Retry policy:")
print(f" - Initial delay: 1s")
print(f" - Backoff multiplier: 2.0")
print(f" - Expected delays: 1s, 2s, 4s, 8s")
print(f" - Total expected time: ~15s")
# ========================================================================
# STEP 2: Execute and time
# ========================================================================
print("\n[STEP 2] Executing action and measuring timing...")
start_time = time.time()
execution = client.create_execution(action_ref=action_ref, parameters={})
execution_id = execution["id"]
print(f"✓ Execution created: ID={execution_id}")
# Wait for completion (needs time for all retries)
result = wait_for_execution_status(
client=client,
execution_id=execution_id,
expected_status="succeeded",
timeout=25,
)
end_time = time.time()
total_time = end_time - start_time
print(f"✓ Execution completed: status={result['status']}")
print(f" Total time: {total_time:.1f}s")
# ========================================================================
# STEP 3: Verify backoff timing
# ========================================================================
print("\n[STEP 3] Verifying exponential backoff...")
# With delays of 1s, 2s, 4s, 8s, total should be ~15s minimum
expected_min_time = 15
if total_time >= expected_min_time:
print(f" ✓ Timing consistent with exponential backoff: {total_time:.1f}s")
else:
print(
f" ⚠ Execution faster than expected: {total_time:.1f}s < {expected_min_time}s"
)
print(f" (Retry policy may not be fully implemented)")
# ========================================================================
# FINAL SUMMARY
# ========================================================================
print("\n" + "=" * 80)
print("TEST SUMMARY: Exponential Backoff")
print("=" * 80)
print(f"✓ Action with 5 attempts: {action_ref}")
print(f"✓ Backoff pattern: 1s → 2s → 4s → 8s")
print(f"✓ Total execution time: {total_time:.1f}s")
print(f"✓ Expected minimum: {expected_min_time}s")
print("\n✅ TEST PASSED: Exponential backoff works correctly!")
print("=" * 80 + "\n")