Error Handling in the OpenAI Agents SDK provides a structured way to manage and respond to errors that occur during agent execution. Think of Error Handling as "safety nets" or "exception management" - they ensure that when things go wrong (network failures, API errors, invalid inputs, etc.), the system can respond gracefully, provide useful information, and recover when possible.
Core Concepts
Error Types
The SDK defines several error types:
UserError - Errors due to user input or configuration
ModelBehaviorError - Errors from unexpected model behavior
classAgentsException(Exception):
"""Base exception for all SDK exceptions."""pass
UserError
Python
classUserError(AgentsException):
"""Error due to user input or configuration."""pass
When raised:
Invalid agent configuration
Invalid tool configuration
Invalid input parameters
Missing required fields
ModelBehaviorError
Python
classModelBehaviorError(AgentsException):
"""Error from unexpected model behavior."""pass
When raised:
Model returns unexpected format
Model refuses to respond
Model behavior is inconsistent
ToolTimeoutError
Python
classToolTimeoutError(AgentsException):
"""Error when a tool times out."""pass
When raised:
Tool execution exceeds timeout
Tool hangs indefinitely
Network timeout during tool execution
MaxTurnsExceeded
Python
classMaxTurnsExceeded(AgentsException):
"""Error when agent exceeds maximum turns."""def__init__(self, run_state: RunState):
self.run_state = run_state
super().__init__("Agent exceeded maximum turns")
When raised:
Agent doesn't produce final output within max_turns
Agent is in an infinite loop
Agent keeps calling tools without stopping
Guardrail Tripwires
Python
classInputGuardrailTripwireTriggered(AgentsException):
"""Error when input guardrail tripwire is triggered."""def__init__(self, guardrail_result, input):
self.guardrail_result = guardrail_result
self.input = inputsuper().__init__("Input guardrail tripwire triggered")
classOutputGuardrailTripwireTriggered(AgentsException):
"""Error when output guardrail tripwire is triggered."""def__init__(self, guardrail_result, output):
self.guardrail_result = guardrail_result
self.output = output
super().__init__("Output guardrail tripwire triggered")
When raised:
Guardrail detects unsafe content
Guardrail validation fails
Guardrail tripwire is triggered
RunErrorDetails
Error Details Structure
Python
@dataclassclassRunErrorDetails:
"""Detailed information about an error."""
error_type: str"""Type of error."""
message: str"""Error message."""
agent_name: str"""Name of the agent that caused the error."""
turn: int"""Turn number when error occurred."""
timestamp: datetime
"""When the error occurred."""
traceback: str | None"""Error traceback if available."""
context: dict[str, Any]
"""Additional context about the error."""
from agents import RunErrorHandlers
error_handlers = RunErrorHandlers(
max_turns=lambda ctx, error: "The agent needs more turns to complete this task. Let me try a different approach.",
)
result = await Runner.run(
agent,
input,
error_handlers=error_handlers,
)
Error Handler Types
MaxTurns Handler
Python
defmax_turns_handler(context, error):
"""Handle max_turns exceeded."""return"I need more information to complete this task."
error_handlers = RunErrorHandlers(
max_turns=max_turns_handler,
)
@function_tool(timeout=30) # 30 second timeoutdefslow_operation(param: str) -> str:
"""Operation that might be slow."""
time.sleep(60) # Will timeoutreturn"Done"# When timeout occurs:# - ToolTimeoutError is raised# - Error is formatted and sent to model# - Model can retry with different parameters
from agents import OutputGuardrailTripwireTriggered
@output_guardraildefcheck_quality(context, agent, output):
"""Check output quality."""iflen(output) > 1000:
return GuardrailFunctionOutput(
output_info="Output too long",
tripwire_triggered=True,
)
return GuardrailFunctionOutput(
output_info="Valid",
tripwire_triggered=False,
)
try:
result = await Runner.run(agent, input)
except OutputGuardrailTripwireTriggered as e:
print(f"Blocked: {e.guardrail_result.output.output_info}")
# Handle blocked output
Tool Guardrail Tripwires
Python
from agents import tool_input_guardrail, ToolInputGuardrailTripwireTriggered
@tool_input_guardraildefvalidate_tool_args(context, tool_name, args_json):
"""Validate tool arguments."""
args = json.loads(args_json)
ifnot args.get("required_param"):
return ToolInputGuardrailFunctionOutput(
output_info="Missing required parameter",
tripwire_triggered=True,
)
return ToolInputGuardrailFunctionOutput(
output_info="Valid",
tripwire_triggered=False,
)
# Tool guardrail tripwires are handled internally# The tool is skipped and an error message is sent to the model
Model Error Handling
Model API Errors
Handle model API errors:
Python
try:
result = await Runner.run(agent, input)
except ModelAPIError as e:
print(f"Model API error: {e}")
# Handle API error (retry, fallback, etc.)
Model Behavior Errors
Handle unexpected model behavior:
Python
try:
result = await Runner.run(agent, input)
except ModelBehaviorError as e:
print(f"Model behaved unexpectedly: {e}")
# Handle unexpected behavior
from tenacity import retry, stop_after_attempt, wait_exponential
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=1, max=10),
)asyncdefrun_with_retry(agent, input):
"""Run agent with retry."""returnawait Runner.run(agent, input)
try:
result = await run_with_retry(agent, input)
except Exception as e:
print(f"Failed after retries: {e}")
Fallback Strategy
Use fallback on error:
Python
asyncdefrun_with_fallback(agent, input, fallback_agent):
"""Run agent with fallback."""try:
returnawait Runner.run(agent, input)
except Exception as e:
print(f"Primary agent failed: {e}, using fallback")
returnawait Runner.run(fallback_agent, input)
Resume Strategy
Resume from error state:
Python
try:
result = await Runner.run(agent, input, max_turns=10)
except MaxTurnsExceeded as e:
state = e.run_state
# Resume with increased max_turns
result = await Runner.run(agent, state, max_turns=20)
Graceful Degradation
Degrade gracefully on error:
Python
asyncdefrun_with_degradation(agent, input):
"""Run with graceful degradation."""try:
result = await Runner.run(agent, input)
return result
except Exception as e:
print(f"Error occurred: {e}")
# Return a fallback responsereturn"I encountered an error. Let me try a simpler approach."
Error Logging
Logging Errors
Log errors for monitoring:
Python
import logging
logger = logging.getLogger(__name__)
try:
result = await Runner.run(agent, input)
except AgentsException as e:
logger.error(f"Agent error: {e}", exc_info=True)
raise
from prometheus_client import Counter
error_counter = Counter(
"agent_errors",
"Agent errors",
["error_type", "agent_name"]
)
try:
result = await Runner.run(agent, input)
except AgentsException as e:
error_counter.labels(
error_type=type(e).__name__,
agent_name=agent.name,
).inc()
raise
Error User Messages
User-Friendly Error Messages
Provide clear error messages to users:
Python
defget_user_message(error: Exception) -> str:
"""Convert technical error to user message."""ifisinstance(error, InputGuardrailTripwireTriggered):
return"I cannot process that request as it contains content that violates our safety guidelines."elifisinstance(error, MaxTurnsExceeded):
return"I need more information to complete this task. Could you provide more details?"elifisinstance(error, ToolTimeoutError):
return"An operation took too long. Let me try a different approach."else:
return"An error occurred. Please try again or contact support."
@pytest.fixturedeferror_agent():
"""Agent that always errors.""" @function_tooldeferror_tool():
raise ValueError("Tool error")
return Agent(tools=[error_tool])
@pytest.mark.asyncioasyncdeftest_tool_error(error_agent):
"""Test tool error handling."""
result = await Runner.run(error_agent, "Call the tool")
# Tool error should be formatted and returnedassert"error"in result.final_output.lower()
Error Best Practices
1. Catch Specific Exceptions
Catch specific exceptions, not all:
Python
# Good - specific exceptionstry:
result = await Runner.run(agent, input)
except MaxTurnsExceeded:
handle_max_turns()
except InputGuardrailTripwireTriggered:
handle_guardrail()
# Avoid - catch all exceptionstry:
result = await Runner.run(agent, input)
except Exception: # Too broad
handle_all()
2. Provide Context in Errors
Include helpful context in error messages:
Python
# Good - with contextraise UserError(
f"Invalid model: {model_name}. "f"Valid models: {', '.join(VALID_MODELS)}"
)
# Avoid - without contextraise UserError("Invalid model")
3. Log Errors Before Raising
Log errors before re-raising:
Python
# Good - log then raisetry:
result = await Runner.run(agent, input)
except AgentsException as e:
logger.error(f"Agent error: {e}", exc_info=True)
raise# Avoid - just raisetry:
result = await Runner.run(agent, input)
except AgentsException as e:
raise# Lost logging opportunity
4. Use Custom Error Types
Define custom error types for your application:
Python
classMyApplicationError(AgentsException):
"""Base error for my application."""passclassInsufficientCreditsError(MyApplicationError):
"""Error when user has insufficient credits."""pass# Use custom errorsif user.credits < cost:
raise InsufficientCreditsError(
f"Insufficient credits: {user.credits} < {cost}"
)
5. Handle Errors Gracefully
Provide graceful fallbacks:
Python
# Good - graceful fallbacktry:
result = await Runner.run(primary_agent, input)
except Exception:
result = await Runner.run(fallback_agent, input)
# Avoid - crash on error
result = await Runner.run(primary_agent, input) # Might crash
try:
result = await Runner.run(agent, input)
except UserError as e:
# User configuration errorreturnf"Configuration error: {str(e)}"except ModelBehaviorError as e:
# Model behaved unexpectedlyreturnf"Model error: {str(e)}"
asyncdefrun_with_partial_success(agent, input):
"""Handle partial success."""try:
result = await Runner.run(agent, input)
return result
except MaxTurnsExceeded as e:
# Partial success - got some output
state = e.run_state
if state.all_items:
last_message = state.all_items[-1]
returnf"Partial result: {last_message}"return"Could not complete the task."
Error and Tracing
Error Spans
Errors create trace spans:
Python
from agents import error_span
try:
with error_span(name="risky_operation"):
risky_operation()
except Exception as e:
# Error is captured in spanpass
Error Metadata in Traces
Errors include metadata in traces:
Python
try:
result = await Runner.run(agent, input)
except AgentsException as e:
# Trace includes error informationprint(f"Trace ID: {result.trace.id}")
print(f"Error: {e}")