19_BEST_PRACTICES

Shared from "OpenAI Agents Python" on Inkdown

Best Practices - Comprehensive Deep Dive

Overview

This document compiles the best practices for using the OpenAI Agents Python SDK. These practices are distilled from real-world usage, community feedback, and the core development team's experience. Following these practices will help you build robust, maintainable, and efficient agent-based applications.

General Principles

1. Start Simple

Begin with simple agents and add complexity gradually:

Python

# Good - start simple
agent = Agent(
    name="assistant",
    instructions="You are a helpful assistant",
)

# Avoid - start complex
agent = Agent(
    name="complex",
    instructions="...",
    tools=[...],
    guardrails=[...],
    handoffs=[...],
    hooks=...,
    # Too much complexity from the start
)

Python

# Start with basic version
agent = Agent(instructions="Basic instructions")
result = await Runner.run(agent, input)

# Add tools based on needs
agent = Agent(instructions="...", tools=[tool1])
result = await Runner.run(agent, input)

# Add more as needed
agent = Agent(instructions="...", tools=[tool1, tool2])

Python

# Good
agent = Agent(
    name="summarizer",
    instructions=(
        "You are a summarization expert. Given a text, provide a concise summary "
        "in 3 bullet points. Each bullet point should be under 20 words. "
        "Focus on the main ideas and ignore minor details."
    ),
)

# Avoid
agent = Agent(
    name="vague",
    instructions="Summarize things",  # Too vague
)

Python

@pytest.mark.asyncio
async def test_agent_basic():
    """Test basic agent behavior."""
    result = await Runner.run(agent, "Hello")
    assert result.final_output is not None

@pytest.mark.asyncio
async def test_agent_with_tools():
    """Test agent with tools."""
    result = await Runner.run(agent, "Use the tool")
    assert "tool" in result.final_output.lower()

Python

# Good - focused agents
coder = Agent(name="coder", instructions="Write code")
researcher = Agent(name="researcher", instructions="Research topics")
writer = Agent(name="writer", instructions="Write content")

# Avoid - multi-purpose agent
generalist = Agent(
    name="generalist",
    instructions="Write code, research topics, and write content",
)

Python

# Good
coder = Agent(
    name="coder",
    instructions=(
        "You are a senior software engineer. Write clean, well-documented code "
        "following best practices. Include error handling and type hints."
    ),
)

# Avoid
coder = Agent(
    name="coder",
    instructions="You are helpful",  # Too generic
)

Python

# Good - relevant tools
coder = Agent(
    name="coder",
    instructions="Write code",
    tools=[read_file_tool, write_file_tool, execute_code_tool],
)

# Avoid - irrelevant tools
coder = Agent(
    name="coder",
    instructions="Write code",
    tools=[web_search_tool, email_tool, calendar_tool],  # Irrelevant
)

Python

# Good
specialist = Agent(
    name="billing_specialist",
    instructions="Handle billing issues",
    handoff_description=(
        "Billing specialist for handling payment issues, refunds, "
        "subscription management, and account balance inquiries"
    ),
)

# Avoid
specialist = Agent(
    name="billing",
    instructions="Handle billing",
    handoff_description="Billing",  # Too vague
)

Python

# Good
@function_tool
def process_data(
    user_id: str,
    limit: int = 10,
    filters: Optional[List[str]] = None,
) -> str:
    ...

# Avoid
@function_tool
def process_data(user_id, limit=10, filters=None):  # No type hints

Python

# Good
@function_tool
def calculate_discount(price: float, discount_percent: float) -> float:
    """
    Calculate the discounted price.
    
    Args:
        price: The original price.
        discount_percent: The discount percentage (0-100).
    
    Returns:
        The discounted price.
    
    Example:
        calculate_discount(100.0, 20) returns 80.0
    """
    return price * (1 - discount_percent / 100)

# Avoid
@function_tool
def calculate_discount(price: float, discount_percent: float) -> float:
    """Calculate discount."""  # Too vague

Python

# Good
@function_tool
def api_call(endpoint: str) -> str:
    """Call an API endpoint."""
    try:
        response = requests.get(endpoint)
        response.raise_for_status()
        return response.text
    except requests.RequestException as e:
        return f"API error: {str(e)}"

# Avoid
@function_tool
def api_call(endpoint: str) -> str:
    """Call an API endpoint."""
    return requests.get(endpoint).text  # No error handling

Python

# Good
@function_tool
def divide(a: float, b: float) -> float:
    """Divide two numbers."""
    if b == 0:
        raise ValueError("Cannot divide by zero")
    return a / b

# Avoid
@function_tool
def divide(a: float, b: float) -> float:
    """Divide two numbers."""
    return a / b  # Will crash on b=0

Python

# Good - input guardrail for filtering
@input_guardrail
def check_safety(input):
    if "harmful" in input.lower():
        return GuardrailFunctionOutput(tripwire_triggered=True)
    return GuardrailFunctionOutput(tripwire_triggered=False)

# Good - output guardrail for validation
@output_guardrail
def check_length(output):
    if len(output) > 1000:
        return GuardrailFunctionOutput(tripwire_triggered=True)
    return GuardrailFunctionOutput(tripwire_triggered=False)

Python

# Good
@input_guardrail
def check_profanity(input):
    if has_profanity(input):
        return GuardrailFunctionOutput(
            output_info="Profanity detected",
            tripwire_triggered=True,
        )
    return GuardrailFunctionOutput(
        output_info="No profanity",
        tripwire_triggered=False,
)

# Avoid
@input_guardrail
def check(input):
    # Complex, unclear logic
    return GuardrailFunctionOutput(tripwire_triggered=some_complex_check(input))

Python

# Good
return GuardrailFunctionOutput(
    output_info="Query too short: minimum 3 characters, got 2",
    tripwire_triggered=True,
)

# Avoid
return GuardrailFunctionOutput(
    output_info="Error",
    tripwire_triggered=True,
)

Python

# Good - independent checks
@input_guardrail(run_in_parallel=True)
def check_length(input):
    ...

@input_guardrail(run_in_parallel=True)
def check_content(input):
    ...

# Avoid - sequential when not needed
@input_guardrail(run_in_parallel=False)
def check_length(input):
    ...

Python

# Good
@input_guardrail
def safe_check(input):
    try:
        result = external_api.check(input)
        return GuardrailFunctionOutput(
            output_info=str(result),
            tripwire_triggered=result.is_flagged,
        )
    except Exception as e:
        logger.error(f"Guardrail error: {e}")
        return GuardrailFunctionOutput(
            output_info="Guardrail error, allowing input",
            tripwire_triggered=False,
        )

# Avoid
@input_guardrail
def unsafe_check(input):
    return external_api.check(input)  # If this fails, run fails

Python

# Good
def get_config():
    env = os.getenv("ENVIRONMENT", "dev")
    
    if env == "production":
        return RunConfig(model="gpt-4o", tracing_disabled=False)
    else:
        return RunConfig(model="gpt-4o-mini", tracing_disabled=True)

# Avoid - hardcoded config
config = RunConfig(model="gpt-4o")  # Same for all environments

Python

# config.yaml
model: "gpt-4o"
temperature: 0.7
max_turns: 20

# Load config
with open("config.yaml") as f:
    config_data = yaml.safe_load(f)

config = RunConfig(
    model=config_data["model"],
    model_settings=ModelSettings(temperature=config_data["temperature"]),
)

Python

# Good
config = get_config()
if not validate_config(config):
    raise ValueError("Invalid configuration")

result = await Runner.run(agent, input, run_config=config)

# Avoid
result = await Runner.run(agent, input, run_config=get_config())  # Might be invalid

Python

# Good - focused context
@dataclass
class UserContext:
    user_id: str
    preferences: dict

# Avoid - bloated context
@dataclass
class EverythingContext:
    user_id: str
    preferences: dict
    database_connection: Any  # Don't put heavy resources here
    cache: dict
    logger: Any

Python

# Good
@dataclass
class MyContext:
    """Context for user operations."""
    
    user_id: str
    """Unique identifier for the user."""
    
    preferences: dict
    """User preferences (theme, language, etc.)."""

# Avoid
@dataclass
class MyContext:
    user_id: str  # What is this?
    preferences: dict  # What keys?

Python

# Good - no circular references
@dataclass
class ContextA:
    data: str

@dataclass
class ContextB:
    context_a: ContextA

# Avoid - circular reference
@dataclass
class ContextA:
    context_b: ContextB

@dataclass
class ContextB:
    context_a: ContextA

Python

# Good
try:
    result = await Runner.run(agent, input)
except MaxTurnsExceeded:
    handle_max_turns()
except InputGuardrailTripwireTriggered:
    handle_guardrail()

# Avoid
try:
    result = await Runner.run(agent, input)
except Exception:  # Too broad
    handle_all()

Python

# Good
try:
    result = await Runner.run(agent, input)
except AgentsException as e:
    logger.error(f"Agent error: {e}", exc_info=True)
    raise

# Avoid
try:
    result = await Runner.run(agent, input)
except AgentsException as e:
    raise  # Lost logging opportunity

Python

class MyApplicationError(AgentsException):
    """Base error for my application."""
    pass

class InsufficientCreditsError(MyApplicationError):
    """Error when user has insufficient credits."""
    pass

Python

# Good
try:
    result = await Runner.run(primary_agent, input)
except Exception:
    result = await Runner.run(fallback_agent, input)

# Avoid
result = await Runner.run(primary_agent, input)  # Might crash

Python

# Good - appropriate model
quick_agent = Agent(model="gpt-4o-mini")  # Fast, cheap

complex_agent = Agent(model="gpt-4o")  # More capable

# Avoid - overkill
simple_task = Agent(model="gpt-4o")  # Unnecessary expense

Python

# Good
if environment == "production":
    config = RunConfig(tracing_disabled=False)
else:
    config = RunConfig(tracing_disabled=True)

# Avoid
config = RunConfig(tracing_disabled=True)  # No observability

Python

# Good
session = SQLiteSession(db_path="conversations.db")
result = await Runner.run(agent, input, session=session)

# Avoid - no session for long conversations
result = await Runner.run(agent, input)  # Token waste

Python

# Good
@function_tool
def process_data(user_id: str, data: dict):
    if not re.match(r'^[a-zA-Z0-9_]+$', user_id):
        raise ValueError("Invalid user_id")
    ...

# Avoid
@function_tool
def process_data(user_id: str, data: dict):
    ...  # No validation

Python

# Good
@pytest.mark.asyncio
async def test_success_case():
    result = await Runner.run(agent, "Hello")
    assert result.final_output is not None

@pytest.mark.asyncio
async def test_error_case():
    with pytest.raises(MaxTurnsExceeded):
        await Runner.run(agent, "loop_forever")

# Avoid - only test happy path
@pytest.mark.asyncio
async def test_agent():
    result = await Runner.run(agent, "Hello")
    assert result.final_output is not None

Python

# Good
@pytest.fixture
def agent():
    return Agent(instructions="Test agent")

@pytest.mark.asyncio
async def test_with_fixture(agent):
    result = await Runner.run(agent, "Hello")
    assert result.final_output is not None

# Avoid - duplicate setup
@pytest.mark.asyncio
async def test1():
    agent = Agent(instructions="Test agent")
    ...

@pytest.mark.asyncio
async def test2():
    agent = Agent(instructions="Test agent")  # Duplicate
    ...

Python

# Good
@pytest.mark.asyncio
async def test_with_mock():
    with patch('external_api.call') as mock:
        mock.return_value = "mocked result"
        result = await Runner.run(agent, "Use API")
        assert "mocked" in result.final_output

# Avoid - no mocking
@pytest.mark.asyncio
async def test_without_mock():
    result = await Runner.run(agent, "Use API")  # Calls real API

Python

# Good
@pytest.mark.asyncio
async def test_empty_input():
    result = await Runner.run(agent, "")
    assert result.final_output is not None

@pytest.mark.asyncio
async def test_very_long_input():
    result = await Runner.run(agent, "a" * 10000)
    assert result.final_output is not None

# Avoid - only test normal cases
@pytest.mark.asyncio
async def test_normal_case():
    result = await Runner.run(agent, "Hello")
    assert result.final_output is not None

Python

# Good
@pytest.mark.asyncio
async def test_with_fast_config():
    config = RunConfig(model="gpt-4o-mini", max_turns=5)
    result = await Runner.run(agent, input, run_config=config)
    assert result.final_output is not None

# Avoid - always use default config
@pytest.mark.asyncio
async def test_with_default():
    result = await Runner.run(agent, input)
    assert result.final_output is not None

Python

# Good
"""
Customer Support Triage Agent

This agent triages customer support requests and routes them to
appropriate specialists based on the issue type.
"""
agent = Agent(
    name="triage",
    instructions="Triage customer support requests...",
)

# Avoid - no documentation
agent = Agent(name="triage", instructions="...")

Python

# Good
@function_tool
def calculate_discount(price: float, discount_percent: float) -> float:
    """
    Calculate the discounted price.
    
    This function calculates the price after applying a discount percentage.
    It validates that the discount is between 0 and 100.
    
    Args:
        price: The original price (must be positive).
        discount_percent: The discount percentage (0-100).
    
    Returns:
        The discounted price.
    
    Raises:
        ValueError: If price is negative or discount is out of range.
    
    Example:
        >>> calculate_discount(100.0, 20)
        80.0
    """
    ...

# Avoid
@function_tool
def calculate_discount(price: float, discount_percent: float) -> float:
    """Calculate discount."""
    ...

Python

# Good
"""
Configuration for agent runs.

Environment Variables:
    - MODEL: Model to use (default: gpt-4o)
    - TEMPERATURE: Temperature (default: 0.7)
    - MAX_TURNS: Maximum turns (default: 10)

Configuration Files:
    - config.yaml: Main configuration file
"""

# Avoid - no documentation
config = RunConfig(...)

Python

# Good
"""
Custom Memory Backend

This extension provides custom memory storage using XYZ database.

Configuration:
    - connection_string: Database connection string
    - table_name: Table name for storage

Usage:
    session = CustomMemorySession(
        connection_string="postgresql://...",
        table_name="agent_sessions",
    )
"""

# Avoid - no documentation
class CustomMemorySession(SessionABC):
    ...

Python

# Good
def run_agent(agent: Agent, input: str) -> str:
    """
    Run an agent with the given input.
    
    This is a convenience function that creates a runner and executes
    the agent with default configuration.
    
    Args:
        agent: The agent to run.
        input: The input to provide to the agent.
    
    Returns:
        The final output from the agent.
    
    Raises:
        AgentsException: If the agent run fails.
    
    Example:
        >>> agent = Agent(instructions="You are helpful")
        >>> run_agent(agent, "Hello")
        'Hello! How can I help you?'
    """
    ...

# Avoid - no documentation
def run_agent(agent, input):
    ...

Python

# Good
from prometheus_client import Counter

agent_runs = Counter("agent_runs_total", "Total agent runs", ["agent_name"])
agent_errors = Counter("agent_errors_total", "Total agent errors", ["error_type"])

agent_runs.labels(agent_name=agent.name).inc()

# Avoid - no metrics
result = await Runner.run(agent, input)

Python

# Good
logger.info(f"Agent {agent.name} started", extra={"run_id": run_id})
logger.info(f"Agent {agent.name} completed", extra={"duration": duration})

# Avoid - no logging
result = await Runner.run(agent, input)

Python

# Good
from prometheus_client import Histogram

run_duration = Histogram("agent_run_duration_seconds", "Agent run duration")

start = time.time()
result = await Runner.run(agent, input)
duration = time.time() - start
run_duration.observe(duration)

# Avoid - no monitoring
result = await Runner.run(agent, input)

Python

# Good
try:
    result = await Runner.run(agent, input)
except CriticalError as e:
    await send_alert(f"Critical error: {e}")
    raise

# Avoid - no alerting
result = await Runner.run(agent, input)

Python

# Good
config = RunConfig(
    tracing_disabled=False,
    workflow_name="Customer Support",
    trace_metadata={"user_id": user_id},
)

result = await Runner.run(agent, input, run_config=config)

# Avoid - no tracing
result = await Runner.run(agent, input)

Python

# Good
async def shutdown():
    """Graceful shutdown."""
    logger.info("Shutting down...")
    await cleanup_resources()
    logger.info("Shutdown complete")

# Avoid - no cleanup
# Process just killed

Python

# Good
@app.get("/health")
async def health_check():
    """Health check endpoint."""
    try:
        # Check agent health
        await test_agent()
        return {"status": "healthy"}
    except Exception as e:
        return {"status": "unhealthy", "error": str(e)}

# Avoid - no health checks

Python

# Good
from slowapi import Limiter

limiter = Limiter(key_func=get_user_id)

@app.post("/run")
@limiter.limit("10/minute")
async def run_agent_endpoint(request):
    """Run agent with rate limiting."""
    ...

# Avoid - no rate limiting
@app.post("/run")
async def run_agent_endpoint(request):
    ...