13_LIFECYCLE_HOOKS

Shared from "Study" on Inkdown

Lifecycle Hooks - Comprehensive Deep Dive

Overview

Lifecycle Hooks in the OpenAI Agents SDK allow you to execute custom code at specific points during an agent run. Think of Lifecycle Hooks as "event listeners" or "callbacks" that let you hook into the execution flow - you can run custom logic when an agent starts, when a tool is called, when the LLM generates a response, and more. This is essential for logging, monitoring, custom business logic, and extending agent behavior.

Core Concepts

What are Lifecycle Hooks?

Lifecycle Hooks are functions that are called at specific points during agent execution:

Agent lifecycle - When an agent starts or ends
LLM lifecycle - Before and after LLM calls
Tool lifecycle - Before and after tool execution
Handoff lifecycle - When agents hand off to each other

Why Lifecycle Hooks Matter

Python

from agents import RunHooks

class MyRunHooks(RunHooks[TContext]):
    async def on_agent_start(self, context, agent):
        """Called when any agent starts."""
        print(f"Agent {agent.name} starting")
    
    async def on_agent_end(self, context, agent, output):
        """Called when any agent ends."""
        print(f"Agent {agent.name} finished")

Python

from agents import AgentHooks

class MyAgentHooks(AgentHooks[TContext]):
    async def on_start(self, context, agent):
        """Called when this specific agent starts."""
        print(f"My agent starting")
    
    async def on_end(self, context, agent, output):
        """Called when this specific agent ends."""
        print(f"My agent finished")

Python

async def on_agent_start(
    self,
    context: AgentHookContext[TContext],
    agent: TAgent,
) -> None:
    """Called before the agent is invoked.
    
    Called each time the current agent changes.
    """
    print(f"Agent {agent.name} starting")

Python

async def on_agent_end(
    self,
    context: AgentHookContext[TContext],
    agent: TAgent,
    output: Any,
) -> None:
    """Called when the agent produces a final output."""
    print(f"Agent {agent.name} finished with output: {output}")

Python

async def on_handoff(
    self,
    context: RunContextWrapper[TContext],
    from_agent: TAgent,
    to_agent: TAgent,
) -> None:
    """Called when a handoff occurs."""
    print(f"Handoff from {from_agent.name} to {to_agent.name}")

Python

async def on_llm_start(
    self,
    context: RunContextWrapper[TContext],
    agent: Agent[TContext],
    system_prompt: str | None,
    input_items: list[TResponseInputItem],
) -> None:
    """Called just before invoking the LLM for this agent."""
    print(f"LLM call starting for {agent.name}")

Python

async def on_llm_end(
    self,
    context: RunContextWrapper[TContext],
    agent: Agent[TContext],
    response: ModelResponse,
) -> None:
    """Called immediately after the LLM call returns for this agent."""
    print(f"LLM call completed for {agent.name}")

Python

async def on_tool_start(
    self,
    context: RunContextWrapper[TContext],
    agent: TAgent,
    tool: Tool,
) -> None:
    """Called immediately before a local tool is invoked.
    
    For function-tool invocations, context is typically a ToolContext instance.
    """
    print(f"Tool {tool.name} starting")

Python

async def on_tool_end(
    self,
    context: RunContextWrapper[TContext],
    agent: TAgent,
    tool: Tool,
    result: str,
) -> None:
    """Called immediately after a local tool is invoked."""
    print(f"Tool {tool.name} finished")

Python

async def on_start(
    self,
    context: AgentHookContext[TContext],
    agent: TAgent,
) -> None:
    """Called before the agent is invoked.
    
    Called each time the running agent is changed to this agent.
    """
    print(f"This agent starting")

Python

async def on_end(
    self,
    context: AgentHookContext[TContext],
    agent: TAgent,
    output: Any,
) -> None:
    """Called when the agent produces a final output."""
    print(f"This agent finished")

Python

async def on_handoff(
    self,
    context: RunContextWrapper[TContext],
    agent: TAgent,
    source: TAgent,
) -> None:
    """Called when the agent is being handed off to.
    
    The source is the agent that is handing off to this agent.
    """
    print(f"Handed off from {source.name} to {agent.name}")

Python

async def on_tool_start(
    self,
    context: RunContextWrapper[TContext],
    agent: TAgent,
    tool: Tool,
) -> None:
    """Called immediately before a local tool is invoked."""
    print(f"Tool {tool.name} starting in this agent")

Python

async def on_tool_end(
    self,
    context: RunContextWrapper[TContext],
    agent: TAgent,
    tool: Tool,
    result: str,
) -> None:
    """Called immediately after a local tool is invoked."""
    print(f"Tool {tool.name} finished in this agent")

Python

async def on_llm_start(
    self,
    context: RunContextWrapper[TContext],
    agent: Agent[TContext],
    system_prompt: str | None,
    input_items: list[TResponseInputItem],
) -> None:
    """Called immediately before the agent issues an LLM call."""
    print(f"LLM call starting for this agent")

Python

async def on_llm_end(
    self,
    context: RunContextWrapper[TContext],
    agent: Agent[TContext],
    response: ModelResponse,
) -> None:
    """Called immediately after the agent receives the LLM response."""
    print(f"LLM call completed for this agent")

Python

from agents import RunHooks

class MyRunHooks(RunHooks):
    async def on_agent_start(self, context, agent):
        print(f"Agent {agent.name} starting")
    
    async def on_agent_end(self, context, agent, output):
        print(f"Agent {agent.name} finished")

hooks = MyRunHooks()

result = await Runner.run(
    agent,
    input,
    hooks=hooks,
)

Python

class MyRunHooks(RunHooks[UserContext]):
    async def on_agent_start(self, context, agent):
        user_id = context.context.user_id
        print(f"Agent {agent.name} starting for user {user_id}")

Python

from agents import Agent, AgentHooks

class MyAgentHooks(AgentHooks):
    async def on_start(self, context, agent):
        print(f"My agent starting")
    
    async def on_end(self, context, agent, output):
        print(f"My agent finished")

agent = Agent(
    name="my_agent",
    instructions="...",
    hooks=MyAgentHooks(),
)

Python

@dataclass
class AgentHookContext(Generic[TContext]):
    context: TContext
    """User-provided context."""
    
    run_config: RunConfig
    """Run configuration."""
    
    agent: Agent[TContext]
    """The agent."""
    
    session: Session | None
    """Session if configured."""

Python

@dataclass
class RunContextWrapper(Generic[TContext]):
    context: TContext
    """User-provided context."""
    
    usage: Usage
    """Token usage tracking."""
    
    approve_tool: Callable
    """Tool approval function."""
    
    reject_tool: Callable
    """Tool rejection function."""
    
    # ... other fields

Plain text

1. RunHooks.on_agent_start
2. AgentHooks.on_start (for the starting agent)
3. RunHooks.on_llm_start
4. AgentHooks.on_llm_start
5. [LLM call]
6. AgentHooks.on_llm_end
7. RunHooks.on_llm_end
8. [Tool execution - if tools called]
   - RunHooks.on_tool_start
   - AgentHooks.on_tool_start
   - [Tool execution]
   - AgentHooks.on_tool_end
   - RunHooks.on_tool_end
9. [Handoff - if handoff occurs]
   - RunHooks.on_handoff
   - AgentHooks.on_handoff (for target agent)
10. AgentHooks.on_end (for finishing agent)
11. RunHooks.on_agent_end

Python

# Good - fast hook
async def on_agent_start(self, context, agent):
    logger.info(f"Agent {agent.name} starting")  # Fast logging

# Avoid - slow hook
async def on_agent_start(self, context, agent):
    await slow_external_api_call(agent.name)  # Blocks execution

Python

# Good - handle errors
async def on_agent_start(self, context, agent):
    try:
        log_to_external_system(agent.name)
    except Exception as e:
        logger.error(f"Hook error: {e}")  # Don't break execution

# Avoid - unhandled errors
async def on_agent_start(self, context, agent):
    log_to_external_system(agent.name)  # Might crash

Python

# Good - logging is cross-cutting
class LoggingHooks(RunHooks):
    async def on_agent_start(self, context, agent):
        logger.info(f"Agent {agent.name} starting")

# Avoid - business logic in hooks
class BusinessLogicHooks(RunHooks):
    async def on_agent_start(self, context, agent):
        if complex_business_rule(agent):
            do_something()  # Business logic should be in tools/agents

Python

# Good - read-only access
async def on_agent_start(self, context, agent):
    user_id = context.context.user_id
    logger.info(f"User: {user_id}")

# Avoid - modifying state
async def on_agent_start(self, context, agent):
    context.context.user_id = "modified"  # Unexpected behavior

Python

class MyHooks(RunHooks):
    """Hooks for monitoring agent execution."""
    
    async def on_agent_start(self, context, agent):
        """Log agent start for monitoring."""
        logger.info(f"Agent {agent.name} starting")

Python

class LoggingHooks(RunHooks):
    """Comprehensive logging hooks."""
    
    async def on_agent_start(self, context, agent):
        logger.info(f"Agent {agent.name} starting")
    
    async def on_agent_end(self, context, agent, output):
        logger.info(f"Agent {agent.name} finished")
    
    async def on_llm_start(self, context, agent, system_prompt, input_items):
        logger.info(f"LLM call starting for {agent.name}")
    
    async def on_llm_end(self, context, agent, response):
        logger.info(f"LLM call completed, tokens: {response.usage.total_tokens}")
    
    async def on_tool_start(self, context, agent, tool):
        logger.info(f"Tool {tool.name} starting")
    
    async def on_tool_end(self, context, agent, tool, result):
        logger.info(f"Tool {tool.name} finished")

Python

class MonitoringHooks(RunHooks):
    """Performance monitoring hooks."""
    
    async def on_llm_end(self, context, agent, response):
        metrics.record(
            "llm_call",
            agent=agent.name,
            tokens=response.usage.total_tokens,
            duration=response.duration,
        )
    
    async def on_tool_end(self, context, agent, tool, result):
        metrics.record(
            "tool_call",
            tool=tool.name,
            duration=result.duration,
        )

Python

class AnalyticsHooks(RunHooks):
    """Analytics collection hooks."""
    
    async def on_agent_end(self, context, agent, output):
        analytics.track(
            "agent_completion",
            agent=agent.name,
            output_length=len(str(output)),
            turn_count=context.context.turn_count,
        )

Python

class ValidationHooks(RunHooks):
    """Validation hooks."""
    
    async def on_llm_end(self, context, agent, response):
        # Validate response
        if not response.response.output:
            raise ValueError("Empty response")
    
    async def on_tool_end(self, context, agent, tool, result):
        # Validate tool result
        if "error" in result.lower():
            logger.warning(f"Tool error: {tool.name}")

Python

class StateTrackingHooks(RunHooks):
    """State tracking hooks."""
    
    def __init__(self):
        self.state = {}
    
    async def on_agent_start(self, context, agent):
        self.state[f"{agent.name}_start"] = datetime.now()
    
    async def on_agent_end(self, context, agent, output):
        self.state[f"{agent.name}_end"] = datetime.now()
        duration = (
            self.state[f"{agent.name}_end"] - 
            self.state[f"{agent.name}_start"]
        )
        self.state[f"{agent.name}_duration"] = duration

Python

class MyHooks(RunHooks[UserContext]):
    async def on_agent_start(self, context, agent):
        # Add metadata to context
        context.context.metadata = {
            "start_time": datetime.now(),
            "agent": agent.name,
        }

Python

class MyHooks(RunHooks):
    async def on_agent_start(self, context, agent):
        try:
            risky_operation()
        except Exception as e:
            logger.error(f"Hook error: {e}")
            # Execution continues

Python

@pytest.mark.asyncio
async def test_hooks():
    hooks = MyHooks()
    context = create_test_context()
    agent = create_test_agent()
    
    await hooks.on_agent_start(context, agent)
    assert hooks.start_called
    
    await hooks.on_agent_end(context, agent, "output")
    assert hooks.end_called

Python

class MockHooks(RunHooks):
    def __init__(self):
        self.events = []
    
    async def on_agent_start(self, context, agent):
        self.events.append(("start", agent.name))

mock_hooks = MockHooks()
result = await Runner.run(agent, input, hooks=mock_hooks)
assert ("start", agent.name) in mock_hooks.events

Python

# Option 1: Use fast logging
import logging
logger = logging.getLogger(__name__)  # Fast

# Option 2: Batch operations
class BatchHooks(RunHooks):
    def __init__(self):
        self.batch = []
    
    async def on_agent_end(self, context, agent, output):
        self.batch.append((agent.name, output))
        if len(self.batch) >= 100:
            await flush_batch(self.batch)
            self.batch = []

# Option 3: Async operations
class AsyncHooks(RunHooks):
    async def on_agent_end(self, context, agent, output):
        # Fire and forget
        asyncio.create_task(async_log(agent.name, output))