08_MODEL_PROVIDERS

Shared from "OpenAI Agents Python" on Inkdown

Model Providers - Comprehensive Deep Dive

Overview

Model Providers are the abstraction layer that connects the OpenAI Agents SDK to various Large Language Model (LLM) APIs. Think of Model Providers as "translators" or "adapters" - they translate the SDK's standardized requests into the specific format required by different LLM providers (OpenAI, Anthropic, Google, etc.).

Core Concepts

What is a Model Provider?

A Model Provider is responsible for:

Resolving model names to concrete Model instances
Managing model connections and resources
Providing a consistent interface across different LLM APIs
Handling provider-specific features and quirks

Why Model Providers Matter

Provider Agnostic - Switch between LLM providers without changing agent code
- Use the best model for each task

Python

class Model(abc.ABC):
    """The base interface for calling an LLM."""
    
    @abc.abstractmethod
    async def get_response(
        self,
        system_instructions: str | None,
        input: str | list[TResponseInputItem],
        model_settings: ModelSettings,
        tools: list[Tool],
        output_schema: AgentOutputSchemaBase | None,
        handoffs: list[Handoff],
        tracing: ModelTracing,
        *,
        previous_response_id: str | None,
        conversation_id: str | None,
        prompt: ResponsePromptParam | None,
    ) -> ModelResponse:
        """Get a response from the model."""
        pass
    
    @abc.abstractmethod
    def stream_response(
        self,
        system_instructions: str | None,
        input: str | list[TResponseInputItem],
        model_settings: ModelSettings,
        tools: list[Tool],
        output_schema: AgentOutputSchemaBase | None,
        handoffs: list[Handoff],
        tracing: ModelTracing,
        *,
        previous_response_id: str | None,
        conversation_id: str | None,
        prompt: ResponsePromptParam | None,
    ) -> AsyncIterator[TResponseStreamEvent]:
        """Stream a response from the model."""
        pass

Python

class ModelProvider(abc.ABC):
    """The base interface for a model provider."""
    
    @abc.abstractmethod
    def get_model(self, model_name: str | None) -> Model:
        """Get a model by name."""
        pass
    
    async def aclose(self) -> None:
        """Release any resources held by the provider."""
        return None

Python

from agents import Agent, OpenAIProvider, Runner

provider = OpenAIProvider()

agent = Agent(
    name="assistant",
    instructions="You are a helpful assistant",
    model="gpt-4o",
)

result = await Runner.run(
    agent,
    "Hello",
    model_provider=provider,
)

Python

from agents import Agent, OpenAIChatCompletionsModel, Runner

model = OpenAIChatCompletionsModel(model="gpt-4o")

agent = Agent(
    name="assistant",
    instructions="You are a helpful assistant",
)

result = await Runner.run(
    agent,
    "Hello",
    model=model,
)

Python

from agents import Agent, OpenAIResponsesModel, Runner

model = OpenAIResponsesModel(model="gpt-4o")

agent = Agent(
    name="assistant",
    instructions="You are a helpful assistant",
)

result = await Runner.run(
    agent,
    "Hello",
    model=model,
)

Python

from agents import Agent, OpenAIResponsesWSModel, Runner

model = OpenAIResponsesWSModel(model="gpt-4o")

agent = Agent(
    name="assistant",
    instructions="You are a helpful assistant",
)

result = await Runner.run(
    agent,
    "Hello",
    model=model,
)

Python

from agents import Agent, ModelSettings

agent = Agent(
    name="assistant",
    instructions="You are a helpful assistant",
    model="gpt-5-preview",
    model_settings=ModelSettings(
        # GPT-5 requires specific settings
        reasoning_effort="high",
    ),
)

Python

from agents import Agent, MultiProvider, OpenAIProvider, AnthropicProvider

provider = MultiProvider([
    OpenAIProvider(),
    AnthropicProvider(),
])

agent = Agent(
    name="assistant",
    instructions="You are a helpful assistant",
    model="gpt-4o",  # Will use OpenAI
)

Python

provider = MultiProvider([
    OpenAIProvider(),
    AnthropicProvider(),
])

# This will try to resolve "gpt-4o" with OpenAI first
# If that fails, it will try with Anthropic (which won't have "gpt-4o")
model = provider.get_model("gpt-4o")

Python

from agents import ModelProvider, Model

class CustomProvider(ModelProvider):
    def __init__(self, api_key: str):
        self.api_key = api_key
        self.client = CustomClient(api_key)
    
    def get_model(self, model_name: str | None) -> Model:
        """Get a model instance."""
        return CustomModel(
            model_name or "default-model",
            self.client,
        )
    
    async def aclose(self) -> None:
        """Release resources."""
        await self.client.close()

Python

from agents import Model, ModelResponse, ModelSettings
from typing import AsyncIterator

class CustomModel(Model):
    def __init__(self, model_name: str, client: CustomClient):
        self.model_name = model_name
        self.client = client
    
    async def get_response(
        self,
        system_instructions: str | None,
        input: str | list[TResponseInputItem],
        model_settings: ModelSettings,
        tools: list[Tool],
        output_schema: AgentOutputSchemaBase | None,
        handoffs: list[Handoff],
        tracing: ModelTracing,
        *,
        previous_response_id: str | None,
        conversation_id: str | None,
        prompt: ResponsePromptParam | None,
    ) -> ModelResponse:
        """Get response from custom API."""
        # Convert to custom API format
        custom_input = self.convert_input(input)
        custom_tools = self.convert_tools(tools)
        
        # Call custom API
        response = await self.client.chat(
            model=self.model_name,
            messages=custom_input,
            tools=custom_tools,
            **self.convert_settings(model_settings),
        )
        
        # Convert back to SDK format
        return self.convert_response(response)
    
    def stream_response(
        self,
        system_instructions: str | None,
        input: str | list[TResponseInputItem],
        model_settings: ModelSettings,
        tools: list[Tool],
        output_schema: AgentOutputSchemaBase | None,
        handoffs: list[Handoff],
        tracing: ModelTracing,
        *,
        previous_response_id: str | None,
        conversation_id: str | None,
        prompt: ResponsePromptParam | None,
    ) -> AsyncIterator[TResponseStreamEvent]:
        """Stream response from custom API."""
        async for chunk in self.client.chat_stream(...):
            yield self.convert_chunk(chunk)

Python

provider = CustomProvider(api_key="your-api-key")

agent = Agent(
    name="assistant",
    instructions="You are a helpful assistant",
)

result = await Runner.run(
    agent,
    "Hello",
    model_provider=provider,
    model="custom-model",
)

Python

from agents import ModelSettings

settings = ModelSettings(
    temperature=0.7,  # 0.0 - 2.0, higher = more creative
    top_p=0.9,  # 0.0 - 1.0, nucleus sampling
    max_tokens=1000,  # Maximum tokens in response
    presence_penalty=0.0,  # -2.0 - 2.0
    frequency_penalty=0.0,  # -2.0 - 2.0
)

Python

from agents import ModelTracing

# Tracing disabled
tracing = ModelTracing.DISABLED

# Tracing enabled with data
tracing = ModelTracing.ENABLED

# Tracing enabled without sensitive data
tracing = ModelTracing.ENABLED_WITHOUT_DATA

Python

from agents import ModelRetryAdviceRequest, ModelRetryAdvice

class CustomModel(Model):
    def get_retry_advice(
        self,
        request: ModelRetryAdviceRequest,
    ) -> ModelRetryAdvice | None:
        """Provide retry advice for failed requests."""
        if request.error_type == "rate_limit":
            return ModelRetryAdvice(
                can_retry=True,
                retry_after=60,  # Retry after 60 seconds
            )
        return None

Python

from agents import retry_policies, RetryPolicy

policy = retry_policies.exponential_backoff(
    max_retries=3,
    initial_delay=1.0,
)

agent = Agent(
    name="assistant",
    instructions="You are a helpful assistant",
    model_settings=ModelSettings(
        retry_policy=policy,
    ),
)

Python

from agents import Agent, LiteLLMModel

model = LiteLLMModel(model="anthropic/claude-3-opus")

agent = Agent(
    name="assistant",
    instructions="You are a helpful assistant",
    model=model,
)

Python

# Fast model for simple tasks
quick_agent = Agent(
    name="quick",
    instructions="Quick responses",
    model="gpt-4o-mini",
)

# Smart model for complex tasks
smart_agent = Agent(
    name="smart",
    instructions="Complex reasoning",
    model="gpt-4o",
)

Python

def select_model(context: RunContextWrapper) -> str:
    """Select model based on context."""
    if context.context.complexity == "high":
        return "gpt-4o"
    return "gpt-4o-mini"

# Apply via model settings or custom provider

Python

# Good - appropriate model for task
quick_response = Agent(
    name="quick",
    instructions="Simple responses",
    model="gpt-4o-mini",  # Fast, cheap
)

complex_reasoning = Agent(
    name="complex",
    instructions="Complex reasoning",
    model="gpt-4o",  # More capable
)

# Avoid - overkill for simple tasks
simple_task = Agent(
    name="simple",
    instructions="Simple task",
    model="gpt-4o",  # Unnecessary expense
)

Python

# Good - factual, low temperature
code_agent = Agent(
    name="coder",
    instructions="Write code",
    model_settings=ModelSettings(temperature=0.1),
)

# Good - creative, high temperature
writer = Agent(
    name="writer",
    instructions="Creative writing",
    model_settings=ModelSettings(temperature=0.9),
)

# Avoid - wrong temperature for task
code_agent = Agent(
    name="coder",
    instructions="Write code",
    model_settings=ModelSettings(temperature=1.5),  # Too creative
)

Python

# Good - appropriate limits
summary = Agent(
    name="summarizer",
    instructions="Summarize briefly",
    model_settings=ModelSettings(max_tokens=200),
)

# Avoid - excessive limits
summary = Agent(
    name="summarizer",
    instructions="Summarize briefly",
    model_settings=ModelSettings(max_tokens=4000),  # Too long
)

Python

def get_model_for_task(complexity: str) -> str:
    if complexity == "high":
        return "gpt-4o"
    elif complexity == "medium":
        return "gpt-4o-mini"
    return "gpt-3.5-turbo"

agent = Agent(
    name="adaptive",
    instructions="Adaptive responses",
    model=get_model_for_task("medium"),
)

Python

# Use cheap model for classification
classifier = Agent(
    name="classifier",
    instructions="Classify the input",
    model="gpt-4o-mini",
)

# Use expensive model only for generation
generator = Agent(
    name="generator",
    instructions="Generate content",
    model="gpt-4o",
)

Python

def get_instructions_for_model(model: str) -> str:
    if "gpt-4" in model:
        return "You are GPT-4, be thorough."
    elif "claude" in model:
        return "You are Claude, be helpful."
    return "You are a helpful assistant."

agent = Agent(
    name="adaptive",
    instructions=get_instructions_for_model("gpt-4o"),
)