Overview
LangChain provides two main types of language models:
- Chat Models (
BaseChatModel): Conversational models that take messages and return messages
- LLMs (
BaseLLM): Traditional text-in, text-out language models
BaseChatModel
Base class for chat models that use message-based interfaces.
Source: langchain_core.language_models.chat_models:246
Inherits: BaseLanguageModel[AIMessage], Runnable
Type Parameters
Chat models are generic over their output type (always AIMessage).
Properties
Optional rate limiter for throttling requests
disable_streaming
bool | Literal['tool_calling']
default:"False"
Whether to disable streaming for this model.
True: Always bypass streaming
'tool_calling': Bypass streaming only when tools are provided
False: Use streaming if available
Version of AIMessage output format. Can be 'v0' (provider-specific) or 'v1' (standardized).
Profile detailing model capabilities (context window, supported modalities, etc.). Beta feature.
Core Methods
invoke
def invoke(
self,
input: LanguageModelInput,
config: RunnableConfig | None = None,
*,
stop: list[str] | None = None,
**kwargs: Any
) -> AIMessage
Generate a chat completion for a single input.
input
str | list[dict | tuple | BaseMessage] | PromptValue
required
The input to the model. Can be:
- A string (converted to
HumanMessage)
- A list of messages or message-like tuples
- A
PromptValue
Configuration for callbacks, tags, metadata
Stop sequences for generation
Additional model-specific parameters
The model’s response as an AIMessage
ainvoke
async def ainvoke(
self,
input: LanguageModelInput,
config: RunnableConfig | None = None,
*,
stop: list[str] | None = None,
**kwargs: Any
) -> AIMessage
Async version of invoke.
stream
def stream(
self,
input: LanguageModelInput,
config: RunnableConfig | None = None,
*,
stop: list[str] | None = None,
**kwargs: Any
) -> Iterator[AIMessageChunk]
Stream chat message chunks from the model.
Iterator of message chunks as they are generated
astream
async def astream(
self,
input: LanguageModelInput,
config: RunnableConfig | None = None,
*,
stop: list[str] | None = None,
**kwargs: Any
) -> AsyncIterator[AIMessageChunk]
Async version of stream.
batch
def batch(
self,
inputs: list[LanguageModelInput],
config: RunnableConfig | list[RunnableConfig] | None = None,
*,
return_exceptions: bool = False,
**kwargs: Any
) -> list[AIMessage]
Batch multiple chat completions.
abatch
async def abatch(
self,
inputs: list[LanguageModelInput],
config: RunnableConfig | list[RunnableConfig] | None = None,
*,
return_exceptions: bool = False,
**kwargs: Any
) -> list[AIMessage]
Async batch multiple chat completions.
def bind_tools(
self,
tools: Sequence[dict[str, Any] | type | Callable | BaseTool],
*,
tool_choice: str | dict[str, Any] | bool | None = None,
**kwargs: Any
) -> Runnable[LanguageModelInput, AIMessage]
Bind tools to the model for tool/function calling.
tools
Sequence[dict | type | Callable | BaseTool]
required
Tools to bind. Can be:
- Tool dictionaries (OpenAI format)
- Pydantic models (converted to JSON schema)
- Python functions (converted to tools)
BaseTool instances
Which tool to call:
None: Model decides
"auto": Model decides (explicit)
"required": Must call a tool
{"name": "tool_name"}: Call specific tool
True: Same as "required"
return
Runnable[LanguageModelInput, AIMessage]
New runnable with tools bound
with_structured_output
def with_structured_output(
self,
schema: dict | type[BaseModel],
*,
method: Literal["function_calling", "json_mode"] = "function_calling",
include_raw: bool = False,
**kwargs: Any
) -> Runnable[LanguageModelInput, dict | BaseModel]
Create a runnable that returns structured output matching a schema.
schema
dict | type[BaseModel]
required
The schema for output. Can be JSON schema dict or Pydantic model.
method
Literal['function_calling', 'json_mode']
default:"function_calling"
How to extract structured output:
"function_calling": Use tool/function calling
"json_mode": Use JSON mode with parsing
If True, return both parsed output and raw message
return
Runnable[LanguageModelInput, dict | BaseModel]
Runnable that outputs structured data
Generation Methods
generate_prompt
def generate_prompt(
self,
prompts: list[PromptValue],
stop: list[str] | None = None,
callbacks: Callbacks = None,
**kwargs: Any
) -> LLMResult
Generate completions for multiple prompt values.
prompts
list[PromptValue]
required
List of prompt values to generate from
LLM result containing generations and metadata
agenerate_prompt
async def agenerate_prompt(
self,
prompts: list[PromptValue],
stop: list[str] | None = None,
callbacks: Callbacks = None,
**kwargs: Any
) -> LLMResult
Async version of generate_prompt.
Implementation Methods
When subclassing BaseChatModel, implement these methods:
_generate (Required)
def _generate(
self,
messages: list[BaseMessage],
stop: list[str] | None = None,
run_manager: CallbackManagerForLLMRun | None = None,
**kwargs: Any
) -> ChatResult
Generate a chat result from messages. Must be implemented by subclasses.
_llm_type (Required)
@property
def _llm_type(self) -> str
Unique identifier for the model type. Must be implemented by subclasses.
_stream (Optional)
def _stream(
self,
messages: list[BaseMessage],
stop: list[str] | None = None,
run_manager: CallbackManagerForLLMRun | None = None,
**kwargs: Any
) -> Iterator[ChatGenerationChunk]
Stream message chunks. Override to implement native streaming.
_agenerate (Optional)
async def _agenerate(
self,
messages: list[BaseMessage],
stop: list[str] | None = None,
run_manager: AsyncCallbackManagerForLLMRun | None = None,
**kwargs: Any
) -> ChatResult
Async generation. Override to implement native async.
_astream (Optional)
async def _astream(
self,
messages: list[BaseMessage],
stop: list[str] | None = None,
run_manager: AsyncCallbackManagerForLLMRun | None = None,
**kwargs: Any
) -> AsyncIterator[ChatGenerationChunk]
Async streaming. Override to implement native async streaming.
BaseLLM
Base class for traditional large language models with text-in, text-out interfaces.
Source: langchain_core.language_models.llms:292
Inherits: BaseLanguageModel[str], Runnable
Core Methods
invoke
def invoke(
self,
input: LanguageModelInput,
config: RunnableConfig | None = None,
*,
stop: list[str] | None = None,
**kwargs: Any
) -> str
Generate text completion for an input.
input
str | PromptValue
required
Input text or prompt value
stream
def stream(
self,
input: LanguageModelInput,
config: RunnableConfig | None = None,
*,
stop: list[str] | None = None,
**kwargs: Any
) -> Iterator[str]
Stream text chunks as they are generated.
generate
def generate(
self,
prompts: list[str],
stop: list[str] | None = None,
callbacks: Callbacks = None,
**kwargs: Any
) -> LLMResult
Generate completions for multiple prompts.
Results containing generations and metadata
agenerate
async def agenerate(
self,
prompts: list[str],
stop: list[str] | None = None,
callbacks: Callbacks = None,
**kwargs: Any
) -> LLMResult
Async version of generate.
Implementation Methods
_generate (Required)
def _generate(
self,
prompts: list[str],
stop: list[str] | None = None,
run_manager: CallbackManagerForLLMRun | None = None,
**kwargs: Any
) -> LLMResult
Generate completions. Must be implemented by subclasses.
_llm_type (Required)
@property
def _llm_type(self) -> str
Unique identifier for the LLM type. Must be implemented by subclasses.
_stream (Optional)
def _stream(
self,
prompt: str,
stop: list[str] | None = None,
run_manager: CallbackManagerForLLMRun | None = None,
**kwargs: Any
) -> Iterator[GenerationChunk]
Stream generation chunks. Override for native streaming support.
BaseLanguageModel
Common base class for both chat models and LLMs.
Source: langchain_core.language_models.base
Inherits: RunnableSerializable[LanguageModelInput, LanguageModelOutput]
Provides common functionality like caching, callbacks, and serialization.
Type Aliases
LanguageModelInput = str | PromptValue | Sequence[BaseMessage | tuple | dict]
LanguageModelOutput = str | AIMessage