Overview
The LLMClient provides a high-level, ergonomic API for interacting with LLM providers. It wraps LLMProvider implementations and provides convenient methods for chat, embeddings, and session management.
Creating a Client
Basic Usage
use mofa_foundation::llm::{LLMClient, OpenAIProvider};
use std::sync::Arc;
let provider = Arc::new(OpenAIProvider::new("sk-xxx"));
let client = LLMClient::new(provider);
With Configuration
use mofa_foundation::llm::{LLMClient, LLMConfig, OpenAIProvider};
use std::sync::Arc;
let provider = Arc::new(OpenAIProvider::new("sk-xxx"));
let config = LLMConfig {
default_model: Some("gpt-4o".to_string()),
default_temperature: Some(0.7),
default_max_tokens: Some(2048),
..Default::default()
};
let client = LLMClient::with_config(provider, config);
Methods
Simple Queries
ask
async fn(question: impl Into<String>) -> LLMResult<String>
Sends a simple question and returns the text responseExample:let answer = client.ask("What is Rust?").await?;
println!("{}", answer);
ask_with_system
async fn(system: impl Into<String>, question: impl Into<String>) -> LLMResult<String>
Sends a question with a system promptExample:let answer = client.ask_with_system(
"You are a Rust expert.",
"Explain ownership."
).await?;
Chat Builder
chat
fn() -> ChatRequestBuilder
Creates a chat request builder for constructing complex requestsExample:let response = client.chat()
.system("You are a helpful assistant.")
.user("Hello!")
.temperature(0.8)
.max_tokens(1000)
.send()
.await?;
Embeddings
embed
async fn(input: impl Into<String>) -> LLMResult<Vec<f32>>
Generates an embedding vector for a single text inputExample:let embedding = client.embed("Hello world").await?;
println!("Embedding dimension: {}", embedding.len());
embed_batch
async fn(inputs: Vec<String>) -> LLMResult<Vec<Vec<f32>>>
Generates embedding vectors for multiple text inputsExample:let embeddings = client.embed_batch(vec![
"First text".to_string(),
"Second text".to_string(),
]).await?;
ChatRequestBuilder
The builder pattern for constructing chat requests.
Message Methods
system
fn(content: impl Into<String>) -> Self
Adds a system message to the conversation
user
fn(content: impl Into<String>) -> Self
Adds a user message to the conversation
assistant
fn(content: impl Into<String>) -> Self
Adds an assistant message to the conversation
message
fn(message: ChatMessage) -> Self
Adds a complete message object
messages
fn(messages: Vec<ChatMessage>) -> Self
Adds multiple messages at once
Multi-Modal Support
user_with_content
fn(content: MessageContent) -> Self
Adds a user message with structured content (text, images, audio, video)Example:use mofa_foundation::llm::{MessageContent, ContentPart, ImageUrl};
let content = MessageContent::Parts(vec![
ContentPart::Text { text: "What is in this image?".to_string() },
ContentPart::Image {
image_url: ImageUrl {
url: "data:image/png;base64,...".to_string(),
detail: Some(ImageDetail::High),
},
},
]);
let response = client.chat()
.user_with_content(content)
.send()
.await?;
Parameter Configuration
Sets the sampling temperature (0.0 to 2.0)
Sets the maximum number of tokens to generate
Enables JSON response formatExample:let response = client.chat()
.system("You are a JSON API. Return {\"answer\": string}")
.user("What is 2+2?")
.json_mode()
.send()
.await?;
stop
fn(sequences: Vec<String>) -> Self
Sets stop sequences for generation
Adds a tool/function that the model can callExample:use mofa_foundation::llm::function_tool;
use serde_json::json;
let weather_tool = function_tool(
"get_weather",
"Get weather for a location",
json!({
"type": "object",
"properties": {
"location": { "type": "string" }
},
"required": ["location"]
})
);
let response = client.chat()
.user("What's the weather in Tokyo?")
.tool(weather_tool)
.send()
.await?;
with_tool_executor
fn(executor: Arc<dyn ToolExecutor>) -> Self
Sets the tool executor for automatic tool execution
Sets the maximum number of tool calling rounds (default: 10)
Retry Configuration
Enables retry with default policy for transient failuresExample:let response = client.chat()
.json_mode()
.with_retry()
.send()
.await?;
Sets maximum retry attemptsExample:let response = client.chat()
.max_retries(3)
.send()
.await?;
Execution Methods
send
async fn() -> LLMResult<ChatCompletionResponse>
Sends the request and returns the complete response
send_stream
async fn() -> LLMResult<ChatStream>
Sends the request and returns a streaming responseExample:use futures::StreamExt;
let mut stream = client.chat()
.user("Tell me a story")
.send_stream()
.await?;
while let Some(chunk) = stream.next().await {
let chunk = chunk?;
if let Some(content) = chunk.content() {
print!("{}", content);
}
}
send_with_tools
async fn() -> LLMResult<ChatCompletionResponse>
Sends the request and automatically executes tool calls in a loopExample:struct MyExecutor;
#[async_trait]
impl ToolExecutor for MyExecutor {
async fn execute(&self, name: &str, args: &str) -> LLMResult<String> {
match name {
"get_weather" => Ok(r#"{"temp": 22, "condition": "sunny"}"#.to_string()),
_ => Err(LLMError::Other("Unknown tool".to_string()))
}
}
async fn available_tools(&self) -> LLMResult<Vec<Tool>> {
Ok(vec![/* tools */])
}
}
let response = client.chat()
.user("What's the weather like?")
.with_tool_executor(Arc::new(MyExecutor))
.send_with_tools()
.await?;
ChatSession
Manages multi-turn conversations with message history.
Creating a Session
use mofa_foundation::llm::ChatSession;
let mut session = ChatSession::new(client)
.with_system("You are a helpful assistant.");
Session Methods
send
async fn(content: impl Into<String>) -> LLMResult<String>
Sends a message and maintains conversation historyExample:let r1 = session.send("Hello!").await?;
let r2 = session.send("What did I just say?").await?;
// r2 will reference the previous message
send_with_content
async fn(content: MessageContent) -> LLMResult<String>
Sends structured content (with images, audio, etc.)
with_system
fn(prompt: impl Into<String>) -> Self
Sets the system prompt for the session
with_context_window_size
fn(size: Option<usize>) -> Self
Sets the maximum number of conversation rounds to keep in memoryExample:let session = ChatSession::new(client)
.with_context_window_size(Some(10)); // Keep last 10 rounds
with_tools
fn(tools: Vec<Tool>, executor: Arc<dyn ToolExecutor>) -> Self
Enables tool calling for the session
Returns the message history
Clears the message history
Returns the unique session identifier
Persistence
save
async fn() -> PersistenceResult<()>
Saves the session and messages to the databaseExample:
load
async fn(...) -> PersistenceResult<Self>
Loads a session from the databaseExample:let session = ChatSession::load(
session_id,
client,
user_id,
tenant_id,
agent_id,
message_store,
session_store,
Some(10), // context window size
).await?;
delete
async fn() -> PersistenceResult<()>
Deletes the session and its messages from the database
Complete Example
use mofa_foundation::llm::*;
use std::sync::Arc;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Create client
let provider = Arc::new(OpenAIProvider::new("sk-xxx"));
let client = LLMClient::new(provider);
// Simple query
let answer = client.ask("What is Rust?").await?;
println!("Answer: {}", answer);
// Complex chat with builder
let response = client.chat()
.system("You are a coding assistant.")
.user("How do I read a file in Rust?")
.temperature(0.7)
.max_tokens(500)
.send()
.await?;
println!("Response: {}", response.content().unwrap());
// Streaming response
use futures::StreamExt;
let mut stream = client.chat()
.user("Tell me a short story")
.send_stream()
.await?;
while let Some(chunk) = stream.next().await {
if let Some(content) = chunk?.content() {
print!("{}", content);
}
}
// Multi-turn session
let mut session = ChatSession::new(client)
.with_system("You are a helpful assistant.");
let r1 = session.send("My name is Alice").await?;
let r2 = session.send("What's my name?").await?;
println!("Session response: {}", r2);
Ok(())
}