Skip to main content

Overview

The LLMClient provides a high-level, ergonomic API for interacting with LLM providers. It wraps LLMProvider implementations and provides convenient methods for chat, embeddings, and session management.

Creating a Client

Basic Usage

use mofa_foundation::llm::{LLMClient, OpenAIProvider};
use std::sync::Arc;

let provider = Arc::new(OpenAIProvider::new("sk-xxx"));
let client = LLMClient::new(provider);

With Configuration

use mofa_foundation::llm::{LLMClient, LLMConfig, OpenAIProvider};
use std::sync::Arc;

let provider = Arc::new(OpenAIProvider::new("sk-xxx"));

let config = LLMConfig {
    default_model: Some("gpt-4o".to_string()),
    default_temperature: Some(0.7),
    default_max_tokens: Some(2048),
    ..Default::default()
};

let client = LLMClient::with_config(provider, config);

Methods

Simple Queries

ask
async fn(question: impl Into<String>) -> LLMResult<String>
Sends a simple question and returns the text responseExample:
let answer = client.ask("What is Rust?").await?;
println!("{}", answer);
ask_with_system
async fn(system: impl Into<String>, question: impl Into<String>) -> LLMResult<String>
Sends a question with a system promptExample:
let answer = client.ask_with_system(
    "You are a Rust expert.",
    "Explain ownership."
).await?;

Chat Builder

chat
fn() -> ChatRequestBuilder
Creates a chat request builder for constructing complex requestsExample:
let response = client.chat()
    .system("You are a helpful assistant.")
    .user("Hello!")
    .temperature(0.8)
    .max_tokens(1000)
    .send()
    .await?;

Embeddings

embed
async fn(input: impl Into<String>) -> LLMResult<Vec<f32>>
Generates an embedding vector for a single text inputExample:
let embedding = client.embed("Hello world").await?;
println!("Embedding dimension: {}", embedding.len());
embed_batch
async fn(inputs: Vec<String>) -> LLMResult<Vec<Vec<f32>>>
Generates embedding vectors for multiple text inputsExample:
let embeddings = client.embed_batch(vec![
    "First text".to_string(),
    "Second text".to_string(),
]).await?;

ChatRequestBuilder

The builder pattern for constructing chat requests.

Message Methods

system
fn(content: impl Into<String>) -> Self
Adds a system message to the conversation
user
fn(content: impl Into<String>) -> Self
Adds a user message to the conversation
assistant
fn(content: impl Into<String>) -> Self
Adds an assistant message to the conversation
message
fn(message: ChatMessage) -> Self
Adds a complete message object
messages
fn(messages: Vec<ChatMessage>) -> Self
Adds multiple messages at once

Multi-Modal Support

user_with_content
fn(content: MessageContent) -> Self
Adds a user message with structured content (text, images, audio, video)Example:
use mofa_foundation::llm::{MessageContent, ContentPart, ImageUrl};

let content = MessageContent::Parts(vec![
    ContentPart::Text { text: "What is in this image?".to_string() },
    ContentPart::Image {
        image_url: ImageUrl {
            url: "data:image/png;base64,...".to_string(),
            detail: Some(ImageDetail::High),
        },
    },
]);

let response = client.chat()
    .user_with_content(content)
    .send()
    .await?;

Parameter Configuration

temperature
fn(temp: f32) -> Self
Sets the sampling temperature (0.0 to 2.0)
max_tokens
fn(tokens: u32) -> Self
Sets the maximum number of tokens to generate
json_mode
fn() -> Self
Enables JSON response formatExample:
let response = client.chat()
    .system("You are a JSON API. Return {\"answer\": string}")
    .user("What is 2+2?")
    .json_mode()
    .send()
    .await?;
stop
fn(sequences: Vec<String>) -> Self
Sets stop sequences for generation

Tool Calling

tool
fn(tool: Tool) -> Self
Adds a tool/function that the model can callExample:
use mofa_foundation::llm::function_tool;
use serde_json::json;

let weather_tool = function_tool(
    "get_weather",
    "Get weather for a location",
    json!({
        "type": "object",
        "properties": {
            "location": { "type": "string" }
        },
        "required": ["location"]
    })
);

let response = client.chat()
    .user("What's the weather in Tokyo?")
    .tool(weather_tool)
    .send()
    .await?;
with_tool_executor
fn(executor: Arc<dyn ToolExecutor>) -> Self
Sets the tool executor for automatic tool execution
max_tool_rounds
fn(rounds: u32) -> Self
Sets the maximum number of tool calling rounds (default: 10)

Retry Configuration

with_retry
fn() -> Self
Enables retry with default policy for transient failuresExample:
let response = client.chat()
    .json_mode()
    .with_retry()
    .send()
    .await?;
max_retries
fn(max: u32) -> Self
Sets maximum retry attemptsExample:
let response = client.chat()
    .max_retries(3)
    .send()
    .await?;

Execution Methods

send
async fn() -> LLMResult<ChatCompletionResponse>
Sends the request and returns the complete response
send_stream
async fn() -> LLMResult<ChatStream>
Sends the request and returns a streaming responseExample:
use futures::StreamExt;

let mut stream = client.chat()
    .user("Tell me a story")
    .send_stream()
    .await?;

while let Some(chunk) = stream.next().await {
    let chunk = chunk?;
    if let Some(content) = chunk.content() {
        print!("{}", content);
    }
}
send_with_tools
async fn() -> LLMResult<ChatCompletionResponse>
Sends the request and automatically executes tool calls in a loopExample:
struct MyExecutor;

#[async_trait]
impl ToolExecutor for MyExecutor {
    async fn execute(&self, name: &str, args: &str) -> LLMResult<String> {
        match name {
            "get_weather" => Ok(r#"{"temp": 22, "condition": "sunny"}"#.to_string()),
            _ => Err(LLMError::Other("Unknown tool".to_string()))
        }
    }
    
    async fn available_tools(&self) -> LLMResult<Vec<Tool>> {
        Ok(vec![/* tools */])
    }
}

let response = client.chat()
    .user("What's the weather like?")
    .with_tool_executor(Arc::new(MyExecutor))
    .send_with_tools()
    .await?;

ChatSession

Manages multi-turn conversations with message history.

Creating a Session

use mofa_foundation::llm::ChatSession;

let mut session = ChatSession::new(client)
    .with_system("You are a helpful assistant.");

Session Methods

send
async fn(content: impl Into<String>) -> LLMResult<String>
Sends a message and maintains conversation historyExample:
let r1 = session.send("Hello!").await?;
let r2 = session.send("What did I just say?").await?;
// r2 will reference the previous message
send_with_content
async fn(content: MessageContent) -> LLMResult<String>
Sends structured content (with images, audio, etc.)
with_system
fn(prompt: impl Into<String>) -> Self
Sets the system prompt for the session
with_context_window_size
fn(size: Option<usize>) -> Self
Sets the maximum number of conversation rounds to keep in memoryExample:
let session = ChatSession::new(client)
    .with_context_window_size(Some(10)); // Keep last 10 rounds
with_tools
fn(tools: Vec<Tool>, executor: Arc<dyn ToolExecutor>) -> Self
Enables tool calling for the session
messages
fn() -> &[ChatMessage]
Returns the message history
clear
fn()
Clears the message history
session_id
fn() -> uuid::Uuid
Returns the unique session identifier

Persistence

save
async fn() -> PersistenceResult<()>
Saves the session and messages to the databaseExample:
session.save().await?;
load
async fn(...) -> PersistenceResult<Self>
Loads a session from the databaseExample:
let session = ChatSession::load(
    session_id,
    client,
    user_id,
    tenant_id,
    agent_id,
    message_store,
    session_store,
    Some(10), // context window size
).await?;
delete
async fn() -> PersistenceResult<()>
Deletes the session and its messages from the database

Complete Example

use mofa_foundation::llm::*;
use std::sync::Arc;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Create client
    let provider = Arc::new(OpenAIProvider::new("sk-xxx"));
    let client = LLMClient::new(provider);

    // Simple query
    let answer = client.ask("What is Rust?").await?;
    println!("Answer: {}", answer);

    // Complex chat with builder
    let response = client.chat()
        .system("You are a coding assistant.")
        .user("How do I read a file in Rust?")
        .temperature(0.7)
        .max_tokens(500)
        .send()
        .await?;
    println!("Response: {}", response.content().unwrap());

    // Streaming response
    use futures::StreamExt;
    let mut stream = client.chat()
        .user("Tell me a short story")
        .send_stream()
        .await?;
    
    while let Some(chunk) = stream.next().await {
        if let Some(content) = chunk?.content() {
            print!("{}", content);
        }
    }

    // Multi-turn session
    let mut session = ChatSession::new(client)
        .with_system("You are a helpful assistant.");
    
    let r1 = session.send("My name is Alice").await?;
    let r2 = session.send("What's my name?").await?;
    println!("Session response: {}", r2);

    Ok(())
}

Build docs developers (and LLMs) love