LLMRails

The LLMRails class is the primary interface for creating and managing guardrails in NeMo Guardrails. It provides methods for generating responses with guardrails applied.

Constructor

LLMRails(
    config: RailsConfig,
    llm: Optional[Union[BaseLLM, BaseChatModel]] = None,
    verbose: bool = False
)

config

RailsConfig

required

A RailsConfig object containing the guardrails configuration.

llm

Optional[Union[BaseLLM, BaseChatModel]]

An optional LLM engine to use. If provided, this will be used as the main LLM and will take precedence over any main LLM specified in the config.

verbose

bool

default:"False"

Whether the logging should be verbose or not.

Methods

generate_async

Generate a completion or next message asynchronously.

async def generate_async(
    prompt: Optional[str] = None,
    messages: Optional[List[dict]] = None,
    options: Optional[Union[dict, GenerationOptions]] = None,
    state: Optional[Union[dict, State]] = None,
    streaming_handler: Optional[StreamingHandler] = None
) -> Union[str, dict, GenerationResponse, Tuple[dict, dict]]

prompt

Optional[str]

The prompt to be used for completion.

messages

Optional[List[dict]]

The history of messages to be used to generate the next message. Messages have the format:

[
    {"role": "context", "content": {"user_name": "John"}},
    {"role": "user", "content": "Hello! How are you?"},
    {"role": "assistant", "content": "I am fine, thank you!"},
    {"role": "event", "event": {"type": "UserSilent"}}
]

options

Optional[Union[dict, GenerationOptions]]

Options specific for the generation.

state

Optional[Union[dict, State]]

The state object that should be used as the starting point.

streaming_handler

Optional[StreamingHandler]

If specified, and the config supports streaming, the provided handler will be used for streaming.

response

Union[str, dict, GenerationResponse, Tuple[dict, dict]]

The completion (when a prompt is provided) or the next message.

from nemoguardrails import RailsConfig, LLMRails

config = RailsConfig.from_path("path/to/config")
rails = LLMRails(config)

response = await rails.generate_async(
    messages=[{"role": "user", "content": "Hello!"}]
)

generate

Synchronous version of generate_async.

def generate(
    prompt: Optional[str] = None,
    messages: Optional[List[dict]] = None,
    options: Optional[Union[dict, GenerationOptions]] = None,
    state: Optional[dict] = None
) -> Union[str, dict, GenerationResponse, Tuple[dict, dict]]

stream_async

Stream the response tokens asynchronously.

def stream_async(
    prompt: Optional[str] = None,
    messages: Optional[List[dict]] = None,
    options: Optional[Union[dict, GenerationOptions]] = None,
    state: Optional[Union[dict, State]] = None,
    include_metadata: bool = False,
    generator: Optional[AsyncIterator[str]] = None
) -> AsyncIterator[Union[str, dict]]

include_metadata

bool

default:"False"

Whether to include metadata in the streamed chunks.

generator

Optional[AsyncIterator[str]]

If provided, uses this external generator for streaming.

chunks

AsyncIterator[Union[str, dict]]

An async iterator that yields token chunks as strings, or dicts if include_metadata=True.

from nemoguardrails import RailsConfig, LLMRails

config = RailsConfig.from_path("path/to/config")
rails = LLMRails(config)

async for chunk in rails.stream_async(
    messages=[{"role": "user", "content": "Tell me a story"}]
):
    print(chunk, end="")

check_async

Run rails on messages to check for policy violations.

async def check_async(
    messages: List[dict],
    rail_types: Optional[List[RailType]] = None
) -> RailsResult

messages

List[dict]

required

List of message dicts with ‘role’ and ‘content’ fields.

rail_types

Optional[List[RailType]]

Optional list of rail types to run (e.g., [RailType.INPUT] or [RailType.OUTPUT]). When not provided, automatically determines which rails to run based on message roles.

result

RailsResult

Contains:

status: PASSED, MODIFIED, or BLOCKED
content: The final content after rails processing
rail: Name of the rail that blocked (if blocked)

from nemoguardrails import RailsConfig, LLMRails

config = RailsConfig.from_path("path/to/config")
rails = LLMRails(config)

result = await rails.check_async([
    {"role": "user", "content": "Harmful message"}
])

if result.status == RailStatus.BLOCKED:
    print(f"Blocked by: {result.rail}")

check

Synchronous version of check_async.

def check(
    messages: List[dict],
    rail_types: Optional[List[RailType]] = None
) -> RailsResult

register_action

def register_action(
    action: Callable,
    name: Optional[str] = None
) -> Self

action

Callable

required

The action function to register.

name

Optional[str]

The name to use for the action. If not provided, uses the function name.

from nemoguardrails import RailsConfig, LLMRails

config = RailsConfig.from_path("path/to/config")
rails = LLMRails(config)

def custom_action(context: dict):
    return "Custom response"

rails.register_action(custom_action, name="my_action")

register_filter

def register_filter(
    filter_fn: Callable,
    name: Optional[str] = None
) -> Self

register_embedding_provider

def register_embedding_provider(
    cls: Type[EmbeddingModel],
    name: Optional[str] = None
) -> Self

cls

Type[EmbeddingModel]

required

The embedding model class.

name

Optional[str]

The name of the embedding engine.

explain

Returns detailed information about the latest generation.

def explain() -> ExplainInfo

info

ExplainInfo

An object containing detailed explanation information including LLM calls, activated rails, and Colang history.

Attributes

config

RailsConfig

The rails configuration object.

llm

Optional[Union[BaseLLM, BaseChatModel]]

The main LLM engine being used.

runtime

Runtime

The Colang runtime instance.

verbose

bool

Whether verbose logging is enabled.

Python API

Server API

CLI Reference

LLMRails

LLMRails

Constructor

Methods

generate_async

generate

stream_async

check_async

check

register_action

register_filter

register_embedding_provider

explain

Attributes

Build docs developers (and LLMs) love

Python API

Server API

CLI Reference

​LLMRails

​Constructor

​Methods

​generate_async

​generate

​stream_async

​check_async

​check

​register_action

​register_filter

​register_embedding_provider

​explain

​Attributes

Build docs developers (and LLMs) love

LLMRails

Constructor

Methods

generate_async

generate

stream_async

check_async

check

register_action

register_filter

register_embedding_provider

explain

Attributes