Skip to main content

LLMRails

The LLMRails class is the primary interface for creating and managing guardrails in NeMo Guardrails. It provides methods for generating responses with guardrails applied.

Constructor

LLMRails(
    config: RailsConfig,
    llm: Optional[Union[BaseLLM, BaseChatModel]] = None,
    verbose: bool = False
)
config
RailsConfig
required
A RailsConfig object containing the guardrails configuration.
llm
Optional[Union[BaseLLM, BaseChatModel]]
An optional LLM engine to use. If provided, this will be used as the main LLM and will take precedence over any main LLM specified in the config.
verbose
bool
default:"False"
Whether the logging should be verbose or not.

Methods

generate_async

Generate a completion or next message asynchronously.
async def generate_async(
    prompt: Optional[str] = None,
    messages: Optional[List[dict]] = None,
    options: Optional[Union[dict, GenerationOptions]] = None,
    state: Optional[Union[dict, State]] = None,
    streaming_handler: Optional[StreamingHandler] = None
) -> Union[str, dict, GenerationResponse, Tuple[dict, dict]]
prompt
Optional[str]
The prompt to be used for completion.
messages
Optional[List[dict]]
The history of messages to be used to generate the next message. Messages have the format:
[
    {"role": "context", "content": {"user_name": "John"}},
    {"role": "user", "content": "Hello! How are you?"},
    {"role": "assistant", "content": "I am fine, thank you!"},
    {"role": "event", "event": {"type": "UserSilent"}}
]
options
Optional[Union[dict, GenerationOptions]]
Options specific for the generation.
state
Optional[Union[dict, State]]
The state object that should be used as the starting point.
streaming_handler
Optional[StreamingHandler]
If specified, and the config supports streaming, the provided handler will be used for streaming.
response
Union[str, dict, GenerationResponse, Tuple[dict, dict]]
The completion (when a prompt is provided) or the next message.
from nemoguardrails import RailsConfig, LLMRails

config = RailsConfig.from_path("path/to/config")
rails = LLMRails(config)

response = await rails.generate_async(
    messages=[{"role": "user", "content": "Hello!"}]
)

generate

Synchronous version of generate_async.
def generate(
    prompt: Optional[str] = None,
    messages: Optional[List[dict]] = None,
    options: Optional[Union[dict, GenerationOptions]] = None,
    state: Optional[dict] = None
) -> Union[str, dict, GenerationResponse, Tuple[dict, dict]]

stream_async

Stream the response tokens asynchronously.
def stream_async(
    prompt: Optional[str] = None,
    messages: Optional[List[dict]] = None,
    options: Optional[Union[dict, GenerationOptions]] = None,
    state: Optional[Union[dict, State]] = None,
    include_metadata: bool = False,
    generator: Optional[AsyncIterator[str]] = None
) -> AsyncIterator[Union[str, dict]]
include_metadata
bool
default:"False"
Whether to include metadata in the streamed chunks.
generator
Optional[AsyncIterator[str]]
If provided, uses this external generator for streaming.
chunks
AsyncIterator[Union[str, dict]]
An async iterator that yields token chunks as strings, or dicts if include_metadata=True.
from nemoguardrails import RailsConfig, LLMRails

config = RailsConfig.from_path("path/to/config")
rails = LLMRails(config)

async for chunk in rails.stream_async(
    messages=[{"role": "user", "content": "Tell me a story"}]
):
    print(chunk, end="")

check_async

Run rails on messages to check for policy violations.
async def check_async(
    messages: List[dict],
    rail_types: Optional[List[RailType]] = None
) -> RailsResult
messages
List[dict]
required
List of message dicts with ‘role’ and ‘content’ fields.
rail_types
Optional[List[RailType]]
Optional list of rail types to run (e.g., [RailType.INPUT] or [RailType.OUTPUT]). When not provided, automatically determines which rails to run based on message roles.
result
RailsResult
Contains:
  • status: PASSED, MODIFIED, or BLOCKED
  • content: The final content after rails processing
  • rail: Name of the rail that blocked (if blocked)
from nemoguardrails import RailsConfig, LLMRails

config = RailsConfig.from_path("path/to/config")
rails = LLMRails(config)

result = await rails.check_async([
    {"role": "user", "content": "Harmful message"}
])

if result.status == RailStatus.BLOCKED:
    print(f"Blocked by: {result.rail}")

check

Synchronous version of check_async.
def check(
    messages: List[dict],
    rail_types: Optional[List[RailType]] = None
) -> RailsResult

register_action

Register a custom action for the rails configuration.
def register_action(
    action: Callable,
    name: Optional[str] = None
) -> Self
action
Callable
required
The action function to register.
name
Optional[str]
The name to use for the action. If not provided, uses the function name.
from nemoguardrails import RailsConfig, LLMRails

config = RailsConfig.from_path("path/to/config")
rails = LLMRails(config)

def custom_action(context: dict):
    return "Custom response"

rails.register_action(custom_action, name="my_action")

register_filter

Register a custom filter for the rails configuration.
def register_filter(
    filter_fn: Callable,
    name: Optional[str] = None
) -> Self

register_embedding_provider

Register a custom embedding provider.
def register_embedding_provider(
    cls: Type[EmbeddingModel],
    name: Optional[str] = None
) -> Self
cls
Type[EmbeddingModel]
required
The embedding model class.
name
Optional[str]
The name of the embedding engine.

explain

Returns detailed information about the latest generation.
def explain() -> ExplainInfo
info
ExplainInfo
An object containing detailed explanation information including LLM calls, activated rails, and Colang history.

Attributes

config
RailsConfig
The rails configuration object.
llm
Optional[Union[BaseLLM, BaseChatModel]]
The main LLM engine being used.
runtime
Runtime
The Colang runtime instance.
verbose
bool
Whether verbose logging is enabled.

Build docs developers (and LLMs) love