load()

`load()`

Load a model and processor from a local directory or Hugging Face Hub repo. Downloads the model automatically if it is not already cached locally.

from mlx_vlm import load

model, processor = load(path_or_hf_repo)

Signature

def load(
    path_or_hf_repo: str,
    adapter_path: Optional[str] = None,
    lazy: bool = False,
    revision: Optional[str] = None,
    **kwargs,
) -> Tuple[nn.Module, ProcessorMixin]:

Parameters

path_or_hf_repo

str

required

Local directory path or Hugging Face repository ID (e.g. "mlx-community/Qwen2-VL-2B-Instruct-4bit").

adapter_path

str

default:"None"

Path to LoRA adapter weights. When provided, LoRA layers are applied on top of the base model.

lazy

bool

default:"False"

When False, all model parameters are evaluated (loaded into memory) before the function returns. When True, parameters are loaded on first use. Set to True for large models when you want to defer memory allocation.

revision

str

default:"None"

A Hugging Face revision identifier: a branch name, tag, or commit hash. Defaults to main.

quantize_activations

bool

default:"False"

Convert QuantizedLinear layers to QQLinear layers for activation quantization. Required when running mxfp8 or nvfp4 quantized models on NVIDIA CUDA. Has no effect on Apple Silicon (Metal).

force_download

bool

default:"False"

Force re-download from Hugging Face Hub even if the model is already cached.

trust_remote_code

bool

default:"False"

Allow execution of custom model code from the repository. Required for some models that ship non-standard architectures.

Returns

A tuple of (model, processor):

model

nn.Module

The loaded MLX model, ready for inference.

processor

ProcessorMixin

The model’s processor (tokenizer + image processor). Includes a detokenizer attribute added by MLX-VLM for streaming decoding.

Raises

FileNotFoundError — Config file or .safetensors weight files not found at the given path.
ValueError — Model class or model args class cannot be found or instantiated.

Examples

from mlx_vlm import load

model, processor = load("mlx-community/Qwen2-VL-2B-Instruct-4bit")

quantize_activations=True is only needed for models quantized with mxfp8 or nvfp4 modes when running on CUDA. On Apple Silicon (Metal), those models work without the flag.

`load_config()`

Load the model configuration from a local directory or Hugging Face repo.

Signature

def load_config(
    model_path: Union[str, Path],
    **kwargs,
) -> dict:

Parameters

model_path

str | Path

required

Local path to a model directory or a Hugging Face repository ID. If a string is passed that is not a local path, the model is first downloaded.

Returns

config

dict

The parsed config.json for the model, with any eos_token_id overrides from generation_config.json applied.

Example

from mlx_vlm.utils import load_config

config = load_config("mlx-community/Qwen2-VL-2B-Instruct-4bit")
print(config["model_type"])  # qwen2_vl

`prepare_inputs()`

Tokenize prompts and preprocess images/audio into model-ready tensors.

Signature

def prepare_inputs(
    processor,
    images=None,
    audio=None,
    prompts=None,
    image_token_index=None,
    resize_shape=None,
    add_special_tokens=False,
    padding=True,
    padding_side="left",
    pad_to_uniform_size=False,
    return_tensors="mlx",
    **kwargs,
) -> dict:

Parameters

processor

ProcessorMixin

required

The model processor returned by load().

images

list | None

default:"None"

A list of image paths, URLs, or PIL.Image.Image objects.

audio

list | None

default:"None"

A list of audio file paths or URLs.

prompts

str | list[str] | None

default:"None"

The formatted prompt string or list of prompt strings.

image_token_index

int | None

default:"None"

Token index used to mark image positions in the input IDs. Read from model.config.image_token_index.

resize_shape

tuple[int, int] | None

default:"None"

Resize all images to (height, width) before processing.

add_special_tokens

bool

default:"False"

Pass add_special_tokens to the tokenizer.

padding

bool

default:"True"

Pad tokenized inputs to the same length when processing a batch.

padding_side

str

default:"\"left\""

Side to apply padding ("left" or "right"). Left padding is standard for causal generation.

pad_to_uniform_size

bool

default:"False"

When True, resize all images to a uniform size derived from the image processor configuration. Used for batched image inputs.

return_tensors

str

default:"\"mlx\""

Tensor format to return. Always "mlx" for MLX arrays.

Returns

A dictionary containing:

input_ids

mx.array

Token IDs of shape (batch, seq_len).

attention_mask

mx.array

Attention mask of shape (batch, seq_len).

pixel_values

mx.array | None

Preprocessed image pixels (present when images are provided).

`process_image()`

Load and optionally resize a single image.

Signature

def process_image(
    img: Union[str, PIL.Image.Image],
    resize_shape: Optional[Tuple[int, int]],
    image_processor,
) -> PIL.Image.Image:

Parameters

img

str | PIL.Image.Image

required

An image URL, local file path, or a PIL.Image.Image object.

resize_shape

tuple[int, int] | None

required

Target (max_width, max_height) to resize the image to. Pass None to skip resizing.

image_processor

BaseImageProcessor | None

required

The model’s image processor. When a custom BaseImageProcessor is provided, resizing is handled by that processor instead.

Returns

img

PIL.Image.Image

The loaded, RGB-converted, and optionally resized image.

Example

from mlx_vlm.utils import process_image

img = process_image("http://images.cocodataset.org/val2017/000000039769.jpg", None, None)
print(img.size)  # (640, 480)

Python API

REST API

`load()`

Signature

Parameters

Returns

Raises

Examples

`load_config()`

Signature

Parameters

Returns

Example

`prepare_inputs()`

Signature

Parameters

Returns

`process_image()`

Signature

Parameters

Returns

Example

Build docs developers (and LLMs) love

Python API

REST API

​load()

​Signature

​Parameters

​Returns

​Raises

​Examples

​load_config()

​Signature

​Parameters

​Returns

​Example

​prepare_inputs()

​Signature

​Parameters

​Returns

​process_image()

​Signature

​Parameters

​Returns

​Example

Build docs developers (and LLMs) love

`load()`

Signature

Parameters

Returns

Raises

Examples

`load_config()`

Signature

Parameters

Returns

Example

`prepare_inputs()`

Signature

Parameters

Returns

`process_image()`

Signature

Parameters

Returns

Example