Skip to main content

Converting TensorFlow Models to ONNX

TensorFlow models can be converted to ONNX format using the tf2onnx library, which provides robust conversion capabilities for both TensorFlow and Keras models.

Prerequisites

pip install tensorflow onnx tf2onnx onnxruntime

Basic Conversion

Converting a Keras Model

import tensorflow as tf
import tf2onnx
import onnx

# Create a simple Keras model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(10,)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(5, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy')

# Convert to ONNX
spec = (tf.TensorSpec((None, 10), tf.float32, name="input"),)
onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature=spec, opset=14)

# Save the model
onnx.save(onnx_model, "keras_model.onnx")

Converting from SavedModel

import tf2onnx

# Export TensorFlow SavedModel
model = YourTensorFlowModel()
tf.saved_model.save(model, "saved_model_dir")

# Convert SavedModel to ONNX
python -m tf2onnx.convert \
    --saved-model saved_model_dir \
    --output model.onnx \
    --opset 14

Converting HuggingFace Transformers (TensorFlow)

Example workflow for converting BERT models from TensorFlow:
import tensorflow as tf
import tf2onnx
from transformers import TFAutoModel, AutoTokenizer, AutoConfig
import numpy as np

# Load pre-trained TensorFlow model
model_name = "bert-base-uncased"
config = AutoConfig.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModel.from_pretrained(model_name, config=config)

# Prepare example inputs
max_length = 128
example_text = "This is a sample input"
example_inputs = tokenizer(
    example_text,
    return_tensors="tf",
    max_length=max_length,
    padding="max_length",
    truncation=True
)

# Create input specifications with dynamic axes
specs = []
for name, value in example_inputs.items():
    dims = [None] * len(value.shape)  # None for dynamic dimensions
    specs.append(tf.TensorSpec(tuple(dims), value.dtype, name=name))

# Convert to ONNX
onnx_model, _ = tf2onnx.convert.from_keras(
    model,
    input_signature=tuple(specs),
    opset=14,
    output_path="bert_tf.onnx"
)

print(f"Model converted successfully to bert_tf.onnx")

Handling Encoder-Decoder Models

For sequence-to-sequence models like T5:
import tensorflow as tf
import tf2onnx
from transformers import TFAutoModelForSeq2SeqLM, AutoTokenizer

model_name = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name)

# Disable cache for ONNX export
if hasattr(model.config, 'use_cache'):
    model.config.use_cache = False

max_length = 128
example_inputs = tokenizer(
    "translate English to German: Hello world",
    return_tensors="tf",
    max_length=max_length,
    padding="max_length",
    truncation=True
)

# Add decoder inputs
example_inputs["decoder_input_ids"] = tokenizer(
    "Hallo Welt",
    return_tensors="tf",
    max_length=max_length,
    padding="max_length",
    truncation=True
).input_ids

# Create specs with dynamic dimensions
specs = []
for name, value in example_inputs.items():
    dims = [None] * len(value.shape)
    specs.append(tf.TensorSpec(tuple(dims), value.dtype, name=name))

# Convert
onnx_model, _ = tf2onnx.convert.from_keras(
    model,
    input_signature=tuple(specs),
    opset=14,
    output_path="t5_model.onnx"
)

Large Model Conversion

For models larger than 2GB, use the large model format:
import tf2onnx
import zipfile
import os

# Convert with large_model flag
onnx_model, _ = tf2onnx.convert.from_keras(
    model,
    input_signature=tuple(specs),
    opset=14,
    large_model=True,  # Enables external data storage
    output_path="large_model.zip"
)

# Extract the zip file
with zipfile.ZipFile("large_model.zip", "r") as z:
    z.extractall("model_output")

# Rename the extracted model
model_path = os.path.join("model_output", "__MODEL_PROTO.onnx")
if os.path.exists("large_model.onnx"):
    os.remove("large_model.onnx")
os.rename(model_path, "large_model.onnx")

Command Line Conversion

From SavedModel

python -m tf2onnx.convert \
    --saved-model saved_model_dir \
    --output model.onnx \
    --opset 14 \
    --verbose

From Checkpoint

python -m tf2onnx.convert \
    --checkpoint checkpoint.ckpt \
    --output model.onnx \
    --inputs input:0 \
    --outputs output:0 \
    --opset 14

From Frozen Graph

python -m tf2onnx.convert \
    --input frozen_graph.pb \
    --output model.onnx \
    --inputs input:0 \
    --outputs output:0 \
    --opset 14

Validating TensorFlow to ONNX Conversion

import tensorflow as tf
import onnxruntime as ort
import numpy as np

# Prepare test input
test_input = np.random.randn(1, 128).astype(np.float32)

# Get TensorFlow output
tf_output = model(test_input, training=False)

# Get ONNX Runtime output
session = ort.InferenceSession("model.onnx")
onnx_inputs = {session.get_inputs()[0].name: test_input}
onnx_output = session.run(None, onnx_inputs)

# Compare outputs
if isinstance(tf_output, dict):
    tf_output = tf_output['last_hidden_state']

rtol = 1e-3
atol = 1e-3
is_close = np.allclose(tf_output.numpy(), onnx_output[0], rtol=rtol, atol=atol)

if is_close:
    print("✓ Conversion validated successfully")
    print(f"Max difference: {np.max(np.abs(tf_output.numpy() - onnx_output[0]))}")
else:
    print("✗ Validation failed - outputs differ significantly")

Handling Special Cases

Models with Custom Layers

For models with custom layers, you may need to register custom operators:
import tf2onnx
from tf2onnx import tf_loader

# Define custom op conversion
@tf2onnx.tfonnx.register_tensorflow_op("CustomOp")
class CustomOpConverter:
    @classmethod
    def version_1(cls, ctx, node, **kwargs):
        # Implement custom conversion logic
        pass

# Then proceed with conversion
onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature=spec)

Fixing Pad Token Issues

from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_name)

# Fix "Using pad_token, but it is not set yet" error
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({"pad_token": "[PAD]"})
    model.resize_token_embeddings(len(tokenizer))

CPU Affinity for Performance

When loading TensorFlow models, you may need to manage CPU affinity:
import tensorflow as tf

# Disable GPU for export
tf.config.set_visible_devices([], "GPU")

# Load and convert model
model = TFAutoModel.from_pretrained(model_name)
# ... conversion code ...

Best Practices

  1. Disable training mode: Set training=False when running the model
  2. Disable caching: Set use_cache=False for models that support it
  3. Use dynamic shapes: Specify None for batch and sequence dimensions
  4. Validate conversion: Always compare TensorFlow and ONNX outputs
  5. Handle special tokens: Configure tokenizer properly before conversion
  6. Set opset version: Use opset 14 or higher for better compatibility
  7. Test edge cases: Validate with various input sizes

Troubleshooting

Common Errors

“Op type not supported”: Update tf2onnx or use a different opset version
pip install --upgrade tf2onnx
Shape inference issues: Provide explicit input shapes in the spec Memory errors: Use large_model=True for models > 2GB

Next Steps