JavaScript Inference API
The ONNX Runtime JavaScript API enables inference in web browsers and Node.js applications. This guide covers both environments with real code examples.Installation
Node.js
Copy
Ask AI
npm install onnxruntime-node
# For GPU support (CUDA)
npm install onnxruntime-node-gpu
Web / Browser
Copy
Ask AI
npm install onnxruntime-web
React Native
Copy
Ask AI
npm install onnxruntime-react-native
Quick Start
Node.js
Copy
Ask AI
const ort = require('onnxruntime-node');
async function main() {
// Create session
const session = await ort.InferenceSession.create('model.onnx');
// Prepare input
const input = new ort.Tensor('float32',
new Float32Array(1 * 3 * 224 * 224),
[1, 3, 224, 224]
);
// Run inference
const feeds = { input: input };
const results = await session.run(feeds);
// Get output
const output = results.output;
console.log('Output shape:', output.dims);
console.log('Output data:', output.data);
}
main();
Web / Browser
Copy
Ask AI
<!DOCTYPE html>
<html>
<head>
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.min.js"></script>
</head>
<body>
<script>
async function runInference() {
// Create session
const session = await ort.InferenceSession.create('model.onnx');
// Prepare input
const input = new ort.Tensor('float32',
new Float32Array(1 * 3 * 224 * 224),
[1, 3, 224, 224]
);
// Run inference
const feeds = { input: input };
const results = await session.run(feeds);
console.log('Results:', results);
}
runInference();
</script>
</body>
</html>
ES6 Modules
Copy
Ask AI
import * as ort from 'onnxruntime-web';
// or for Node.js:
// import * as ort from 'onnxruntime-node';
async function runModel() {
const session = await ort.InferenceSession.create('model.onnx');
const tensor = new ort.Tensor('float32', data, shape);
const results = await session.run({ input: tensor });
return results;
}
InferenceSession
Creating a Session
From URL (Web):Copy
Ask AI
// Load from URL
const session = await ort.InferenceSession.create(
'https://example.com/model.onnx'
);
// Load from local file (Node.js)
const session = await ort.InferenceSession.create('./model.onnx');
Copy
Ask AI
// Fetch model as ArrayBuffer
const response = await fetch('model.onnx');
const arrayBuffer = await response.arrayBuffer();
const session = await ort.InferenceSession.create(arrayBuffer);
Copy
Ask AI
const modelData = new Uint8Array(/* model bytes */);
const session = await ort.InferenceSession.create(modelData);
Copy
Ask AI
const options = {
executionProviders: ['webgpu', 'wasm'],
graphOptimizationLevel: 'all',
intraOpNumThreads: 4,
enableCpuMemArena: true,
enableMemPattern: true,
logSeverityLevel: 2
};
const session = await ort.InferenceSession.create(
'model.onnx',
options
);
Session Properties
Copy
Ask AI
// Get input names
const inputNames = session.inputNames;
console.log('Input names:', inputNames);
// Get output names
const outputNames = session.outputNames;
console.log('Output names:', outputNames);
// The session object contains metadata about inputs/outputs
console.log('Session info:', {
inputs: inputNames,
outputs: outputNames
});
Running Inference
Basic inference:Copy
Ask AI
// Create input tensor
const inputTensor = new ort.Tensor(
'float32',
Float32Array.from([1.0, 2.0, 3.0, 4.0]),
[1, 4]
);
// Create feeds object
const feeds = {
'input': inputTensor
};
// Run inference
const results = await session.run(feeds);
// Access output by name
const output = results['output'];
console.log('Output data:', output.data);
console.log('Output shape:', output.dims);
Copy
Ask AI
const feeds = {
'input1': new ort.Tensor('float32', data1, shape1),
'input2': new ort.Tensor('float32', data2, shape2)
};
const results = await session.run(feeds);
Copy
Ask AI
// Only compute specific outputs
const feeds = { 'input': inputTensor };
const fetchesNames = ['output1', 'output2'];
const results = await session.run(feeds, fetchesNames);
const output1 = results.output1;
const output2 = results.output2;
Copy
Ask AI
const runOptions = {
logSeverityLevel: 2,
logVerbosityLevel: 0,
tag: 'my-run'
};
const results = await session.run(feeds, runOptions);
SessionOptions
Configure session behavior:Copy
Ask AI
const sessionOptions = {
// Execution providers (in priority order)
executionProviders: [
'webgpu', // WebGPU (web only)
'webnn', // WebNN (web only)
'wasm' // WebAssembly (web and Node.js)
],
// Graph optimization level
graphOptimizationLevel: 'all',
// Options: 'disabled', 'basic', 'extended', 'all'
// Threading (Node.js only)
intraOpNumThreads: 4,
interOpNumThreads: 2,
// Memory optimization
enableCpuMemArena: true,
enableMemPattern: true,
// Execution mode
executionMode: 'sequential',
// Options: 'sequential', 'parallel'
// Logging
logSeverityLevel: 2, // 0=Verbose, 1=Info, 2=Warning, 3=Error, 4=Fatal
logVerbosityLevel: 0,
logId: 'MySession',
// Extra configurations
extra: {
session: {
disable_prepacking: '0'
}
}
};
const session = await ort.InferenceSession.create(
'model.onnx',
sessionOptions
);
Tensor
Create and manipulate tensors: Create from typed array:Copy
Ask AI
// Float32 tensor
const data = new Float32Array([1.0, 2.0, 3.0, 4.0]);
const tensor = new ort.Tensor('float32', data, [2, 2]);
// Int32 tensor
const intData = new Int32Array([1, 2, 3, 4]);
const intTensor = new ort.Tensor('int32', intData, [2, 2]);
// String tensor
const strTensor = new ort.Tensor('string', ['hello', 'world'], [2]);
Copy
Ask AI
const data = [1.0, 2.0, 3.0, 4.0];
const tensor = new ort.Tensor('float32', Float32Array.from(data), [2, 2]);
Copy
Ask AI
console.log('Type:', tensor.type); // 'float32'
console.log('Data:', tensor.data); // TypedArray
console.log('Shape:', tensor.dims); // [2, 2]
console.log('Size:', tensor.size); // 4
Copy
Ask AI
// Scalar
const scalar = new ort.Tensor('float32', Float32Array.from([1.0]), []);
// Vector
const vector = new ort.Tensor('float32', new Float32Array(10), [10]);
// Matrix
const matrix = new ort.Tensor('float32', new Float32Array(100), [10, 10]);
// Image (NCHW format)
const image = new ort.Tensor(
'float32',
new Float32Array(1 * 3 * 224 * 224),
[1, 3, 224, 224]
);
Execution Providers
Web Execution Providers
WebGPU (GPU acceleration in browser):Copy
Ask AI
const session = await ort.InferenceSession.create('model.onnx', {
executionProviders: ['webgpu']
});
Copy
Ask AI
const session = await ort.InferenceSession.create('model.onnx', {
executionProviders: [
{
name: 'webnn',
deviceType: 'gpu',
powerPreference: 'default'
}
]
});
Copy
Ask AI
const session = await ort.InferenceSession.create('model.onnx', {
executionProviders: ['wasm']
});
Node.js Execution Providers
CPU:Copy
Ask AI
const session = await ort.InferenceSession.create('model.onnx', {
executionProviders: ['cpu']
});
Copy
Ask AI
const session = await ort.InferenceSession.create('model.onnx', {
executionProviders: [
{
name: 'cuda',
deviceId: 0
}
]
});
Copy
Ask AI
const session = await ort.InferenceSession.create('model.onnx', {
executionProviders: ['dml']
});
Copy
Ask AI
const session = await ort.InferenceSession.create('model.onnx', {
executionProviders: ['coreml']
});
Complete Examples
Node.js Image Classification
Copy
Ask AI
const ort = require('onnxruntime-node');
const fs = require('fs');
const { createCanvas, loadImage } = require('canvas');
class ImageClassifier {
constructor(modelPath) {
this.modelPath = modelPath;
this.session = null;
}
async initialize() {
const options = {
executionProviders: ['cpu'],
graphOptimizationLevel: 'all',
intraOpNumThreads: 4
};
this.session = await ort.InferenceSession.create(
this.modelPath,
options
);
console.log('Model loaded:', this.modelPath);
console.log('Input names:', this.session.inputNames);
console.log('Output names:', this.session.outputNames);
}
async preprocessImage(imagePath) {
// Load image
const image = await loadImage(imagePath);
const canvas = createCanvas(224, 224);
const ctx = canvas.getContext('2d');
// Resize to 224x224
ctx.drawImage(image, 0, 0, 224, 224);
const imageData = ctx.getImageData(0, 0, 224, 224);
// Convert to CHW format and normalize
const pixels = imageData.data;
const input = new Float32Array(1 * 3 * 224 * 224);
const mean = [0.485, 0.456, 0.406];
const std = [0.229, 0.224, 0.225];
for (let i = 0; i < 224 * 224; i++) {
const r = pixels[i * 4] / 255;
const g = pixels[i * 4 + 1] / 255;
const b = pixels[i * 4 + 2] / 255;
input[i] = (r - mean[0]) / std[0];
input[224 * 224 + i] = (g - mean[1]) / std[1];
input[224 * 224 * 2 + i] = (b - mean[2]) / std[2];
}
return new ort.Tensor('float32', input, [1, 3, 224, 224]);
}
async classify(imagePath) {
const inputTensor = await this.preprocessImage(imagePath);
const inputName = this.session.inputNames[0];
const feeds = {};
feeds[inputName] = inputTensor;
const results = await this.session.run(feeds);
const output = results[this.session.outputNames[0]];
// Get top 5 predictions
const predictions = Array.from(output.data);
const top5 = predictions
.map((score, index) => ({ index, score }))
.sort((a, b) => b.score - a.score)
.slice(0, 5);
return top5;
}
}
// Usage
async function main() {
const classifier = new ImageClassifier('resnet50.onnx');
await classifier.initialize();
const predictions = await classifier.classify('cat.jpg');
console.log('\nTop 5 predictions:');
predictions.forEach(pred => {
console.log(` Class ${pred.index}: ${pred.score.toFixed(4)}`);
});
}
main().catch(console.error);
Web Browser Image Classification
Copy
Ask AI
<!DOCTYPE html>
<html>
<head>
<title>ONNX Runtime Web Demo</title>
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.min.js"></script>
</head>
<body>
<h1>Image Classification</h1>
<input type="file" id="imageInput" accept="image/*">
<canvas id="canvas" width="224" height="224" style="display:none"></canvas>
<div id="results"></div>
<script>
let session = null;
// Initialize model
async function initModel() {
try {
session = await ort.InferenceSession.create('resnet50.onnx', {
executionProviders: ['webgpu', 'wasm']
});
console.log('Model loaded successfully');
} catch (error) {
console.error('Failed to load model:', error);
}
}
// Preprocess image
function preprocessImage(imageData) {
const canvas = document.getElementById('canvas');
const ctx = canvas.getContext('2d');
const pixels = imageData.data;
const input = new Float32Array(1 * 3 * 224 * 224);
const mean = [0.485, 0.456, 0.406];
const std = [0.229, 0.224, 0.225];
for (let i = 0; i < 224 * 224; i++) {
const r = pixels[i * 4] / 255;
const g = pixels[i * 4 + 1] / 255;
const b = pixels[i * 4 + 2] / 255;
input[i] = (r - mean[0]) / std[0];
input[224 * 224 + i] = (g - mean[1]) / std[1];
input[224 * 224 * 2 + i] = (b - mean[2]) / std[2];
}
return new ort.Tensor('float32', input, [1, 3, 224, 224]);
}
// Run inference
async function classify(imageElement) {
const canvas = document.getElementById('canvas');
const ctx = canvas.getContext('2d');
// Draw and resize image
ctx.drawImage(imageElement, 0, 0, 224, 224);
const imageData = ctx.getImageData(0, 0, 224, 224);
// Preprocess
const inputTensor = preprocessImage(imageData);
// Run inference
const feeds = {};
feeds[session.inputNames[0]] = inputTensor;
const start = Date.now();
const results = await session.run(feeds);
const elapsed = Date.now() - start;
// Get predictions
const output = results[session.outputNames[0]];
const predictions = Array.from(output.data);
// Get top 5
const top5 = predictions
.map((score, index) => ({ index, score }))
.sort((a, b) => b.score - a.score)
.slice(0, 5);
// Display results
const resultsDiv = document.getElementById('results');
resultsDiv.innerHTML = `<h3>Results (${elapsed}ms):</h3>`;
top5.forEach(pred => {
resultsDiv.innerHTML +=
`<p>Class ${pred.index}: ${pred.score.toFixed(4)}</p>`;
});
}
// Handle file input
document.getElementById('imageInput').addEventListener('change', (e) => {
const file = e.target.files[0];
if (file) {
const reader = new FileReader();
reader.onload = (event) => {
const img = new Image();
img.onload = () => classify(img);
img.src = event.target.result;
};
reader.readAsDataURL(file);
}
});
// Initialize on load
initModel();
</script>
</body>
</html>
TypeScript Example
Copy
Ask AI
import * as ort from 'onnxruntime-node';
interface ModelConfig {
modelPath: string;
executionProviders: string[];
options?: ort.InferenceSession.SessionOptions;
}
class ONNXModel {
private session: ort.InferenceSession | null = null;
constructor(private config: ModelConfig) {}
async initialize(): Promise<void> {
this.session = await ort.InferenceSession.create(
this.config.modelPath,
{
executionProviders: this.config.executionProviders,
...this.config.options
}
);
}
async run(inputs: Record<string, ort.Tensor>): Promise<ort.InferenceSession.ReturnType> {
if (!this.session) {
throw new Error('Model not initialized');
}
return await this.session.run(inputs);
}
getInputNames(): readonly string[] {
if (!this.session) {
throw new Error('Model not initialized');
}
return this.session.inputNames;
}
getOutputNames(): readonly string[] {
if (!this.session) {
throw new Error('Model not initialized');
}
return this.session.outputNames;
}
}
// Usage
async function main() {
const model = new ONNXModel({
modelPath: 'model.onnx',
executionProviders: ['cpu'],
options: {
graphOptimizationLevel: 'all',
intraOpNumThreads: 4
}
});
await model.initialize();
const input = new ort.Tensor(
'float32',
new Float32Array(1 * 3 * 224 * 224),
[1, 3, 224, 224]
);
const results = await model.run({
[model.getInputNames()[0]]: input
});
console.log('Results:', results);
}
main();
Environment Configuration
Web Environment
Copy
Ask AI
// Set WASM file paths (if not using CDN)
ort.env.wasm.wasmPaths = '/path/to/wasm/files/';
// Enable/disable SIMD
ort.env.wasm.simd = true;
// Enable/disable multi-threading
ort.env.wasm.numThreads = 4;
// Set log level
ort.env.logLevel = 'warning';
Node.js Environment
Copy
Ask AI
// Set global log level
ort.env.logLevel = 'warning';
// Enable debug mode
ort.env.debug = true;
Error Handling
Copy
Ask AI
try {
const session = await ort.InferenceSession.create('model.onnx');
const results = await session.run(feeds);
} catch (error) {
if (error instanceof ort.OnnxRuntimeError) {
console.error('ONNX Runtime error:', error.message);
} else {
console.error('Error:', error);
}
}
Supported Data Types
Copy
Ask AI
'float32' // Float32Array
'int32' // Int32Array
'int64' // BigInt64Array
'uint8' // Uint8Array
'bool' // Uint8Array (0 or 1)
'string' // string[]
Performance Tips
Use WebGPU for GPU Acceleration
Use WebGPU for GPU Acceleration
WebGPU provides the best performance in modern browsers. Always include it as the first execution provider.
Enable WASM SIMD
Enable WASM SIMD
SIMD provides significant speedups. Enable it with
ort.env.wasm.simd = true.Reuse Sessions
Reuse Sessions
Creating sessions is expensive. Create once and reuse for multiple inferences.
Pre-allocate Tensors
Pre-allocate Tensors
Reuse typed arrays for inputs when possible to reduce memory allocation overhead.
Optimize Graph
Optimize Graph
Set
graphOptimizationLevel: 'all' for maximum optimization.Next Steps
Model Optimization
Optimize models for web and Node.js
Execution Providers
Configure hardware acceleration