Recurrent Layers
import { tensor } from "deepbox/ndarray";
import { RNN } from "deepbox/nn";
// RNN(inputSize, hiddenSize, options)
// Input shape (batchFirst=true): (batch, seqLen, inputSize)
const rnn = new RNN(4, 8, { batchFirst: true });
console.log("RNN(inputSize=4, hiddenSize=8, batchFirst=true)");
// Batch of 2 sequences, each with 3 time steps and 4 features
const rnnInput = tensor([
[
[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12],
],
[
[13, 14, 15, 16],
[17, 18, 19, 20],
[21, 22, 23, 24],
],
]);
console.log(`Input shape: [${rnnInput.shape.join(", ")}]`);
const rnnResult = rnn.forward(rnnInput);
const rnnOut = rnnResult instanceof GradTensor ? rnnResult.tensor : rnnResult;
console.log(`Output shape: [${rnnOut.shape.join(", ")}]`);
console.log("Output contains hidden states for all time steps");
RNN(inputSize=4, hiddenSize=8, batchFirst=true)
Input shape: [2, 3, 4]
Output shape: [2, 3, 8]
Output contains hidden states for all time steps
import { LSTM } from "deepbox/nn";
// LSTM adds cell state for better memory
const lstm = new LSTM(4, 8, { batchFirst: true });
console.log("\nLSTM(inputSize=4, hiddenSize=8, batchFirst=true)");
console.log(`Input shape: [${rnnInput.shape.join(", ")}]`);
const lstmResult = lstm.forward(rnnInput);
const lstmOut = lstmResult instanceof GradTensor ? lstmResult.tensor : lstmResult;
console.log(`Output shape: [${lstmOut.shape.join(", ")}]`);
console.log("LSTM uses forget/input/output gates for selective memory");
LSTM(inputSize=4, hiddenSize=8, batchFirst=true)
Input shape: [2, 3, 4]
Output shape: [2, 3, 8]
LSTM uses forget/input/output gates for selective memory
import { GRU } from "deepbox/nn";
// GRU has fewer parameters than LSTM
const gru = new GRU(4, 8, { batchFirst: true });
console.log("\nGRU(inputSize=4, hiddenSize=8, batchFirst=true)");
console.log(`Input shape: [${rnnInput.shape.join(", ")}]`);
const gruResult = gru.forward(rnnInput);
const gruOut = gruResult instanceof GradTensor ? gruResult.tensor : gruResult;
console.log(`Output shape: [${gruOut.shape.join(", ")}]`);
console.log("GRU uses reset/update gates — fewer params than LSTM");
GRU(inputSize=4, hiddenSize=8, batchFirst=true)
Input shape: [2, 3, 4]
Output shape: [2, 3, 8]
GRU uses reset/update gates — fewer params than LSTM
const deepRnn = new RNN(4, 16, { numLayers: 2, batchFirst: true });
console.log("\nRNN(inputSize=4, hiddenSize=16, numLayers=2)");
console.log(`Input shape: [${rnnInput.shape.join(", ")}]`);
const deepResult = deepRnn.forward(rnnInput);
const deepOut = deepResult instanceof GradTensor ? deepResult.tensor : deepResult;
console.log(`Output shape: [${deepOut.shape.join(", ")}]`);
console.log("2-layer RNN extracts higher-level sequential patterns");
RNN(inputSize=4, hiddenSize=16, numLayers=2)
Input shape: [2, 3, 4]
Output shape: [2, 3, 16]
2-layer RNN extracts higher-level sequential patterns
const rnnParams = Array.from(rnn.parameters()).length;
const lstmParams = Array.from(lstm.parameters()).length;
const gruParams = Array.from(gru.parameters()).length;
console.log("\nParameter Comparison:");
console.log(`RNN parameters: ${rnnParams}`);
console.log(`LSTM parameters: ${lstmParams} (4x gates)`);
console.log(`GRU parameters: ${gruParams} (3x gates)`);
When to Use Each Type
- RNN: Simple sequences, faster training
- LSTM: Long sequences, vanishing gradient problems
- GRU: Balance between RNN and LSTM, fewer parameters
Use Cases
- Time series forecasting
- Natural language processing
- Speech recognition
- Video analysis
- Sequence generation
Next Steps
Attention & Transformers
Learn modern sequence modeling with attention
Dropout
Prevent overfitting in recurrent networks