Using Random Forests
import { loadIris } from "deepbox/datasets";
import { accuracy } from "deepbox/metrics";
import { RandomForestClassifier } from "deepbox/ml";
import { trainTestSplit } from "deepbox/preprocess";
const iris = loadIris();
const [XTrain, XTest, yTrain, yTest] = trainTestSplit(
iris.data,
iris.target,
{
testSize: 0.2,
randomState: 42,
}
);
// Create and train random forest
const rfc = new RandomForestClassifier({
nEstimators: 50, // Number of trees
maxDepth: 5, // Max depth per tree
randomState: 42, // For reproducibility
});
rfc.fit(XTrain, yTrain);
const rfcPred = rfc.predict(XTest);
const acc = accuracy(yTest, rfcPred);
console.log("Random Forest Classifier");
console.log(`Accuracy: ${(Number(acc) * 100).toFixed(2)}%`);
import { mse, r2Score } from "deepbox/metrics";
import { RandomForestRegressor } from "deepbox/ml";
import { tensor } from "deepbox/ndarray";
const XReg = tensor([
[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],
[6, 7], [7, 8], [8, 9], [9, 10], [10, 11],
[1, 3], [2, 5], [3, 2], [4, 1], [5, 4],
[6, 3], [7, 6], [8, 5], [9, 8], [10, 7],
]);
const yReg = tensor([5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 7, 12, 7, 6, 13, 12, 19, 18, 25, 24]);
const [XRegTrain, XRegTest, yRegTrain, yRegTest] = trainTestSplit(
XReg,
yReg,
{
testSize: 0.2,
randomState: 42,
}
);
// Train random forest regressor
const rfr = new RandomForestRegressor({
nEstimators: 50,
maxDepth: 5,
randomState: 42,
});
rfr.fit(XRegTrain, yRegTrain);
const rfrPred = rfr.predict(XRegTest);
console.log("\nRandom Forest Regressor");
console.log(`MSE: ${mse(yRegTest, rfrPred).toFixed(4)}`);
console.log(`R²: ${r2Score(yRegTest, rfrPred).toFixed(4)}`);
const rf = new RandomForestClassifier({
nEstimators: 100, // More trees = better performance (up to a point)
maxDepth: 10, // Deeper trees = more expressive
minSamplesSplit: 5, // Higher = less overfitting
randomState: 42, // For reproducible results
});
Why Random Forests Work
- Bagging: Each tree trains on a random subset of data
- Feature randomness: Each split considers a random subset of features
- Voting: Final prediction aggregates all trees (majority vote or average)
- Reduced overfitting: Individual tree errors cancel out
Next Steps
Gradient Boosting
Achieve even higher accuracy with boosting
Clustering
Discover patterns with unsupervised learning