Performance Characteristics
- Processes billions of cells in memory
- Optimized array-based storage
- Zero-copy operations where possible
- Multi-threaded query execution
- Requires 64-bit compilation for datasets > 3GB
Creating Large Datasets
uses
BI.DataItem, System.Diagnostics;
// Create 1 million rows
function CreateLargeDataset: TDataItem;
const
RowCount = 1000000;
var
ID, Value, Category: TDataItem;
I: Integer;
Stopwatch: TStopwatch;
begin
Stopwatch := TStopwatch.StartNew;
Result := TDataItem.Create(True);
// Create columns
ID := TDataItem.Create(TDataKind.dkInt32, 'ID');
Value := TDataItem.Create(TDataKind.dkDouble, 'Value');
Category := TDataItem.Create(TDataKind.dkText, 'Category');
Result.Items.Add(ID);
Result.Items.Add(Value);
Result.Items.Add(Category);
// Allocate memory
Result.Resize(RowCount);
// Fill data
for I := 0 to RowCount - 1 do
begin
ID.Int32Data[I] := I;
Value.DoubleData[I] := Random * 1000;
Category.TextData[I] := 'Category' + IntToStr(Random(10));
end;
ShowMessage(Format('Created %d rows in %d ms',
[RowCount, Stopwatch.ElapsedMilliseconds]));
end;
Billion Row Example
{$IFNDEF CPUX64}
{$MESSAGE WARN 'This example requires 64-bit platform'}
{$ENDIF}
uses
BI.DataItem, BI.Persist, System.Diagnostics;
// Create 1 billion cells (250 million rows x 4 columns)
function CreateBillionCells: TDataItem;
const
Rows = 250000000; // 250 million rows
var
Stopwatch: TStopwatch;
begin
Stopwatch := TStopwatch.StartNew;
Result := TDataItem.Create(True);
// 4 columns = 1 billion cells
Result.Items.Add('ID', TDataKind.dkInt32);
Result.Items.Add('Value1', TDataKind.dkDouble);
Result.Items.Add('Value2', TDataKind.dkDouble);
Result.Items.Add('Category', TDataKind.dkInt32);
Result.Resize(Rows);
// Fill with data (can be parallelized)
for var I := 0 to Rows - 1 do
begin
Result[0].Int32Data[I] := I;
Result[1].DoubleData[I] := Random * 1000;
Result[2].DoubleData[I] := Random * 1000;
Result[3].Int32Data[I] := Random(100);
end;
ShowMessage(Format('Created %d billion cells in %d sec',
[Rows * 4 div 1000000000,
Stopwatch.ElapsedMilliseconds div 1000]));
end;
Save and Load Large Datasets
uses
BI.Persist, System.IOUtils;
// Save to disk (compressed binary format)
procedure SaveBigData;
var
Data: TDataItem;
FileName: String;
FileSize: Int64;
begin
Data := CreateBillionCells;
try
FileName := TPath.Combine(TPath.GetTempPath, 'big_data.bi');
// Save (4.5GB for 1 billion cells)
TDataItemPersistence.Save(Data, FileName);
FileSize := TBIFileSource.GetFileSize(FileName);
ShowMessage(Format('Saved to: %s (%s)',
[FileName, FormatBytes(FileSize)]));
finally
Data.Free;
end;
end;
// Load from disk
procedure LoadBigData;
var
Data: TDataItem;
FileName: String;
Stopwatch: TStopwatch;
begin
FileName := TPath.Combine(TPath.GetTempPath, 'big_data.bi');
if not TFile.Exists(FileName) then
begin
ShowMessage('File not found');
Exit;
end;
Stopwatch := TStopwatch.StartNew;
Data := TDataItemPersistence.Load(FileName);
try
ShowMessage(Format('Loaded %d rows in %d sec',
[Data.Count, Stopwatch.ElapsedMilliseconds div 1000]));
// Use data...
finally
Data.Free;
end;
end;
Query Large Datasets
uses
BI.SQL, System.Diagnostics;
// Fast aggregation on 250 million rows
procedure QueryBigData;
var
Data, Result: TDataItem;
Stopwatch: TStopwatch;
begin
Data := LoadBigData; // 250 million rows
try
Stopwatch := TStopwatch.StartNew;
// GROUP BY aggregation
Result := TBISQL.From(Data,
'sum(Value1), avg(Value2), count(*) group by Category');
try
ShowMessage(Format('Grouped 250M rows in %d ms',
[Stopwatch.ElapsedMilliseconds]));
BIGrid1.Data := Result;
finally
// Don't free Result if assigned to grid
end;
finally
Data.Free;
end;
end;
Parallel Processing
uses
System.Threading, System.Diagnostics;
{$IF CompilerVersion > 27} // XE7 and up
procedure ParallelQuery;
var
Data: TDataItem;
Results: array[0..3] of TDataItem;
Stopwatch: TStopwatch;
begin
Data := LoadBigData;
try
Stopwatch := TStopwatch.StartNew;
// Run 4 queries in parallel
TParallel.For(0, 3,
procedure(Index: Integer)
begin
case Index of
0: Results[0] := TBISQL.From(Data, 'sum(Value1) group by Category');
1: Results[1] := TBISQL.From(Data, 'avg(Value2) group by Category');
2: Results[2] := TBISQL.From(Data, 'count(*) group by Category');
3: Results[3] := TBISQL.From(Data, 'max(Value1) group by Category');
end;
end
);
ShowMessage(Format('4 parallel queries in %d ms',
[Stopwatch.ElapsedMilliseconds]));
// Use results...
// Cleanup
for var I := 0 to 3 do
Results[I].Free;
finally
Data.Free;
end;
end;
{$ENDIF}
Memory Usage
uses
BI.DataItem;
// Calculate dataset memory usage
function CalculateMemoryUsage(const Data: TDataItem): Int64;
var
Column: TDataItem;
begin
Result := 0;
for var I := 0 to Data.Items.Count - 1 do
begin
Column := Data.Items[I];
case Column.Kind of
dkBoolean: Result := Result + Data.Count; // 1 byte per value
dkInt32: Result := Result + Data.Count * 4;
dkInt64: Result := Result + Data.Count * 8;
dkDouble: Result := Result + Data.Count * 8;
dkDateTime: Result := Result + Data.Count * 8;
dkText: Result := Result + EstimateTextSize(Column);
end;
end;
end;
function EstimateTextSize(const TextColumn: TDataItem): Int64;
begin
// Rough estimate: average 20 chars per string
Result := TextColumn.Count * 20 * SizeOf(Char);
end;
Complete Example: Big Data Demo
uses
BI.DataItem, BI.Persist, BI.SQL,
VCLBI.Grid, VCLBI.DataViewer,
System.IOUtils, System.Diagnostics;
type
TBigDataForm = class(TForm)
BtnCreate: TButton;
BtnLoad: TButton;
BtnQuery: TButton;
BtnView: TButton;
BtnClose: TButton;
MemoInfo: TMemo;
LabelFile: TLabel;
LabelLoadTime: TLabel;
procedure FormCreate(Sender: TObject);
procedure FormDestroy(Sender: TObject);
procedure BtnCreateClick(Sender: TObject);
procedure BtnLoadClick(Sender: TObject);
procedure BtnQueryClick(Sender: TObject);
procedure BtnViewClick(Sender: TObject);
procedure BtnCloseClick(Sender: TObject);
private
Data: TDataItem;
FileName: String;
procedure ShowDataInfo;
end;
procedure TBigDataForm.FormCreate(Sender: TObject);
begin
FileName := TPath.Combine(TPath.GetTempPath, 'big_data.bi');
LabelFile.Caption := FileName;
BtnLoad.Enabled := TFile.Exists(FileName);
if BtnLoad.Enabled then
begin
var FileSize := TBIFileSource.GetFileSize(FileName);
MemoInfo.Lines.Add('File exists: ' + FormatBytes(FileSize));
end;
end;
procedure TBigDataForm.BtnCreateClick(Sender: TObject);
var
Stopwatch: TStopwatch;
const
Rows = 10000000; // 10 million rows
begin
Screen.Cursor := crHourGlass;
try
Data.Free;
MemoInfo.Clear;
MemoInfo.Lines.Add('Creating ' + IntToStr(Rows) + ' rows...');
Application.ProcessMessages;
Stopwatch := TStopwatch.StartNew;
// Create data
Data := TDataItem.Create(True);
Data.Items.Add('ID', TDataKind.dkInt32);
Data.Items.Add('Value', TDataKind.dkDouble);
Data.Items.Add('Category', TDataKind.dkInt32);
Data.Resize(Rows);
for var I := 0 to Rows - 1 do
begin
Data[0].Int32Data[I] := I;
Data[1].DoubleData[I] := Random * 1000;
Data[2].Int32Data[I] := Random(100);
end;
MemoInfo.Lines.Add('Created in ' +
IntToStr(Stopwatch.ElapsedMilliseconds) + ' ms');
// Save
Stopwatch := TStopwatch.StartNew;
TDataItemPersistence.Save(Data, FileName);
MemoInfo.Lines.Add('Saved in ' +
IntToStr(Stopwatch.ElapsedMilliseconds) + ' ms');
BtnLoad.Enabled := True;
ShowDataInfo;
finally
Screen.Cursor := crDefault;
end;
end;
procedure TBigDataForm.BtnLoadClick(Sender: TObject);
var
Stopwatch: TStopwatch;
begin
Screen.Cursor := crHourGlass;
try
Data.Free;
Stopwatch := TStopwatch.StartNew;
Data := TDataItemPersistence.Load(FileName);
LabelLoadTime.Caption := 'Loaded in ' +
IntToStr(Stopwatch.ElapsedMilliseconds) + ' ms';
ShowDataInfo;
finally
Screen.Cursor := crDefault;
end;
end;
procedure TBigDataForm.BtnQueryClick(Sender: TObject);
var
Result: TDataItem;
Stopwatch: TStopwatch;
begin
if Data = nil then
begin
ShowMessage('Load data first');
Exit;
end;
Screen.Cursor := crHourGlass;
try
Stopwatch := TStopwatch.StartNew;
Result := TBISQL.From(Data,
'sum(Value), avg(Value), count(*) group by Category');
try
ShowMessage(Format('Grouped %d rows in %d ms',
[Data.Count, Stopwatch.ElapsedMilliseconds]));
TDataViewer.View(Self, Result);
finally
Result.Free;
end;
finally
Screen.Cursor := crDefault;
end;
end;
procedure TBigDataForm.BtnViewClick(Sender: TObject);
begin
if Data <> nil then
TDataViewer.View(Self, Data);
end;
procedure TBigDataForm.BtnCloseClick(Sender: TObject);
begin
Close;
end;
procedure TBigDataForm.ShowDataInfo;
begin
if Data <> nil then
begin
MemoInfo.Clear;
MemoInfo.Lines.Add('Rows: ' + IntToStr(Data.Count));
MemoInfo.Lines.Add('Columns: ' + IntToStr(Data.Items.Count));
MemoInfo.Lines.Add('Cells: ' +
IntToStr(Int64(Data.Count) * Data.Items.Count));
MemoInfo.Lines.Add('Memory: ~' +
FormatBytes(CalculateMemoryUsage(Data)));
BtnView.Enabled := True;
BtnQuery.Enabled := True;
end;
end;
procedure TBigDataForm.FormDestroy(Sender: TObject);
begin
Data.Free;
end;
Performance Tips
64-bit Platform
Always use 64-bit compilation for data > 3GB
Pre-allocate Memory
Use
Resize(Count) once instead of growing incrementallyUse Appropriate Types
Int32 uses less memory than Int64, Float less than Double
Create Indexes
Index frequently queried columns for 10-100x speedup
Parallel Queries
Use TParallel for multi-core speedup
Binary Storage
TeeBI .bi format is 10x faster than CSV/JSON
See Also
- Parallel Processing - Multi-threaded queries
- SQL Queries - Fast aggregation
- Importing Data - Load large files
- Simple Queries - Filter large datasets
