Overview
Read filters allow you to selectively load only the data you need from a spreadsheet file. Instead of loading an entire workbook into memory, you can specify exactly which cells should be read based on coordinates, ranges, or custom logic.
Read filters are applied during the loading process , meaning filtered cells never enter memory. This provides significant performance benefits for large files.
The IReadFilter Interface
All read filters implement the IReadFilter interface:
namespace PhpOffice\PhpSpreadsheet\Reader ;
interface IReadFilter
{
/**
* Should this cell be read?
*
* @param string $columnAddress Column address (e.g., "A", "IV")
* @param int $row Row number
* @param string $worksheetName Optional worksheet name
*/
public function readCell (
string $columnAddress ,
int $row ,
string $worksheetName = ''
) : bool ;
}
Source: IReadFilter.php:6-15
Default Read Filter
By default, PhpSpreadsheet uses DefaultReadFilter which reads all cells:
class DefaultReadFilter implements IReadFilter
{
public function readCell ( string $columnAddress , int $row , string $worksheetName = '' ) : bool
{
return true ; // Read everything
}
}
Source: DefaultReadFilter.php:14-17
Basic Usage
Simple Range Filter
Read only rows 9-15 and columns A-E:
use PhpOffice\PhpSpreadsheet\ IOFactory ;
use PhpOffice\PhpSpreadsheet\Reader\ IReadFilter ;
class MyReadFilter implements IReadFilter
{
public function readCell ( string $columnAddress , int $row , string $worksheetName = '' ) : bool
{
// Read rows 9 to 15 and columns A to E only
if ( $row >= 9 && $row <= 15 ) {
if ( in_array ( $columnAddress , range ( 'A' , 'E' ))) {
return true ;
}
}
return false ;
}
}
$reader = IOFactory :: createReader ( 'Xlsx' );
$reader -> setReadFilter ( new MyReadFilter ());
$spreadsheet = $reader -> load ( 'large-file.xlsx' );
Source: 09_Simple_file_reader_using_a_read_filter.php:14-27
Configurable Range Filter
Make your filter configurable for reusability:
class ConfigurableReadFilter implements IReadFilter
{
public function __construct (
private int $startRow ,
private int $endRow ,
private array $columns
) {}
public function readCell ( string $columnAddress , int $row , string $worksheetName = '' ) : bool
{
if ( $row >= $this -> startRow && $row <= $this -> endRow ) {
if ( in_array ( $columnAddress , $this -> columns )) {
return true ;
}
}
return false ;
}
}
// Read rows 9-15, columns G-K
$filter = new ConfigurableReadFilter ( 9 , 15 , range ( 'G' , 'K' ));
$reader = IOFactory :: createReader ( 'Xlsx' );
$reader -> setReadFilter ( $filter );
$spreadsheet = $reader -> load ( 'file.xlsx' );
Source: 10_Simple_file_reader_using_a_configurable_read_filter.php:14-34
Chunked Reading
For very large files, process data in chunks to keep memory usage constant:
Version 1: Create New Filter Instance
class ChunkReadFilter implements IReadFilter
{
private int $startRow ;
private int $endRow ;
public function __construct ( int $startRow , int $chunkSize )
{
$this -> startRow = $startRow ;
$this -> endRow = $startRow + $chunkSize ;
}
public function readCell ( string $columnAddress , int $row , string $worksheetName = '' ) : bool
{
// Always read heading row
if ( $row == 1 ) {
return true ;
}
// Read configured row range
if ( $row >= $this -> startRow && $row < $this -> endRow ) {
return true ;
}
return false ;
}
}
$reader = IOFactory :: createReader ( 'Xlsx' );
$chunkSize = 1000 ;
// Process file in chunks
for ( $startRow = 2 ; $startRow <= 10000 ; $startRow += $chunkSize ) {
echo "Reading rows { $startRow } to " . ( $startRow + $chunkSize - 1 ) . " \n " ;
// Create new filter for this chunk
$chunkFilter = new ChunkReadFilter ( $startRow , $chunkSize );
$reader -> setReadFilter ( $chunkFilter );
// Load only this chunk
$spreadsheet = $reader -> load ( 'large-file.xlsx' );
// Process chunk data
foreach ( $spreadsheet -> getActiveSheet () -> getRowIterator () as $row ) {
foreach ( $row -> getCellIterator () as $cell ) {
// Process cell
}
}
// Free memory
$spreadsheet -> disconnectWorksheets ();
unset ( $spreadsheet );
}
Source: 11_Reading_a_workbook_in_chunks_using_a_configurable_read_filter_version_1.php:14-38
Version 2: Reuse Filter Instance
More efficient - reuses the same filter and reader instances:
class ChunkReadFilter implements IReadFilter
{
private int $startRow = 0 ;
private int $endRow = 0 ;
/**
* Set the row range to read
*/
public function setRows ( int $startRow , int $chunkSize ) : void
{
$this -> startRow = $startRow ;
$this -> endRow = $startRow + $chunkSize ;
}
public function readCell ( string $columnAddress , int $row , string $worksheetName = '' ) : bool
{
// Always read heading row
if ( $row == 1 ) {
return true ;
}
// Read configured row range
if ( $row >= $this -> startRow && $row < $this -> endRow ) {
return true ;
}
return false ;
}
}
$reader = IOFactory :: createReader ( 'Xlsx' );
$chunkSize = 1000 ;
$chunkFilter = new ChunkReadFilter ();
// Attach filter once
$reader -> setReadFilter ( $chunkFilter );
for ( $startRow = 2 ; $startRow <= 10000 ; $startRow += $chunkSize ) {
echo "Reading rows { $startRow } to " . ( $startRow + $chunkSize - 1 ) . " \n " ;
// Update filter range
$chunkFilter -> setRows ( $startRow , $chunkSize );
// Load chunk
$spreadsheet = $reader -> load ( 'large-file.xlsx' );
// Process...
$spreadsheet -> disconnectWorksheets ();
unset ( $spreadsheet );
}
Source: 12_Reading_a_workbook_in_chunks_using_a_configurable_read_filter_version_2.php:14-38
Version 2 is more efficient because it reuses the reader and filter instances. Use this approach when processing multiple chunks.
Advanced Filter Patterns
Reading Specific Rows Only
Read header row and specific data rows:
class SpecificRowsFilter implements IReadFilter
{
public function __construct (
private array $rowsToRead
) {}
public function readCell ( string $columnAddress , int $row , string $worksheetName = '' ) : bool
{
return in_array ( $row , $this -> rowsToRead );
}
}
// Read rows 1, 20-30
$filter = new SpecificRowsFilter ([ 1 , ... range ( 20 , 30 )]);
$reader -> setReadFilter ( $filter );
Source: 24_Readfilter.php:21-32
Worksheet-Specific Filter
Apply different rules to different worksheets:
class WorksheetSpecificFilter implements IReadFilter
{
public function readCell ( string $columnAddress , int $row , string $worksheetName = '' ) : bool
{
// Different rules for different sheets
return match ( $worksheetName ) {
'Summary' => $row <= 10 , // Only first 10 rows
'Data' => $row >= 5 && $row <= 100 , // Rows 5-100
'Config' => true , // Read everything
default => false , // Skip other sheets
};
}
}
Column Range Filter
Read specific columns only:
class ColumnRangeFilter implements IReadFilter
{
public function __construct (
private string $startColumn ,
private string $endColumn
) {}
public function readCell ( string $columnAddress , int $row , string $worksheetName = '' ) : bool
{
return $columnAddress >= $this -> startColumn
&& $columnAddress <= $this -> endColumn ;
}
}
// Read columns C through G only
$filter = new ColumnRangeFilter ( 'C' , 'G' );
Complex Logic Filter
Combine multiple conditions:
class ComplexFilter implements IReadFilter
{
public function readCell ( string $columnAddress , int $row , string $worksheetName = '' ) : bool
{
// Always include header row
if ( $row === 1 ) {
return true ;
}
// Only specific columns
if ( ! in_array ( $columnAddress , [ 'A' , 'B' , 'E' , 'F' ])) {
return false ;
}
// Skip every 10th row (sampling)
if ( $row % 10 === 0 ) {
return false ;
}
// Only rows 2-1000
return $row >= 2 && $row <= 1000 ;
}
}
Combining with Other Optimizations
Read Filter + Read Data Only
Combine filters with setReadDataOnly() to skip formatting:
$reader = IOFactory :: createReader ( 'Xlsx' );
$reader -> setReadDataOnly ( true ); // Skip formatting
$reader -> setReadFilter ( new MyFilter ()); // Filter cells
$spreadsheet = $reader -> load ( 'file.xlsx' );
Source: 05_Simple_file_reader_using_the_read_data_only_option.php:11-14
Read Filter + Cell Caching
Use both techniques for maximum memory efficiency:
use PhpOffice\PhpSpreadsheet\ Settings ;
// Configure cache
$cache = new SimpleCacheBridge ( new ApcuCachePool ());
Settings :: setCache ( $cache );
// Use read filter
$reader = IOFactory :: createReader ( 'Xlsx' );
$reader -> setReadFilter ( new ChunkReadFilter ( 1 , 1000 ));
$spreadsheet = $reader -> load ( 'file.xlsx' );
Read Filter + Load Specific Worksheets
Filter cells AND sheets:
$reader = IOFactory :: createReader ( 'Xlsx' );
$reader -> setLoadSheetsOnly ([ 'Data' , 'Summary' ]); // Only these sheets
$reader -> setReadFilter ( new MyFilter ()); // Filter cells
$spreadsheet = $reader -> load ( 'file.xlsx' );
Memory Usage Comparison
Scenario Memory Load Time Full file (100K cells) 160 MB 2.5s With read filter (10K cells) 16 MB 0.8s Chunked reading (1K chunks) 2 MB 0.1s per chunk
Results will vary based on your file structure and complexity. Always test with your actual data.
Best Practices
Always Include Header Rows
Free Memory After Processing
When processing chunks, explicitly free memory: $spreadsheet -> disconnectWorksheets ();
unset ( $spreadsheet );
gc_collect_cycles (); // Force garbage collection
Use Range Checks Efficiently
Column range checks with letters are slower than numeric checks. For better performance: use PhpOffice\PhpSpreadsheet\Cell\ Coordinate ;
$colIndex = Coordinate :: columnIndexFromString ( $columnAddress );
return $colIndex >= 3 && $colIndex <= 7 ; // Columns C-G
Test your filters with small files first to ensure correct data is loaded: $reader -> setReadFilter ( $filter );
$spreadsheet = $reader -> load ( 'test-file.xlsx' );
// Verify data range
$sheet = $spreadsheet -> getActiveSheet ();
echo 'Highest row: ' . $sheet -> getHighestRow () . " \n " ;
echo 'Highest column: ' . $sheet -> getHighestColumn () . " \n " ;
Complete Example: ETL Pipeline
Here’s a complete example of using read filters in an ETL pipeline:
use PhpOffice\PhpSpreadsheet\ IOFactory ;
use PhpOffice\PhpSpreadsheet\Reader\ IReadFilter ;
use PhpOffice\PhpSpreadsheet\ Settings ;
class ETLReadFilter implements IReadFilter
{
private int $startRow = 0 ;
private int $endRow = 0 ;
public function setChunk ( int $startRow , int $chunkSize ) : void
{
$this -> startRow = $startRow ;
$this -> endRow = $startRow + $chunkSize ;
}
public function readCell ( string $columnAddress , int $row , string $worksheetName = '' ) : bool
{
// Always read headers
if ( $row === 1 ) return true ;
// Only columns A-F (we don't need G onwards)
if ( ! in_array ( $columnAddress , range ( 'A' , 'F' ))) {
return false ;
}
// Current chunk range
return $row >= $this -> startRow && $row < $this -> endRow ;
}
}
// Configure caching
$cache = new SimpleCacheBridge ( new ApcuCachePool ());
Settings :: setCache ( $cache );
// Setup reader and filter
$reader = IOFactory :: createReader ( 'Xlsx' );
$reader -> setReadDataOnly ( true );
$filter = new ETLReadFilter ();
$reader -> setReadFilter ( $filter );
// Process in chunks
$chunkSize = 500 ;
$totalRows = 50000 ; // Known from metadata or previous scan
$processedCount = 0 ;
for ( $startRow = 2 ; $startRow <= $totalRows ; $startRow += $chunkSize ) {
// Update filter for this chunk
$filter -> setChunk ( $startRow , $chunkSize );
// Load chunk
$spreadsheet = $reader -> load ( 'large-data.xlsx' );
$sheet = $spreadsheet -> getActiveSheet ();
// Process rows
foreach ( $sheet -> getRowIterator () as $row ) {
if ( $row -> getRowIndex () === 1 ) continue ; // Skip header
$rowData = [];
foreach ( $row -> getCellIterator () as $cell ) {
$rowData [] = $cell -> getValue ();
}
// Your ETL logic here
processRow ( $rowData );
$processedCount ++ ;
}
// Clean up
$spreadsheet -> disconnectWorksheets ();
unset ( $spreadsheet );
// Progress report
$progress = round (( $processedCount / $totalRows ) * 100 , 1 );
echo "Processed { $processedCount } / { $totalRows } rows ({ $progress }%) \n " ;
}
echo "ETL complete! Processed { $processedCount } rows. \n " ;
Troubleshooting
Filter Not Being Applied
Issue : All data is still being loaded
Solution : Ensure setReadFilter() is called before load():
// Correct order
$reader -> setReadFilter ( $filter );
$spreadsheet = $reader -> load ( 'file.xlsx' );
// Wrong - too late
$spreadsheet = $reader -> load ( 'file.xlsx' );
$reader -> setReadFilter ( $filter ); // Has no effect
Empty Worksheet After Filtering
Issue : Worksheet appears empty after loading
Solution : Verify your filter logic returns true for expected cells:
// Debug your filter
class DebugFilter implements IReadFilter
{
public function readCell ( string $columnAddress , int $row , string $worksheetName = '' ) : bool
{
$result = /* your logic */ ;
echo "Cell { $columnAddress }{ $row }: " . ( $result ? 'YES' : 'NO' ) . " \n " ;
return $result ;
}
}
See Also