Skip to main content

Overview

A Table is a sequence of chunked arrays. Tables have a similar interface to record batches, but they can be composed from multiple record batches or chunked arrays. Tables are data-frame-like, and many methods you expect to work on a data.frame are implemented for Table.

Table Class

Factory Method

Table$create()

Create a Table from various inputs.
...
various
A data.frame, a named set of Arrays or vectors, or RecordBatch objects. If given a mixture of data.frames and named vectors, the inputs will be autospliced together
schema
Schema
default:"NULL"
A Schema, or NULL (the default) to infer the schema from the data
tbl <- arrow_table(name = rownames(mtcars), mtcars)
dim(tbl)
names(tbl)

Methods

$column()

Extract a ChunkedArray by integer position from the table.
i
integer
Zero-based column index
tbl <- arrow_table(mtcars)
tbl$column(0)  # First column

$ColumnNames()

Get all column names (called by names(tab)).
tbl <- arrow_table(mtcars)
tbl$ColumnNames()

$nbytes()

Total number of bytes consumed by the elements of the table.
tbl <- arrow_table(mtcars)
tbl$nbytes()

$RenameColumns()

Set all column names (called by names(tab) <- value).
value
character
New column names
tbl <- arrow_table(mtcars)
tbl$RenameColumns(c("miles_per_gallon", "cylinders", ...))

$GetColumnByName()

Extract a ChunkedArray by string name.
name
character
Column name
tbl <- arrow_table(mtcars)
tbl$GetColumnByName("mpg")

$RemoveColumn()

Remove a column by index.
i
integer
Zero-based column index

$AddColumn()

Add a new column at the specified position.
i
integer
Zero-based position to insert the column
new_field
Field
A Field object defining the column name and type
value
Array | ChunkedArray
The column data

$SetColumn()

Replace a column at the specified position.
i
integer
Zero-based column index
new_field
Field
A Field object defining the column name and type
value
Array | ChunkedArray
The column data

$ReplaceSchemaMetadata()

Replace the schema metadata.
new
list
Named list of metadata

$field()

Extract a Field from the table schema by integer position.
i
integer
Zero-based field index

$SelectColumns()

Return new Table with specified columns.
indices
integer vector
Zero-based column indices to select
tbl <- arrow_table(mtcars)
tbl$SelectColumns(c(0, 1, 2))  # Select first 3 columns

$Slice()

Create a zero-copy view starting at the indicated integer offset.
offset
integer
Starting row position (zero-based)
length
integer
default:"NULL"
Number of rows to include. If NULL, goes to the end of the table
tbl <- arrow_table(mtcars)
tbl$Slice(5, 10)  # Rows 5-14

$Take()

Return a Table with rows at positions given by integers.
i
integer vector | Array | ChunkedArray
Row positions to take. If an Arrow Array or ChunkedArray, it will be coerced to an R vector
tbl <- arrow_table(mtcars)
tbl$Take(c(1, 3, 5, 7))

$Filter()

Return a Table with rows at positions where logical vector is TRUE.
i
logical vector | Array | ChunkedArray
Logical vector or Arrow boolean-type (Chunked)Array
keep_na
logical
default:"TRUE"
Whether to keep NA values
tbl <- arrow_table(mtcars)
tbl$Filter(tbl$column(0) > 20)

$SortIndices()

Return an Array of integer row positions that can be used to rearrange the Table.
names
character vector
Column names to sort by
descending
logical
default:"FALSE"
Whether to sort in descending order. Can be a logical vector of length one or of the same length as names
tbl <- arrow_table(mtcars)
indices <- tbl$SortIndices(c("cyl", "mpg"))
sorted_tbl <- tbl$Take(indices)

$serialize()

Write the table to the given OutputStream.
output_stream
OutputStream
The output stream to write to
...
various
Additional arguments

$to_data_frame()

Convert the table to an R data.frame.
tbl <- arrow_table(mtcars)
df <- tbl$to_data_frame()

$cast()

Alter the schema of the table.
target_schema
Schema
The target schema. Must have the same column names as the current schema
safe
logical
default:"TRUE"
Whether to check for overflows or other unsafe conversions
options
CastOptions
default:"cast_options(safe)"
Casting options
tbl <- arrow_table(mtcars)
new_schema <- schema(mpg = float64(), cyl = int8(), ...)
tbl$cast(new_schema)

$Equals()

Check if this table is equal to another.
other
Table
Another Table to compare with
check_metadata
logical
default:"FALSE"
Whether to compare metadata as well
tbl1 <- arrow_table(mtcars)
tbl2 <- arrow_table(mtcars)
tbl1$Equals(tbl2)  # TRUE

$Validate()

Perform validation checks on the table.

$ValidateFull()

Perform full validation checks on the table.

Active Bindings

$num_columns

Number of columns in the table.
tbl <- arrow_table(mtcars)
tbl$num_columns  # 11

$num_rows

Number of rows in the table.
tbl <- arrow_table(mtcars)
tbl$num_rows  # 32

$schema

The Schema of the table. Can be read or replaced.
tbl <- arrow_table(mtcars)
tbl$schema

# Replace schema
tbl$schema <- new_schema

$columns

Returns a list of ChunkedArrays.
tbl <- arrow_table(mtcars)
tbl$columns

$metadata

Returns the key-value metadata of the Schema as a named list. Modify or replace by assigning in.
tbl <- arrow_table(mtcars)
tbl$metadata

# Set metadata
tbl$metadata <- list(description = "Motor Trend Car Road Tests")

S3 Methods

Tables support many data.frame-like operations:

Subsetting

tbl <- arrow_table(mtcars)

# Column access
tbl$mpg
tbl[["cyl"]]
tbl[, c("gear", "hp", "wt")]

# Row and column access
as.data.frame(tbl[4:8, c("gear", "hp", "wt")])

names(), dim(), nrow(), ncol()

tbl <- arrow_table(mtcars)
names(tbl)
dim(tbl)
nrow(tbl)
ncol(tbl)

head() and tail()

tbl <- arrow_table(mtcars)
head(tbl)
tail(tbl, n = 10)

Helper Functions

arrow_table()

Alias for Table$create().
...
various
A data.frame or a named set of Arrays or vectors
schema
Schema
default:"NULL"
A Schema, or NULL to infer from data
tbl <- arrow_table(name = rownames(mtcars), mtcars)
dim(tbl)

as_arrow_table()

Convert a single object to an Arrow Table.
x
object
An object to convert to an Arrow Table
schema
Schema
default:"NULL"
An optional schema
# Use as_arrow_table() for a single object
as_arrow_table(data.frame(col1 = 1, col2 = "two"))

# Use arrow_table() to create from columns
arrow_table(col1 = 1, col2 = "two")

concat_tables()

Concatenate one or more Tables into a single table. This operation does not copy array data.
...
Table | RecordBatch
One or more Table or RecordBatch objects. RecordBatch objects will be automatically converted to Tables
unify_schemas
logical
default:"TRUE"
If TRUE, the schemas will be unified with fields of the same name being merged, then each table will be promoted to the unified schema before being concatenated
tbl <- arrow_table(name = rownames(mtcars), mtcars)
prius <- arrow_table(name = "Prius", mpg = 58, cyl = 4, disp = 1.8)
combined <- concat_tables(tbl, prius)
tail(combined)$to_data_frame()

rbind() and cbind()

# Row binding
tbl1 <- arrow_table(mtcars[1:16, ])
tbl2 <- arrow_table(mtcars[17:32, ])
rbind(tbl1, tbl2)

# Column binding
tbl_a <- arrow_table(mtcars[, 1:5])
tbl_b <- arrow_table(mtcars[, 6:11])
cbind(tbl_a, tbl_b)

Build docs developers (and LLMs) love