Overview
Layout functions define how logical coordinates in a tensor’s index space map to offsets in the 1D array held in memory. All layout functions implement the interface defined by IdentityTensorLayout<> and are used by TensorRef and derived classes.
Header: cutlass/layout/matrix.h
RowMajor
Mapping function for row-major matrices.
Class Definition
class RowMajor {
public:
static int const kRank = 2;
static int const kStrideRank = 1;
using Index = int32_t;
using LongIndex = int64_t;
using TensorCoord = MatrixCoord;
using Stride = Coord<kStrideRank, LongIndex>;
};
Constructors
CUTLASS_HOST_DEVICE
RowMajor(LongIndex ldm = 0);
CUTLASS_HOST_DEVICE
RowMajor(Stride stride);
Member Functions
packed
CUTLASS_HOST_DEVICE
static RowMajor packed(MatrixCoord const &extent);
Returns a layout to a tightly packed tensor.
operator()
CUTLASS_HOST_DEVICE
LongIndex operator()(MatrixCoord const &coord) const;
Returns the offset of a coordinate in linear memory. Assumes coordinate has convention (row, column).
Implementation:
return LongIndex(coord.row()) * LongIndex(stride_[0]) + coord.column();
inverse
CUTLASS_HOST_DEVICE
MatrixCoord inverse(LongIndex offset) const;
Inverse of layout function, mapping linear offset to logical coordinate.
stride
CUTLASS_HOST_DEVICE
Stride stride() const;
CUTLASS_HOST_DEVICE
Stride & stride();
CUTLASS_HOST_DEVICE
typename Stride::Index stride(int idx) const;
CUTLASS_HOST_DEVICE
typename Stride::Index & stride(int idx);
capacity
CUTLASS_HOST_DEVICE
LongIndex capacity(MatrixCoord const &extent) const;
Computes the number of contiguous elements needed to store a tensor with the given size.
Example
// Create row-major layout with leading dimension
cutlass::layout::RowMajor layout(lda);
// Create tensor reference
cutlass::TensorRef<float, cutlass::layout::RowMajor> tensor(ptr, layout);
// Access element at (i, j)
float value = tensor.at(cutlass::MatrixCoord(i, j));
ColumnMajor
Mapping function for column-major matrices.
Class Definition
class ColumnMajor {
public:
static int const kRank = 2;
static int const kStrideRank = 1;
using Index = int32_t;
using LongIndex = int64_t;
using TensorCoord = MatrixCoord;
using Stride = Coord<kStrideRank, LongIndex>;
};
Constructors
CUTLASS_HOST_DEVICE
ColumnMajor(LongIndex ldm = 0);
CUTLASS_HOST_DEVICE
ColumnMajor(Stride stride);
Member Functions
packed
CUTLASS_HOST_DEVICE
static ColumnMajor packed(MatrixCoord const &extent);
operator()
CUTLASS_HOST_DEVICE
LongIndex operator()(MatrixCoord const &coord) const;
Returns the offset of a coordinate in linear memory.
Implementation:
return LongIndex(coord.column()) * LongIndex(stride_[0]) + coord.row();
inverse
CUTLASS_HOST_DEVICE
MatrixCoord inverse(LongIndex offset) const;
stride
CUTLASS_HOST_DEVICE
Stride stride() const;
CUTLASS_HOST_DEVICE
Stride & stride();
capacity
CUTLASS_HOST_DEVICE
LongIndex capacity(MatrixCoord const &extent) const;
Example
// Column-major with leading dimension
cutlass::layout::ColumnMajor layout(lda);
cutlass::TensorRef<float, cutlass::layout::ColumnMajor> tensor(ptr, layout);
RowMajorInterleaved
Mapping function for interleaved matrices. Matrix is structured as row-major arrangement of fixed-size columns.
Template Signature
template <int Interleave>
struct RowMajorInterleaved;
Template Parameters
Size of interleaved columns
Class Definition
template <int Interleave>
struct RowMajorInterleaved {
static int const kRank = 2;
static int const kStrideRank = 1;
static int const kInterleave = Interleave;
using Index = int32_t;
using LongIndex = int64_t;
using TensorCoord = MatrixCoord;
using Stride = Coord<kStrideRank, LongIndex>;
};
operator()
CUTLASS_HOST_DEVICE
LongIndex operator()(MatrixCoord const &coord) const;
Implementation:
Index row_major = coord.row() / kInterleave;
Index row_minor = coord.row() % kInterleave;
return LongIndex(row_major) * LongIndex(stride_[0]) +
LongIndex(coord.column()) * kInterleave + row_minor;
Example
// 32-way interleaved row-major layout
using Layout = cutlass::layout::RowMajorInterleaved<32>;
Layout layout(ldm);
cutlass::TensorRef<int8_t, Layout> tensor(ptr, layout);
ColumnMajorInterleaved
Mapping function for interleaved matrices. Matrix is structured as column-major arrangement of fixed-size rows.
Template Signature
template <int Interleave>
struct ColumnMajorInterleaved;
Template Parameters
Class Definition
template <int Interleave>
struct ColumnMajorInterleaved {
static int const kRank = 2;
static int const kStrideRank = 1;
static int const kInterleave = Interleave;
using Index = int32_t;
using LongIndex = int64_t;
using TensorCoord = MatrixCoord;
using Stride = Coord<kStrideRank, LongIndex>;
};
operator()
CUTLASS_HOST_DEVICE
LongIndex operator()(MatrixCoord const &coord) const;
Implementation:
Index column_major = coord.column() / kInterleave;
Index column_minor = coord.column() % kInterleave;
return LongIndex(column_major) * LongIndex(stride_[0]) +
LongIndex(coord.row()) * kInterleave + column_minor;
ContiguousMatrix
Mapping function for scenarios where layout is row-major or column-major but this information is only available at runtime.
Enumeration
enum class Matrix {
kColumnMajor, ///< leading dimension refers to stride between columns
kRowMajor ///< leading dimension refers to stride between rows
};
Class Definition
struct ContiguousMatrix {
static int const kRank = 2;
static int const kStrideRank = 1;
using Index = int32_t;
using LongIndex = int64_t;
using TensorCoord = MatrixCoord;
using Stride = Coord<kStrideRank, LongIndex>;
};
Constructor
CUTLASS_HOST_DEVICE
ContiguousMatrix(
Index ldm = 0,
Matrix layout = Matrix::kColumnMajor
);
operator()
CUTLASS_HOST_DEVICE
LongIndex operator()(MatrixCoord const &coord) const;
Implementation:
if (layout_ == Matrix::kColumnMajor) {
return coord.row() + coord.column() * stride_[0];
}
else if (layout_ == Matrix::kRowMajor) {
return coord.row() * stride_[0] + coord.column();
}
Example
// Runtime layout selection
cutlass::layout::Matrix kind = is_column_major ?
cutlass::layout::Matrix::kColumnMajor :
cutlass::layout::Matrix::kRowMajor;
cutlass::layout::ContiguousMatrix layout(ldm, kind);
cutlass::TensorRef<float, cutlass::layout::ContiguousMatrix> tensor(ptr, layout);
AffineRank2ColumnMajor
Mapping function for scenarios where both rows and columns are separated by a stride. Row stride is smaller than column stride.
Class Definition
struct AffineRank2ColumnMajor {
static int const kRank = 2;
static int const kStrideRank = 2;
using Index = int32_t;
using LongIndex = int64_t;
using TensorCoord = MatrixCoord;
using Stride = Coord<kStrideRank, LongIndex>;
};
Constructors
CUTLASS_HOST_DEVICE
AffineRank2ColumnMajor(Stride const &stride = Stride());
CUTLASS_HOST_DEVICE
AffineRank2ColumnMajor(
LongIndex row_stride,
LongIndex column_stride
);
CUTLASS_HOST_DEVICE
AffineRank2ColumnMajor(LongIndex stride);
operator()
CUTLASS_HOST_DEVICE
LongIndex operator()(MatrixCoord const &coord) const;
Implementation:
return dot(coord, stride_);
AffineRank2RowMajor
Mapping function where both rows and columns are separated by a stride. Column stride is smaller than row stride.
Class Definition
struct AffineRank2RowMajor {
static int const kRank = 2;
static int const kStrideRank = 2;
using Index = int32_t;
using LongIndex = int64_t;
using TensorCoord = MatrixCoord;
using Stride = Coord<kStrideRank, LongIndex>;
};
Constructors
CUTLASS_HOST_DEVICE
AffineRank2RowMajor(Stride const &stride = Stride());
CUTLASS_HOST_DEVICE
AffineRank2RowMajor(
LongIndex row_stride,
LongIndex column_stride
);
Layout Transpose
Defines transposes of matrix layouts.
template <typename Layout>
struct LayoutTranspose;
template <>
struct LayoutTranspose<layout::RowMajor> {
using type = layout::ColumnMajor;
};
template <>
struct LayoutTranspose<layout::ColumnMajor> {
using type = layout::RowMajor;
};
Example
// Get transpose layout type
using TransposedLayout = typename cutlass::layout::LayoutTranspose<
cutlass::layout::RowMajor>::type;
// TransposedLayout is cutlass::layout::ColumnMajor
See Also
- TensorRef - Uses layout functions to map coordinates
- Gemm - Accepts layout template parameters