mini_jit
-
enum class mini_jit::exec_t : uint32_t
execution type
Values:
-
enumerator seq
-
enumerator prim
-
enumerator undefined
-
enumerator seq
-
enum class mini_jit::ptype_t : uint32_t
primitive type
Values:
-
enumerator zero
-
enumerator identity
-
enumerator relu
-
enumerator gemm
-
enumerator brgemm
-
enumerator square
-
enumerator reciprocal
-
enumerator increment
-
enumerator decrement
-
enumerator add
-
enumerator sub
-
enumerator mul
-
enumerator div
-
enumerator min
-
enumerator max
-
enumerator fast_sigmoid
-
enumerator sigmoid_interp
-
enumerator sigmoid_taylor
-
enumerator none
-
enumerator zero
-
enum class mini_jit::dim_t : uint32_t
dimension type
Values:
-
enumerator c
-
enumerator m
-
enumerator n
-
enumerator k
-
enumerator c
-
enum class mini_jit::error_t : int32_t
error codes
Values:
-
enumerator success
-
enumerator wrong_dimension
-
enumerator wrong_ptype
-
enumerator operation_not_supported
-
enumerator wrong_matrix_ordering_format
-
enumerator wrong_dtype
-
enumerator wrong_exec_type
-
enumerator success
-
class Benchmark
Subclassed by mini_jit::benchmarks::EinsumTreeBench, mini_jit::benchmarks::FastSigmoidPrimitiveBench, mini_jit::benchmarks::IdentityPrimitiveBench, mini_jit::benchmarks::IdentityTransPrimitiveBench, mini_jit::benchmarks::MatmulBrMNKBench, mini_jit::benchmarks::MatmulMNKBench, mini_jit::benchmarks::ReLUPrimitiveBench, mini_jit::benchmarks::ReLUTransPrimitiveBench, mini_jit::benchmarks::ReciprocalPrimitiveBench, mini_jit::benchmarks::SigmoidInterpolationPrimitiveBench, mini_jit::benchmarks::SigmoidTaylorPrimitiveBench, mini_jit::benchmarks::SquarePrimitiveBench, mini_jit::benchmarks::SquareTransPrimitiveBench, mini_jit::benchmarks::TensorOperationBench, mini_jit::benchmarks::ZeroEorPrimitiveBench, mini_jit::benchmarks::ZeroXZRPrimitiveBench
Public Functions
-
inline virtual ~Benchmark()
-
virtual void run() = 0
Runs the benchmark.
-
inline benchmark_result getResult()
Returns the result of the benchmark.
-
struct benchmark_result
-
inline virtual ~Benchmark()
-
class Binary
Public Types
-
using kernel_t = void (*)(void const *a, void const *b, void *c, int64_t ld_a, int64_t ld_b, int64_t ld_c)
Public Functions
-
inline ~Binary() noexcept
Destructor.
-
error_t generate(uint32_t m, uint32_t n, uint32_t trans_c, mini_jit::dtype_t dtype, mini_jit::ptype_t ptype)
Generate a kernel for a binary primitive.
- Parameters:
m – Number of rows.
n – Number of columns.
trans_c – 0 if C is stored in column-major order, 1 if C is stored in row-major order.
dtype – Data type of the matrices.
ptype – Primitive type.
- Returns:
error_t::success on success, another error_t value otherwise.
-
using kernel_t = void (*)(void const *a, void const *b, void *c, int64_t ld_a, int64_t ld_b, int64_t ld_c)
-
class Brgemm
Public Types
-
using kernel_t = void (*)(void const *a, void const *b, void *c, int64_t ld_a, int64_t ld_b, int64_t ld_c, int64_t br_stride_a, int64_t br_stride_b)
Public Functions
-
inline ~Brgemm() noexcept
Destructor.
-
error_t generate(uint32_t m, uint32_t n, uint32_t k, uint32_t br_size, uint32_t trans_a, uint32_t trans_b, uint32_t trans_c, mini_jit::dtype_t dtype)
Generate a kernel for batch-reduce matrix multiplication.
- Parameters:
m – number of rows in A and C.
n – number of columns in B and C.
k – number of columns in A and rows in B.
br_size – batch-reduce size.
trans_a – 0 if A is stored in column-major order, 1 if A is stored in row-major order.
trans_b – 0 if B is stored in column-major order, 1 if B is stored in row-major order.
trans_c – 0 if C is stored in column-major order, 1 if C is stored in row-major order.
dtype – data type of the matrices.
- Returns:
error_t::success on success, another error_t value otherwise.
-
using kernel_t = void (*)(void const *a, void const *b, void *c, int64_t ld_a, int64_t ld_b, int64_t ld_c, int64_t br_stride_a, int64_t br_stride_b)
-
class Kernel
Public Functions
-
inline Kernel()
Constructor
-
~Kernel() noexcept
Destructor
-
void add_instr(uint32_t ins)
Adds an instruction to the code buffer.
- Parameters:
ins – instruction which is added.
-
void add_instr(std::vector<uint32_t> ins)
Adds a vector of instructions to the code buffer.
- Parameters:
ins – instructions which are added.
-
void add_label(std::string const &label)
Adds a label to the code buffer.
- Parameters:
label – label which is added.
-
int getInstrCountFromLabel(std::string const &label) const
Returns how many instructions come after the given label.
- Parameters:
label – label to search for.
- Returns:
number of instructions after the label.
-
std::size_t get_size() const
Gets the size of the code buffer.
- Returns:
size of the code buffer in bytes.
-
void set_kernel()
Sets the kernel based on the code buffer.
-
void const *get_kernel() const
Gets a pointer to the executable kernel.
-
void write(char const *filename) const
Writes the code buffer to the given file.
- Parameters:
filename – name of the file to write to.
-
inline Kernel()
-
class TensorOperation
Public Functions
-
error_t setup(dtype_t dtype, ptype_t prim_first_touch, ptype_t prim_main, ptype_t prim_last_touch, std::span<const dim_t> dim_types, std::span<const exec_t> exec_types, std::span<const int64_t> dim_sizes, std::span<const int64_t> strides_in0, std::span<const int64_t> strides_in1, std::span<const int64_t> strides_out)
Setup for a binary tensor contraction or a unary tensor operation.
- Parameters:
dtype – Datatype of all tensor elements.
prim_first_touch – Type of the first touch primitive.
prim_main – Type of the main primitive.
prim_last_touch – Type of the last touch primitive.
dim_types – Dimension type of the loops (c, m, n, or k).
exec_types – Execution type of the loops (seq, shared, or prim).
dim_sizes – Sizes of the dimensions.
strides_in0 – Strides of the first input tensor.
strides_in1 – Strides of the second input tensor (ignored if unary).
strides_out – Strides of the output tensor.
- Returns:
error_t::success on success, another error_t value otherwise.
-
void execute(void const *tensor_in0, void const *tensor_in1, void *tensor_out)
Execute the tensor operation.
- Parameters:
tensor_in0 – First input tensor.
tensor_in1 – Second input tensor (use nullptr if unary).
tensor_out – Output tensor.
-
void execute_iter(int64_t id_loop, char const *ptr_in0, char const *ptr_in1, char *ptr_out, bool first_access, bool last_access)
General-purpose loop implementation featuring first and last touch operations. No threading is applied.
- Parameters:
id_loop – Dimension id of the loop which is executed.
ptr_in0 – Pointer to the first input tensor’s data.
ptr_in1 – Pointer to the second input tensor’s data (use nullptr if unary).
ptr_out – Pointer to the output tensor’s data.
first_access – True if first time accessing data of output tensor.
last_access – True if last time accessing data of output tensor.
-
void execute_iter_parallel(char const *ptr_in0, char const *ptr_in1, char *ptr_out, bool first_access, bool last_access)
General-purpose loop implementation featuring first and last touch operations with parallelization.
- Parameters:
ptr_in0 – Pointer to the first input tensor’s data.
ptr_in1 – Pointer to the second input tensor’s data (use nullptr if unary).
ptr_out – Pointer to the output tensor’s data.
first_access – True if first time accessing data of output tensor.
last_access – True if last time accessing data of output tensor.
-
inline int dtype_size() const
-
error_t setup(dtype_t dtype, ptype_t prim_first_touch, ptype_t prim_main, ptype_t prim_last_touch, std::span<const dim_t> dim_types, std::span<const exec_t> exec_types, std::span<const int64_t> dim_sizes, std::span<const int64_t> strides_in0, std::span<const int64_t> strides_in1, std::span<const int64_t> strides_out)
-
class Unary
Public Types
-
using kernel_t = void (*)(void const *a, void *b, int64_t ld_a, int64_t ld_b, void *extra)
Public Functions
-
inline ~Unary() noexcept
Destructor.
-
error_t generate(uint32_t m, uint32_t n, uint32_t trans_b, mini_jit::dtype_t dtype, mini_jit::ptype_t ptype)
Generate a kernel for a unary primitive.
- Parameters:
m – Number of rows in A and B.
n – Number of columns in A and B.
trans_b – 0 if B is stored in column-major order, 1 if B is stored in row-major order.
dtype – Data type of the matrices.
ptype – Primitive type.
- Returns:
error_t::success on success, another error_t value otherwise.
-
kernel_t get_kernel() const
Get the generated kernel: B := op(A).
- Returns:
pointer to the generated kernel.
-
void set_extra(void *extra)
Set extra/context pointer for kernels that need it (e.g., lookup table).
-
void *get_extra() const
Get the extra/context pointer.
-
using kernel_t = void (*)(void const *a, void *b, int64_t ld_a, int64_t ld_b, void *extra)