MNN/source/backend/opencl/execution/cl/raster_buf.cl

103 lines
3.9 KiB
Common Lisp

#ifdef MNN_SUPPORT_FP16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#endif
#define GLOBAL_SIZE_2_DIMS __private const int global_size_dim0, __private const int global_size_dim1,
#define DEAL_NON_UNIFORM_DIM2(input1, input2) \
if (input1 >= global_size_dim0 || input2 >= global_size_dim1) { \
return; \
}
__constant sampler_t SAMPLER = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
#define GLOBAL_SIZE_3_DIMS \
__private const int global_size_dim0, __private const int global_size_dim1, __private const int global_size_dim2,
#define DEAL_NON_UNIFORM_DIM3(input1, input2, input3) \
if (input1 >= global_size_dim0 || input2 >= global_size_dim1 || input3 >= global_size_dim2) { \
return; \
}
__kernel void buffer_set_zero(
GLOBAL_SIZE_2_DIMS
__global FLOAT *output
) {
const int x = get_global_id(0);
const int y = get_global_id(1);
DEAL_NON_UNIFORM_DIM2(x, y);
output[y*global_size_dim0 + x] = (FLOAT)(0.0f);
}
__kernel void image_set_zero(
GLOBAL_SIZE_2_DIMS
__write_only image2d_t output
) {
const int x = get_global_id(0);
const int y = get_global_id(1);
DEAL_NON_UNIFORM_DIM2(x, y);
WI_F(output, (int2)(x, y), (FLOAT4)(0.0f));
}
__kernel void raster_buffer(
GLOBAL_SIZE_3_DIMS
__global FLOAT *input,
__private const int inputOffset,
__private const int inputStride0,
__private const int inputStride1,
__private const int inputStride2,
__global FLOAT *output,
__private const int outputOffset,
__private const int outputStride0,
__private const int outputStride1,
__private const int outputStride2
) {
const int x = get_global_id(0);
const int y = get_global_id(1);
const int z = get_global_id(2);
DEAL_NON_UNIFORM_DIM3(x, y, z);
int inputIndex = inputOffset + z * inputStride0 + y * inputStride1 + x * inputStride2;
int outputIndex = outputOffset + z * outputStride0 + y * outputStride1 + x * outputStride2;
output[outputIndex] = input[inputIndex];
}
__kernel void raster_nc4hw4_buffer(
GLOBAL_SIZE_3_DIMS
__global FLOAT *input,
__private const int inputOffset,
__private const int inputStride0,
__private const int inputStride1,
__private const int inputStride2,
__private const int inputHeight,
__private const int inputWidth,
__private const int inputChannel,
__global FLOAT *output,
__private const int outputOffset,
__private const int outputStride0,
__private const int outputStride1,
__private const int outputStride2,
__private const int outputHeight,
__private const int outputWidth,
__private const int outputChannel
) {
const int x = get_global_id(0);
const int y = get_global_id(1);
const int z = get_global_id(2);
DEAL_NON_UNIFORM_DIM3(x, y, z);
int inputIndex = inputOffset + (z * inputStride0 + y * inputStride1 + x * inputStride2) * 4;
int outputIndex = outputOffset + (z * outputStride0 + y * outputStride1 + x * outputStride2) * 4;
FLOAT4 values = vload4(0, (__global FLOAT *)(input+inputIndex));
vstore4(values, 0, (__global FLOAT *)(output+outputIndex));
}