MNN/source/backend/opencl/execution/cl/raster_buf_mnn_cl.cpp

122 lines
4.8 KiB
C++

#include "opencl_source_map.hpp"
namespace MNN {
#ifndef MNN_OPENCL_BUFFER_CLOSED
const char* raster_buf =
"#ifdef MNN_SUPPORT_FP16\n"
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"#endif\n"
"#define GLOBAL_SIZE_2_DIMS __private const int global_size_dim0,__private const int global_size_dim1,\n"
"#define DEAL_NON_UNIFORM_DIM2(input1, input2) "" if (input1 >= global_size_dim0 || input2 >= global_size_dim1) { "" return; "" }\n"
"__constant sampler_t SAMPLER=CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n"
"#define GLOBAL_SIZE_3_DIMS "" __private const int global_size_dim0,__private const int global_size_dim1,__private const int global_size_dim2,\n"
"#define DEAL_NON_UNIFORM_DIM3(input1, input2, input3) "" if (input1 >= global_size_dim0 || input2 >= global_size_dim1 || input3 >= global_size_dim2) { "" return; "" }\n"
"__kernel void buffer_set_zero(\n"
" GLOBAL_SIZE_2_DIMS\n"
" __global OUTPUT_TYPE *output\n"
" ) {\n"
" const int x=get_global_id(0);\n"
" const int y=get_global_id(1);\n"
" \n"
" DEAL_NON_UNIFORM_DIM2(x,y);\n"
" \n"
" output[y*global_size_dim0+x]=(OUTPUT_TYPE)(0.0f);\n"
"}\n"
"#define MNN_DATA_FORMAT_NCHW 0\n"
"#define MNN_DATA_FORMAT_NHWC 1\n"
"#define MNN_DATA_FORMAT_NC4HW4 2\n"
"__kernel void raster_direct_buffer(\n"
" GLOBAL_SIZE_3_DIMS\n"
" __private const int size_x,\n"
" __global INPUT_TYPE *input,\n"
" __private const int inputOffset,\n"
" __private const int combineSrcOffset,\n"
" __private const int inputStride0,\n"
" __private const int inputStride1,\n"
" __private const int inputStride2,\n"
" __private const int src_width,\n"
" __private const int src_height,\n"
" __private const int src_channel,\n"
" __private const int src_batch,\n"
" __global OUTPUT_TYPE *output,\n"
" __private const int outputOffset,\n"
" __private const int combineDstOffset,\n"
" __private const int outputStride0,\n"
" __private const int outputStride1,\n"
" __private const int outputStride2,\n"
" __private const int dst_width,\n"
" __private const int dst_height,\n"
" __private const int dst_channel,\n"
" __private const int dst_batch\n"
" ) {\n"
" const int idx=get_global_id(0);\n"
" const int y=get_global_id(1);\n"
" const int z=get_global_id(2);\n"
" \n"
" DEAL_NON_UNIFORM_DIM3(idx,y,z);\n"
" const int x=idx % size_x;\n"
" const int id=idx/size_x;\n"
" \n"
" int inputIndex=inputOffset+id*combineSrcOffset+z*inputStride0+y*inputStride1+x*inputStride2;\n"
" int outputIndex=outputOffset+id*combineDstOffset+z*outputStride0+y*outputStride1+x*outputStride2;\n"
" int inputIndexReal=0;\n"
" int outputIndexReal=0;\n"
"#if INPUT_FORMAT == MNN_DATA_FORMAT_NCHW\n"
" inputIndexReal=inputIndex;\n"
"#elif INPUT_FORMAT == MNN_DATA_FORMAT_NHWC\n"
" inputIndexReal=inputIndex;\n"
"#elif INPUT_FORMAT == MNN_DATA_FORMAT_NC4HW4\n"
" int in_w=inputIndex % src_width; inputIndex /= src_width;\n"
" int in_h=inputIndex % src_height; inputIndex /= src_height;\n"
" int in_c=inputIndex % src_channel;\n"
" int in_b=inputIndex/src_channel;\n"
" inputIndexReal=(((in_b+(in_c/4)*src_batch)*src_height+in_h)*src_width+in_w)*4+(in_c % 4);\n"
"#endif\n"
" \n"
"#if OUTPUT_FORMAT == MNN_DATA_FORMAT_NCHW\n"
" outputIndexReal=outputIndex;\n"
"#elif OUTPUT_FORMAT == MNN_DATA_FORMAT_NHWC\n"
" outputIndexReal=outputIndex;\n"
"#elif OUTPUT_FORMAT == MNN_DATA_FORMAT_NC4HW4\n"
" int out_w=outputIndex % dst_width; outputIndex /= dst_width;\n"
" int out_h=outputIndex % dst_height; outputIndex /= dst_height;\n"
" int out_c=outputIndex % dst_channel;\n"
" int out_b=outputIndex/dst_channel;\n"
" outputIndexReal=(((out_b+(out_c/4)*dst_batch)*dst_height+out_h)*dst_width+out_w)*4+(out_c % 4);\n"
"#endif\n"
" output[outputIndexReal]=(OUTPUT_TYPE)input[inputIndexReal];\n"
"}\n"
"__kernel void raster_nc4hw4_buffer(\n"
" GLOBAL_SIZE_3_DIMS\n"
" __global INPUT_TYPE *input,\n"
" __private const int inputOffset,\n"
" __private const int inputStride0,\n"
" __private const int inputStride1,\n"
" __private const int inputStride2,\n"
" __private const int inputHeight,\n"
" __private const int inputWidth,\n"
" __private const int inputChannel,\n"
" __global OUTPUT_TYPE *output,\n"
" __private const int outputOffset,\n"
" __private const int outputStride0,\n"
" __private const int outputStride1,\n"
" __private const int outputStride2,\n"
" __private const int outputHeight,\n"
" __private const int outputWidth,\n"
" __private const int outputChannel\n"
" ) {\n"
" const int x=get_global_id(0);\n"
" const int y=get_global_id(1);\n"
" const int z=get_global_id(2);\n"
" \n"
" DEAL_NON_UNIFORM_DIM3(x,y,z);\n"
" \n"
" int inputIndex=inputOffset+(z*inputStride0+y*inputStride1+x*inputStride2)*4;\n"
" int outputIndex=outputOffset+(z*outputStride0+y*outputStride1+x*outputStride2)*4;\n"
" \n"
" OUTPUT_TYPE4 values=CONVERT_OUTPUT4(vload4(0,(__global INPUT_TYPE *)(input+inputIndex)));\n"
" vstore4(values,0,(__global OUTPUT_TYPE *)(output+outputIndex));\n"
"}\n"
;
#endif
}