mirror of https://github.com/alibaba/MNN.git
122 lines
4.8 KiB
C++
122 lines
4.8 KiB
C++
#include "opencl_source_map.hpp"
|
|
namespace MNN {
|
|
#ifndef MNN_OPENCL_BUFFER_CLOSED
|
|
const char* raster_buf =
|
|
"#ifdef MNN_SUPPORT_FP16\n"
|
|
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
|
|
"#endif\n"
|
|
"#define GLOBAL_SIZE_2_DIMS __private const int global_size_dim0,__private const int global_size_dim1,\n"
|
|
"#define DEAL_NON_UNIFORM_DIM2(input1, input2) "" if (input1 >= global_size_dim0 || input2 >= global_size_dim1) { "" return; "" }\n"
|
|
"__constant sampler_t SAMPLER=CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n"
|
|
"#define GLOBAL_SIZE_3_DIMS "" __private const int global_size_dim0,__private const int global_size_dim1,__private const int global_size_dim2,\n"
|
|
"#define DEAL_NON_UNIFORM_DIM3(input1, input2, input3) "" if (input1 >= global_size_dim0 || input2 >= global_size_dim1 || input3 >= global_size_dim2) { "" return; "" }\n"
|
|
"__kernel void buffer_set_zero(\n"
|
|
" GLOBAL_SIZE_2_DIMS\n"
|
|
" __global OUTPUT_TYPE *output\n"
|
|
" ) {\n"
|
|
" const int x=get_global_id(0);\n"
|
|
" const int y=get_global_id(1);\n"
|
|
" \n"
|
|
" DEAL_NON_UNIFORM_DIM2(x,y);\n"
|
|
" \n"
|
|
" output[y*global_size_dim0+x]=(OUTPUT_TYPE)(0.0f);\n"
|
|
"}\n"
|
|
"#define MNN_DATA_FORMAT_NCHW 0\n"
|
|
"#define MNN_DATA_FORMAT_NHWC 1\n"
|
|
"#define MNN_DATA_FORMAT_NC4HW4 2\n"
|
|
"__kernel void raster_direct_buffer(\n"
|
|
" GLOBAL_SIZE_3_DIMS\n"
|
|
" __private const int size_x,\n"
|
|
" __global INPUT_TYPE *input,\n"
|
|
" __private const int inputOffset,\n"
|
|
" __private const int combineSrcOffset,\n"
|
|
" __private const int inputStride0,\n"
|
|
" __private const int inputStride1,\n"
|
|
" __private const int inputStride2,\n"
|
|
" __private const int src_width,\n"
|
|
" __private const int src_height,\n"
|
|
" __private const int src_channel,\n"
|
|
" __private const int src_batch,\n"
|
|
" __global OUTPUT_TYPE *output,\n"
|
|
" __private const int outputOffset,\n"
|
|
" __private const int combineDstOffset,\n"
|
|
" __private const int outputStride0,\n"
|
|
" __private const int outputStride1,\n"
|
|
" __private const int outputStride2,\n"
|
|
" __private const int dst_width,\n"
|
|
" __private const int dst_height,\n"
|
|
" __private const int dst_channel,\n"
|
|
" __private const int dst_batch\n"
|
|
" ) {\n"
|
|
" const int idx=get_global_id(0);\n"
|
|
" const int y=get_global_id(1);\n"
|
|
" const int z=get_global_id(2);\n"
|
|
" \n"
|
|
" DEAL_NON_UNIFORM_DIM3(idx,y,z);\n"
|
|
" const int x=idx % size_x;\n"
|
|
" const int id=idx/size_x;\n"
|
|
" \n"
|
|
" int inputIndex=inputOffset+id*combineSrcOffset+z*inputStride0+y*inputStride1+x*inputStride2;\n"
|
|
" int outputIndex=outputOffset+id*combineDstOffset+z*outputStride0+y*outputStride1+x*outputStride2;\n"
|
|
" int inputIndexReal=0;\n"
|
|
" int outputIndexReal=0;\n"
|
|
"#if INPUT_FORMAT == MNN_DATA_FORMAT_NCHW\n"
|
|
" inputIndexReal=inputIndex;\n"
|
|
"#elif INPUT_FORMAT == MNN_DATA_FORMAT_NHWC\n"
|
|
" inputIndexReal=inputIndex;\n"
|
|
"#elif INPUT_FORMAT == MNN_DATA_FORMAT_NC4HW4\n"
|
|
" int in_w=inputIndex % src_width; inputIndex /= src_width;\n"
|
|
" int in_h=inputIndex % src_height; inputIndex /= src_height;\n"
|
|
" int in_c=inputIndex % src_channel;\n"
|
|
" int in_b=inputIndex/src_channel;\n"
|
|
" inputIndexReal=(((in_b+(in_c/4)*src_batch)*src_height+in_h)*src_width+in_w)*4+(in_c % 4);\n"
|
|
"#endif\n"
|
|
" \n"
|
|
"#if OUTPUT_FORMAT == MNN_DATA_FORMAT_NCHW\n"
|
|
" outputIndexReal=outputIndex;\n"
|
|
"#elif OUTPUT_FORMAT == MNN_DATA_FORMAT_NHWC\n"
|
|
" outputIndexReal=outputIndex;\n"
|
|
"#elif OUTPUT_FORMAT == MNN_DATA_FORMAT_NC4HW4\n"
|
|
" int out_w=outputIndex % dst_width; outputIndex /= dst_width;\n"
|
|
" int out_h=outputIndex % dst_height; outputIndex /= dst_height;\n"
|
|
" int out_c=outputIndex % dst_channel;\n"
|
|
" int out_b=outputIndex/dst_channel;\n"
|
|
" outputIndexReal=(((out_b+(out_c/4)*dst_batch)*dst_height+out_h)*dst_width+out_w)*4+(out_c % 4);\n"
|
|
"#endif\n"
|
|
" output[outputIndexReal]=(OUTPUT_TYPE)input[inputIndexReal];\n"
|
|
"}\n"
|
|
"__kernel void raster_nc4hw4_buffer(\n"
|
|
" GLOBAL_SIZE_3_DIMS\n"
|
|
" __global INPUT_TYPE *input,\n"
|
|
" __private const int inputOffset,\n"
|
|
" __private const int inputStride0,\n"
|
|
" __private const int inputStride1,\n"
|
|
" __private const int inputStride2,\n"
|
|
" __private const int inputHeight,\n"
|
|
" __private const int inputWidth,\n"
|
|
" __private const int inputChannel,\n"
|
|
" __global OUTPUT_TYPE *output,\n"
|
|
" __private const int outputOffset,\n"
|
|
" __private const int outputStride0,\n"
|
|
" __private const int outputStride1,\n"
|
|
" __private const int outputStride2,\n"
|
|
" __private const int outputHeight,\n"
|
|
" __private const int outputWidth,\n"
|
|
" __private const int outputChannel\n"
|
|
" ) {\n"
|
|
" const int x=get_global_id(0);\n"
|
|
" const int y=get_global_id(1);\n"
|
|
" const int z=get_global_id(2);\n"
|
|
" \n"
|
|
" DEAL_NON_UNIFORM_DIM3(x,y,z);\n"
|
|
" \n"
|
|
" int inputIndex=inputOffset+(z*inputStride0+y*inputStride1+x*inputStride2)*4;\n"
|
|
" int outputIndex=outputOffset+(z*outputStride0+y*outputStride1+x*outputStride2)*4;\n"
|
|
" \n"
|
|
" OUTPUT_TYPE4 values=CONVERT_OUTPUT4(vload4(0,(__global INPUT_TYPE *)(input+inputIndex)));\n"
|
|
" vstore4(values,0,(__global OUTPUT_TYPE *)(output+outputIndex));\n"
|
|
"}\n"
|
|
;
|
|
#endif
|
|
}
|