MNN/source/backend/opencl/execution/cl/range_buf_mnn_cl.cpp

41 lines
1.2 KiB
C++

#include "opencl_source_map.hpp"
namespace MNN {
#ifndef MNN_OPENCL_BUFFER_CLOSED
const char* range_buf =
"#ifdef MNN_SUPPORT_FP16\n"
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"#endif\n"
"#define GLOBAL_SIZE_2_DIMS ""__private const int global_size_dim0,__private const int global_size_dim1,\n"
"#define DEAL_NON_UNIFORM_DIM2(input1, input2) "" if (input1 >= global_size_dim0 || input2 >= global_size_dim1) { "" return; "" }\n"
"__kernel void range_buf(GLOBAL_SIZE_2_DIMS\n"
" __global const INPUT_TYPE* input0,\n"
" __global const INPUT_TYPE* input2,\n"
" __global OUTPUT_TYPE* output,\n"
" __private const int size\n"
" ) {\n"
" const int x=get_global_id(0);\n"
" const int y=get_global_id(1);\n"
" DEAL_NON_UNIFORM_DIM2(x,y);\n"
" \n"
" int index=x << 2;\n"
" int4 index4=(int4)(index,index+1,index+2,index+3);\n"
" INPUT_TYPE start=input0[0];\n"
" INPUT_TYPE step=input2[0];\n"
" OUTPUT_TYPE4 value=(OUTPUT_TYPE4)start+CONVERT_OUTPUT4(index4)*(OUTPUT_TYPE4)step;\n"
"#ifdef PACK_LEAVE\n"
" if(index+3 >= size){\n"
" OUTPUT_TYPE* value_ptr=(OUTPUT_TYPE*)&value;\n"
" for(int i=0; i<size-index; ++i){\n"
" output[index+i]=value_ptr[i];\n"
" }\n"
" }else{\n"
"#endif\n"
" vstore4(value,0,output+index);\n"
"#ifdef PACK_LEAVE\n"
" }\n"
"#endif\n"
"}\n"
;
#endif
}