mirror of https://github.com/alibaba/MNN.git
31 lines
1.5 KiB
C++
31 lines
1.5 KiB
C++
|
#include "opencl_source_map.hpp"
|
||
|
namespace MNN {
|
||
|
const char* unary =
|
||
|
"#ifdef MNN_SUPPORT_FP16\n"
|
||
|
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
|
||
|
"#endif\n"
|
||
|
"#define GLOBAL_SIZE_3_DIMS "" __private const int global_size_dim0,__private const int global_size_dim1,__private const int global_size_dim2,\n"
|
||
|
"#define DEAL_NON_UNIFORM_DIM3(input1, input2, input3) "" if (input1 >= global_size_dim0 || input2 >= global_size_dim1 || input3 >= global_size_dim2) { "" return; "" }\n"
|
||
|
"inline float4 gelu(float4 in){\n"
|
||
|
" float4 value=0.79788458f*(0.044715f*in*in*in+in);\n"
|
||
|
" float4 x2=value*value;\n"
|
||
|
" float4 dst=value>(float4)5.0f ? (float4)1.0f : (value <= -(float4)5.0f ? -(float4)1.0f :\n"
|
||
|
" (value*(135135.0f+x2*(17325.0f+x2*(378.0f+x2))))/(135135.0f+x2*(62370.0f+x2*(3150.0f+x2*28.0f))));\n"
|
||
|
" return (1.0f+dst)*in*0.5f;\n"
|
||
|
"}\n"
|
||
|
"__constant sampler_t SAMPLER=CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n"
|
||
|
"__kernel void unary(GLOBAL_SIZE_3_DIMS __read_only image2d_t input,__write_only image2d_t output) {\n"
|
||
|
" const int channel_block_idx=get_global_id(0);\n"
|
||
|
" const int w=get_global_id(1);\n"
|
||
|
" const int hb=get_global_id(2);\n"
|
||
|
" DEAL_NON_UNIFORM_DIM3(channel_block_idx,w,hb);\n"
|
||
|
" const int width=global_size_dim1;\n"
|
||
|
" const int pos=mad24(channel_block_idx,width,w);\n"
|
||
|
" float4 in=convert_float4(RI_DATA(input,SAMPLER,(int2)(pos,hb)));\n"
|
||
|
" OUTPUT_TYPE_I4 out=CONVERT_OUTPUT_I4(OPERATOR);\n"
|
||
|
" \n"
|
||
|
" WI_DATA(output,(int2)(pos,hb),out);\n"
|
||
|
"}\n"
|
||
|
;
|
||
|
}
|