mirror of https://github.com/alibaba/MNN.git
144 lines
3.8 KiB
C++
144 lines
3.8 KiB
C++
#include "opencl_source_map.hpp"
|
|
namespace MNN {
|
|
#ifndef MNN_OPENCL_BUFFER_CLOSED
|
|
const char* binary_buf =
|
|
"#ifdef MNN_SUPPORT_FP16\n"
|
|
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
|
|
"#endif\n"
|
|
"#define PI 3.141592653589f\n"
|
|
"__kernel void binary_buf(__private int global_dim0,__private int global_dim1,\n"
|
|
" __global INPUT_TYPE* input0,__global INPUT_TYPE* input1,__global OUTPUT_TYPE* output,\n"
|
|
" __private const int size,\n"
|
|
" __private const int activationType) {\n"
|
|
" int2 pos=(int2)(get_global_id(0),get_global_id(1));//NCHW,1\n"
|
|
" \n"
|
|
" if (pos.x<global_dim0 && pos.y<global_dim1) {\n"
|
|
" int offset=pos.x << 2;\n"
|
|
"#ifdef PACK_LEAVE\n"
|
|
" if(offset+3 >= size){\n"
|
|
" int remain=size-offset;\n"
|
|
" #ifdef INT_COMPUTE_MOD\n"
|
|
" int4 in0,in1;\n"
|
|
" int* in0_ptr=(int*)&in0;\n"
|
|
" int* in1_ptr=(int*)&in1;\n"
|
|
" \n"
|
|
" for(int i=0; i<remain; ++i){\n"
|
|
" #ifdef A_SINGLE\n"
|
|
" in0_ptr[i]=(int)input0[0];\n"
|
|
" #else\n"
|
|
" in0_ptr[i]=(int)input0[offset+i];\n"
|
|
" #endif\n"
|
|
" \n"
|
|
" #ifdef B_SINGLE\n"
|
|
" in1_ptr[i]=(int)input1[0];\n"
|
|
" #else\n"
|
|
" in1_ptr[i]=(int)input1[offset+i];\n"
|
|
" #endif\n"
|
|
" }\n"
|
|
" int4 out=in0 % in1;\n"
|
|
" out=((out<(int4)0 && in1>(int4)0) || (out>(int4)0 && in1<(int4)0)) ? out+in1 : out;\n"
|
|
" if(activationType == 1) {\n"
|
|
" out=out>0 ? out : 0;\n"
|
|
" }\n"
|
|
" int* out_ptr=(int*)&out;\n"
|
|
" for(int i=0; i<remain; ++i){\n"
|
|
" output[offset+i]=(OUTPUT_TYPE)out_ptr[i];\n"
|
|
" }\n"
|
|
" #else\n"
|
|
" float4 in0,in1;\n"
|
|
" float* in0_ptr=(float*)&in0;\n"
|
|
" float* in1_ptr=(float*)&in1;\n"
|
|
" \n"
|
|
" for(int i=0; i<remain; ++i){\n"
|
|
" #ifdef A_SINGLE\n"
|
|
" in0_ptr[i]=(float)input0[0];\n"
|
|
" #else\n"
|
|
" in0_ptr[i]=(float)input0[offset+i];\n"
|
|
" #endif\n"
|
|
" \n"
|
|
" #ifdef B_SINGLE\n"
|
|
" in1_ptr[i]=(float)input1[0];\n"
|
|
" #else\n"
|
|
" in1_ptr[i]=(float)input1[offset+i];\n"
|
|
" #endif\n"
|
|
" }\n"
|
|
" float4 out=OPERATOR;\n"
|
|
" if(activationType == 1) {\n"
|
|
" out=fmax(out,(float4)0);\n"
|
|
" }\n"
|
|
" float* out_ptr=(float*)&out;\n"
|
|
" for(int i=0; i<remain; ++i){\n"
|
|
" output[offset+i]=(OUTPUT_TYPE)out_ptr[i];\n"
|
|
" }\n"
|
|
" #endif\n"
|
|
" }else {\n"
|
|
"#endif\n"
|
|
" #ifdef INT_COMPUTE_MOD\n"
|
|
" #ifdef A_SINGLE\n"
|
|
" int data0=input0[0];\n"
|
|
" int4 in0=(int4)(data0,data0,data0,data0);\n"
|
|
" #else\n"
|
|
" int4 in0=convert_int4(vload4(0,input0+offset));\n"
|
|
" #endif\n"
|
|
" \n"
|
|
" #ifdef B_SINGLE\n"
|
|
" int data1=input1[0];\n"
|
|
" int4 in1=(int4)(data1,data1,data1,data1);\n"
|
|
" #else\n"
|
|
" int4 in1=convert_int4(vload4(0,input1+offset));\n"
|
|
" #endif\n"
|
|
" \n"
|
|
" int4 out=in0 % in1;\n"
|
|
" out=((out<(int4)0 && in1>(int4)0) || (out>(int4)0 && in1<(int4)0)) ? out+in1 : out;\n"
|
|
" \n"
|
|
" if(activationType == 1) {\n"
|
|
" out=out>0 ? out : 1;\n"
|
|
" }\n"
|
|
" vstore4(CONVERT_OUTPUT4(out),0,output+offset);\n"
|
|
" #else\n"
|
|
" #ifdef A_SINGLE\n"
|
|
" float data0=input0[0];\n"
|
|
" float4 in0=(float4)(data0,data0,data0,data0);\n"
|
|
" #else\n"
|
|
" float4 in0=convert_float4(vload4(0,input0+offset));\n"
|
|
" #endif\n"
|
|
" \n"
|
|
" #ifdef B_SINGLE\n"
|
|
" float data1=input1[0];\n"
|
|
" float4 in1=(float4)(data1,data1,data1,data1);\n"
|
|
" #else\n"
|
|
" float4 in1=convert_float4(vload4(0,input1+offset));\n"
|
|
" #endif\n"
|
|
" \n"
|
|
" float4 out=OPERATOR;\n"
|
|
" \n"
|
|
" if(activationType == 1) {\n"
|
|
" out=fmax(out,(float4)0);\n"
|
|
" }\n"
|
|
" vstore4(CONVERT_OUTPUT4(out),0,output+offset);\n"
|
|
" #endif\n"
|
|
"#ifdef PACK_LEAVE\n"
|
|
" }\n"
|
|
"#endif\n"
|
|
" }\n"
|
|
"}\n"
|
|
"__kernel void prelu_buf(__private int global_dim0,__private int global_dim1,\n"
|
|
" __global INPUT_TYPE* input0,__global INPUT_TYPE* input1,__global OUTPUT_TYPE* output,\n"
|
|
" __private const int4 shape\n"
|
|
" ) {\n"
|
|
" int2 pos=(int2)(get_global_id(0),get_global_id(1));//NC4,HW\n"
|
|
" \n"
|
|
" if (pos.x<global_dim0 && pos.y<global_dim1) {\n"
|
|
" int b=pos.x/shape.w;\n"
|
|
" int c=pos.x % shape.w;\n"
|
|
" int offset=(b+c*shape.x)*(shape.y*shape.z)+pos.y;\n"
|
|
" float4 in0=convert_float4(vload4(offset,input0));\n"
|
|
" float4 in1=convert_float4(vload4(pos.x % shape.w,input1));\n"
|
|
" float4 out=OPERATOR;\n"
|
|
" vstore4(CONVERT_OUTPUT4(out),offset,output);\n"
|
|
" }\n"
|
|
"}\n"
|
|
;
|
|
#endif
|
|
}
|