mirror of https://github.com/alibaba/MNN.git
40 lines
1.5 KiB
C++
40 lines
1.5 KiB
C++
|
#include "opencl_source_map.hpp"
|
||
|
namespace MNN {
|
||
|
#ifndef MNN_OPENCL_BUFFER_CLOSED
|
||
|
const char* scale_buf =
|
||
|
"#ifdef MNN_SUPPORT_FP16\n"
|
||
|
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
|
||
|
"#endif\n"
|
||
|
"#define GLOBAL_SIZE_2_DIMS "" __private const int global_size_dim0,__private const int global_size_dim1,\n"
|
||
|
"#define DEAL_NON_UNIFORM_DIM2(input1, input2) "" if (input1 >= global_size_dim0 || input2 >= global_size_dim1) { "" return; "" }\n"
|
||
|
"__kernel void scale_buf(GLOBAL_SIZE_2_DIMS\n"
|
||
|
" __global const FLOAT* input,\n"
|
||
|
" __global const FLOAT* scale,\n"
|
||
|
"#ifdef BIAS\n"
|
||
|
" __global const FLOAT* bias,\n"
|
||
|
"#endif\n"
|
||
|
" __global FLOAT* output,\n"
|
||
|
" __private const int channelBlock,\n"
|
||
|
" __private const int batch,\n"
|
||
|
" __private const int inside) {\n"
|
||
|
" const int x=get_global_id(0); // inside(width*height)\n"
|
||
|
" const int y=get_global_id(1); // channelBlock*batch\n"
|
||
|
" \n"
|
||
|
" DEAL_NON_UNIFORM_DIM2(x,y);\n"
|
||
|
" const int out_c_idx=y % channelBlock;\n"
|
||
|
" const int out_b_idx=y/channelBlock;\n"
|
||
|
" const int offset=((out_b_idx+out_c_idx*batch)*inside+x)*4;\n"
|
||
|
" COMPUTE_FLOAT4 in_value=CONVERT_COMPUTE_FLOAT4(vload4(0,input+offset));\n"
|
||
|
" COMPUTE_FLOAT4 scale_value=CONVERT_COMPUTE_FLOAT4(vload4(out_c_idx,scale));\n"
|
||
|
" #ifdef BIAS\n"
|
||
|
" COMPUTE_FLOAT4 bias_value=CONVERT_COMPUTE_FLOAT4(vload4(out_c_idx,bias));\n"
|
||
|
" COMPUTE_FLOAT4 out_value=in_value*scale_value+bias_value;\n"
|
||
|
" #else\n"
|
||
|
" COMPUTE_FLOAT4 out_value=in_value*scale_value;\n"
|
||
|
" #endif\n"
|
||
|
" vstore4(CONVERT_FLOAT4(out_value),0,output+offset);\n"
|
||
|
"}\n"
|
||
|
;
|
||
|
#endif
|
||
|
}
|