MNN/source/backend/opencl/execution/cl/scale_buf_mnn_cl.cpp

#include "opencl_source_map.hpp" 
namespace MNN { 
#ifndef MNN_OPENCL_BUFFER_CLOSED
const char* scale_buf = 
"#ifdef MNN_SUPPORT_FP16\n"
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"#endif\n"
"#define GLOBAL_SIZE_2_DIMS "" __private const int global_size_dim0,__private const int global_size_dim1,\n"
"#define DEAL_NON_UNIFORM_DIM2(input1, input2) "" if (input1 >= global_size_dim0 || input2 >= global_size_dim1) { "" return; "" }\n"
"__kernel void scale_buf(GLOBAL_SIZE_2_DIMS\n"
" __global const FLOAT* input,\n"
" __global const FLOAT* scale,\n"
"#ifdef BIAS\n"
" __global const FLOAT* bias,\n"
"#endif\n"
" __global FLOAT* output,\n"
" __private const int channelBlock,\n"
" __private const int batch,\n"
" __private const int inside) {\n"
" const int x=get_global_id(0); // inside(width*height)\n"
" const int y=get_global_id(1); // channelBlock*batch\n"
" \n"
" DEAL_NON_UNIFORM_DIM2(x,y);\n"
" const int out_c_idx=y % channelBlock;\n"
" const int out_b_idx=y/channelBlock;\n"
" const int offset=((out_b_idx+out_c_idx*batch)*inside+x)*4;\n"
" COMPUTE_FLOAT4 in_value=CONVERT_COMPUTE_FLOAT4(vload4(0,input+offset));\n"
" COMPUTE_FLOAT4 scale_value=CONVERT_COMPUTE_FLOAT4(vload4(out_c_idx,scale));\n"
" #ifdef BIAS\n"
" COMPUTE_FLOAT4 bias_value=CONVERT_COMPUTE_FLOAT4(vload4(out_c_idx,bias));\n"
" COMPUTE_FLOAT4 out_value=in_value*scale_value+bias_value;\n"
" #else\n"
" COMPUTE_FLOAT4 out_value=in_value*scale_value;\n"
" #endif\n"
" vstore4(CONVERT_FLOAT4(out_value),0,output+offset);\n"
"}\n"
;
#endif
}
MNN:Sync: Sync Internal 3.1.3 2025-04-28 11:38:44 +08:00			`#include "opencl_source_map.hpp"`
			`namespace MNN {`
			`#ifndef MNN_OPENCL_BUFFER_CLOSED`
			`const char* scale_buf =`
			`"#ifdef MNN_SUPPORT_FP16\n"`
			`"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"`
			`"#endif\n"`
			`"#define GLOBAL_SIZE_2_DIMS "" __private const int global_size_dim0,__private const int global_size_dim1,\n"`
			`"#define DEAL_NON_UNIFORM_DIM2(input1, input2) "" if (input1 >= global_size_dim0 \|\| input2 >= global_size_dim1) { "" return; "" }\n"`
			`"__kernel void scale_buf(GLOBAL_SIZE_2_DIMS\n"`
			`" __global const FLOAT* input,\n"`
			`" __global const FLOAT* scale,\n"`
			`"#ifdef BIAS\n"`
			`" __global const FLOAT* bias,\n"`
			`"#endif\n"`
			`" __global FLOAT* output,\n"`
			`" __private const int channelBlock,\n"`
			`" __private const int batch,\n"`
			`" __private const int inside) {\n"`
			`" const int x=get_global_id(0); // inside(width*height)\n"`
			`" const int y=get_global_id(1); // channelBlock*batch\n"`
			`" \n"`
			`" DEAL_NON_UNIFORM_DIM2(x,y);\n"`
			`" const int out_c_idx=y % channelBlock;\n"`
			`" const int out_b_idx=y/channelBlock;\n"`
			`" const int offset=((out_b_idx+out_c_idxbatch)inside+x)*4;\n"`
			`" COMPUTE_FLOAT4 in_value=CONVERT_COMPUTE_FLOAT4(vload4(0,input+offset));\n"`
			`" COMPUTE_FLOAT4 scale_value=CONVERT_COMPUTE_FLOAT4(vload4(out_c_idx,scale));\n"`
			`" #ifdef BIAS\n"`
			`" COMPUTE_FLOAT4 bias_value=CONVERT_COMPUTE_FLOAT4(vload4(out_c_idx,bias));\n"`
			`" COMPUTE_FLOAT4 out_value=in_value*scale_value+bias_value;\n"`
			`" #else\n"`
			`" COMPUTE_FLOAT4 out_value=in_value*scale_value;\n"`
			`" #endif\n"`
			`" vstore4(CONVERT_FLOAT4(out_value),0,output+offset);\n"`
			`"}\n"`
			`;`
			`#endif`
			`}`