mirror of https://github.com/alibaba/MNN.git
123 lines
6.7 KiB
C++
123 lines
6.7 KiB
C++
|
#include "opencl_source_map.hpp"
|
||
|
namespace MNN {
|
||
|
#ifndef MNN_OPENCL_BUFFER_CLOSED
|
||
|
const char* interp_buf =
|
||
|
"#ifdef MNN_SUPPORT_FP16\n"
|
||
|
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
|
||
|
"#endif\n"
|
||
|
"#define GLOBAL_SIZE_3_DIMS "" __private const int global_size_dim0,__private const int global_size_dim1,__private const int global_size_dim2,\n"
|
||
|
"#define DEAL_NON_UNIFORM_DIM3(input1, input2, input3) "" if (input1 >= global_size_dim0 || input2 >= global_size_dim1 || input3 >= global_size_dim2) { "" return; "" }\n"
|
||
|
"__kernel void nearest_buf(GLOBAL_SIZE_3_DIMS __global const FLOAT* input,\n"
|
||
|
" __global FLOAT* output,\n"
|
||
|
" __private const float height_scale,\n"
|
||
|
" __private const float width_scale,\n"
|
||
|
" __private const float height_offset,\n"
|
||
|
" __private const float width_offset,\n"
|
||
|
" __private const int input_height,\n"
|
||
|
" __private const int input_width,\n"
|
||
|
" __private const int out_height,\n"
|
||
|
" __private const int out_width,\n"
|
||
|
" __private const int batch) {\n"
|
||
|
" const int output_channel_block_idx=get_global_id(0);\n"
|
||
|
" const int output_width_block_idx=get_global_id(1);\n"
|
||
|
" const int output_batch_height_block_idx=get_global_id(2);\n"
|
||
|
" DEAL_NON_UNIFORM_DIM3(output_channel_block_idx,output_width_block_idx,output_batch_height_block_idx);\n"
|
||
|
" const int output_batch_idx=output_batch_height_block_idx/out_height;\n"
|
||
|
" const int output_height_idx=output_batch_height_block_idx % out_height;\n"
|
||
|
" const float in_h_idx=output_height_idx*height_scale+height_offset;\n"
|
||
|
" const float in_w_idx=output_width_block_idx*width_scale+width_offset;\n"
|
||
|
"#ifdef USE_ROUND\n"
|
||
|
" const int in_h_index=min(max(0,(int)floor(in_h_idx+0.499f)),input_height-1);\n"
|
||
|
" const int in_w_index=min(max(0,(int)floor(in_w_idx+0.499f)),input_width-1);\n"
|
||
|
"#else\n"
|
||
|
" const int in_h_index=min(max(0,(int)floor(in_h_idx)),input_height-1);\n"
|
||
|
" const int in_w_index=min(max(0,(int)floor(in_w_idx)),input_width-1);\n"
|
||
|
"#endif\n"
|
||
|
" const int inp_offset=((output_batch_idx+output_channel_block_idx*batch)*input_height+in_h_index)*input_width+in_w_index;\n"
|
||
|
" FLOAT4 value=vload4(inp_offset,input);\n"
|
||
|
" const int out_offset=((output_batch_idx+output_channel_block_idx*batch)*out_height+output_height_idx)*out_width+output_width_block_idx;\n"
|
||
|
" vstore4(value,out_offset,output);\n"
|
||
|
"}\n"
|
||
|
"__kernel void bilinear_buf(GLOBAL_SIZE_3_DIMS __global const FLOAT* input,\n"
|
||
|
" __global FLOAT* output,\n"
|
||
|
" __private const float height_scale,\n"
|
||
|
" __private const float width_scale,\n"
|
||
|
" __private const float height_offset,\n"
|
||
|
" __private const float width_offset,\n"
|
||
|
" __private const int input_height,\n"
|
||
|
" __private const int input_width,\n"
|
||
|
" __private const int out_height,\n"
|
||
|
" __private const int out_width,\n"
|
||
|
" __private const int batch) {\n"
|
||
|
" const int output_channel_block_idx=get_global_id(0);\n"
|
||
|
" const int output_width_block_idx=get_global_id(1);\n"
|
||
|
" const int output_batch_height_block_idx=get_global_id(2);\n"
|
||
|
" DEAL_NON_UNIFORM_DIM3(output_channel_block_idx,output_width_block_idx,output_batch_height_block_idx);\n"
|
||
|
" \n"
|
||
|
" const int output_batch_idx=output_batch_height_block_idx/out_height;\n"
|
||
|
" const int output_height_idx=output_batch_height_block_idx % out_height;\n"
|
||
|
" const float in_h_idx=output_height_idx*height_scale+height_offset;\n"
|
||
|
" const float in_w_idx=output_width_block_idx*width_scale+width_offset;\n"
|
||
|
" const int in_h0_index=min(max(0,(int)floor(in_h_idx)),input_height-1);\n"
|
||
|
" const int in_w0_index=min(max(0,(int)floor(in_w_idx)),input_width-1);\n"
|
||
|
" const int in_h1_index=min(max(0,(int)floor(in_h_idx)+1),input_height-1);\n"
|
||
|
" const int in_w1_index=min(max(0,(int)floor(in_w_idx)+1),input_width-1);\n"
|
||
|
" \n"
|
||
|
" float factor_w=(in_w_idx-(int)floor(in_w_idx));\n"
|
||
|
" float factor_h=(in_h_idx-(int)floor(in_h_idx));\n"
|
||
|
" \n"
|
||
|
" const int inp_offset_base=(output_batch_idx+output_channel_block_idx*batch)*input_height;\n"
|
||
|
" const int inp_offset_00=(inp_offset_base+in_h0_index)*input_width+in_w0_index;\n"
|
||
|
" const int inp_offset_01=(inp_offset_base+in_h0_index)*input_width+in_w1_index;\n"
|
||
|
" const int inp_offset_10=(inp_offset_base+in_h1_index)*input_width+in_w0_index;\n"
|
||
|
" const int inp_offset_11=(inp_offset_base+in_h1_index)*input_width+in_w1_index;\n"
|
||
|
" FLOAT4 value_00=vload4(inp_offset_00,input);\n"
|
||
|
" FLOAT4 value_01=vload4(inp_offset_01,input);\n"
|
||
|
" FLOAT4 value_10=vload4(inp_offset_10,input);\n"
|
||
|
" FLOAT4 value_11=vload4(inp_offset_11,input);\n"
|
||
|
" FLOAT4 value=CONVERT_FLOAT4((float4)((1.0-factor_w)*(1.0-factor_h))*convert_float4(value_00)+(float4)(factor_w*(1.0-factor_h))*convert_float4(value_01)+(float4)((1.0-factor_w)*factor_h)*convert_float4(value_10)+(float4)(factor_w*factor_h)*convert_float4(value_11));\n"
|
||
|
" \n"
|
||
|
" const int out_offset=((output_batch_idx+output_channel_block_idx*batch)*out_height+output_height_idx)*out_width+output_width_block_idx;\n"
|
||
|
" \n"
|
||
|
" vstore4(value,out_offset,output);\n"
|
||
|
"}\n"
|
||
|
"__kernel void nearest3D_buf(GLOBAL_SIZE_3_DIMS __global const FLOAT* input,\n"
|
||
|
" __global FLOAT* output,\n"
|
||
|
" __private const float depth_scale,\n"
|
||
|
" __private const float height_scale,\n"
|
||
|
" __private const float width_scale,\n"
|
||
|
" __private const float depth_offset,\n"
|
||
|
" __private const float height_offset,\n"
|
||
|
" __private const float width_offset,\n"
|
||
|
" __private const int input_depth,\n"
|
||
|
" __private const int input_height,\n"
|
||
|
" __private const int input_width,\n"
|
||
|
" __private const int out_depth,\n"
|
||
|
" __private const int out_height,\n"
|
||
|
" __private const int out_width,\n"
|
||
|
" __private const int batch) {\n"
|
||
|
" const int output_channel_block_idx=get_global_id(0);\n"
|
||
|
" const int output_height_width_block_idx=get_global_id(1);\n"
|
||
|
" const int output_batch_depth_block_idx=get_global_id(2);\n"
|
||
|
" DEAL_NON_UNIFORM_DIM3(output_channel_block_idx,output_height_width_block_idx,output_batch_depth_block_idx);\n"
|
||
|
" const int output_batch_idx=output_batch_depth_block_idx/out_depth;\n"
|
||
|
" const int output_depth_idx=output_batch_depth_block_idx % out_depth;\n"
|
||
|
" const int output_height_idx=output_height_width_block_idx/out_height;\n"
|
||
|
" const int output_width_idx=output_height_width_block_idx % out_height;\n"
|
||
|
" const float in_d_idx=output_depth_idx*depth_scale+depth_offset;\n"
|
||
|
" const float in_h_idx=output_height_idx*height_scale+height_offset;\n"
|
||
|
" const float in_w_idx=output_width_idx*width_scale+width_offset;\n"
|
||
|
" const int in_d_index=min(max(0,(int)floor(in_d_idx)),input_depth-1);\n"
|
||
|
" const int in_h_index=min(max(0,(int)floor(in_h_idx)),input_height-1);\n"
|
||
|
" const int in_w_index=min(max(0,(int)floor(in_w_idx)),input_width-1);\n"
|
||
|
" const int inp_offset=(((output_batch_idx+output_channel_block_idx*batch)\n"
|
||
|
"*input_depth+in_d_index)*input_height+in_h_index)*input_width+in_w_index;\n"
|
||
|
" const int out_offset=(((output_batch_idx+output_channel_block_idx*batch)\n"
|
||
|
"*out_depth+output_depth_idx)*out_height+output_height_idx)*out_width+output_width_idx;\n"
|
||
|
" FLOAT4 value=vload4(inp_offset,input);\n"
|
||
|
" vstore4(value,out_offset,output);\n"
|
||
|
"}\n"
|
||
|
;
|
||
|
#endif
|
||
|
}
|