2019-06-24 11:32:41 +08:00
|
|
|
layout(std430) buffer;
|
|
|
|
layout(FORMAT, binding=0) writeonly uniform PRECISION image3D uOutput;
|
|
|
|
layout(binding=2) readonly buffer kernel{
|
|
|
|
vec4 data[];
|
|
|
|
} uKernel;
|
|
|
|
|
|
|
|
layout(location = 3) uniform int uFxFy;
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
layout(location = 4) uniform int uIc_4;
|
2019-06-24 11:32:41 +08:00
|
|
|
|
|
|
|
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
|
|
|
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
//weight buffer : oc ic h w -> oc/4, ic/4, ky kx ic4 oc4
|
|
|
|
//index : ky kx, oc/4, ic/4
|
|
|
|
//weight image : ky kx, oc/4, ic/4*ic4 oc4
|
2019-06-24 11:32:41 +08:00
|
|
|
void main()
|
|
|
|
{
|
|
|
|
ivec3 pos = ivec3(gl_GlobalInvocationID) * ivec3(4, 1, 1);
|
|
|
|
int kernelPos = 0
|
|
|
|
+ pos.x * uFxFy
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
+ 4*pos.y * uIc_4 * uFxFy
|
2019-06-24 11:32:41 +08:00
|
|
|
+ 4*pos.z
|
|
|
|
;
|
|
|
|
vec4 color0 = uKernel.data[kernelPos+0];
|
|
|
|
vec4 color1 = uKernel.data[kernelPos+1];
|
|
|
|
vec4 color2 = uKernel.data[kernelPos+2];
|
|
|
|
vec4 color3 = uKernel.data[kernelPos+3];
|
|
|
|
|
|
|
|
imageStore(uOutput, ivec3(pos.x+0, pos.y, pos.z), color0);
|
|
|
|
imageStore(uOutput, ivec3(pos.x+1, pos.y, pos.z), color1);
|
|
|
|
imageStore(uOutput, ivec3(pos.x+2, pos.y, pos.z), color2);
|
|
|
|
imageStore(uOutput, ivec3(pos.x+3, pos.y, pos.z), color3);
|
|
|
|
}
|