mirror of https://github.com/alibaba/MNN.git
				
				
				
			
		
			
				
	
	
		
			461 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			461 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			C++
		
	
	
	
| //
 | |
| //  CPUImageProcess.cpp
 | |
| //  MNN
 | |
| //
 | |
| //  Created by MNN on 2021/10/27.
 | |
| //  Copyright © 2018 Alibaba. All rights reserved.
 | |
| //
 | |
| 
 | |
| #include "backend/cpu/CPUImageProcess.hpp"
 | |
| #include "compute/ImageProcessFunction.hpp"
 | |
| #include <string.h>
 | |
| #include <mutex>
 | |
| #include "core/Macro.h"
 | |
| #ifdef MNN_USE_NEON
 | |
| #include <arm_neon.h>
 | |
| #endif
 | |
| #ifdef MNN_USE_SSE
 | |
| #if defined(_MSC_VER)
 | |
| #include <intrin.h>
 | |
| #else
 | |
| #include <x86intrin.h>
 | |
| #endif
 | |
| #endif
 | |
| #include <map>
 | |
| #include <utility>
 | |
| 
 | |
| namespace MNN {
 | |
| #define CACHE_SIZE 256
 | |
| #define CHECKFORMAT(src, dst, func) if (source == ImageFormatType_##src && dest == ImageFormatType_##dst) return func
 | |
| #define CHECKFORMAT_CORE(src, dst, func) if (source == ImageFormatType_##src && dest == ImageFormatType_##dst) return coreFunctions ? coreFunctions->func : func;
 | |
| 
 | |
| BLITTER CPUImageProcess::choose(ImageFormatType source, ImageFormatType dest) {
 | |
|     // YUV only different in sampler
 | |
|     if (source == ImageFormatType_YUV_NV12) {
 | |
|         source = ImageFormatType_YUV_NV21;
 | |
|     }
 | |
|     if (source == ImageFormatType_YUV_I420) {
 | |
|         source = ImageFormatType_YUV_NV21;
 | |
|     }
 | |
|     CHECKFORMAT(RGBA, RGBA, MNNCopyC4);
 | |
|     CHECKFORMAT_CORE(RGBA, BGRA, MNNRGBAToBGRA);
 | |
|     CHECKFORMAT(RGBA, BGR, MNNRGBAToBGR);
 | |
|     CHECKFORMAT(RGBA, RGB, MNNBGRAToBGR);
 | |
|     CHECKFORMAT(RGBA, GRAY, MNNRGBAToGRAY);
 | |
| 
 | |
|     CHECKFORMAT_CORE(BGRA, RGBA, MNNRGBAToBGRA);
 | |
|     CHECKFORMAT(BGRA, BGRA, MNNCopyC4);
 | |
|     CHECKFORMAT(BGRA, BGR, MNNBGRAToBGR);
 | |
|     CHECKFORMAT(BGRA, RGB, MNNRGBAToBGR);
 | |
|     CHECKFORMAT(BGRA, GRAY, MNNBGRAToGRAY);
 | |
| 
 | |
|     CHECKFORMAT(RGB, RGB, MNNCopyC3);
 | |
|     CHECKFORMAT(RGB, BGR, MNNRGBToBGR);
 | |
|     CHECKFORMAT(RGB, GRAY, MNNRGBToGRAY);
 | |
|     CHECKFORMAT(RGB, RGBA, MNNC3ToC4);
 | |
|     CHECKFORMAT(RGB, YCrCb, MNNRGBToCrCb);
 | |
|     CHECKFORMAT(RGB, YUV, MNNRGBToYUV);
 | |
|     CHECKFORMAT(RGB, XYZ, MNNRGBToXYZ);
 | |
|     CHECKFORMAT(RGB, HSV, MNNRGBToHSV);
 | |
|     CHECKFORMAT(RGB, BGR555, MNNRGBToBGR555);
 | |
|     CHECKFORMAT(RGB, BGR565, MNNRGBToBGR565);
 | |
|     CHECKFORMAT(RGB, HSV_FULL, MNNRGBToHSV_FULL);
 | |
| 
 | |
|     CHECKFORMAT(BGR, BGR, MNNCopyC3);
 | |
|     CHECKFORMAT(BGR, RGB, MNNRGBToBGR);
 | |
|     CHECKFORMAT(BGR, GRAY, MNNBRGToGRAY);
 | |
|     CHECKFORMAT(BGR, BGRA, MNNC3ToC4);
 | |
|     CHECKFORMAT(BGR, YCrCb, MNNBGRToCrCb);
 | |
|     CHECKFORMAT(BGR, YUV, MNNBGRToYUV);
 | |
|     CHECKFORMAT(BGR, XYZ, MNNBGRToXYZ);
 | |
|     CHECKFORMAT(BGR, HSV, MNNBGRToHSV);
 | |
|     CHECKFORMAT(BGR, BGR555, MNNBGRToBGR555);
 | |
|     CHECKFORMAT(BGR, BGR565, MNNBGRToBGR565);
 | |
|     CHECKFORMAT(BGR, HSV_FULL, MNNBGRToHSV_FULL);
 | |
| 
 | |
|     CHECKFORMAT(GRAY, RGBA, MNNGRAYToC4);
 | |
|     CHECKFORMAT(GRAY, BGRA, MNNGRAYToC4);
 | |
|     CHECKFORMAT(GRAY, BGR, MNNGRAYToC3);
 | |
|     CHECKFORMAT(GRAY, RGB, MNNGRAYToC3);
 | |
|     CHECKFORMAT(GRAY, GRAY, MNNCopyC1);
 | |
| 
 | |
|     CHECKFORMAT(YUV_NV21, GRAY, MNNCopyC1);
 | |
|     CHECKFORMAT_CORE(YUV_NV21, RGB, MNNNV21ToRGB);
 | |
|     CHECKFORMAT_CORE(YUV_NV21, BGR, MNNNV21ToBGR);
 | |
|     CHECKFORMAT_CORE(YUV_NV21, RGBA, MNNNV21ToRGBA);
 | |
|     CHECKFORMAT_CORE(YUV_NV21, BGRA, MNNNV21ToBGRA);
 | |
|     return nullptr;
 | |
| }
 | |
| 
 | |
| BLITTER CPUImageProcess::choose(int channelByteSize) {
 | |
|     switch (channelByteSize) {
 | |
|         case 4:
 | |
|             return MNNC4blitH;
 | |
|         case 3:
 | |
|             return MNNC3blitH;
 | |
|         case 1:
 | |
|             return MNNC1blitH;
 | |
|         default:
 | |
|             return nullptr;
 | |
|     }
 | |
| }
 | |
| 
 | |
| SAMPLER CPUImageProcess::choose(ImageFormatType format, FilterType type, bool identity) {
 | |
|     if (identity) {
 | |
|         switch (format) {
 | |
|             case ImageFormatType_RGBA:
 | |
|             case ImageFormatType_BGRA:
 | |
|                 return MNNSamplerC4Copy;
 | |
|             case ImageFormatType_GRAY:
 | |
|                 return MNNSamplerC1Copy;
 | |
| 
 | |
|             case ImageFormatType_RGB:
 | |
|             case ImageFormatType_BGR:
 | |
|                 return MNNSamplerC3Copy;
 | |
|             case ImageFormatType_YUV_NV21:
 | |
|                 return MNNSamplerNV21Copy;
 | |
|             case ImageFormatType_YUV_NV12:
 | |
|                 return MNNSamplerNV12Copy;
 | |
|             case ImageFormatType_YUV_I420:
 | |
|                 return MNNSamplerI420Copy;
 | |
|             default:
 | |
|                 break;
 | |
|         }
 | |
|     }
 | |
|     if (FilterType_BILINEAR == type) {
 | |
|         switch (format) {
 | |
|             case ImageFormatType_RGBA:
 | |
|             case ImageFormatType_BGRA:
 | |
|                 return MNNSamplerC4Bilinear;
 | |
|             case ImageFormatType_GRAY:
 | |
|                 return MNNSamplerC1Bilinear;
 | |
| 
 | |
|             case ImageFormatType_RGB:
 | |
|             case ImageFormatType_BGR:
 | |
|                 return MNNSamplerC3Bilinear;
 | |
|             default:
 | |
|                 break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // Nearest
 | |
|     switch (format) {
 | |
|         case ImageFormatType_RGBA:
 | |
|         case ImageFormatType_BGRA:
 | |
|             return MNNSamplerC4Nearest;
 | |
|         case ImageFormatType_GRAY:
 | |
|             return MNNSamplerC1Nearest;
 | |
| 
 | |
|         case ImageFormatType_RGB:
 | |
|         case ImageFormatType_BGR:
 | |
|             return MNNSamplerC3Nearest;
 | |
|         case ImageFormatType_YUV_NV12:
 | |
|             return MNNSamplerNV12Nearest;
 | |
|         case ImageFormatType_YUV_NV21:
 | |
|             return MNNSamplerNV21Nearest;
 | |
|         case ImageFormatType_YUV_I420:
 | |
|             return MNNSamplerI420Nearest;
 | |
|         default:
 | |
|             break;
 | |
|     }
 | |
|     MNN_PRINT("Don't support sampler for format:%d, type:%d", format, type);
 | |
|     return nullptr;
 | |
| }
 | |
| 
 | |
| BLIT_FLOAT CPUImageProcess::choose(ImageFormatType format, int dstBpp) {
 | |
|     if (4 == dstBpp) {
 | |
|         switch (format) {
 | |
|             case ImageFormatType_GRAY:
 | |
|                 return MNNC1ToFloatRGBA;
 | |
|             case ImageFormatType_RGBA:
 | |
|             case ImageFormatType_BGRA:
 | |
|                 return MNNC4ToFloatC4;
 | |
|             case ImageFormatType_RGB:
 | |
|             case ImageFormatType_BGR:
 | |
|                 return MNNC3ToFloatRGBA;
 | |
|             default:
 | |
|                 break;
 | |
|         }
 | |
|     }
 | |
|     switch (format) {
 | |
|         case ImageFormatType_GRAY:
 | |
|             return MNNC1ToFloatC1;
 | |
|         case ImageFormatType_RGBA:
 | |
|         case ImageFormatType_BGRA:
 | |
|             return MNNC4ToFloatC4;
 | |
|         case ImageFormatType_RGB:
 | |
|         case ImageFormatType_BGR:
 | |
|             return MNNC3ToFloatC3;
 | |
|         default:
 | |
|             break;
 | |
|     }
 | |
|     return nullptr;
 | |
| }
 | |
| 
 | |
| static int LEFT   = 1 << 0;
 | |
| static int RIGHT  = 1 << 1;
 | |
| static int TOP    = 1 << 2;
 | |
| static int BOTTOM = 1 << 3;
 | |
| inline static uint8_t _encode(const CV::Point& p, int iw, int ih) {
 | |
|     uint8_t mask = 0;
 | |
|     if (p.fX < 0) {
 | |
|         mask |= LEFT;
 | |
|     }
 | |
|     if (p.fX > iw - 1) {
 | |
|         mask |= RIGHT;
 | |
|     }
 | |
|     if (p.fY < 0) {
 | |
|         mask |= TOP;
 | |
|     }
 | |
|     if (p.fY > ih - 1) {
 | |
|         mask |= BOTTOM;
 | |
|     }
 | |
|     return mask;
 | |
| }
 | |
| static std::pair<int, int> _computeClip(CV::Point* points, int iw, int ih, const CV::Matrix& invert, int xStart, int count) {
 | |
|     auto code1 = _encode(points[0], iw, ih);
 | |
|     auto code2 = _encode(points[1], iw, ih);
 | |
|     int sta    = 0;
 | |
|     int end    = count;
 | |
| 
 | |
|     float x1     = points[0].fX;
 | |
|     float x2     = points[1].fX;
 | |
|     float y1     = points[0].fY;
 | |
|     float y2     = points[1].fY;
 | |
|     int code     = 0;
 | |
|     int pIndex   = 0;
 | |
|     float deltaY = y2 - y1;
 | |
|     float deltaX = x2 - x1;
 | |
|     if (deltaX > 0.01f || deltaX < -0.01f) {
 | |
|         deltaY = (y2 - y1) / (x2 - x1);
 | |
|     } else {
 | |
|         deltaY = 0;
 | |
|     }
 | |
|     if (deltaY > 0.01f || deltaY < -0.01f) {
 | |
|         deltaX = (x2 - x1) / (y2 - y1);
 | |
|     } else {
 | |
|         deltaX = 0;
 | |
|     }
 | |
|     while (code1 != 0 || code2 != 0) {
 | |
|         if ((code1 & code2) != 0) {
 | |
|             sta = end;
 | |
|             break;
 | |
|         }
 | |
|         if (code1 != 0) {
 | |
|             code   = code1;
 | |
|             pIndex = 0;
 | |
|         } else if (code2 != 0) {
 | |
|             code   = code2;
 | |
|             pIndex = 1;
 | |
|         }
 | |
|         if ((LEFT & code) != 0) {
 | |
|             points[pIndex].fY = points[pIndex].fY + deltaY * (0 - points[pIndex].fX);
 | |
|             points[pIndex].fX = 0;
 | |
|         } else if ((RIGHT & code) != 0) {
 | |
|             points[pIndex].fY = points[pIndex].fY + deltaY * (iw - 1 - points[pIndex].fX);
 | |
|             points[pIndex].fX = iw - 1;
 | |
|         } else if ((BOTTOM & code) != 0) {
 | |
|             points[pIndex].fX = points[pIndex].fX + deltaX * (ih - 1 - points[pIndex].fY);
 | |
|             points[pIndex].fY = ih - 1;
 | |
|         } else if ((TOP & code) != 0) {
 | |
|             points[pIndex].fX = points[pIndex].fX + deltaX * (0 - points[pIndex].fY);
 | |
|             points[pIndex].fY = 0;
 | |
|         }
 | |
|         auto tmp = invert.mapXY(points[pIndex].fX, points[pIndex].fY);
 | |
|         if (0 == pIndex) {
 | |
|             code1 = _encode(points[pIndex], iw, ih);
 | |
|             // FUNC_PRINT_ALL(tmp.fX, f);
 | |
|             // sta = (int)::ceilf(tmp.fX) - xStart;
 | |
|             sta = (int)::round(tmp.fX) - xStart;
 | |
|         } else {
 | |
|             code2 = _encode(points[pIndex], iw, ih);
 | |
|             // FUNC_PRINT_ALL(tmp.fX, f);
 | |
|             // end = (int)::ceilf(tmp.fX) - xStart + 1;
 | |
|             end = (int)::floor(tmp.fX) - xStart + 1;
 | |
|         }
 | |
|     }
 | |
|     if (end > count) {
 | |
|         end = count;
 | |
|     }
 | |
|     if (sta > end) {
 | |
|         sta = end;
 | |
|     }
 | |
|     return std::make_pair(sta, end);
 | |
| }
 | |
| 
 | |
| ErrorCode CPUImageProcess::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
 | |
|     auto input = inputs[0];
 | |
|     if (input->dimensions() == 3) {
 | |
|         ih = input->length(0);
 | |
|         iw = input->length(1);
 | |
|         ic = input->length(2);
 | |
|     } else {
 | |
|         ih = input->height();
 | |
|         iw = input->width();
 | |
|         ic = input->channel();
 | |
|     }
 | |
|     if (draw) {
 | |
|         blitter = choose(ic * inputs[0]->getType().bytes());
 | |
|         return NO_ERROR;
 | |
|     }
 | |
|     auto output = outputs[0];
 | |
|     oh = output->height();
 | |
|     ow = output->width();
 | |
|     oc = output->channel();
 | |
|     dtype = output->getType();
 | |
| 
 | |
|     bool identity = transform.isIdentity() && iw >= ow && ih >= oh;
 | |
|     // choose sampler
 | |
|     sampler  = choose(sourceFormat, filterType, identity);
 | |
|     if (nullptr == sampler) {
 | |
|         return INPUT_DATA_ERROR;
 | |
|     }
 | |
|     // choose blitter
 | |
|     if (sourceFormat != destFormat) {
 | |
|         blitter = choose(sourceFormat, destFormat);
 | |
|         if (nullptr == blitter) {
 | |
|             return INPUT_DATA_ERROR;
 | |
|         }
 | |
|         if (backend()) {
 | |
|             cacheBuffer.reset(Tensor::createDevice<uint8_t>(std::vector<int>{4 * CACHE_SIZE}));
 | |
|             backend()->onAcquireBuffer(cacheBuffer.get(), Backend::DYNAMIC);
 | |
|             samplerDest = cacheBuffer->host<uint8_t>();
 | |
|         } else {
 | |
|             samplerBuffer.reset(new uint8_t[4 * CACHE_SIZE]);
 | |
|             samplerDest = samplerBuffer.get();
 | |
|         }
 | |
|     }
 | |
|     // choose float blitter
 | |
|     if (dtype.code == halide_type_float) {
 | |
|         blitFloat = choose(destFormat, oc);
 | |
|         if (nullptr == blitFloat) {
 | |
|             return INPUT_DATA_ERROR;
 | |
|         }
 | |
|         if (backend()) {
 | |
|             cacheBufferRGBA.reset(Tensor::createDevice<uint8_t>(std::vector<int>{4 * CACHE_SIZE}));
 | |
|             backend()->onAcquireBuffer(cacheBufferRGBA.get(), Backend::DYNAMIC);
 | |
|             blitDest = cacheBufferRGBA->host<uint8_t>();
 | |
|         } else {
 | |
|             blitBuffer.reset(new uint8_t[4 * CACHE_SIZE]);
 | |
|             blitDest = blitBuffer.get();
 | |
|         }
 | |
|     }
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| ErrorCode CPUImageProcess::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
 | |
|     if (0 == mStride) {
 | |
|         mStride = iw * ic;
 | |
|     }
 | |
|     auto source = inputs[0]->host<uint8_t>();
 | |
|     void* dest = nullptr;
 | |
|     CV::Point points[2];
 | |
|     auto destBytes = dtype.bytes();
 | |
|     int tileCount = UP_DIV(ow, CACHE_SIZE);
 | |
|     const int* regions = nullptr;
 | |
|     if (draw) {
 | |
|         // change input to output
 | |
|         dest = source;
 | |
|         oh = inputs[1]->length(0);
 | |
|         ow = iw;
 | |
|         oc = ic;
 | |
|         destBytes = inputs[0]->getType().bytes();
 | |
|         // draw one
 | |
|         tileCount = 1;
 | |
|         // src is color
 | |
|         samplerDest = inputs[2]->host<uint8_t>();
 | |
|         // get region info ptr
 | |
|         regions = inputs[1]->host<int>();
 | |
|     } else {
 | |
|         dest = outputs[0]->host<void>();
 | |
|     }
 | |
|     for (int i = 0; i < oh; ++i) {
 | |
|         int dy = draw ? regions[3 * i] : i;
 | |
|         auto dstY = (uint8_t*)dest + dy * destBytes * ow * oc;
 | |
|         for (int tIndex = 0; tIndex < tileCount; ++tIndex) {
 | |
|             int xStart    = tIndex * CACHE_SIZE;
 | |
|             int count     = std::min(CACHE_SIZE, ow - xStart);
 | |
|             if (draw) {
 | |
|                 xStart = regions[3 * i + 1];
 | |
|                 count = regions[3 * i + 2] - xStart + 1;
 | |
|             }
 | |
|             auto dstStart = dstY + destBytes * oc * xStart;
 | |
|           
 | |
|             if (!blitFloat) {
 | |
|                 blitDest = dstStart;
 | |
|             }
 | |
|             if (!blitter) {
 | |
|                 samplerDest = blitDest;
 | |
|             }
 | |
| 
 | |
|             // Sample
 | |
|             if (!draw) {
 | |
|                 // Compute position
 | |
|                 points[0].fX = xStart;
 | |
|                 points[0].fY = dy;
 | |
| 
 | |
|                 points[1].fX = xStart + count;
 | |
|                 points[1].fY = dy;
 | |
|                 transform.mapPoints(points, 2);
 | |
|                 float deltaY = points[1].fY - points[0].fY;
 | |
|                 float deltaX = points[1].fX - points[0].fX;
 | |
| 
 | |
|                 int sta = 0;
 | |
|                 int end = count;
 | |
| 
 | |
|                 // FUNC_PRINT(sta);
 | |
|                 if (wrap == WrapType_ZERO) {
 | |
|                     // Clip: Cohen-Sutherland
 | |
|                     auto clip    = _computeClip(points, iw, ih, transformInvert, xStart, count);
 | |
|                     sta          = clip.first;
 | |
|                     end          = clip.second;
 | |
|                     points[0].fX = sta + xStart;
 | |
|                     points[0].fY = dy;
 | |
| 
 | |
|                     transform.mapPoints(points, 1);
 | |
|                     if (sta != 0 || end < count) {
 | |
|                         if (ic > 0) {
 | |
|                             if (sta > 0) {
 | |
|                                 ::memset(samplerDest, paddingValue, ic * sta);
 | |
|                             }
 | |
|                             if (end < count) {
 | |
|                                 ::memset(samplerDest + end * ic, paddingValue, (count - end) * ic);
 | |
|                             }
 | |
|                         } else {
 | |
|                             // TODO, Only support NV12 / NV21
 | |
|                             ::memset(samplerDest, paddingValue, count);
 | |
|                             ::memset(samplerDest + count, 128, UP_DIV(count, 2) * 2);
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|                 points[1].fX = (deltaX) / (float)(count);
 | |
|                 points[1].fY = (deltaY) / (float)(count);
 | |
| 
 | |
|                 sampler(source, samplerDest, points, sta, end - sta, count, iw, ih, mStride);
 | |
|             }
 | |
|             // Convert format
 | |
|             if (blitter) {
 | |
|                 blitter(samplerDest, blitDest, count);
 | |
|             }
 | |
|             // Turn float
 | |
|             if (blitFloat) {
 | |
|                 blitFloat(blitDest, (float*)dstStart, mean, normal, count);
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| class CPUImageProcessCreator : public CPUBackend::Creator {
 | |
| public:
 | |
|     virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
 | |
|                                 const MNN::Op *op, Backend *backend) const {
 | |
|         auto process = op->main_as_ImageProcessParam();
 | |
|         return new CPUImageProcess(backend, process);
 | |
|     }
 | |
| };
 | |
| 
 | |
| REGISTER_CPU_OP_CREATOR(CPUImageProcessCreator, OpType_ImageProcess);
 | |
| } // namespace MNN
 |