| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  | //
 | 
					
						
							|  |  |  | //  ImageProcessFunction.cpp
 | 
					
						
							|  |  |  | //  MNN
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | //  Created by MNN on 2021/10/29.
 | 
					
						
							|  |  |  | //  Copyright © 2018 Alibaba. All rights reserved.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "backend/cpu/compute/ImageProcessFunction.hpp"
 | 
					
						
							|  |  |  | #include "core/Macro.h"
 | 
					
						
							|  |  |  | #include <algorithm>
 | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  | #include <arm_neon.h>
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | extern "C" { | 
					
						
							|  |  |  | void MNNNV21ToRGBUnit(const unsigned char* source, unsigned char* dest, size_t countDiv8, const unsigned char* uv); | 
					
						
							|  |  |  | void MNNNV21ToBGRUnit(const unsigned char* source, unsigned char* dest, size_t countDiv8, const unsigned char* uv); | 
					
						
							|  |  |  | void MNNNV21ToRGBAUnit(const unsigned char* source, unsigned char* dest, size_t countDiv8, const unsigned char* uv); | 
					
						
							|  |  |  | void MNNNV21ToBGRAUnit(const unsigned char* source, unsigned char* dest, size_t countDiv8, const unsigned char* uv); | 
					
						
							|  |  |  | void MNNSamplerC4BilinearOpt(const unsigned char* source, unsigned char* dest, float* points, size_t count, size_t xMax, size_t yMax, size_t yStride); | 
					
						
							|  |  |  | void MNNSamplerC1BilinearOpt(const unsigned char* source, unsigned char* dest, float* points, size_t count, size_t xMax, size_t yMax, size_t yStride); | 
					
						
							|  |  |  | void MNNSamplerC4NearestOpt(const unsigned char* source, unsigned char* dest, float* points, size_t count, size_t iw, size_t ih, size_t yStride); | 
					
						
							|  |  |  | void MNNSamplerC1NearestOpt(const unsigned char* source, unsigned char* dest, float* points, size_t count, size_t iw, size_t ih, size_t yStride); | 
					
						
							|  |  |  | void MNNBlitC1ToFloatRGBA(const unsigned char* source, float* dest, const float* mean, const float* normal, size_t count); | 
					
						
							|  |  |  | void MNNBlitC3ToFloatRGBA(const unsigned char* source, float* dest, const float* mean, const float* normal, size_t count); | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  | void MNNRGBToBGRC8(const unsigned char* source, unsigned char* dest, size_t count); | 
					
						
							|  |  |  | void MNNBGRAToBGRC8(const unsigned char* source, unsigned char* dest, size_t count); | 
					
						
							|  |  |  | void MNNGRAYToC4Fast(const unsigned char* source, unsigned char* dest, size_t count); | 
					
						
							|  |  |  | void MNNGRAYToC3Fast(const unsigned char* source, unsigned char* dest, size_t count); | 
					
						
							|  |  |  | void MNNC3ToC4Fast(const unsigned char* source, unsigned char* dest, size_t count); | 
					
						
							|  |  |  | void MNNBGRAToGRAYFast(const unsigned char* source, unsigned char* dest, size_t count); | 
					
						
							|  |  |  | void MNNRGBToGRAYFast(const unsigned char* source, unsigned char* dest, size_t count); | 
					
						
							|  |  |  | void MNNRGBAToGRAYFast(const unsigned char* source, unsigned char* dest, size_t count); | 
					
						
							|  |  |  | void MNNBGRToGRAYFast(const unsigned char* source, unsigned char* dest, size_t count); | 
					
						
							|  |  |  | void MNNC3ToYUVFast(const unsigned char* source, unsigned char* dest, size_t count, int32_t* c); | 
					
						
							|  |  |  | void MNNC3ToXYZFast(const unsigned char* source, unsigned char* dest, size_t count, int32_t* c); | 
					
						
							|  |  |  | void MNNRGBToBGR555Fast(const unsigned char* source, unsigned char* dest, size_t count); | 
					
						
							|  |  |  | void MNNBGRToBGR555Fast(const unsigned char* source, unsigned char* dest, size_t count); | 
					
						
							|  |  |  | void MNNBGRToBGR565Fast(const unsigned char* source, unsigned char* dest, size_t count); | 
					
						
							|  |  |  | void MNNRGBToBGR565Fast(const unsigned char* source, unsigned char* dest, size_t count); | 
					
						
							|  |  |  | void MNNRGBAToBGRAFast(const unsigned char* source, unsigned char* dest, size_t count); | 
					
						
							|  |  |  | void MNNRGBAToBGRFast(const unsigned char* source, unsigned char* dest, size_t count); | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNGRAYToC4(const unsigned char* source, unsigned char* dest, size_t count) { | 
					
						
							|  |  |  |     int sta = 0; | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  |     int countD8 = (int)count / 8; | 
					
						
							|  |  |  |     if (countD8 > 0) { | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |         MNNGRAYToC4Fast(source, dest, countD8); | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |         sta = countD8 * 8; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     for (int i = sta; i < count; ++i) { | 
					
						
							|  |  |  |         dest[4 * i + 0] = source[i]; | 
					
						
							|  |  |  |         dest[4 * i + 1] = source[i]; | 
					
						
							|  |  |  |         dest[4 * i + 2] = source[i]; | 
					
						
							|  |  |  |         dest[4 * i + 3] = 255; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNGRAYToC3(const unsigned char* source, unsigned char* dest, size_t count) { | 
					
						
							|  |  |  |     int sta = 0; | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  |     int countD8 = (int)count / 8; | 
					
						
							|  |  |  |     if (countD8 > 0) { | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |         MNNGRAYToC3Fast(source, dest, countD8); | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |         sta = countD8 * 8; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     for (int i = sta; i < count; ++i) { | 
					
						
							|  |  |  |         dest[3 * i + 0] = source[i]; | 
					
						
							|  |  |  |         dest[3 * i + 1] = source[i]; | 
					
						
							|  |  |  |         dest[3 * i + 2] = source[i]; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNC3ToC4(const unsigned char* source, unsigned char* dest, size_t count) { | 
					
						
							|  |  |  |     int sta = 0; | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  |     int countD8 = (int)count / 8; | 
					
						
							|  |  |  |     if (countD8 > 0) { | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |         MNNC3ToC4Fast(source, dest, countD8); | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |         sta = countD8 * 8; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     for (int i = sta; i < count; i++) { | 
					
						
							|  |  |  |         dest[i * 4 + 0] = source[i * 3 + 0]; | 
					
						
							|  |  |  |         dest[i * 4 + 1] = source[i * 3 + 1]; | 
					
						
							|  |  |  |         dest[i * 4 + 2] = source[i * 3 + 2]; | 
					
						
							|  |  |  |         dest[i * 4 + 3] = 255; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNRGBAToBGRA(const unsigned char* source, unsigned char* dest, size_t count) { | 
					
						
							|  |  |  |     int sta = 0; | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |     auto countD8 = count / 8; | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |     if (countD8 > 0) { | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |         MNNRGBAToBGRAFast(source, dest, countD8); | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |         sta = countD8 * 8; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     for (int i = sta; i < count; ++i) { | 
					
						
							|  |  |  |         dest[4 * i + 0] = source[4 * i + 2]; | 
					
						
							|  |  |  |         dest[4 * i + 1] = source[4 * i + 1]; | 
					
						
							|  |  |  |         dest[4 * i + 2] = source[4 * i + 0]; | 
					
						
							|  |  |  |         dest[4 * i + 3] = source[4 * i + 3]; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNRGBAToBGR(const unsigned char* source, unsigned char* dest, size_t count) { | 
					
						
							|  |  |  |     int sta = 0; | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |     auto countD8 = count / 8; | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |     if (countD8 > 0) { | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |         MNNRGBAToBGRFast(source, dest, countD8); | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |         sta = countD8 * 8; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     for (int i = sta; i < count; ++i) { | 
					
						
							|  |  |  |         dest[3 * i + 0] = source[4 * i + 2]; | 
					
						
							|  |  |  |         dest[3 * i + 1] = source[4 * i + 1]; | 
					
						
							|  |  |  |         dest[3 * i + 2] = source[4 * i + 0]; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNRGBToBGR(const unsigned char* source, unsigned char* dest, size_t count) { | 
					
						
							|  |  |  |     int sta = 0; | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |    int countD8 = (int)count / 8; | 
					
						
							|  |  |  |    if (countD8 > 0) { | 
					
						
							|  |  |  |         MNNRGBToBGRC8(source, dest, countD8); | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |         sta = countD8 * 8; | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |    } | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  | #endif
 | 
					
						
							|  |  |  |     for (int i = sta; i < count; ++i) { | 
					
						
							|  |  |  |         dest[3 * i + 0] = source[3 * i + 2]; | 
					
						
							|  |  |  |         dest[3 * i + 1] = source[3 * i + 1]; | 
					
						
							|  |  |  |         dest[3 * i + 2] = source[3 * i + 0]; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNBGRAToBGR(const unsigned char* source, unsigned char* dest, size_t count) { | 
					
						
							|  |  |  |     int sta = 0; | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  |     int countD8 = (int)count / 8; | 
					
						
							|  |  |  |     if (countD8 > 0) { | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |         MNNBGRAToBGRC8(source, dest, countD8); | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |         sta = countD8 * 8; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     for (int i = sta; i < count; ++i) { | 
					
						
							|  |  |  |         dest[3 * i + 0] = source[4 * i + 0]; | 
					
						
							|  |  |  |         dest[3 * i + 1] = source[4 * i + 1]; | 
					
						
							|  |  |  |         dest[3 * i + 2] = source[4 * i + 2]; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNBGRAToGRAY(const unsigned char* source, unsigned char* dest, size_t count) { | 
					
						
							|  |  |  |     int sta = 0; | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |  #if defined MNN_USE_NEON
 | 
					
						
							|  |  |  |      int countD8 = (int)count / 8; | 
					
						
							|  |  |  |      if (countD8 > 0) { | 
					
						
							|  |  |  |         MNNBGRAToGRAYFast(source, dest, countD8); | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |         sta = countD8 * 8; | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |      } | 
					
						
							|  |  |  |  #endif
 | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |     for (int i = sta; i < count; ++i) { | 
					
						
							|  |  |  |         int r = source[4 * i + 2]; | 
					
						
							|  |  |  |         int g = source[4 * i + 1]; | 
					
						
							|  |  |  |         int b = source[4 * i + 0]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         int y = (19 * r + 38 * g + 7 * b) >> 6; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         dest[i] = y; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNRGBAToGRAY(const unsigned char* source, unsigned char* dest, size_t count) { | 
					
						
							|  |  |  |     int sta = 0; | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | #if defined MNN_USE_NEON
 | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |     int countD8 = (int)count / 8; | 
					
						
							|  |  |  |     if (countD8 > 0) { | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |         MNNRGBAToGRAYFast(source, dest, countD8); | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |         sta = countD8 * 8; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2023-06-16 09:42:45 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |     for (int i = sta; i < count; ++i) { | 
					
						
							|  |  |  |         int r = source[4 * i + 0]; | 
					
						
							|  |  |  |         int g = source[4 * i + 1]; | 
					
						
							|  |  |  |         int b = source[4 * i + 2]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         int y = (19 * r + 38 * g + 7 * b) >> 6; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         dest[i] = y; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | uint8_t saturate_cast(int v) { return (uint8_t)((unsigned)v <= 255 ? v : v > 0 ? 255 : 0); } | 
					
						
							|  |  |  | #define CV_DESCALE(x,n)     (((x) + (1 << ((n)-1))) >> (n))
 | 
					
						
							|  |  |  | #define CV_MUL_SHIFT(rC, gC, bC, n) vshrn_n_u16((vmull_u8(rC, rgb.val[0]) + vmull_u8(gC, rgb.val[1]) + vmull_u8(bC, rgb.val[2])), n)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNC3ToYUV(const unsigned char* source, unsigned char* dest, size_t count, bool bgr, bool yuv) { | 
					
						
							|  |  |  |     static const int coeffs[] = { | 
					
						
							|  |  |  |         // Y
 | 
					
						
							|  |  |  |          4899,    9617,    1868, | 
					
						
							|  |  |  |         // Cr
 | 
					
						
							|  |  |  |          8192,   -6860,   -1332, | 
					
						
							|  |  |  |         // Cb
 | 
					
						
							|  |  |  |         -2765,   -5427,    8192, | 
					
						
							|  |  |  |         // U
 | 
					
						
							|  |  |  |         -2412,   -4734,    7146, | 
					
						
							|  |  |  |         // V
 | 
					
						
							|  |  |  |         10076,  -8438,   -1638 | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  |     int r0 = 0, r1 = 3, r2 = 6, | 
					
						
							|  |  |  |         g0 = 1, g1 = 4, g2 = 7, | 
					
						
							|  |  |  |         b0 = 2, b1 = 5, b2 = 8; | 
					
						
							|  |  |  |     if (yuv) { | 
					
						
							|  |  |  |         r1 = 9,  r2 = 12; | 
					
						
							|  |  |  |         g1 = 10, g2 = 13; | 
					
						
							|  |  |  |         b1 = 11, b2 = 14; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (bgr) { | 
					
						
							|  |  |  |         std::swap(r0, b0); | 
					
						
							|  |  |  |         std::swap(r1, b1); | 
					
						
							|  |  |  |         std::swap(r2, b2); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     int C0 = coeffs[r0], C1 = coeffs[g0], C2 = coeffs[b0], | 
					
						
							|  |  |  |         C3 = coeffs[r1], C4 = coeffs[g1], C5 = coeffs[b1], | 
					
						
							|  |  |  |         C6 = coeffs[r2], C7 = coeffs[g2], C8 = coeffs[b2]; | 
					
						
							|  |  |  |     int sta = 0; | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | #if defined MNN_USE_NEON
 | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |     int countD8 = (int)count / 8; | 
					
						
							|  |  |  |     if (countD8 > 0) { | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |         int32_t c[] = {C0, C1, C2, C3, C4, C5, C6, C7, C8}; | 
					
						
							|  |  |  |         MNNC3ToYUVFast(source, dest, countD8, c); | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |         sta = countD8 * 8; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     for (int i = sta; i < count; ++i) { | 
					
						
							|  |  |  |         int r = source[3 * i + 0]; | 
					
						
							|  |  |  |         int g = source[3 * i + 1]; | 
					
						
							|  |  |  |         int b = source[3 * i + 2]; | 
					
						
							|  |  |  |         int y = CV_DESCALE(r*C0 + g*C1 + b*C2, 14); | 
					
						
							|  |  |  |         int u = CV_DESCALE(r*C3 + g*C4 + b*C5, 14) + 128; | 
					
						
							|  |  |  |         int v = CV_DESCALE(r*C6 + g*C7 + b*C8, 14) + 128; | 
					
						
							|  |  |  |         dest[3 * i + 0] = y; | 
					
						
							|  |  |  |         dest[3 * i + 1] = u; | 
					
						
							|  |  |  |         dest[3 * i + 2] = v; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNC3ToXYZ(const unsigned char* source, unsigned char* dest, size_t count, bool bgr) { | 
					
						
							|  |  |  |     static const int coeffs[] = { | 
					
						
							|  |  |  |         1689,    1465,    739, | 
					
						
							|  |  |  |         871,     2929,    296, | 
					
						
							|  |  |  |         79,      488,     3892 | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  |     int r0 = 0, r1 = 3, r2 = 6, b0 = 2, b1 = 5, b2 = 8; | 
					
						
							|  |  |  |     if (bgr) { | 
					
						
							|  |  |  |         std::swap(r0, b0); | 
					
						
							|  |  |  |         std::swap(r1, b1); | 
					
						
							|  |  |  |         std::swap(r2, b2); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     int C0 = coeffs[r0], C1 = coeffs[1], C2 = coeffs[b0], | 
					
						
							|  |  |  |         C3 = coeffs[r1], C4 = coeffs[4], C5 = coeffs[b1], | 
					
						
							|  |  |  |         C6 = coeffs[r2], C7 = coeffs[7], C8 = coeffs[b2]; | 
					
						
							|  |  |  |     int sta = 0; | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |      | 
					
						
							|  |  |  | #if defined MNN_USE_NEON
 | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |     int countD8 = (int)count / 8; | 
					
						
							|  |  |  |     if (countD8 > 0) { | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |         int32_t c[] = {C0, C1, C2, C3, C4, C5, C6, C7, C8}; | 
					
						
							|  |  |  |         MNNC3ToXYZFast(source, dest, countD8, c); | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |         sta = countD8 * 8; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |      | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |     for (int i = sta; i < count; ++i) { | 
					
						
							|  |  |  |         int r = source[3 * i + 0]; | 
					
						
							|  |  |  |         int g = source[3 * i + 1]; | 
					
						
							|  |  |  |         int b = source[3 * i + 2]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         int x = CV_DESCALE(r*C0 + g*C1 + b*C2, 12); | 
					
						
							|  |  |  |         int y = CV_DESCALE(r*C3 + g*C4 + b*C5, 12); | 
					
						
							|  |  |  |         int z = CV_DESCALE(r*C6 + g*C7 + b*C8, 12); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         dest[3 * i + 0] = saturate_cast(x); | 
					
						
							|  |  |  |         dest[3 * i + 1] = saturate_cast(y); | 
					
						
							|  |  |  |         dest[3 * i + 2] = saturate_cast(z); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-04 10:50:40 +08:00
										 |  |  | void MNNC3ToHSV(const unsigned char* source, unsigned char* dest, size_t count, bool bgr, bool full) { | 
					
						
							|  |  |  |     int hrange = full ? 256 : 180; | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |     int i = 0; | 
					
						
							|  |  |  |     for (; i < count; ++i) { | 
					
						
							|  |  |  |         int r = source[3 * i + 0]; | 
					
						
							|  |  |  |         int g = source[3 * i + 1]; | 
					
						
							|  |  |  |         int b = source[3 * i + 2]; | 
					
						
							|  |  |  |         if (bgr) std::swap(r, b); | 
					
						
							|  |  |  |         int h, s, v = b, vmin = b, vr, vg; | 
					
						
							|  |  |  |         vmin = std::min({r, g, b}); | 
					
						
							|  |  |  |         v = std::max({r, g, b}); | 
					
						
							|  |  |  |         uint8_t diff = saturate_cast(v - vmin); | 
					
						
							|  |  |  |         vr = v == r ? -1 : 0; | 
					
						
							|  |  |  |         vg = v == g ? -1 : 0; | 
					
						
							|  |  |  |         s = (int(diff * (255 << 12) * (1.0f/(float)v)) + (1 << (11))) >> 12; | 
					
						
							|  |  |  |         h = (vr & (g - b)) + (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff)))); | 
					
						
							| 
									
										
										
										
											2022-01-04 10:50:40 +08:00
										 |  |  |         h = ((h * int((hrange << 12)/(6.f*diff) + 0.5)) + (1 << (11))) >> 12; | 
					
						
							|  |  |  |         h += h < 0 ? hrange : 0; | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         dest[3 * i + 0] = saturate_cast(h); | 
					
						
							|  |  |  |         dest[3 * i + 1] = s; | 
					
						
							|  |  |  |         dest[3 * i + 2] = v; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNC3ToBGR555(const unsigned char* source, unsigned char* dest, size_t count, bool bgr) { | 
					
						
							|  |  |  |     int i = 0; | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |     int countD8 = (int)count / 8; | 
					
						
							|  |  |  | #if defined MNN_USE_NEON
 | 
					
						
							|  |  |  |     if (countD8 > 0) { | 
					
						
							|  |  |  |         if (bgr) { | 
					
						
							|  |  |  |             MNNBGRToBGR555Fast(source, dest, countD8); | 
					
						
							|  |  |  |         } else { | 
					
						
							|  |  |  |             MNNRGBToBGR555Fast(source, dest, countD8); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         i = countD8 * 8; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |     for (; i < count; ++i) { | 
					
						
							|  |  |  |         int r = source[3 * i + 0]; | 
					
						
							|  |  |  |         int g = source[3 * i + 1]; | 
					
						
							|  |  |  |         int b = source[3 * i + 2]; | 
					
						
							|  |  |  |         if (bgr) std::swap(r, b); | 
					
						
							|  |  |  |         reinterpret_cast<unsigned short*>(dest)[i] = (b >> 3)|((g & ~7) << 2)|((r & ~7) << 7); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNC3ToBGR565(const unsigned char* source, unsigned char* dest, size_t count, bool bgr) { | 
					
						
							|  |  |  |     int i = 0; | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  | #if defined MNN_USE_NEON
 | 
					
						
							|  |  |  |     auto countD8 = count / 8; | 
					
						
							|  |  |  |     if (countD8 > 0) { | 
					
						
							|  |  |  |         if (bgr) { | 
					
						
							|  |  |  |             MNNBGRToBGR565Fast(source, dest, countD8); | 
					
						
							|  |  |  |         } else { | 
					
						
							|  |  |  |             MNNRGBToBGR565Fast(source, dest, countD8); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         i = countD8 * 8; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |     for (; i < count; ++i) { | 
					
						
							|  |  |  |         int r = source[3 * i + 0]; | 
					
						
							|  |  |  |         int g = source[3 * i + 1]; | 
					
						
							|  |  |  |         int b = source[3 * i + 2]; | 
					
						
							|  |  |  |         if (bgr) std::swap(r, b); | 
					
						
							|  |  |  |         reinterpret_cast<unsigned short*>(dest)[i] = (b >> 3)|((g&~3) << 3)|((r&~7) << 8); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNRGBToGRAY(const unsigned char* source, unsigned char* dest, size_t count) { | 
					
						
							|  |  |  |     int sta = 0; | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  |     int countD8 = (int)count / 8; | 
					
						
							|  |  |  |     if (countD8 > 0) { | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |         MNNRGBToGRAYFast(source, dest, countD8); | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |         sta = countD8 * 8; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     for (int i = sta; i < count; ++i) { | 
					
						
							|  |  |  |         int r = source[3 * i + 0]; | 
					
						
							|  |  |  |         int g = source[3 * i + 1]; | 
					
						
							|  |  |  |         int b = source[3 * i + 2]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         int y = (19 * r + 38 * g + 7 * b) >> 6; | 
					
						
							|  |  |  |         // opencv impl: int y = (9798 * r + 19235 * g + 3735 * b + (1 << 14)) >> 15;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         dest[i] = y; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNBRGToGRAY(const unsigned char* source, unsigned char* dest, size_t count) { | 
					
						
							|  |  |  |     int sta = 0; | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  |     int countD8 = (int)count / 8; | 
					
						
							|  |  |  |     if (countD8 > 0) { | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |         MNNBGRToGRAYFast(source, dest, countD8); | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |         sta = countD8 * 8; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for (int i = sta; i < count; ++i) { | 
					
						
							|  |  |  |         int r = source[3 * i + 2]; | 
					
						
							|  |  |  |         int g = source[3 * i + 1]; | 
					
						
							|  |  |  |         int b = source[3 * i + 0]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         int y = (19 * r + 38 * g + 7 * b) >> 6; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         dest[i] = y; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNNV21ToRGBA(const unsigned char* source, unsigned char* dest, size_t count) { | 
					
						
							|  |  |  |     auto y   = source; | 
					
						
							|  |  |  |     auto uv  = source + count; | 
					
						
							|  |  |  |     auto dst = dest; | 
					
						
							|  |  |  |     int sta  = 0; | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  |     const int unit   = 16; | 
					
						
							|  |  |  |     size_t countDiv8 = count / unit; | 
					
						
							|  |  |  |     if (countDiv8 > 0) { | 
					
						
							|  |  |  |         MNNNV21ToRGBAUnit(source, dest, countDiv8, uv); | 
					
						
							|  |  |  |         sta = (int)countDiv8 * unit; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     for (int i = sta; i < count; ++i) { | 
					
						
							|  |  |  |         int Y = y[i]; | 
					
						
							|  |  |  |         int U = (int)uv[(i / 2) * 2 + 1] - 128; | 
					
						
							|  |  |  |         int V = (int)uv[(i / 2) * 2 + 0] - 128; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Y     = Y << 6; | 
					
						
							|  |  |  |         int R = (Y + 73 * V) >> 6; | 
					
						
							|  |  |  |         int G = (Y - 25 * U - 37 * V) >> 6; | 
					
						
							|  |  |  |         int B = (Y + 130 * U) >> 6; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         R = std::min(std::max(R, 0), 255); | 
					
						
							|  |  |  |         G = std::min(std::max(G, 0), 255); | 
					
						
							|  |  |  |         B = std::min(std::max(B, 0), 255); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         dst[4 * i + 0] = (uint8_t)R; | 
					
						
							|  |  |  |         dst[4 * i + 1] = (uint8_t)G; | 
					
						
							|  |  |  |         dst[4 * i + 2] = (uint8_t)B; | 
					
						
							|  |  |  |         dst[4 * i + 3] = 255; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNNV21ToRGB(const unsigned char* source, unsigned char* dest, size_t count) { | 
					
						
							|  |  |  |     auto y   = source; | 
					
						
							|  |  |  |     auto uv  = source + count; | 
					
						
							|  |  |  |     auto dst = dest; | 
					
						
							|  |  |  |     int sta  = 0; | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  |     const int unit   = 16; | 
					
						
							|  |  |  |     size_t countDiv8 = count / unit; | 
					
						
							|  |  |  |     if (countDiv8 > 0) { | 
					
						
							|  |  |  |         MNNNV21ToRGBUnit(source, dest, countDiv8, uv); | 
					
						
							|  |  |  |         sta = (int)countDiv8 * unit; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     for (int i = sta; i < count; ++i) { | 
					
						
							|  |  |  |         int Y = y[i]; | 
					
						
							|  |  |  |         int U = (int)uv[(i / 2) * 2 + 1] - 128; | 
					
						
							|  |  |  |         int V = (int)uv[(i / 2) * 2 + 0] - 128; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |         OpenCV impl is as below: | 
					
						
							|  |  |  |         Y     = std::max(0, Y - 16) * 1220542; | 
					
						
							|  |  |  |         int R = (Y + (V * 1673527) + (1 << 19)) >> 20; | 
					
						
							|  |  |  |         int G = (Y + (-852492 * V + -409993 * U) + (1 << 19)) >> 20; | 
					
						
							|  |  |  |         int B = (Y + (2116026 * U) + (1 << 19)) >> 20; | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  |         Y     = Y << 6; | 
					
						
							|  |  |  |         int R = (Y + 73 * V) >> 6; | 
					
						
							|  |  |  |         int G = (Y - 25 * U - 37 * V) >> 6; | 
					
						
							|  |  |  |         int B = (Y + 130 * U) >> 6; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         R = std::min(std::max(R, 0), 255); | 
					
						
							|  |  |  |         G = std::min(std::max(G, 0), 255); | 
					
						
							|  |  |  |         B = std::min(std::max(B, 0), 255); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         dst[3 * i + 0] = (uint8_t)R; | 
					
						
							|  |  |  |         dst[3 * i + 1] = (uint8_t)G; | 
					
						
							|  |  |  |         dst[3 * i + 2] = (uint8_t)B; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNNV21ToBGRA(const unsigned char* source, unsigned char* dest, size_t count) { | 
					
						
							|  |  |  |     auto y   = source; | 
					
						
							|  |  |  |     auto uv  = source + count; | 
					
						
							|  |  |  |     auto dst = dest; | 
					
						
							|  |  |  |     int sta  = 0; | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  |     const int unit   = 16; | 
					
						
							|  |  |  |     size_t countDiv8 = count / unit; | 
					
						
							|  |  |  |     if (countDiv8 > 0) { | 
					
						
							|  |  |  |         MNNNV21ToBGRAUnit(source, dest, countDiv8, uv); | 
					
						
							|  |  |  |         sta = (int)countDiv8 * unit; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     for (int i = sta; i < count; ++i) { | 
					
						
							|  |  |  |         int Y = y[i]; | 
					
						
							|  |  |  |         int U = (int)uv[(i / 2) * 2 + 1] - 128; | 
					
						
							|  |  |  |         int V = (int)uv[(i / 2) * 2 + 0] - 128; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Y     = Y << 6; | 
					
						
							|  |  |  |         int R = (Y + 73 * V) >> 6; | 
					
						
							|  |  |  |         int G = (Y - 25 * U - 37 * V) >> 6; | 
					
						
							|  |  |  |         int B = (Y + 130 * U) >> 6; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         R = std::min(std::max(R, 0), 255); | 
					
						
							|  |  |  |         G = std::min(std::max(G, 0), 255); | 
					
						
							|  |  |  |         B = std::min(std::max(B, 0), 255); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         dst[4 * i + 0] = (uint8_t)B; | 
					
						
							|  |  |  |         dst[4 * i + 1] = (uint8_t)G; | 
					
						
							|  |  |  |         dst[4 * i + 2] = (uint8_t)R; | 
					
						
							|  |  |  |         dst[4 * i + 3] = 255; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNNV21ToBGR(const unsigned char* source, unsigned char* dest, size_t count) { | 
					
						
							|  |  |  |     auto y   = source; | 
					
						
							|  |  |  |     auto uv  = source + count; | 
					
						
							|  |  |  |     auto dst = dest; | 
					
						
							|  |  |  |     int sta  = 0; | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  |     const int unit   = 16; | 
					
						
							|  |  |  |     size_t countDiv8 = count / unit; | 
					
						
							|  |  |  |     if (countDiv8 > 0) { | 
					
						
							|  |  |  |         MNNNV21ToBGRUnit(source, dest, countDiv8, uv); | 
					
						
							|  |  |  |         sta = (int)countDiv8 * unit; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     for (int i = sta; i < count; ++i) { | 
					
						
							|  |  |  |         int Y = y[i]; | 
					
						
							|  |  |  |         int U = (int)uv[(i / 2) * 2 + 1] - 128; | 
					
						
							|  |  |  |         int V = (int)uv[(i / 2) * 2 + 0] - 128; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Y     = Y << 6; | 
					
						
							|  |  |  |         int R = (Y + 73 * V) >> 6; | 
					
						
							|  |  |  |         int G = (Y - 25 * U - 37 * V) >> 6; | 
					
						
							|  |  |  |         int B = (Y + 130 * U) >> 6; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         R = std::min(std::max(R, 0), 255); | 
					
						
							|  |  |  |         G = std::min(std::max(G, 0), 255); | 
					
						
							|  |  |  |         B = std::min(std::max(B, 0), 255); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         dst[3 * i + 0] = (uint8_t)B; | 
					
						
							|  |  |  |         dst[3 * i + 1] = (uint8_t)G; | 
					
						
							|  |  |  |         dst[3 * i + 2] = (uint8_t)R; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNC1ToFloatC1(const unsigned char* source, float* dest, const float* mean, const float* normal, size_t count) { | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  |     unsigned long size  = count >> 4; | 
					
						
							|  |  |  |     float32x4_t cache   = vdupq_n_f32(0); | 
					
						
							|  |  |  |     float32x4_t _mean   = vdupq_n_f32(-mean[0]); | 
					
						
							|  |  |  |     float32x4_t _normal = vdupq_n_f32(normal[0]); | 
					
						
							|  |  |  |     for (int i = 0; i < size; i++, source += 16) { | 
					
						
							|  |  |  |         uint8x16_t v = vld1q_u8(source); | 
					
						
							|  |  |  |         int16x8_t vl = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v)));  // 0..7
 | 
					
						
							|  |  |  |         int16x8_t vh = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v))); // 8..15
 | 
					
						
							|  |  |  |         // unpack to 32 bits
 | 
					
						
							|  |  |  |         float32x4_t vll = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vl))); // 0..3
 | 
					
						
							|  |  |  |         cache           = vaddq_f32(_mean, vll); | 
					
						
							|  |  |  |         cache           = vmulq_f32(cache, _normal); | 
					
						
							|  |  |  |         vst1q_f32(dest, cache); | 
					
						
							|  |  |  |         dest += 4; | 
					
						
							|  |  |  |         float32x4_t vlh = vcvtq_f32_s32(vmovl_s16(vget_high_s16(vl))); // 4..7
 | 
					
						
							|  |  |  |         cache           = vaddq_f32(_mean, vlh); | 
					
						
							|  |  |  |         cache           = vmulq_f32(cache, _normal); | 
					
						
							|  |  |  |         vst1q_f32(dest, cache); | 
					
						
							|  |  |  |         dest += 4; | 
					
						
							|  |  |  |         float32x4_t vhl = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vh))); // 8..11
 | 
					
						
							|  |  |  |         cache           = vaddq_f32(_mean, vhl); | 
					
						
							|  |  |  |         cache           = vmulq_f32(cache, _normal); | 
					
						
							|  |  |  |         vst1q_f32(dest, cache); | 
					
						
							|  |  |  |         dest += 4; | 
					
						
							|  |  |  |         float32x4_t vhh = vcvtq_f32_s32(vmovl_s16(vget_high_s16(vh))); // 12..15
 | 
					
						
							|  |  |  |         cache           = vaddq_f32(_mean, vhh); | 
					
						
							|  |  |  |         cache           = vmulq_f32(cache, _normal); | 
					
						
							|  |  |  |         vst1q_f32(dest, cache); | 
					
						
							|  |  |  |         dest += 4; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     int left = count & 15; | 
					
						
							|  |  |  |     if (left == 0) { | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     for (int i = 0; i < left; ++i, ++dest, ++source) { | 
					
						
							|  |  |  |         *dest = normal[0] * (*source - mean[0]); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     for (int i = 0; i < count; ++i) { | 
					
						
							|  |  |  |         dest[i + 0] = normal[0] * (source[i + 0] - mean[0]); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNC3ToFloatC3(const unsigned char* source, float* dest, const float* mean, const float* normal, | 
					
						
							|  |  |  |                              size_t count) { | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  |     int size              = (int)count / 16; | 
					
						
							|  |  |  |     float32x4x3_t cachell = {vmovq_n_f32(0), vmovq_n_f32(0), vmovq_n_f32(0)}; | 
					
						
							|  |  |  |     float32x4x3_t cachelh = {vmovq_n_f32(0), vmovq_n_f32(0), vmovq_n_f32(0)}; | 
					
						
							|  |  |  |     float32x4x3_t cachehl = {vmovq_n_f32(0), vmovq_n_f32(0), vmovq_n_f32(0)}; | 
					
						
							|  |  |  |     float32x4x3_t cachehh = {vmovq_n_f32(0), vmovq_n_f32(0), vmovq_n_f32(0)}; | 
					
						
							|  |  |  |     float32x4x3_t _mean; | 
					
						
							|  |  |  |     float32x4x3_t _normal; | 
					
						
							|  |  |  |     for (int c = 0; c < 3; c++) { | 
					
						
							|  |  |  |         _mean.val[c]   = vmovq_n_f32(-mean[c]); | 
					
						
							|  |  |  |         _normal.val[c] = vmovq_n_f32(normal[c]); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     for (int i = 0; i < size; i++) { | 
					
						
							|  |  |  |         uint8x16x3_t v = vld3q_u8(source + 16 * 3 * i); | 
					
						
							|  |  |  |         int c          = 0; | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             int16x8_t vl = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v.val[c]))); // 0..7
 | 
					
						
							|  |  |  |             // unpack to 32 bits
 | 
					
						
							|  |  |  |             float32x4_t vll = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vl))); // 0..3
 | 
					
						
							|  |  |  |             cachell.val[c]  = vaddq_f32(_mean.val[c], vll); | 
					
						
							|  |  |  |             cachell.val[c]  = vmulq_f32(cachell.val[c], _normal.val[c]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             float32x4_t vlh = vcvtq_f32_s32(vmovl_s16(vget_high_s16(vl))); // 4..7
 | 
					
						
							|  |  |  |             cachelh.val[c]  = vaddq_f32(_mean.val[c], vlh); | 
					
						
							|  |  |  |             cachelh.val[c]  = vmulq_f32(cachelh.val[c], _normal.val[c]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             int16x8_t vh = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v.val[c]))); // 8..15
 | 
					
						
							|  |  |  |             // unpack to 32 bits
 | 
					
						
							|  |  |  |             float32x4_t vhl = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vh))); // 8..11
 | 
					
						
							|  |  |  |             cachehl.val[c]  = vaddq_f32(_mean.val[c], vhl); | 
					
						
							|  |  |  |             cachehl.val[c]  = vmulq_f32(cachehl.val[c], _normal.val[c]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             float32x4_t vhh = vcvtq_f32_s32(vmovl_s16(vget_high_s16(vh))); // 12..15
 | 
					
						
							|  |  |  |             cachehh.val[c]  = vaddq_f32(_mean.val[c], vhh); | 
					
						
							|  |  |  |             cachehh.val[c]  = vmulq_f32(cachehh.val[c], _normal.val[c]); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         c = 1; | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             int16x8_t vl = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v.val[c]))); // 0..7
 | 
					
						
							|  |  |  |             // unpack to 32 bits
 | 
					
						
							|  |  |  |             float32x4_t vll = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vl))); // 0..3
 | 
					
						
							|  |  |  |             cachell.val[c]  = vaddq_f32(_mean.val[c], vll); | 
					
						
							|  |  |  |             cachell.val[c]  = vmulq_f32(cachell.val[c], _normal.val[c]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             float32x4_t vlh = vcvtq_f32_s32(vmovl_s16(vget_high_s16(vl))); // 4..7
 | 
					
						
							|  |  |  |             cachelh.val[c]  = vaddq_f32(_mean.val[c], vlh); | 
					
						
							|  |  |  |             cachelh.val[c]  = vmulq_f32(cachelh.val[c], _normal.val[c]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             int16x8_t vh = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v.val[c]))); // 8..15
 | 
					
						
							|  |  |  |             // unpack to 32 bits
 | 
					
						
							|  |  |  |             float32x4_t vhl = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vh))); // 8..11
 | 
					
						
							|  |  |  |             cachehl.val[c]  = vaddq_f32(_mean.val[c], vhl); | 
					
						
							|  |  |  |             cachehl.val[c]  = vmulq_f32(cachehl.val[c], _normal.val[c]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             float32x4_t vhh = vcvtq_f32_s32(vmovl_s16(vget_high_s16(vh))); // 12..15
 | 
					
						
							|  |  |  |             cachehh.val[c]  = vaddq_f32(_mean.val[c], vhh); | 
					
						
							|  |  |  |             cachehh.val[c]  = vmulq_f32(cachehh.val[c], _normal.val[c]); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         c = 2; | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             int16x8_t vl = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v.val[c]))); // 0..7
 | 
					
						
							|  |  |  |             // unpack to 32 bits
 | 
					
						
							|  |  |  |             float32x4_t vll = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vl))); // 0..3
 | 
					
						
							|  |  |  |             cachell.val[c]  = vaddq_f32(_mean.val[c], vll); | 
					
						
							|  |  |  |             cachell.val[c]  = vmulq_f32(cachell.val[c], _normal.val[c]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             float32x4_t vlh = vcvtq_f32_s32(vmovl_s16(vget_high_s16(vl))); // 4..7
 | 
					
						
							|  |  |  |             cachelh.val[c]  = vaddq_f32(_mean.val[c], vlh); | 
					
						
							|  |  |  |             cachelh.val[c]  = vmulq_f32(cachelh.val[c], _normal.val[c]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             int16x8_t vh = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v.val[c]))); // 8..15
 | 
					
						
							|  |  |  |             // unpack to 32 bits
 | 
					
						
							|  |  |  |             float32x4_t vhl = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vh))); // 8..11
 | 
					
						
							|  |  |  |             cachehl.val[c]  = vaddq_f32(_mean.val[c], vhl); | 
					
						
							|  |  |  |             cachehl.val[c]  = vmulq_f32(cachehl.val[c], _normal.val[c]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             float32x4_t vhh = vcvtq_f32_s32(vmovl_s16(vget_high_s16(vh))); // 12..15
 | 
					
						
							|  |  |  |             cachehh.val[c]  = vaddq_f32(_mean.val[c], vhh); | 
					
						
							|  |  |  |             cachehh.val[c]  = vmulq_f32(cachehh.val[c], _normal.val[c]); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         vst3q_f32(dest + 48 * i + 0 * 3, cachell); | 
					
						
							|  |  |  |         vst3q_f32(dest + 48 * i + 4 * 3, cachelh); | 
					
						
							|  |  |  |         vst3q_f32(dest + 48 * i + 8 * 3, cachehl); | 
					
						
							|  |  |  |         vst3q_f32(dest + 48 * i + 12 * 3, cachehh); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     int remain = size * 16; | 
					
						
							|  |  |  |     for (int i = remain; i < count; i++) { | 
					
						
							|  |  |  |         dest[3 * i + 0] = normal[0] * (source[3 * i + 0] - mean[0]); | 
					
						
							|  |  |  |         dest[3 * i + 1] = normal[1] * (source[3 * i + 1] - mean[1]); | 
					
						
							|  |  |  |         dest[3 * i + 2] = normal[2] * (source[3 * i + 2] - mean[2]); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     for (int i = 0; i < count; ++i) { | 
					
						
							|  |  |  |         dest[3 * i + 0] = normal[0] * (source[3 * i + 0] - mean[0]); | 
					
						
							|  |  |  |         dest[3 * i + 1] = normal[1] * (source[3 * i + 1] - mean[1]); | 
					
						
							|  |  |  |         dest[3 * i + 2] = normal[2] * (source[3 * i + 2] - mean[2]); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNC4ToFloatC4(const unsigned char* source, float* dest, const float* mean, const float* normal, size_t count) { | 
					
						
							|  |  |  |     for (int i = 0; i < count; ++i) { | 
					
						
							|  |  |  |         dest[4 * i + 0] = normal[0] * (source[4 * i + 0] - mean[0]); | 
					
						
							|  |  |  |         dest[4 * i + 1] = normal[1] * (source[4 * i + 1] - mean[1]); | 
					
						
							|  |  |  |         dest[4 * i + 2] = normal[2] * (source[4 * i + 2] - mean[2]); | 
					
						
							|  |  |  |         dest[4 * i + 3] = normal[3] * (source[4 * i + 3] - mean[3]); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNC1ToFloatRGBA(const unsigned char* source, float* dest, const float* mean, const float* normal, size_t count) { | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  |     MNNBlitC1ToFloatRGBA(source, dest, mean, normal, count); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     // MNN_PRINT("normal = %f\n", normal[0]);
 | 
					
						
							|  |  |  |     ::memset(dest, 0, 4 * sizeof(float) * count); | 
					
						
							|  |  |  |     for (int i = 0; i < count; ++i) { | 
					
						
							|  |  |  |         dest[4 * i + 0] = normal[0] * (source[i + 0] - mean[0]); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNC3ToFloatRGBA(const unsigned char* source, float* dest, const float* mean, const float* normal, size_t count) { | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  |     MNNBlitC3ToFloatRGBA(source, dest, mean, normal, count); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     for (int i = 0; i < count; ++i) { | 
					
						
							|  |  |  |         dest[4 * i + 0] = normal[0] * (source[3 * i + 0] - mean[0]); | 
					
						
							|  |  |  |         dest[4 * i + 1] = normal[1] * (source[3 * i + 1] - mean[1]); | 
					
						
							|  |  |  |         dest[4 * i + 2] = normal[2] * (source[3 * i + 2] - mean[2]); | 
					
						
							|  |  |  |         dest[4 * i + 3] = 0.0f; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline float __clamp(float v, float minV, float maxV) { | 
					
						
							|  |  |  |     return std::max(std::min(v, maxV), minV); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void _sampleBilinearCommon(const unsigned char* source, unsigned char* dest, MNN::CV::Point* points, size_t count, | 
					
						
							|  |  |  |                                   size_t iw, size_t ih, size_t yStride, size_t bpp) { | 
					
						
							|  |  |  |     float dy   = points[1].fY; | 
					
						
							|  |  |  |     float dx   = points[1].fX; | 
					
						
							|  |  |  |     float xMax = iw - 1; | 
					
						
							|  |  |  |     float yMax = ih - 1; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     MNN::CV::Point curPoints; | 
					
						
							|  |  |  |     curPoints.fX = points[0].fX; | 
					
						
							|  |  |  |     curPoints.fY = points[0].fY; | 
					
						
							|  |  |  |     for (int i = 0; i < count; ++i) { | 
					
						
							|  |  |  |         float y  = __clamp(curPoints.fY, 0, yMax); | 
					
						
							|  |  |  |         float x  = __clamp(curPoints.fX, 0, xMax); | 
					
						
							|  |  |  |         int y0   = (int)y; | 
					
						
							|  |  |  |         int x0   = (int)x; | 
					
						
							|  |  |  |         int y1   = (int)ceilf(y); | 
					
						
							|  |  |  |         int x1   = (int)ceilf(x); | 
					
						
							|  |  |  |         float xF = x - (float)x0; | 
					
						
							|  |  |  |         float yF = y - (float)y0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for (int b = 0; b < bpp; ++b) { | 
					
						
							|  |  |  |             unsigned char c00 = source[y0 * yStride + bpp * x0 + b]; | 
					
						
							|  |  |  |             unsigned char c01 = source[y0 * yStride + bpp * x1 + b]; | 
					
						
							|  |  |  |             unsigned char c10 = source[y1 * yStride + bpp * x0 + b]; | 
					
						
							|  |  |  |             unsigned char c11 = source[y1 * yStride + bpp * x1 + b]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             float v = | 
					
						
							|  |  |  |                 (1.0f - xF) * (1.0f - yF) * c00 + xF * (1.0f - yF) * c01 + yF * (1.0 - xF) * c10 + xF * yF * (c11); | 
					
						
							|  |  |  |             v                 = std::min(std::max(v, 0.0f), 255.0f); | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  |             dest[bpp * i + b] = (unsigned char)roundf(v); | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |         } | 
					
						
							|  |  |  |         curPoints.fY += dy; | 
					
						
							|  |  |  |         curPoints.fX += dx; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNSamplerC4Bilinear(const unsigned char* source, unsigned char* dest, MNN::CV::Point* points, size_t sta, | 
					
						
							|  |  |  |                           size_t count, size_t capacity, size_t iw, size_t ih, size_t yStride) { | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  |     MNNSamplerC4BilinearOpt(source, dest + 4 * sta, reinterpret_cast<float*>(points), count, iw - 1, ih - 1, yStride); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     _sampleBilinearCommon(source, dest + 4 * sta, points, count, iw, ih, yStride, 4); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | void MNNSamplerC3Bilinear(const unsigned char* source, unsigned char* dest, MNN::CV::Point* points, size_t sta, | 
					
						
							|  |  |  |                           size_t count, size_t capacity, size_t iw, size_t ih, size_t yStride) { | 
					
						
							|  |  |  |     _sampleBilinearCommon(source, dest + 3 * sta, points, count, iw, ih, yStride, 3); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | void MNNSamplerC1Bilinear(const unsigned char* source, unsigned char* dest, MNN::CV::Point* points, size_t sta, | 
					
						
							|  |  |  |                           size_t count, size_t capacity, size_t iw, size_t ih, size_t yStride) { | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  |     MNNSamplerC1BilinearOpt(source, dest + sta, reinterpret_cast<float*>(points), count, iw - 1, ih - 1, yStride); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     _sampleBilinearCommon(source, dest + sta, points, count, iw, ih, yStride, 1); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | void MNNSamplerNearest(const unsigned char* source, unsigned char* dest, MNN::CV::Point* points, size_t sta, size_t count, | 
					
						
							|  |  |  |                        size_t iw, size_t ih, size_t yStride, int bpp) { | 
					
						
							|  |  |  |     dest = dest + bpp * sta; | 
					
						
							|  |  |  |     MNN::CV::Point curPoints; | 
					
						
							|  |  |  |     curPoints.fX = points[0].fX; | 
					
						
							|  |  |  |     curPoints.fY = points[0].fY; | 
					
						
							|  |  |  |     float dy     = points[1].fY; | 
					
						
							|  |  |  |     float dx     = points[1].fX; | 
					
						
							|  |  |  |     float xMax   = iw - 1; | 
					
						
							|  |  |  |     float yMax   = ih - 1; | 
					
						
							|  |  |  |     for (int i = 0; i < count; ++i) { | 
					
						
							|  |  |  |         int y = (int)roundf(__clamp(curPoints.fY, 0, yMax)); | 
					
						
							|  |  |  |         int x = (int)roundf(__clamp(curPoints.fX, 0, xMax)); | 
					
						
							|  |  |  |         curPoints.fY += dy; | 
					
						
							|  |  |  |         curPoints.fX += dx; | 
					
						
							|  |  |  |         auto sourcePos = y * yStride + bpp * x; | 
					
						
							|  |  |  |         for (int j = 0; j < bpp; ++j) { | 
					
						
							|  |  |  |             dest[bpp * i + j] = source[sourcePos + j]; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNSamplerC4Nearest(const unsigned char* source, unsigned char* dest, MNN::CV::Point* points, size_t sta, | 
					
						
							|  |  |  |                          size_t count, size_t capacity, size_t iw, size_t ih, size_t yStride) { | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  |     MNNSamplerC4NearestOpt(source, dest + 4 * sta, (float*)points, count, iw - 1, ih - 1, yStride); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     MNNSamplerNearest(source, dest, points, sta, count, iw, ih, yStride, 4); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNSamplerC1Nearest(const unsigned char* source, unsigned char* dest, MNN::CV::Point* points, size_t sta, | 
					
						
							|  |  |  |                          size_t count, size_t capacity, size_t iw, size_t ih, size_t yStride) { | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  |     MNNSamplerC1NearestOpt(source, dest + sta, (float*)points, count, iw - 1, ih - 1, yStride); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     MNNSamplerNearest(source, dest, points, sta, count, iw, ih, yStride, 1); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNSamplerC3Nearest(const unsigned char* source, unsigned char* dest, MNN::CV::Point* points, size_t sta, | 
					
						
							|  |  |  |                          size_t count, size_t capacity, size_t iw, size_t ih, size_t yStride) { | 
					
						
							|  |  |  |     MNNSamplerNearest(source, dest, points, sta, count, iw, ih, yStride, 3); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNSamplerCopyCommon(const unsigned char* source, unsigned char* dest, MNN::CV::Point* points, size_t sta, | 
					
						
							|  |  |  |                           size_t count, size_t iw, size_t ih, size_t yStride, int bpp) { | 
					
						
							|  |  |  |     dest = dest + bpp * sta; | 
					
						
							|  |  |  |     MNN::CV::Point curPoints; | 
					
						
							|  |  |  |     curPoints.fX   = points[0].fX; | 
					
						
							|  |  |  |     curPoints.fY   = points[0].fY; | 
					
						
							|  |  |  |     float xMax     = iw - 1; | 
					
						
							|  |  |  |     float yMax     = ih - 1; | 
					
						
							|  |  |  |     int y          = (int)roundf(__clamp(curPoints.fY, 0, yMax)); | 
					
						
							|  |  |  |     int x          = (int)roundf(__clamp(curPoints.fX, 0, xMax)); | 
					
						
							|  |  |  |     auto sourcePos = y * yStride + bpp * x; | 
					
						
							|  |  |  |     ::memcpy(dest, source + sourcePos, bpp * count); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNSamplerI420Copy(const unsigned char* source, unsigned char* dest, MNN::CV::Point* points, size_t sta, | 
					
						
							|  |  |  |                         size_t count, size_t capacity, size_t iw, size_t ih, size_t yStride) { | 
					
						
							|  |  |  |     MNN::CV::Point curPoints; | 
					
						
							|  |  |  |     curPoints.fX    = points[0].fX; | 
					
						
							|  |  |  |     curPoints.fY    = points[0].fY; | 
					
						
							|  |  |  |     float xMax      = iw - 1; | 
					
						
							|  |  |  |     float yMax      = ih - 1; | 
					
						
							|  |  |  |     int y           = (int)roundf(__clamp(curPoints.fY, 0, yMax)); | 
					
						
							|  |  |  |     int x           = (int)roundf(__clamp(curPoints.fX, 0, xMax)); | 
					
						
							|  |  |  |     auto uvPlane = (((int)iw + 1) / 2) * ((int(ih) + 1) / 2); | 
					
						
							|  |  |  |     int sourcePosY  = y * (int)iw + x; | 
					
						
							|  |  |  |     auto sourcePosU = source + (int)iw * (int)ih + (y / 2) * (((int)iw + 1) / 2) + (x / 2); | 
					
						
							|  |  |  |     auto sourcePosV = source + (int)iw * (int)ih + (y / 2) * (((int)iw + 1) / 2) + (x / 2) + uvPlane; | 
					
						
							|  |  |  |     auto uvCount = (count + 1) / 2; | 
					
						
							|  |  |  |     ::memcpy(dest + sta, source + sourcePosY, count); | 
					
						
							|  |  |  |     auto uDest = dest + (capacity) + (sta / 2) * 2; | 
					
						
							|  |  |  |     for (int i=0; i<uvCount; ++i) { | 
					
						
							|  |  |  |         uDest[2 * i + 0] = sourcePosV[i]; | 
					
						
							|  |  |  |         uDest[2 * i + 1] = sourcePosU[i]; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | void MNNSamplerI420Nearest(const unsigned char* source, unsigned char* dest, MNN::CV::Point* points, size_t sta, | 
					
						
							|  |  |  |                            size_t count, size_t capacity, size_t iw, size_t ih, size_t yStride) { | 
					
						
							|  |  |  |     auto srcY  = source; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     auto dstY  = dest + sta; | 
					
						
							|  |  |  |     auto dstUV = dest + (capacity) + (sta / 2) * 2; | 
					
						
							|  |  |  |     auto stride = yStride; | 
					
						
							|  |  |  |     if (yStride == 0) { | 
					
						
							|  |  |  |         stride = iw; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     auto srcU = source + stride * ih; | 
					
						
							|  |  |  |     MNNSamplerC1Nearest(srcY, dstY, points, 0, count, capacity, iw, ih, stride); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     MNN::CV::Point uvPoints[2]; | 
					
						
							|  |  |  |     uvPoints[0].fX = (points[0].fX - 0.01f) / 2.0f; | 
					
						
							|  |  |  |     uvPoints[0].fY = (points[0].fY - 0.01f) / 2.0f; | 
					
						
							| 
									
										
										
										
											2022-12-30 15:18:58 +08:00
										 |  |  |     uvPoints[1].fX = points[1].fX / 2.0f; | 
					
						
							|  |  |  |     uvPoints[1].fY = points[1].fY / 2.0f; | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |     if (yStride == 0) { | 
					
						
							|  |  |  |         stride =  ((iw + 1) / 2); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     auto srcV = srcU + stride * ((ih + 1) / 2); | 
					
						
							|  |  |  |     auto uvCount = (count + 1) / 2; | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         MNN::CV::Point curPoints; | 
					
						
							|  |  |  |         curPoints.fX = uvPoints[0].fX; | 
					
						
							|  |  |  |         curPoints.fY = uvPoints[0].fY; | 
					
						
							|  |  |  |         float dy     = uvPoints[1].fY; | 
					
						
							|  |  |  |         float dx     = uvPoints[1].fX; | 
					
						
							| 
									
										
										
										
											2022-12-30 15:18:58 +08:00
										 |  |  |         float xMax   = ((iw + 1) / 2) - 1; | 
					
						
							|  |  |  |         float yMax   = ((ih + 1) / 2) - 1; | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         for (int i = 0; i < uvCount; ++i) { | 
					
						
							|  |  |  |             int y = (int)roundf(__clamp(curPoints.fY, 0, yMax)); | 
					
						
							|  |  |  |             int x = (int)roundf(__clamp(curPoints.fX, 0, xMax)); | 
					
						
							|  |  |  |             curPoints.fY += dy; | 
					
						
							|  |  |  |             curPoints.fX += dx; | 
					
						
							|  |  |  |             auto offset = y * stride + x; | 
					
						
							|  |  |  |             dstUV[2 * i + 0] = srcV[offset]; | 
					
						
							|  |  |  |             dstUV[2 * i + 1] = srcU[offset]; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNSamplerNV21Copy(const unsigned char* source, unsigned char* dest, MNN::CV::Point* points, size_t sta, | 
					
						
							|  |  |  |                         size_t count, size_t capacity, size_t iw, size_t ih, size_t yStride) { | 
					
						
							|  |  |  |     MNN::CV::Point curPoints; | 
					
						
							|  |  |  |     curPoints.fX    = points[0].fX; | 
					
						
							|  |  |  |     curPoints.fY    = points[0].fY; | 
					
						
							|  |  |  |     float xMax      = iw - 1; | 
					
						
							|  |  |  |     float yMax      = ih - 1; | 
					
						
							|  |  |  |     int y           = (int)roundf(__clamp(curPoints.fY, 0, yMax)); | 
					
						
							|  |  |  |     int x           = (int)roundf(__clamp(curPoints.fX, 0, xMax)); | 
					
						
							| 
									
										
										
										
											2023-02-28 10:41:24 +08:00
										 |  |  |     int stride = (int)yStride; | 
					
						
							|  |  |  |     int hstride = (int)yStride; | 
					
						
							|  |  |  |     if (yStride == 0) { | 
					
						
							|  |  |  |         stride = (int)iw; | 
					
						
							|  |  |  |         hstride = (((int)iw + 1) / 2) * 2; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     int sourcePosY  = y * stride + x; | 
					
						
							|  |  |  |     int sourcePosUV = (int)stride * (int)ih + (y / 2) * hstride + (x / 2) * 2; | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     ::memcpy(dest + sta, source + sourcePosY, count); | 
					
						
							|  |  |  |     ::memcpy(dest + (capacity) + (sta / 2) * 2, source + sourcePosUV, ((count + 1) / 2) * 2); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNSamplerNV21Nearest(const unsigned char* source, unsigned char* dest, MNN::CV::Point* points, size_t sta, | 
					
						
							|  |  |  |                            size_t count, size_t capacity, size_t iw, size_t ih, size_t yStride) { | 
					
						
							|  |  |  |     auto srcY  = source; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     auto dstY  = dest + sta; | 
					
						
							|  |  |  |     auto dstUV = dest + (capacity) + (sta / 2) * 2; | 
					
						
							|  |  |  |     auto stride = yStride; | 
					
						
							|  |  |  |     if (yStride == 0) { | 
					
						
							|  |  |  |         stride = iw; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     auto srcUV = source + stride * ih; | 
					
						
							|  |  |  |     MNNSamplerC1Nearest(srcY, dstY, points, 0, count, capacity, iw, ih, stride); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     MNN::CV::Point uvPoints[2]; | 
					
						
							|  |  |  |     uvPoints[0].fX = (points[0].fX - 0.01f) / 2.0f; | 
					
						
							|  |  |  |     uvPoints[0].fY = (points[0].fY - 0.01f) / 2.0f; | 
					
						
							|  |  |  |     uvPoints[1].fX = points[1].fX; | 
					
						
							|  |  |  |     uvPoints[1].fY = points[1].fY; | 
					
						
							|  |  |  |     if (yStride == 0) { | 
					
						
							|  |  |  |         stride =  ((iw + 1) / 2) * 2; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     MNNSamplerNearest(srcUV, dstUV, uvPoints, 0, (count + 1) / 2, (iw + 1) / 2, (ih + 1) / 2, stride, 2); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void _swapUV(const unsigned char* source, unsigned char* dest, size_t countC2) { | 
					
						
							|  |  |  |     int sta = 0; | 
					
						
							|  |  |  | #ifdef MNN_USE_NEON
 | 
					
						
							|  |  |  |     int countC2C16 = (int)countC2 / 16; | 
					
						
							|  |  |  |     sta = countC2C16 * 16; | 
					
						
							|  |  |  |     for (int i=0; i<countC2C16; ++i) { | 
					
						
							|  |  |  |         auto src = vld2q_u8(source + i * 32); | 
					
						
							|  |  |  |         auto temp = src.val[0]; | 
					
						
							|  |  |  |         src.val[0] = src.val[1]; | 
					
						
							|  |  |  |         src.val[1] = temp; | 
					
						
							|  |  |  |         vst2q_u8(dest + i * 32, src); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     for (int i=sta; i < countC2; ++i) { | 
					
						
							|  |  |  |         auto temp = source[2*i]; | 
					
						
							|  |  |  |         dest[2*i] = source[2*i+1]; | 
					
						
							|  |  |  |         dest[2*i+1] = temp; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNSamplerNV12Copy(const unsigned char* source, unsigned char* dest, MNN::CV::Point* points, size_t sta, | 
					
						
							|  |  |  |                         size_t count, size_t capacity, size_t iw, size_t ih, size_t yStride) { | 
					
						
							|  |  |  |     MNNSamplerNV21Copy(source, dest, points, sta, count, capacity, iw, ih, yStride); | 
					
						
							|  |  |  |     auto destUV = dest + (capacity) + (sta / 2) * 2; | 
					
						
							|  |  |  |     auto countC2 = ((count + 1) / 2); | 
					
						
							|  |  |  |     _swapUV(destUV, destUV, countC2); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNSamplerNV12Nearest(const unsigned char* source, unsigned char* dest, MNN::CV::Point* points, size_t sta, | 
					
						
							|  |  |  |                            size_t count, size_t capacity, size_t iw, size_t ih, size_t yStride) { | 
					
						
							|  |  |  |     MNNSamplerNV21Nearest(source, dest, points, sta, count, capacity, iw, ih, yStride); | 
					
						
							|  |  |  |     auto destUV = dest + (capacity) + (sta / 2) * 2; | 
					
						
							|  |  |  |     auto countC2 = ((count + 1) / 2); | 
					
						
							|  |  |  |     _swapUV(destUV, destUV, countC2); | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2022-02-18 11:30:27 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | void MNNC3blitH(const unsigned char* source, unsigned char* dest, size_t count) { | 
					
						
							|  |  |  |     for (int i = 0; i < count; i++) { | 
					
						
							|  |  |  |         memcpy(dest + 3 * i, source, 3); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNC4blitH(const unsigned char* source, unsigned char* dest, size_t count) { | 
					
						
							|  |  |  |     for (int i = 0; i < count; i++) { | 
					
						
							|  |  |  |         memcpy(dest + 4 * i, source, 4); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void MNNC1blitH(const unsigned char* source, unsigned char* dest, size_t count) { | 
					
						
							|  |  |  |     for (int i = 0; i < count; i++) { | 
					
						
							|  |  |  |         memcpy(dest + i, source, 1); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } |