mirror of https://github.com/alibaba/MNN.git
				
				
				
			
		
			
				
	
	
		
			104 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			104 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			C++
		
	
	
	
| //
 | |
| //  CPUDet.cpp
 | |
| //  MNN
 | |
| //
 | |
| //  Created by MNN on 2018/08/07.
 | |
| //  Copyright © 2018, Alibaba Group Holding Limited
 | |
| //
 | |
| 
 | |
| #include <cmath>
 | |
| #include <limits>
 | |
| #include "CPUDet.hpp"
 | |
| #include "CPUBackend.hpp"
 | |
| #include "core/Macro.h"
 | |
| #include "core/TensorUtils.hpp"
 | |
| #include "core/Concurrency.h"
 | |
| #include "backend/cpu/compute/CommonOptFunction.h"
 | |
| 
 | |
| namespace MNN {
 | |
| ErrorCode CPUDet::onResize(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
 | |
|     auto numberThread = ((CPUBackend*)backend())->threadNumber();
 | |
|     auto M = inputs[0]->length(1);
 | |
|     auto core = static_cast<CPUBackend*>(backend())->functions();
 | |
|     mTempMat.reset(Tensor::createDevice<float>({numberThread, M, ROUND_UP(M, core->pack)}));
 | |
|     mTempRowPtrs.reset(Tensor::createDevice<float*>({numberThread, M}));
 | |
|     auto success = backend()->onAcquireBuffer(mTempMat.get(), Backend::DYNAMIC);
 | |
|     success &= backend()->onAcquireBuffer(mTempRowPtrs.get(), Backend::DYNAMIC);
 | |
|     if (!success) {
 | |
|         return OUT_OF_MEMORY;
 | |
|     }
 | |
|     backend()->onReleaseBuffer(mTempMat.get(), Backend::DYNAMIC);
 | |
|     backend()->onReleaseBuffer(mTempRowPtrs.get(), Backend::DYNAMIC);
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| ErrorCode CPUDet::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
 | |
|     auto core = static_cast<CPUBackend*>(backend())->functions();
 | |
|     auto input  = inputs[0], output = outputs[0];
 | |
|     auto batch = input->length(0), M = input->length(1), step = ROUND_UP(M, core->pack);
 | |
|     auto computeDet = [&](int b, int tId) -> float {
 | |
| #define F_IS_ZERO(v) (fabs(v) < 1e-6)
 | |
| #define ADDR(row) (mTempRowPtrs->host<float*>()[tId * M + row])
 | |
| #define VAL(row, col) (*(ADDR(row) + col))
 | |
|         auto elimRow = [&](int row1, int row2) {
 | |
|             auto ratio = -VAL(row2, row1) / VAL(row1, row1);
 | |
|             float params[] = {1.f, ratio, std::numeric_limits<float>::lowest(), std::numeric_limits<float>::max()};
 | |
|             int sta = row1, end = M;
 | |
|             int extra = (core->pack - (end - sta) % core->pack) % core->pack;
 | |
|             if (step - M >= extra) {
 | |
|                 end = M + extra;
 | |
|             } else {
 | |
|                 sta -= extra - (step - M);
 | |
|                 end = step;
 | |
|             }
 | |
|             auto p1 = ADDR(row1) + sta, p2 = ADDR(row2) + sta;
 | |
|             core->MNNAxByClampBroadcastUnit(p2, p2, p1, 1, core->pack, core->pack, (end - sta) / core->pack, params);
 | |
|         };
 | |
|         float result = 1;
 | |
|         for (int i = 0; i < M; ++i) {
 | |
|             auto tempPtr = mTempMat->host<float>() + (tId * M + i) * step;
 | |
|             ::memcpy(tempPtr, input->host<float>() + (b * M + i) * M, M * sizeof(float));
 | |
|             mTempRowPtrs->host<float*>()[tId * M + i] = tempPtr;
 | |
|         }
 | |
|         for (int i = 0; i < M; ++i) {
 | |
|             if (F_IS_ZERO(VAL(i, i))) {
 | |
|                 bool swapd = false;
 | |
|                 for (int j = i + 1; j < M; ++j) {
 | |
|                     if (!F_IS_ZERO(VAL(j, i))) {
 | |
|                         std::swap(ADDR(i), ADDR(j));
 | |
|                         swapd = true;
 | |
|                         break;
 | |
|                     }
 | |
|                 }
 | |
|                 if (!swapd) {
 | |
|                     return 0;
 | |
|                 }
 | |
|             }
 | |
|             result *= VAL(i, i);
 | |
|             for (int j = i + 1; j < M; ++j) {
 | |
|                 elimRow(i, j);
 | |
|             }
 | |
|         }
 | |
|         return result;
 | |
|     };
 | |
|     
 | |
|     int numberThread = ((CPUBackend*)backend())->threadNumber();
 | |
|     MNN_CONCURRENCY_BEGIN(tId, numberThread) {
 | |
|         for (int b = tId; b < batch; b += numberThread) {
 | |
|             output->host<float>()[b] = computeDet(b, tId);
 | |
|         }
 | |
|     }
 | |
|     MNN_CONCURRENCY_END();
 | |
|     return NO_ERROR;
 | |
| }
 | |
| class CPUDetCreator : public CPUBackend::Creator {
 | |
| public:
 | |
|     virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
 | |
|                                 const MNN::Op* op, Backend* backend) const override {
 | |
|         return new CPUDet(backend);
 | |
|     }
 | |
| };
 | |
| 
 | |
| REGISTER_CPU_OP_CREATOR(CPUDetCreator, OpType_Det);
 | |
| } // namespace MNN
 |