mirror of https://github.com/alibaba/MNN.git
				
				
				
			
		
			
				
	
	
		
			246 lines
		
	
	
		
			7.9 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			246 lines
		
	
	
		
			7.9 KiB
		
	
	
	
		
			C++
		
	
	
	
//
 | 
						|
//  WingoradGenerater.cpp
 | 
						|
//  MNN
 | 
						|
//
 | 
						|
//  Created by MNN on 2018/08/20.
 | 
						|
//  Copyright © 2018, Alibaba Group Holding Limited
 | 
						|
//
 | 
						|
 | 
						|
#include <math.h>
 | 
						|
#include <string.h>
 | 
						|
#include "math/WingoradGenerater.hpp"
 | 
						|
#include "core/Macro.h"
 | 
						|
 | 
						|
namespace MNN {
 | 
						|
namespace Math {
 | 
						|
 | 
						|
static std::shared_ptr<Tensor> computeF(const float* a, int alpha) {
 | 
						|
    std::shared_ptr<Tensor> res;
 | 
						|
    res.reset(Matrix::create(alpha, 1));
 | 
						|
    auto diagData = res->host<float>();
 | 
						|
    for (int x = 0; x < alpha; ++x) {
 | 
						|
        float product = 1.0f;
 | 
						|
        for (int i = 0; i < alpha; ++i) {
 | 
						|
            if (x == i) {
 | 
						|
                continue;
 | 
						|
            }
 | 
						|
            product *= (a[x] - a[i]);
 | 
						|
        }
 | 
						|
        diagData[x] = product;
 | 
						|
    }
 | 
						|
    return res;
 | 
						|
}
 | 
						|
 | 
						|
static std::shared_ptr<Tensor> computeT(const float* a, int n) {
 | 
						|
    std::shared_ptr<Tensor> result(Matrix::create(n + 1, n));
 | 
						|
    for (int y = 0; y < n; ++y) {
 | 
						|
        auto line = result->host<float>() + result->stride(0) * y;
 | 
						|
        ::memset(line, 0, result->length(0) * sizeof(float));
 | 
						|
        line[y] = 1.0f;
 | 
						|
        line[n] = -::powf(a[y], (float)n);
 | 
						|
    }
 | 
						|
    // Matrix::print(result.get());
 | 
						|
    return result;
 | 
						|
}
 | 
						|
 | 
						|
static std::shared_ptr<Tensor> computeL(const float* a, int n) {
 | 
						|
    MNN_ASSERT(n >= 1);
 | 
						|
    std::shared_ptr<Tensor> result(Matrix::create(n, n));
 | 
						|
    for (int k = 0; k < n; ++k) {
 | 
						|
        std::shared_ptr<Tensor> poly(Matrix::create(1, 1));
 | 
						|
        auto p = poly->host<float>();
 | 
						|
        p[0]   = 1.0f;
 | 
						|
        std::shared_ptr<Tensor> poly2(Matrix::create(2, 1));
 | 
						|
        auto p2 = poly2->host<float>();
 | 
						|
        for (int i = 0; i < n; ++i) {
 | 
						|
            if (i == k) {
 | 
						|
                continue;
 | 
						|
            }
 | 
						|
            p2[0] = -a[i];
 | 
						|
            p2[1] = 1.0f;
 | 
						|
            poly  = Matrix::polyMulti(poly, poly2);
 | 
						|
        }
 | 
						|
        ::memcpy(result->host<float>() + result->buffer().dim[0].stride * k, poly->host<float>(), n * sizeof(float));
 | 
						|
    }
 | 
						|
    return result;
 | 
						|
}
 | 
						|
 | 
						|
static std::shared_ptr<Tensor> computeB(const float* a, int alpha) {
 | 
						|
    std::shared_ptr<Tensor> res;
 | 
						|
    auto LT    = computeL(a, alpha - 1);
 | 
						|
    auto fdiag = computeF(a, alpha - 1);
 | 
						|
    Matrix::divPerLine(LT.get(), LT.get(), fdiag.get());
 | 
						|
 | 
						|
    std::shared_ptr<Tensor> L(Matrix::create(alpha - 1, alpha - 1));
 | 
						|
    Matrix::transpose(L.get(), LT.get());
 | 
						|
 | 
						|
    auto T = computeT(a, alpha - 1);
 | 
						|
    std::shared_ptr<Tensor> BT(Matrix::create(alpha, alpha - 1));
 | 
						|
    Matrix::multi(BT.get(), L.get(), T.get());
 | 
						|
 | 
						|
    std::shared_ptr<Tensor> B(Matrix::create(alpha, alpha));
 | 
						|
    for (int y = 0; y < alpha - 1; ++y) {
 | 
						|
        ::memcpy(B->host<float>() + B->stride(0) * y, BT->host<float>() + BT->stride(0) * y, alpha * sizeof(float));
 | 
						|
    }
 | 
						|
    auto BLast = B->host<float>() + B->stride(0) * (alpha - 1);
 | 
						|
    for (int x = 0; x < alpha - 1; ++x) {
 | 
						|
        BLast[x] = 0;
 | 
						|
    }
 | 
						|
    BLast[alpha - 1] = 1.0f;
 | 
						|
 | 
						|
    return B;
 | 
						|
}
 | 
						|
 | 
						|
static std::shared_ptr<Tensor> computeA(const float* a, int m, int n) {
 | 
						|
    std::shared_ptr<Tensor> res;
 | 
						|
    res.reset(Matrix::create(m, n));
 | 
						|
    for (int y = 0; y < n; ++y) {
 | 
						|
        auto line = res->host<float>() + res->buffer().dim[0].stride * y;
 | 
						|
        for (int x = 0; x < m - 1; ++x) {
 | 
						|
            if (x == 0 && y == 0) {
 | 
						|
                line[x] = 1.0f;
 | 
						|
            } else {
 | 
						|
                line[x] = ::powf(a[x], (float)y);
 | 
						|
            }
 | 
						|
        }
 | 
						|
        if (y == n - 1) {
 | 
						|
            line[m - 1] = 1.0f;
 | 
						|
        } else {
 | 
						|
            line[m - 1] = 0.0f;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return res;
 | 
						|
}
 | 
						|
 | 
						|
static std::shared_ptr<Tensor> computeFDiag(const float* a, int alpha) {
 | 
						|
    std::shared_ptr<Tensor> res;
 | 
						|
    res.reset(Matrix::create(alpha, 1));
 | 
						|
    auto diagData = res->host<float>();
 | 
						|
    for (int x = 0; x < alpha - 1; ++x) {
 | 
						|
        float product = 1.0f;
 | 
						|
        for (int i = 0; i < alpha - 1; ++i) {
 | 
						|
            if (x == i) {
 | 
						|
                continue;
 | 
						|
            }
 | 
						|
            product *= (a[x] - a[i]);
 | 
						|
        }
 | 
						|
        diagData[x] = product;
 | 
						|
    }
 | 
						|
    diagData[alpha - 1] = 1.0f;
 | 
						|
    if (diagData[0] < 0) {
 | 
						|
        diagData[0] = -diagData[0];
 | 
						|
    }
 | 
						|
    return res;
 | 
						|
}
 | 
						|
 | 
						|
WinogradGenerater::WinogradGenerater(int computeUnit, int kernelSize, float interp, bool dividedInG) {
 | 
						|
    MNN_ASSERT(computeUnit > 0 && kernelSize > 0);
 | 
						|
    mUnit       = computeUnit;
 | 
						|
    mKernelSize = kernelSize;
 | 
						|
 | 
						|
    int n     = computeUnit;
 | 
						|
    int r     = kernelSize;
 | 
						|
    int alpha = n + r - 1;
 | 
						|
    mG.reset(Matrix::create(r, alpha));
 | 
						|
    mB.reset(Matrix::create(alpha, alpha));
 | 
						|
    mA.reset(Matrix::create(n, alpha));
 | 
						|
 | 
						|
    std::shared_ptr<Tensor> polyBuffer(Matrix::create(alpha, 1));
 | 
						|
 | 
						|
    auto a   = polyBuffer->host<float>();
 | 
						|
    a[0]     = 0.0f;
 | 
						|
    int sign = 1;
 | 
						|
    for (int i = 0; i < alpha - 1; ++i) {
 | 
						|
        int value = 1 + i / 2;
 | 
						|
        a[i + 1]  = sign * value * interp;
 | 
						|
        sign *= -1;
 | 
						|
    }
 | 
						|
    // Matrix::print(polyBuffer.get());
 | 
						|
    {
 | 
						|
        auto A = computeA(a, alpha, n);
 | 
						|
        Matrix::transpose(mA.get(), A.get());
 | 
						|
    }
 | 
						|
    auto fdiag = computeFDiag(a, alpha);
 | 
						|
    // Matrix::print(fdiag.get());
 | 
						|
    {
 | 
						|
        auto A = computeA(a, alpha, r);
 | 
						|
        Matrix::transpose(mG.get(), A.get());
 | 
						|
        if (dividedInG) {
 | 
						|
            Matrix::divPerLine(mG.get(), mG.get(), fdiag.get());
 | 
						|
        }
 | 
						|
    }
 | 
						|
    {
 | 
						|
        auto B = computeB(a, alpha);
 | 
						|
        if (dividedInG) {
 | 
						|
            Matrix::transpose(mB.get(), B.get());
 | 
						|
            Matrix::mulPerLine(B.get(), mB.get(), fdiag.get());
 | 
						|
            Matrix::transpose(mB.get(), B.get());
 | 
						|
        } else {
 | 
						|
            mB = B;
 | 
						|
        }
 | 
						|
    }
 | 
						|
}
 | 
						|
std::shared_ptr<Tensor> WinogradGenerater::allocTransformWeight(const Tensor* source, int unitCi, int unitCo, bool alloc) {
 | 
						|
    int ci = source->channel();
 | 
						|
    int co = source->batch();
 | 
						|
    MNN_ASSERT(source->width() == source->height() && source->width() == mG->length(1));
 | 
						|
    int ciC4 = UP_DIV(ci, unitCi);
 | 
						|
    int coC4 = UP_DIV(co, unitCo);
 | 
						|
    if (alloc) {
 | 
						|
        return std::shared_ptr<Tensor>(Tensor::create<float>({mB->length(0) * mB->length(1), coC4, ciC4, unitCi, unitCo}));
 | 
						|
    }
 | 
						|
    return std::shared_ptr<Tensor>(Tensor::createDevice<float>({mB->length(0) * mB->length(1), coC4, ciC4, unitCi, unitCo}));
 | 
						|
}
 | 
						|
 | 
						|
void WinogradGenerater::transformWeight(const Tensor* weightDest, const Tensor* source, bool ciFirst) {
 | 
						|
    std::shared_ptr<Tensor> GT(Math::Matrix::create(mG->length(0), mG->length(1)));
 | 
						|
    Math::Matrix::transpose(GT.get(), mG.get());
 | 
						|
    int ci          = source->length(1);
 | 
						|
    int co          = source->length(0);
 | 
						|
    int kernelCount = source->length(2);
 | 
						|
    int unitCi      = weightDest->length(3);
 | 
						|
    int unitCo      = weightDest->length(4);
 | 
						|
    auto alpha      = mB->length(0);
 | 
						|
 | 
						|
    if (ci % unitCi != 0 || co % unitCo != 0) {
 | 
						|
        ::memset(weightDest->host<float>(), 0, weightDest->size());
 | 
						|
    }
 | 
						|
    std::shared_ptr<Tensor> M(Math::Matrix::create(kernelCount, alpha));
 | 
						|
    std::shared_ptr<Tensor> K(Math::Matrix::createShape(kernelCount, kernelCount));
 | 
						|
    std::shared_ptr<Tensor> K_Transform(Math::Matrix::create(alpha, alpha));
 | 
						|
    auto weightPtr      = source->host<float>();
 | 
						|
    auto KTransformData = K_Transform->host<float>();
 | 
						|
    int lCi = unitCo;
 | 
						|
    int lCo = 1;
 | 
						|
    if (ciFirst) {
 | 
						|
        lCi = 1;
 | 
						|
        lCo = unitCi;
 | 
						|
    }
 | 
						|
    for (int oz = 0; oz < co; ++oz) {
 | 
						|
        auto srcOz = weightPtr + oz * ci * kernelCount * kernelCount;
 | 
						|
 | 
						|
        int ozC4 = oz / unitCo;
 | 
						|
        int mx   = oz % unitCo;
 | 
						|
 | 
						|
        auto dstOz = weightDest->host<float>() + weightDest->stride(1) * ozC4 + mx * lCo;
 | 
						|
        for (int sz = 0; sz < ci; ++sz) {
 | 
						|
            int szC4         = sz / unitCi;
 | 
						|
            int my           = sz % unitCi;
 | 
						|
            auto srcSz       = srcOz + kernelCount * kernelCount * sz;
 | 
						|
            K->buffer().host = (uint8_t*)srcSz;
 | 
						|
            // M = G * K
 | 
						|
            Math::Matrix::multi(M.get(), mG.get(), K.get());
 | 
						|
            // K_Transform = M*GT
 | 
						|
            Math::Matrix::multi(K_Transform.get(), M.get(), GT.get());
 | 
						|
 | 
						|
            auto dstSz = dstOz + szC4 * weightDest->stride(2) + my * lCi;
 | 
						|
 | 
						|
            for (int i = 0; i < alpha * alpha; ++i) {
 | 
						|
                *(dstSz + i * weightDest->stride(0)) = KTransformData[i];
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
}
 | 
						|
} // namespace Math
 | 
						|
} // namespace MNN
 |