mirror of https://github.com/alibaba/MNN.git
Merge pull request #4036 from ihb2032/opt/rvv-minmax-float
opt(RVV): Optimize max and min float functions with intrinsics GitOrigin-RevId: d246089d9de5602aeb58e91d1169923d58ed9712
This commit is contained in:
parent
4e47bbdb40
commit
3f3ccf2e75
|
|
@ -0,0 +1,25 @@
|
|||
#include <riscv_vector.h>
|
||||
#include <cfloat>
|
||||
|
||||
#define UNIT 4
|
||||
|
||||
void MNNMaxFloat(float *input, float *maxBuffer, int32_t inputCountUnit) {
|
||||
const float init = -FLT_MAX;
|
||||
for (int j = 0; j < UNIT; ++j) {
|
||||
float local = init;
|
||||
size_t i = 0;
|
||||
|
||||
while (i < (size_t)inputCountUnit) {
|
||||
size_t vl = __riscv_vsetvl_e32m8(inputCountUnit - i);
|
||||
float *p0 = input + (i * UNIT * 2) + j * 2;
|
||||
float *p1 = p0 + 1;
|
||||
vfloat32m8_t v0 = __riscv_vlse32_v_f32m8(p0, UNIT * 2 * sizeof(float), vl);
|
||||
vfloat32m8_t v1 = __riscv_vlse32_v_f32m8(p1, UNIT * 2 * sizeof(float), vl);
|
||||
vfloat32m8_t vmax = __riscv_vfmax_vv_f32m8(v0, v1, vl);
|
||||
vfloat32m1_t vred = __riscv_vfredmax_vs_f32m8_f32m1(vmax, __riscv_vfmv_s_f_f32m1(local, 1), vl);
|
||||
local = __riscv_vfmv_f_s_f32m1_f32(vred);
|
||||
i += vl;
|
||||
}
|
||||
maxBuffer[j] = local;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
#include <riscv_vector.h>
|
||||
#include <cfloat>
|
||||
|
||||
#define UNIT 4
|
||||
|
||||
void MNNMinFloat(float *input, float *minBuffer, int32_t inputCountUnit) {
|
||||
const float init = FLT_MAX;
|
||||
for (int j = 0; j < UNIT; ++j) {
|
||||
float local = init;
|
||||
size_t i = 0;
|
||||
|
||||
while (i < (size_t)inputCountUnit) {
|
||||
size_t vl = __riscv_vsetvl_e32m8(inputCountUnit - i);
|
||||
float *p0 = input + (i * UNIT * 2) + j * 2;
|
||||
float *p1 = p0 + 1;
|
||||
vfloat32m8_t v0 = __riscv_vlse32_v_f32m8(p0, UNIT * 2 * sizeof(float), vl);
|
||||
vfloat32m8_t v1 = __riscv_vlse32_v_f32m8(p1, UNIT * 2 * sizeof(float), vl);
|
||||
vfloat32m8_t vmin = __riscv_vfmin_vv_f32m8(v0, v1, vl);
|
||||
vfloat32m1_t vred = __riscv_vfredmin_vs_f32m8_f32m1(vmin, __riscv_vfmv_s_f_f32m1(local, 1), vl);
|
||||
local = __riscv_vfmv_f_s_f32m1_f32(vred);
|
||||
i += vl;
|
||||
}
|
||||
minBuffer[j] = local;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue