Merge 67b8d3c817 into bc71278670

2025-10-07 12:46:30 +05:30 · 2025-10-07 12:46:30 +05:30 · ad8c2a8814
parent bc71278670 67b8d3c817
commit ad8c2a8814
6 changed files with 2397 additions and 3 deletions
--- a/ml/backend/ggml/ggml/src/CMakeLists.txt
+++ b/ml/backend/ggml/ggml/src/CMakeLists.txt
@ -364,7 +364,7 @@ if (GGML_CPU_ALL_VARIANTS)
            ggml_add_cpu_backend_variant(power8_2       POWER8  VSX)
            ggml_add_cpu_backend_variant(power9         POWER9  VSX)
            ggml_add_cpu_backend_variant(power10        POWER10 VSX)
-            ggml_add_cpu_backend_variant(power11        POWER11 VSX)
+            ggml_add_cpu_backend_variant(power11        POWER10 VSX)
        else()
            message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
        endif()
--- a/ml/backend/ggml/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp
+++ b/ml/backend/ggml/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp
@ -0,0 +1,82 @@
 # include "ggml-backend-impl.h"
 #if defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
 #if defined(__linux__)
 #include <sys/auxv.h>
 #endif
 #include <string>
 struct powerpc_features {
    std::string platform = "";
    int power_version    = -1;
    bool has_vsx         = false;
    powerpc_features() {
 #if defined(__linux__)
        unsigned long auxval = getauxval(AT_PLATFORM);
        if (auxval) {
            platform = std::string(reinterpret_cast<const char*>(auxval));
            // TBD: Do systems exist that return this in uppercase?
            if (platform.substr(0, 5) == "power") {
                // Extractt a numeric suffix, if one exists
                int vpos = -1;
                for (int i = platform.length() - 1; i >= 0; i--) {
                    if (std::isdigit(platform[i])) {
                        vpos = i;
                    } else {
                        break;
                    }
                }
                if (vpos > -1) {
                    power_version = std::stoi(platform.substr(vpos));
                }
            }
        }
 #endif
        if (power_version >= 9) {
            has_vsx = true;
        }
    }
 };
 static int ggml_backend_cpu_powerpc_score() {
    int score = 1;
    powerpc_features pf;
 // Platform scores
 #if defined(GGML_USE_POWER7)
    if (pf.power_version < 7) { return 0; }
    score += 1<<1;
 #endif
 #if defined(GGML_USE_POWER8)
    if (pf.power_version < 8) { return 0; }
    score += 1<<2;
 #endif
 #if defined(GGML_USE_POWER9)
    if (pf.power_version < 9) { return 0; }
    score += 1<<3;
 #endif
 #if defined(GGML_USE_POWER10)
    if (pf.power_version < 10) { return 0; }
    score += 1<<4;
 #endif
 #if defined(GGML_USE_POWER11)
    if (pf.power_version < 11) { return 0; }
    score += 1<<5;
 #endif
 // Feature scores
 #if defined(GGML_USE_VSX)
    if (!pf.has_vsx) { return 0; }
    score += 1<<6;
 #endif
    return score;
 }
 GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_powerpc_score)
 #endif // defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
--- a/ml/backend/ggml/ggml/src/ggml-cpu/arch/powerpc/powerpc.go
+++ b/ml/backend/ggml/ggml/src/ggml-cpu/arch/powerpc/powerpc.go
@ -0,0 +1,5 @@
 package powerpc
 // #cgo CXXFLAGS: -std=c++17
 // #cgo CPPFLAGS: -I${SRCDIR}/../.. -I${SRCDIR}/../../.. -I${SRCDIR}/../../../../include
 import "C"
--- a/ml/backend/ggml/ggml/src/ggml-cpu/arch/powerpc/quants.c
+++ b/ml/backend/ggml/ggml/src/ggml-cpu/arch/powerpc/quants.c
--- a/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/sgemm.cpp
+++ b/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/sgemm.cpp
@ -117,6 +117,9 @@ inline float32x4_t mul(float32x4_t x, float32x4_t y) { return vec_mul(x, y); }
 #endif
 #if defined(__MMA__)
 #ifndef vector
 #define vector __vector
 #endif
 typedef vector unsigned char vec_t;
 typedef __vector_quad acc_t;
 #endif
--- a/ml/backend/ggml/ggml/src/ggml-cpu/simd-mappings.h
+++ b/ml/backend/ggml/ggml/src/ggml-cpu/simd-mappings.h
@ -621,7 +621,6 @@ static inline void __avx_f32cx8_store(ggml_fp16_t *x, __m256 y) {
 #define GGML_F16_VEC_REDUCE         GGML_F32Cx8_REDUCE
 #elif defined(__POWER9_VECTOR__)
 #define GGML_SIMD
 // F32 POWER9
@ -629,7 +628,7 @@ static inline void __avx_f32cx8_store(ggml_fp16_t *x, __m256 y) {
 #define GGML_F32_STEP 32
 #define GGML_F32_EPR  4
-#define GGML_F32x4              vector float
+#define GGML_F32x4              __vector float
 #define GGML_F32x4_ZERO         {0.0f}
 #define GGML_F32x4_SET1         vec_splats
 #define GGML_F32x4_LOAD(p)      vec_xl(0, p)