2025-04-15 03:12:36 +08:00
|
|
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
|
|
From: jmorganca <jmorganca@gmail.com>
|
|
|
|
Date: Thu, 6 Jun 2024 23:55:47 -0700
|
|
|
|
Subject: [PATCH] ggml-backend: malloc and free using the same compiler
|
|
|
|
|
|
|
|
On Windows, the CUDA backend must be compiled with MSVC but generic
|
|
|
|
portions compiled with CGo use either GCC or Clang. Since
|
|
|
|
ggml_backend_buffer_t spans these two components, it can be allocated
|
|
|
|
and freed using different compilers. Specifically, it is malloced by
|
|
|
|
MSVC and freed by Clang, which can cause problems.
|
|
|
|
|
|
|
|
This moves freeing of the buffers into the backends to avoid the
|
|
|
|
problem.
|
|
|
|
---
|
2025-08-15 05:42:58 +08:00
|
|
|
ggml/src/ggml-backend.cpp | 9 +++++++--
|
|
|
|
ggml/src/ggml-cann/ggml-cann.cpp | 2 ++
|
|
|
|
ggml/src/ggml-cuda/ggml-cuda.cu | 3 +++
|
2025-10-03 05:47:10 +08:00
|
|
|
ggml/src/ggml-metal/ggml-metal.cpp | 2 ++
|
2025-08-15 05:42:58 +08:00
|
|
|
ggml/src/ggml-opencl/ggml-opencl.cpp | 1 +
|
|
|
|
ggml/src/ggml-rpc/ggml-rpc.cpp | 1 +
|
|
|
|
ggml/src/ggml-sycl/ggml-sycl.cpp | 3 +++
|
|
|
|
ggml/src/ggml-vulkan/ggml-vulkan.cpp | 2 ++
|
2025-10-03 05:47:10 +08:00
|
|
|
8 files changed, 21 insertions(+), 2 deletions(-)
|
2025-04-15 03:12:36 +08:00
|
|
|
|
|
|
|
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
|
2025-10-03 05:47:10 +08:00
|
|
|
index ff9135fe..8ba86f82 100644
|
2025-04-15 03:12:36 +08:00
|
|
|
--- a/ggml/src/ggml-backend.cpp
|
|
|
|
+++ b/ggml/src/ggml-backend.cpp
|
2025-10-03 05:47:10 +08:00
|
|
|
@@ -113,7 +113,6 @@ void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) {
|
2025-04-15 03:12:36 +08:00
|
|
|
if (buffer->iface.free_buffer != NULL) {
|
|
|
|
buffer->iface.free_buffer(buffer);
|
|
|
|
}
|
|
|
|
- delete buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) {
|
2025-10-03 05:47:10 +08:00
|
|
|
@@ -586,6 +585,7 @@ static void ggml_backend_multi_buffer_free_buffer(ggml_backend_buffer_t buffer)
|
2025-04-15 03:12:36 +08:00
|
|
|
|
|
|
|
free(ctx->buffers);
|
|
|
|
free(ctx);
|
|
|
|
+ delete buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ggml_backend_multi_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
2025-10-03 05:47:10 +08:00
|
|
|
@@ -2075,6 +2075,11 @@ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
|
2025-04-15 03:12:36 +08:00
|
|
|
static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
2025-10-03 05:47:10 +08:00
|
|
|
GGML_ASSERT(buffer);
|
2025-04-15 03:12:36 +08:00
|
|
|
ggml_aligned_free(buffer->context, buffer->size);
|
|
|
|
+ delete buffer;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void ggml_backend_cpu_ptr_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
|
|
+ delete buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ggml_backend_cpu_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
|
2025-10-03 05:47:10 +08:00
|
|
|
@@ -2127,7 +2132,7 @@ static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_i = {
|
2025-04-15 03:12:36 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_from_ptr_i = {
|
|
|
|
- /* .free_buffer = */ NULL, // ptr is not owned by the buffer, so it does not need to be freed
|
|
|
|
+ /* .free_buffer = */ ggml_backend_cpu_ptr_buffer_free_buffer, // ptr is not owned by the buffer but need to free the buffer itself
|
|
|
|
/* .get_base = */ ggml_backend_cpu_buffer_get_base,
|
|
|
|
/* .init_tensor = */ NULL, // no initialization required
|
|
|
|
/* .memset_tensor = */ ggml_backend_cpu_buffer_memset_tensor,
|
|
|
|
diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp
|
2025-10-03 05:47:10 +08:00
|
|
|
index b51b554e..3ba0f5a6 100755
|
2025-04-15 03:12:36 +08:00
|
|
|
--- a/ggml/src/ggml-cann/ggml-cann.cpp
|
|
|
|
+++ b/ggml/src/ggml-cann/ggml-cann.cpp
|
2025-10-03 05:47:10 +08:00
|
|
|
@@ -843,6 +843,7 @@ static void ggml_backend_cann_buffer_free_buffer(
|
2025-04-15 03:12:36 +08:00
|
|
|
ggml_backend_cann_buffer_context* ctx =
|
|
|
|
(ggml_backend_cann_buffer_context*)buffer->context;
|
|
|
|
delete ctx;
|
|
|
|
+ delete buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2025-10-03 05:47:10 +08:00
|
|
|
@@ -1630,6 +1631,7 @@ static const char * ggml_backend_cann_host_buffer_name(ggml_backend_buffer_t buf
|
2025-04-15 03:12:36 +08:00
|
|
|
*/
|
|
|
|
static void ggml_backend_cann_host_buffer_free(ggml_backend_buffer_t buffer) {
|
|
|
|
ACL_CHECK(aclrtFreeHost(buffer->context));
|
|
|
|
+ delete buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
2025-10-03 05:47:10 +08:00
|
|
|
index b7e81b21..fdf8c63d 100644
|
2025-04-15 03:12:36 +08:00
|
|
|
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
|
|
|
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
2025-08-15 05:42:58 +08:00
|
|
|
@@ -567,6 +567,7 @@ struct ggml_backend_cuda_buffer_context {
|
2025-04-15 03:12:36 +08:00
|
|
|
static void ggml_backend_cuda_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
|
|
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
|
|
|
delete ctx;
|
|
|
|
+ delete buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool ggml_backend_buffer_is_cuda(ggml_backend_buffer_t buffer) {
|
2025-08-15 05:42:58 +08:00
|
|
|
@@ -822,6 +823,7 @@ struct ggml_backend_cuda_split_buffer_context {
|
2025-04-15 03:12:36 +08:00
|
|
|
static void ggml_backend_cuda_split_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
|
|
ggml_backend_cuda_split_buffer_context * ctx = (ggml_backend_cuda_split_buffer_context *)buffer->context;
|
|
|
|
delete ctx;
|
|
|
|
+ delete buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void * ggml_backend_cuda_split_buffer_get_base(ggml_backend_buffer_t buffer) {
|
2025-08-15 05:42:58 +08:00
|
|
|
@@ -1103,6 +1105,7 @@ static bool ggml_backend_buft_is_cuda_host(ggml_backend_buffer_type_t buft) {
|
2025-04-15 03:12:36 +08:00
|
|
|
|
|
|
|
static void ggml_backend_cuda_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
|
|
CUDA_CHECK(cudaFreeHost(buffer->context));
|
|
|
|
+ delete buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void * ggml_cuda_host_malloc(size_t size) {
|
2025-10-03 05:47:10 +08:00
|
|
|
diff --git a/ggml/src/ggml-metal/ggml-metal.cpp b/ggml/src/ggml-metal/ggml-metal.cpp
|
|
|
|
index e11555a7..909e17de 100644
|
|
|
|
--- a/ggml/src/ggml-metal/ggml-metal.cpp
|
|
|
|
+++ b/ggml/src/ggml-metal/ggml-metal.cpp
|
|
|
|
@@ -25,6 +25,7 @@ static void ggml_backend_metal_buffer_shared_free_buffer(ggml_backend_buffer_t b
|
|
|
|
GGML_ASSERT(ggml_metal_buffer_is_shared(ctx));
|
|
|
|
|
|
|
|
ggml_metal_buffer_free(ctx);
|
|
|
|
+ delete buffer;
|
|
|
|
}
|
2025-04-15 03:12:36 +08:00
|
|
|
|
2025-10-03 05:47:10 +08:00
|
|
|
static void * ggml_backend_metal_buffer_shared_get_base(ggml_backend_buffer_t buffer) {
|
|
|
|
@@ -99,6 +100,7 @@ static void ggml_backend_metal_buffer_private_free_buffer(ggml_backend_buffer_t
|
|
|
|
GGML_ASSERT(!ggml_metal_buffer_is_shared(ctx));
|
|
|
|
|
|
|
|
ggml_metal_buffer_free(ctx);
|
|
|
|
+ delete buffer;
|
2025-04-15 03:12:36 +08:00
|
|
|
}
|
|
|
|
|
2025-10-03 05:47:10 +08:00
|
|
|
static void * ggml_backend_metal_buffer_private_get_base(ggml_backend_buffer_t buffer) {
|
2025-04-15 03:12:36 +08:00
|
|
|
diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp
|
2025-10-03 05:47:10 +08:00
|
|
|
index 0cf3b924..09d706b5 100644
|
2025-04-15 03:12:36 +08:00
|
|
|
--- a/ggml/src/ggml-opencl/ggml-opencl.cpp
|
|
|
|
+++ b/ggml/src/ggml-opencl/ggml-opencl.cpp
|
2025-10-03 05:47:10 +08:00
|
|
|
@@ -3215,6 +3215,7 @@ struct ggml_backend_opencl_buffer_context {
|
2025-04-15 03:12:36 +08:00
|
|
|
static void ggml_backend_opencl_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
|
|
ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
|
|
|
|
delete ctx;
|
|
|
|
+ delete buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void * ggml_backend_opencl_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
|
|
diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp
|
2025-10-03 05:47:10 +08:00
|
|
|
index f99681c8..59591770 100644
|
2025-04-15 03:12:36 +08:00
|
|
|
--- a/ggml/src/ggml-rpc/ggml-rpc.cpp
|
|
|
|
+++ b/ggml/src/ggml-rpc/ggml-rpc.cpp
|
2025-10-03 05:47:10 +08:00
|
|
|
@@ -505,6 +505,7 @@ static void ggml_backend_rpc_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
2025-04-15 03:12:36 +08:00
|
|
|
bool status = send_rpc_cmd(ctx->sock, RPC_CMD_FREE_BUFFER, &request, sizeof(request), nullptr, 0);
|
2025-08-15 05:42:58 +08:00
|
|
|
RPC_STATUS_ASSERT(status);
|
2025-04-15 03:12:36 +08:00
|
|
|
delete ctx;
|
|
|
|
+ delete buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void * ggml_backend_rpc_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
|
|
diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp
|
2025-10-03 05:47:10 +08:00
|
|
|
index 4ac919ea..447ea3c4 100644
|
2025-04-15 03:12:36 +08:00
|
|
|
--- a/ggml/src/ggml-sycl/ggml-sycl.cpp
|
|
|
|
+++ b/ggml/src/ggml-sycl/ggml-sycl.cpp
|
2025-08-15 05:42:58 +08:00
|
|
|
@@ -331,6 +331,7 @@ ggml_backend_sycl_buffer_free_buffer(ggml_backend_buffer_t buffer) try {
|
2025-04-15 03:12:36 +08:00
|
|
|
ggml_sycl_set_device(ctx->device);
|
|
|
|
|
|
|
|
delete ctx;
|
|
|
|
+ delete buffer;
|
|
|
|
}
|
|
|
|
catch (sycl::exception const &exc) {
|
|
|
|
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
2025-08-15 05:42:58 +08:00
|
|
|
@@ -792,6 +793,7 @@ struct ggml_backend_sycl_split_buffer_context {
|
2025-04-15 03:12:36 +08:00
|
|
|
static void ggml_backend_sycl_split_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
|
|
ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context;
|
|
|
|
delete ctx;
|
|
|
|
+ delete buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void * ggml_backend_sycl_split_buffer_get_base(ggml_backend_buffer_t buffer) {
|
2025-08-15 05:42:58 +08:00
|
|
|
@@ -1134,6 +1136,7 @@ static const char * ggml_backend_sycl_host_buffer_type_name(ggml_backend_buffer_
|
2025-04-15 03:12:36 +08:00
|
|
|
|
|
|
|
static void ggml_backend_sycl_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
|
|
ggml_sycl_host_free(buffer->context);
|
|
|
|
+ delete buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ggml_backend_buffer_t ggml_backend_sycl_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
|
|
|
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
2025-10-03 05:47:10 +08:00
|
|
|
index 2608cbd0..061cd078 100644
|
2025-04-15 03:12:36 +08:00
|
|
|
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
|
|
|
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
2025-10-03 05:47:10 +08:00
|
|
|
@@ -11603,6 +11603,7 @@ static void ggml_backend_vk_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
2025-04-15 03:12:36 +08:00
|
|
|
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
|
|
|
ggml_vk_destroy_buffer(ctx->dev_buffer);
|
|
|
|
delete ctx;
|
|
|
|
+ delete buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void * ggml_backend_vk_buffer_get_base(ggml_backend_buffer_t buffer) {
|
2025-10-03 05:47:10 +08:00
|
|
|
@@ -11746,6 +11747,7 @@ static const char * ggml_backend_vk_host_buffer_name(ggml_backend_buffer_t buffe
|
2025-04-15 03:12:36 +08:00
|
|
|
static void ggml_backend_vk_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
|
|
VK_LOG_MEMORY("ggml_backend_vk_host_buffer_free_buffer()");
|
|
|
|
ggml_vk_host_free(vk_instance.devices[0], buffer->context);
|
|
|
|
+ delete buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ggml_backend_buffer_t ggml_backend_vk_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|