ollama/llama/patches/0028-vulkan-pci-and-memory....

253 lines
11 KiB
Diff

From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <daniel@ollama.com>
Date: Fri Sep 5 08:25:03 2025 -0700
Subject: [PATCH] Vulkan PCI and Memory
---
ggml/src/ggml-vulkan/ggml-vulkan.cpp | 176 ++++++++++++++++++++++-----
1 file changed, 145 insertions(+), 31 deletions(-)
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index adea7783..fb7204ce 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -12423,31 +12423,99 @@ std::string ggml_backend_vk_get_device_id(int device) {
return ggml_vk_get_device_id(dev_idx);
}
-void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) {
- GGML_ASSERT(device < (int) vk_instance.device_indices.size());
- GGML_ASSERT(device < (int) vk_instance.device_supports_membudget.size());
+//////////////////////////
+
+struct ggml_backend_vk_device_context {
+ size_t device;
+ std::string name;
+ std::string description;
+ bool is_integrated_gpu;
+ // Combined string id in the form "dddd:bb:dd.f" (domain:bus:device.function)
+ std::string pci_id;
+ std::string id;
+ std::string uuid;
+ int major;
+ int minor;
+ int driver_major;
+ int driver_minor;
+ int pci_bus_id;
+ int pci_device_id;
+ int pci_domain_id;
+};
+
+void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size_t * free, size_t * total) {
+ GGML_ASSERT(ctx->device < (int) vk_instance.device_indices.size());
+ GGML_ASSERT(ctx->device < (int) vk_instance.device_supports_membudget.size());
+
+ vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[ctx->device]];
- vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]];
- vk::PhysicalDeviceMemoryBudgetPropertiesEXT budgetprops;
- vk::PhysicalDeviceMemoryProperties2 memprops = {};
- bool membudget_supported = vk_instance.device_supports_membudget[device];
+ vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties();
+ vk::PhysicalDeviceProperties2 props2;
+ vkdev.getProperties2(&props2);
- if (membudget_supported) {
- memprops.pNext = &budgetprops;
+ if (!ctx->is_integrated_gpu)
+ {
+ // Use vendor specific management libraries for best VRAM reporting if available
+ switch (props2.properties.vendorID) {
+ case VK_VENDOR_ID_AMD:
+ if (ggml_hip_mgmt_init() == 0) {
+ int status = ggml_hip_get_device_memory(ctx->pci_bus_id, ctx->pci_device_id, free, total);
+ if (status == 0) {
+ GGML_LOG_DEBUG("%s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, *free, *total);
+ ggml_hip_mgmt_release();
+ return;
+ }
+ ggml_hip_mgmt_release();
+ }
+ break;
+ case VK_VENDOR_ID_NVIDIA:
+ if (ggml_nvml_init() == 0) {
+ int status = ggml_nvml_get_device_memory(ctx->uuid.c_str(), free, total);
+ if (status == 0) {
+ GGML_LOG_DEBUG("%s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, *free, *total);
+ ggml_nvml_release();
+ return;
+ }
+ ggml_nvml_release();
+ }
+ break;
+ }
}
- vkdev.getMemoryProperties2(&memprops);
+ // else fallback to memory budget if supported
- for (uint32_t i = 0; i < memprops.memoryProperties.memoryHeapCount; ++i) {
- const vk::MemoryHeap & heap = memprops.memoryProperties.memoryHeaps[i];
+ *total = 0;
+ *free = 0;
+ vk::PhysicalDeviceMemoryBudgetPropertiesEXT mem_budget_props;
+ vk::PhysicalDeviceMemoryProperties2 memprops2;
+ memprops2.pNext = &mem_budget_props;
+ vkdev.getMemoryProperties2(&memprops2);
+ for (int i = 0; i < memprops2.memoryProperties.memoryHeapCount; i++) {
+ if (memprops2.memoryProperties.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
+ *total += memprops2.memoryProperties.memoryHeaps[i].size;
+ } else if (ctx->is_integrated_gpu) {
+ // Include shared memory on iGPUs
+ *total += memprops2.memoryProperties.memoryHeaps[i].size;
+ }
+ }
+ for (int i = 0; i < memprops2.memoryProperties.memoryHeapCount; i++) {
+ if (memprops2.memoryProperties.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
+ *free += mem_budget_props.heapBudget[i];
+ } else if (ctx->is_integrated_gpu) {
+ *free += mem_budget_props.heapBudget[i];
+ }
+ }
+ if (*total > 0 && *free > 0) {
+ return;
+ } else if (*total > 0) {
+ *free = *total;
+ return;
+ }
+ // else just report the physical memory
+ for (const vk::MemoryHeap& heap : memprops2.memoryProperties.memoryHeaps) {
if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
*total = heap.size;
-
- if (membudget_supported && i < budgetprops.heapUsage.size()) {
- *free = budgetprops.heapBudget[i] - budgetprops.heapUsage[i];
- } else {
- *free = heap.size;
- }
+ *free = heap.size;
break;
}
}
@@ -12502,16 +12570,17 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
return std::string(pci_bus_id);
}
-//////////////////////////
-
-struct ggml_backend_vk_device_context {
- size_t device;
- std::string name;
- std::string description;
- bool is_integrated_gpu;
- std::string pci_bus_id;
- std::string id;
-};
+static bool ggml_backend_vk_parse_pci_bus_id(const std::string & id, int *domain, int *bus, int *device) {
+ if (id.empty()) return false;
+ unsigned int d = 0, b = 0, dev = 0, func = 0;
+ // Expected format: dddd:bb:dd.f (all hex)
+ int n = sscanf(id.c_str(), "%4x:%2x:%2x.%1x", &d, &b, &dev, &func);
+ if (n < 4) return false;
+ if (domain) *domain = (int) d;
+ if (bus) *bus = (int) b;
+ if (device) *device = (int) dev;
+ return true;
+}
static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) {
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
@@ -12530,7 +12599,7 @@ static const char * ggml_backend_vk_device_get_id(ggml_backend_dev_t dev) {
static void ggml_backend_vk_device_get_memory(ggml_backend_dev_t device, size_t * free, size_t * total) {
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)device->context;
- ggml_backend_vk_get_device_memory(ctx->device, free, total);
+ ggml_backend_vk_get_device_memory(ctx, free, total);
}
static ggml_backend_buffer_type_t ggml_backend_vk_device_get_buffer_type(ggml_backend_dev_t dev) {
@@ -12556,7 +12625,7 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
props->description = ggml_backend_vk_device_get_description(dev);
props->id = ggml_backend_vk_device_get_id(dev);
props->type = ggml_backend_vk_device_get_type(dev);
- props->device_id = ctx->pci_bus_id.empty() ? nullptr : ctx->pci_bus_id.c_str();
+ props->device_id = ctx->pci_id.empty() ? nullptr : ctx->pci_id.c_str();
ggml_backend_vk_device_get_memory(dev, &props->memory_free, &props->memory_total);
props->caps = {
/* .async = */ false,
@@ -12564,6 +12633,16 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
/* .buffer_from_host_ptr = */ false,
/* .events = */ false,
};
+
+ props->compute_major = ctx->major;
+ props->compute_minor = ctx->minor;
+ props->driver_major = ctx->driver_major;
+ props->driver_minor = ctx->driver_minor;
+ props->integrated = ctx->is_integrated_gpu;
+ props->pci_bus_id = ctx->pci_bus_id;
+ props->pci_device_id = ctx->pci_device_id;
+ props->pci_domain_id = ctx->pci_domain_id;
+ props->library = GGML_VK_NAME;
}
static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
@@ -12992,6 +13071,8 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
static std::mutex mutex;
std::lock_guard<std::mutex> lock(mutex);
if (!initialized) {
+ std::vector<vk::PhysicalDevice> vk_devices = vk_instance.instance.enumeratePhysicalDevices();
+
for (int i = 0; i < ggml_backend_vk_get_device_count(); i++) {
ggml_backend_vk_device_context * ctx = new ggml_backend_vk_device_context;
char desc[256];
@@ -13000,13 +13081,46 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
ctx->name = GGML_VK_NAME + std::to_string(i);
ctx->description = desc;
ctx->is_integrated_gpu = ggml_backend_vk_get_device_type(i) == vk::PhysicalDeviceType::eIntegratedGpu;
- ctx->pci_bus_id = ggml_backend_vk_get_device_pci_id(i);
+ ctx->pci_id = ggml_backend_vk_get_device_pci_id(i);
ctx->id = ggml_backend_vk_get_device_id(i);
devices.push_back(new ggml_backend_device {
/* .iface = */ ggml_backend_vk_device_i,
/* .reg = */ reg,
/* .context = */ ctx,
});
+
+ // Gather additional information about the device
+ int dev_idx = vk_instance.device_indices[i];
+ vk::PhysicalDeviceProperties props1;
+ vk_devices[dev_idx].getProperties(&props1);
+ vk::PhysicalDeviceProperties2 props2;
+ vk::PhysicalDeviceIDProperties device_id_props;
+ vk::PhysicalDevicePCIBusInfoPropertiesEXT pci_bus_props;
+ vk::PhysicalDeviceDriverProperties driver_props;
+ props2.pNext = &device_id_props;
+ device_id_props.pNext = &pci_bus_props;
+ pci_bus_props.pNext = &driver_props;
+ vk_devices[dev_idx].getProperties2(&props2);
+ std::ostringstream oss;
+ oss << std::hex << std::setfill('0');
+ oss << "GPU-";
+ int byteIdx = 0;
+ for (int i = 0; i < 16; ++i, ++byteIdx) {
+ oss << std::setw(2) << static_cast<int>(device_id_props.deviceUUID[i]);
+ if (byteIdx == 3 || byteIdx == 5 || byteIdx == 7 || byteIdx == 9) {
+ oss << '-';
+ }
+ }
+ ctx->uuid = oss.str();
+ ctx->pci_bus_id = pci_bus_props.pciBus;
+ ctx->pci_device_id = pci_bus_props.pciDevice;
+ ctx->pci_domain_id = pci_bus_props.pciDomain;
+ ctx->id = std::to_string(i);
+ ctx->major = 0;
+ ctx->minor = 0;
+ // TODO regex parse driver_props.driverInfo for a X.Y or X.Y.Z version string
+ ctx->driver_major = 0;
+ ctx->driver_minor = 0;
}
initialized = true;
}
--
2.51.0