metal : release buffers when freeing metal context (ggml-org#2062)

apage43 · web-flow · commit 2f8cd979ecd1 · 2023-07-01T21:14:59.000+03:00
diff --git a/ggml-metal.m b/ggml-metal.m
@@ -202,7 +202,9 @@ @implementation GGMLMetalClass
 
 void ggml_metal_free(struct ggml_metal_context * ctx) {
     fprintf(stderr, "%s: deallocating\n", __func__);
-
+    for (int i = 0; i < ctx->n_buffers; ++i) {
+        [ctx->buffers[i].metal release];
+    }
     free(ctx);
 }
 
diff --git a/llama.cpp b/llama.cpp
@@ -253,7 +253,13 @@ struct llama_model {
 
 struct llama_context {
     llama_context(const llama_model & model, const llama_vocab & vocab) : model(model), vocab(vocab), t_load_us(model.t_load_us), t_start_us(model.t_start_us) {}
-
+#ifdef GGML_USE_METAL
+    ~llama_context() {
+        if (ctx_metal) {
+            ggml_metal_free(ctx_metal);
+        }
+    }
+#endif
     std::mt19937 rng;
 
     bool has_evaluated_once = false;

Original file line number	Diff line number	Diff line change
`@@ -202,7 +202,9 @@ @implementation GGMLMetalClass`
`202`	`202`
`203`	`203`	`void ggml_metal_free(struct ggml_metal_context * ctx) {`
`204`	`204`	`fprintf(stderr, "%s: deallocating\n", __func__);`
`205`		`-`
	`205`	`+ for (int i = 0; i < ctx->n_buffers; ++i) {`
	`206`	`+ [ctx->buffers[i].metal release];`
	`207`	`+ }`
`206`	`208`	`free(ctx);`
`207`	`209`	`}`
`208`	`210`