@@ -6856,10 +6856,10 @@ static void ggml_vk_print_tensor(const ggml_tensor * tensor, const char * name)
6856
6856
const size_t tensor_size = ggml_nbytes (tensor);
6857
6857
tensor_data = malloc (tensor_size);
6858
6858
6859
- ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra ;
6859
+ ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)tensor->buffer -> context ;
6860
6860
6861
- vk_buffer buffer_gpu = extra-> buffer_gpu . lock () ;
6862
- ggml_vk_buffer_read (buffer_gpu, extra-> offset + tensor->view_offs , tensor_data, tensor_size);
6861
+ vk_buffer buffer_gpu = buf_ctx-> dev_buffer ;
6862
+ ggml_vk_buffer_read (buffer_gpu, vk_tensor_offset (tensor) + tensor->view_offs , tensor_data, tensor_size);
6863
6863
}
6864
6864
6865
6865
std::cerr << " TENSOR CHECK " << name << " (" << tensor->name << " ): " << ggml_op_name (tensor->op ) << std::endl;
@@ -6933,9 +6933,9 @@ static void ggml_vk_check_results_0(ggml_tensor * tensor) {
6933
6933
memcpy (src0_clone->data , src0->data , src0_size);
6934
6934
memcpy (src0_clone->nb , src0->nb , sizeof (size_t ) * GGML_MAX_DIMS);
6935
6935
} else if (ggml_backend_buffer_is_vk (src0->buffer )) {
6936
- ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src0->extra ;
6937
- vk_buffer buffer_gpu = extra-> buffer_gpu . lock () ;
6938
- uint64_t offset = extra-> offset + src0->view_offs ;
6936
+ ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)src0->buffer -> context ;
6937
+ vk_buffer& buffer_gpu = buf_ctx-> dev_buffer ;
6938
+ uint64_t offset = vk_tensor_offset (src0) + src0->view_offs ;
6939
6939
if (!ggml_is_contiguous (src0) && ggml_vk_dim01_contiguous (src0)) {
6940
6940
for (int i3 = 0 ; i3 < src0->ne [3 ]; i3++) {
6941
6941
for (int i2 = 0 ; i2 < src0->ne [2 ]; i2++) {
@@ -6975,9 +6975,9 @@ static void ggml_vk_check_results_0(ggml_tensor * tensor) {
6975
6975
memcpy (src1_clone->data , src1->data , src1_size);
6976
6976
memcpy (src1_clone->nb , src1->nb , sizeof (size_t ) * GGML_MAX_DIMS);
6977
6977
} else if (ggml_backend_buffer_is_vk (src1->buffer )) {
6978
- ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src1->extra ;
6979
- vk_buffer buffer_gpu = extra-> buffer_gpu . lock () ;
6980
- uint64_t offset = extra-> offset + src1->view_offs ;
6978
+ ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)src1->buffer -> context ;
6979
+ vk_buffer& buffer_gpu = buf_ctx-> dev_buffer ;
6980
+ uint64_t offset = vk_tensor_offset (src1) + src1->view_offs ;
6981
6981
if (!ggml_is_contiguous (src1) && ggml_vk_dim01_contiguous (src1)) {
6982
6982
for (int i3 = 0 ; i3 < src1->ne [3 ]; i3++) {
6983
6983
for (int i2 = 0 ; i2 < src1->ne [2 ]; i2++) {
@@ -7017,9 +7017,9 @@ static void ggml_vk_check_results_0(ggml_tensor * tensor) {
7017
7017
memcpy (src2_clone->data , src2->data , src2_size);
7018
7018
memcpy (src2_clone->nb , src2->nb , sizeof (size_t ) * GGML_MAX_DIMS);
7019
7019
} else if (ggml_backend_buffer_is_vk (src2->buffer )) {
7020
- ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src2->extra ;
7021
- vk_buffer buffer_gpu = extra-> buffer_gpu . lock () ;
7022
- uint64_t offset = extra-> offset + src2->view_offs ;
7020
+ ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)src2->buffer -> context ;
7021
+ vk_buffer& buffer_gpu = buf_ctx-> dev_buffer ;
7022
+ uint64_t offset = vk_tensor_offset (src2) + src2->view_offs ;
7023
7023
if (!ggml_is_contiguous (src2) && ggml_vk_dim01_contiguous (src2)) {
7024
7024
for (int i3 = 0 ; i3 < src2->ne [3 ]; i3++) {
7025
7025
for (int i2 = 0 ; i2 < src2->ne [2 ]; i2++) {
@@ -7074,7 +7074,7 @@ static void ggml_vk_check_results_0(ggml_tensor * tensor) {
7074
7074
} else if (tensor->op == GGML_OP_PAD) {
7075
7075
tensor_clone = ggml_pad (ggml_ctx, src0_clone, tensor->ne [0 ] - src0_clone->ne [0 ], tensor->ne [1 ] - src0_clone->ne [1 ], tensor->ne [2 ] - src0_clone->ne [2 ], tensor->ne [3 ] - src0_clone->ne [3 ]);
7076
7076
} else if (tensor->op == GGML_OP_REPEAT) {
7077
- tensor_clone = ggml_repeat (ggml_ctx, src0_clone, src1_clone );
7077
+ tensor_clone = ggml_repeat (ggml_ctx, src0_clone, tensor );
7078
7078
} else if (tensor->op == GGML_OP_ADD) {
7079
7079
tensor_clone = ggml_add (ggml_ctx, src0_clone, src1_clone);
7080
7080
} else if (tensor->op == GGML_OP_ACC) {
@@ -7219,14 +7219,15 @@ static void ggml_vk_check_results_1(ggml_tensor * tensor) {
7219
7219
size_t tensor_size = ggml_nbytes (tensor);
7220
7220
tensor_data = malloc (tensor_size);
7221
7221
7222
- ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra ;
7222
+ ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)tensor->buffer -> context ;
7223
7223
7224
- vk_buffer buffer_gpu = extra->buffer_gpu .lock ();
7225
- if (extra->offset + tensor->view_offs + tensor_size >= buffer_gpu->size ) {
7226
- tensor_size = buffer_gpu->size - (extra->offset + tensor->view_offs );
7224
+ vk_buffer& buffer_gpu = buf_ctx->dev_buffer ;
7225
+ uint64_t offset = vk_tensor_offset (tensor) + tensor->view_offs ;
7226
+ if (offset + tensor_size >= buffer_gpu->size ) {
7227
+ tensor_size = buffer_gpu->size - offset;
7227
7228
}
7228
7229
7229
- ggml_vk_buffer_read (buffer_gpu, extra-> offset + tensor-> view_offs , tensor_data, tensor_size);
7230
+ ggml_vk_buffer_read (buffer_gpu, offset, tensor_data, tensor_size);
7230
7231
}
7231
7232
7232
7233
float first_error_result = -1 .0f ;
0 commit comments