Skip to content

Commit

Permalink
metal : reusing llama.cpp logging (ggerganov#3152)
Browse files Browse the repository at this point in the history
* metal : reusing llama.cpp logging

* cmake : build fix

* metal : logging callback

* metal : logging va_args memory fix

* metal : minor cleanup

* metal : setting function like logging macro to capital letters

* llama.cpp : trailing whitespace fix

* ggml : log level enum used by llama

* Makefile : cleanup ggml-metal recipe

* ggml : ggml_log_callback typedef

* ggml : minor

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
  • Loading branch information
Ricardicus and ggerganov committed Sep 27, 2023
1 parent 527e57c commit dc68974
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 67 deletions.
2 changes: 1 addition & 1 deletion examples/llama-bench/llama-bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -903,7 +903,7 @@ static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads)
}
}

static void llama_null_log_callback(enum llama_log_level level, const char * text, void * user_data) {
static void llama_null_log_callback(enum ggml_log_level level, const char * text, void * user_data) {
(void) level;
(void) text;
(void) user_data;
Expand Down
4 changes: 4 additions & 0 deletions ggml-metal.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

#pragma once

#include "ggml.h"

#include <stddef.h>
#include <stdbool.h>

Expand All @@ -33,6 +35,8 @@ struct ggml_cgraph;
extern "C" {
#endif

void ggml_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);

struct ggml_metal_context;

// number of command buffers to use
Expand Down
116 changes: 74 additions & 42 deletions ggml-metal.m
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,14 @@
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))

// TODO: temporary - reuse llama.cpp logging
#ifdef GGML_METAL_NDEBUG
#define metal_printf(...)
#define GGML_METAL_LOG_INFO(...)
#define GGML_METAL_LOG_WARN(...)
#define GGML_METAL_LOG_ERROR(...)
#else
#define metal_printf(...) fprintf(stderr, __VA_ARGS__)
#define GGML_METAL_LOG_INFO(...) ggml_metal_log(GGML_LOG_LEVEL_INFO, __VA_ARGS__)
#define GGML_METAL_LOG_WARN(...) ggml_metal_log(GGML_LOG_LEVEL_WARN, __VA_ARGS__)
#define GGML_METAL_LOG_ERROR(...) ggml_metal_log(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
#endif

#define UNUSED(x) (void)(x)
Expand Down Expand Up @@ -120,8 +123,37 @@ @interface GGMLMetalClass : NSObject
@implementation GGMLMetalClass
@end

ggml_log_callback ggml_metal_log_callback = NULL;
void * ggml_metal_log_user_data = NULL;

void ggml_metal_log_set_callback(ggml_log_callback log_callback, void * user_data) {
ggml_metal_log_callback = log_callback;
ggml_metal_log_user_data = user_data;
}

static void ggml_metal_log(enum ggml_log_level level, const char* format, ...){
if (ggml_metal_log_callback != NULL) {
va_list args;
va_start(args, format);
char buffer[128];
int len = vsnprintf(buffer, 128, format, args);
if (len < 128) {
ggml_metal_log_callback(level, buffer, ggml_metal_log_user_data);
} else {
char* buffer2 = malloc(len+1);
vsnprintf(buffer2, len+1, format, args);
buffer2[len] = 0;
ggml_metal_log_callback(level, buffer2, ggml_metal_log_user_data);
free(buffer2);
}
va_end(args);
}
}



struct ggml_metal_context * ggml_metal_init(int n_cb) {
metal_printf("%s: allocating\n", __func__);
GGML_METAL_LOG_INFO("%s: allocating\n", __func__);

id <MTLDevice> device;
NSString * s;
Expand All @@ -131,14 +163,14 @@ @implementation GGMLMetalClass
NSArray * devices = MTLCopyAllDevices();
for (device in devices) {
s = [device name];
metal_printf("%s: found device: %s\n", __func__, [s UTF8String]);
GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [s UTF8String]);
}
#endif

// Pick and show default Metal device
device = MTLCreateSystemDefaultDevice();
s = [device name];
metal_printf("%s: picking default device: %s\n", __func__, [s UTF8String]);
GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [s UTF8String]);

// Configure context
struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
Expand All @@ -165,7 +197,7 @@ @implementation GGMLMetalClass
ctx->library = [ctx->device newLibraryWithURL:libURL error:&error];

if (error) {
metal_printf("%s: error: %s\n", __func__, [[error description] UTF8String]);
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
return NULL;
}
}
Expand All @@ -179,11 +211,11 @@ @implementation GGMLMetalClass
//NSString * path = [[NSBundle mainBundle] pathForResource:@"../../examples/metal/metal" ofType:@"metal"];
NSBundle * bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
NSString * path = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
metal_printf("%s: loading '%s'\n", __func__, [path UTF8String]);
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [path UTF8String]);

NSString * src = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:&error];
if (error) {
metal_printf("%s: error: %s\n", __func__, [[error description] UTF8String]);
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
return NULL;
}

Expand All @@ -195,7 +227,7 @@ @implementation GGMLMetalClass
ctx->library = [ctx->device newLibraryWithSource:src options:nil error:&error];
#endif
if (error) {
metal_printf("%s: error: %s\n", __func__, [[error description] UTF8String]);
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
return NULL;
}
}
Expand All @@ -207,11 +239,11 @@ @implementation GGMLMetalClass
#define GGML_METAL_ADD_KERNEL(name) \
ctx->function_##name = [ctx->library newFunctionWithName:@"kernel_"#name]; \
ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:&error]; \
metal_printf("%s: loaded %-32s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name, \
GGML_METAL_LOG_INFO("%s: loaded %-32s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name, \
(int) ctx->pipeline_##name.maxTotalThreadsPerThreadgroup, \
(int) ctx->pipeline_##name.threadExecutionWidth); \
if (error) { \
metal_printf("%s: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
GGML_METAL_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
return NULL; \
}

Expand Down Expand Up @@ -270,21 +302,21 @@ @implementation GGMLMetalClass
#undef GGML_METAL_ADD_KERNEL
}

metal_printf("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
GGML_METAL_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
#if TARGET_OS_OSX
metal_printf("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
GGML_METAL_LOG_INFO("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
if (ctx->device.maxTransferRate != 0) {
metal_printf("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
GGML_METAL_LOG_INFO("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
} else {
metal_printf("%s: maxTransferRate = built-in GPU\n", __func__);
GGML_METAL_LOG_INFO("%s: maxTransferRate = built-in GPU\n", __func__);
}
#endif

return ctx;
}

void ggml_metal_free(struct ggml_metal_context * ctx) {
metal_printf("%s: deallocating\n", __func__);
GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
#define GGML_METAL_DEL_KERNEL(name) \
[ctx->function_##name release]; \
[ctx->pipeline_##name release];
Expand Down Expand Up @@ -360,7 +392,7 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
void * data = NULL;
const int result = posix_memalign((void **) &data, sysconf(_SC_PAGESIZE), n);
if (result != 0) {
metal_printf("%s: error: posix_memalign failed\n", __func__);
GGML_METAL_LOG_ERROR("%s: error: posix_memalign failed\n", __func__);
return NULL;
}

Expand Down Expand Up @@ -388,7 +420,7 @@ int ggml_metal_if_optimized(struct ggml_metal_context * ctx) {
// Metal buffer based on the host memory pointer
//
static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) {
//metal_printf("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
//GGML_METAL_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);

const int64_t tsize = ggml_nbytes(t);

Expand All @@ -400,13 +432,13 @@ int ggml_metal_if_optimized(struct ggml_metal_context * ctx) {
if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) {
*offs = (size_t) ioffs;

//metal_printf("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
//GGML_METAL_LOG_INFO("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);

return ctx->buffers[i].metal;
}
}

metal_printf("%s: error: buffer is nil\n", __func__);
GGML_METAL_LOG_ERROR("%s: error: buffer is nil\n", __func__);

return nil;
}
Expand All @@ -418,7 +450,7 @@ bool ggml_metal_add_buffer(
size_t size,
size_t max_size) {
if (ctx->n_buffers >= GGML_METAL_MAX_BUFFERS) {
metal_printf("%s: too many buffers\n", __func__);
GGML_METAL_LOG_ERROR("%s: error: too many buffers\n", __func__);
return false;
}

Expand All @@ -428,7 +460,7 @@ bool ggml_metal_add_buffer(
const int64_t ioffs = (int64_t) data - (int64_t) ctx->buffers[i].data;

if (ioffs >= 0 && ioffs < (int64_t) ctx->buffers[i].size) {
metal_printf("%s: error: buffer '%s' overlaps with '%s'\n", __func__, name, ctx->buffers[i].name);
GGML_METAL_LOG_ERROR("%s: error: buffer '%s' overlaps with '%s'\n", __func__, name, ctx->buffers[i].name);
return false;
}
}
Expand All @@ -449,11 +481,11 @@ bool ggml_metal_add_buffer(
ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:data length:size_aligned options:MTLResourceStorageModeShared deallocator:nil];

if (ctx->buffers[ctx->n_buffers].metal == nil) {
metal_printf("%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned / 1024.0 / 1024.0);
GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned / 1024.0 / 1024.0);
return false;
}

metal_printf("%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0);
GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0);

++ctx->n_buffers;
} else {
Expand All @@ -473,31 +505,31 @@ bool ggml_metal_add_buffer(
ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:(void *) ((uint8_t *) data + i) length:size_step_aligned options:MTLResourceStorageModeShared deallocator:nil];

if (ctx->buffers[ctx->n_buffers].metal == nil) {
metal_printf("%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned / 1024.0 / 1024.0);
GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned / 1024.0 / 1024.0);
return false;
}

metal_printf("%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i);
GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i);
if (i + size_step < size) {
metal_printf("\n");
GGML_METAL_LOG_INFO("\n");
}

++ctx->n_buffers;
}
}

#if TARGET_OS_OSX
metal_printf(", (%8.2f / %8.2f)",
GGML_METAL_LOG_INFO(", (%8.2f / %8.2f)",
ctx->device.currentAllocatedSize / 1024.0 / 1024.0,
ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);

if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
metal_printf(", warning: current allocated size is greater than the recommended max working set size\n");
GGML_METAL_LOG_WARN(", warning: current allocated size is greater than the recommended max working set size\n", __func__);
} else {
metal_printf("\n");
GGML_METAL_LOG_INFO("\n");
}
#else
metal_printf(", (%8.2f)\n", ctx->device.currentAllocatedSize / 1024.0 / 1024.0);
GGML_METAL_LOG_INFO(", (%8.2f)\n", ctx->device.currentAllocatedSize / 1024.0 / 1024.0);
#endif
}

Expand Down Expand Up @@ -610,7 +642,7 @@ void ggml_metal_graph_find_concurrency(
}

if (ctx->concur_list_len > GGML_MAX_CONCUR) {
metal_printf("%s: too many elements for metal ctx->concur_list!\n", __func__);
GGML_METAL_LOG_WARN("%s: too many elements for metal ctx->concur_list!\n", __func__);
}
}

Expand Down Expand Up @@ -664,7 +696,7 @@ void ggml_metal_graph_compute(
continue;
}

//metal_printf("%s: encoding node %3d, op = %8s\n", __func__, i, ggml_op_name(gf->nodes[i]->op));
//GGML_METAL_LOG_INFO("%s: encoding node %3d, op = %8s\n", __func__, i, ggml_op_name(gf->nodes[i]->op));

struct ggml_tensor * src0 = gf->nodes[i]->src[0];
struct ggml_tensor * src1 = gf->nodes[i]->src[1];
Expand Down Expand Up @@ -708,17 +740,17 @@ void ggml_metal_graph_compute(
id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(ctx, src1, &offs_src1) : nil;
id<MTLBuffer> id_dst = dst ? ggml_metal_get_buffer(ctx, dst, &offs_dst) : nil;

//metal_printf("%s: op - %s\n", __func__, ggml_op_name(dst->op));
//GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op));
//if (src0) {
// metal_printf("%s: src0 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src0t), ne00, ne01, ne02,
// GGML_METAL_LOG_INFO("%s: src0 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src0t), ne00, ne01, ne02,
// ggml_is_contiguous(src0), src0->name);
//}
//if (src1) {
// metal_printf("%s: src1 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src1t), ne10, ne11, ne12,
// GGML_METAL_LOG_INFO("%s: src1 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src1t), ne10, ne11, ne12,
// ggml_is_contiguous(src1), src1->name);
//}
//if (dst) {
// metal_printf("%s: dst - %4s [%5lld, %5lld, %5lld], 1, %s\n", __func__, ggml_type_name(dstt), ne0, ne1, ne2,
// GGML_METAL_LOG_INFO("%s: dst - %4s [%5lld, %5lld, %5lld], 1, %s\n", __func__, ggml_type_name(dstt), ne0, ne1, ne2,
// dst->name);
//}

Expand Down Expand Up @@ -830,7 +862,7 @@ void ggml_metal_graph_compute(
} break;
default:
{
metal_printf("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
GGML_METAL_LOG_WARN("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
GGML_ASSERT(false);
}
} break;
Expand Down Expand Up @@ -1019,7 +1051,7 @@ void ggml_metal_graph_compute(
} break;
default:
{
metal_printf("Asserting on type %d\n",(int)src0t);
GGML_METAL_LOG_ERROR("Asserting on type %d\n", (int)src0t);
GGML_ASSERT(false && "not implemented");
}
};
Expand Down Expand Up @@ -1261,7 +1293,7 @@ void ggml_metal_graph_compute(
} break;
default:
{
metal_printf("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
GGML_METAL_LOG_ERROR("%s: error: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
GGML_ASSERT(false);
}
}
Expand All @@ -1286,7 +1318,7 @@ void ggml_metal_graph_compute(

MTLCommandBufferStatus status = (MTLCommandBufferStatus) [ctx->command_buffers[i] status];
if (status != MTLCommandBufferStatusCompleted) {
metal_printf("%s: command buffer %d failed with status %lu\n", __func__, i, status);
GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status);
GGML_ASSERT(false);
}
}
Expand Down
7 changes: 7 additions & 0 deletions ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,12 @@ extern "C" {
GGML_OBJECT_WORK_BUFFER
};

enum ggml_log_level {
GGML_LOG_LEVEL_ERROR = 2,
GGML_LOG_LEVEL_WARN = 3,
GGML_LOG_LEVEL_INFO = 4
};

// ggml object
struct ggml_object {
size_t offs;
Expand Down Expand Up @@ -1691,6 +1697,7 @@ extern "C" {
};

typedef void (*ggml_opt_callback)(void * data, float * sched);
typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data);

// optimization parameters
//
Expand Down
Loading

0 comments on commit dc68974

Please sign in to comment.