From 1a7ecd55e6cf6f25aedbad5c1d471f4e9cccc3de Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Wed, 21 Aug 2024 18:14:53 +0800 Subject: [PATCH] timing for init step, clip for vulkan --- Makefile | 6 ++++-- gpttype_adapter.cpp | 7 +++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index a413190c6ca06..646597100261b 100644 --- a/Makefile +++ b/Makefile @@ -470,6 +470,8 @@ llavaclip_default.o: examples/llava/clip.cpp examples/llava/clip.h $(CXX) $(CXXFLAGS) -c $< -o $@ llavaclip_cublas.o: examples/llava/clip.cpp examples/llava/clip.h $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) -c $< -o $@ +llavaclip_vulkan.o: examples/llava/clip.cpp examples/llava/clip.h + $(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@ #this is only used for openblas and accelerate ggml-blas.o: ggml/src/ggml-blas.cpp ggml/include/ggml-blas.h @@ -663,10 +665,10 @@ koboldcpp_hipblas: endif ifdef VULKAN_BUILD -koboldcpp_vulkan: ggml_v4_vulkan.o ggml_v3.o ggml_v2.o ggml_v1.o expose.o gpttype_adapter_vulkan.o ggml-vulkan.o sdcpp_vulkan.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_vulkan.o $(OBJS_FULL) $(OBJS) +koboldcpp_vulkan: ggml_v4_vulkan.o ggml_v3.o ggml_v2.o ggml_v1.o expose.o gpttype_adapter_vulkan.o ggml-vulkan.o sdcpp_vulkan.o whispercpp_default.o llavaclip_vulkan.o llava.o ggml-backend_vulkan.o $(OBJS_FULL) $(OBJS) $(VULKAN_BUILD) ifdef NOAVX2_BUILD -koboldcpp_vulkan_noavx2: ggml_v4_vulkan_noavx2.o ggml_v3_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o gpttype_adapter_vulkan_noavx2.o ggml-vulkan.o sdcpp_vulkan.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_vulkan.o $(OBJS_SIMPLE) $(OBJS) +koboldcpp_vulkan_noavx2: ggml_v4_vulkan_noavx2.o ggml_v3_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o gpttype_adapter_vulkan_noavx2.o ggml-vulkan.o sdcpp_vulkan.o whispercpp_default.o llavaclip_vulkan.o llava.o ggml-backend_vulkan.o $(OBJS_SIMPLE) $(OBJS) $(VULKAN_BUILD) else koboldcpp_vulkan_noavx2: diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 15f80dba39f8a..4ad320826ad96 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -1961,6 +1961,9 @@ generation_outputs gpttype_generate(const generation_inputs inputs) dry_sequence_breakers.clear(); dry_max_token_repeat.clear(); + double time0 = 0, time1 = 0, time2 = 0; + timer_start(); + for(int x=0;xn_predict, time1, pt1, ts1, time2, pt2, ts2, (time1 + time2), tokens_per_second); + printf("\nCtxLimit:%d/%d, Amt:%d/%d, Init:%.2fs, Process:%.2fs (%.1fms/T = %.2fT/s), Generate:%.2fs (%.1fms/T = %.2fT/s), Total:%.2fs (%.2fT/s)",(int)current_context_tokens.size(),(int)nctx, realnpredict, kcpp_params->n_predict, time0, time1, pt1, ts1, time2, pt2, ts2, (time1 + time2), tokens_per_second); fflush(stdout); output.status = 1; output.stopreason = last_stop_reason;