time=2025-09-30T20:01:00.367-04:00 level=INFO source=routes.go:1475 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY:localhost,127.0.0.1,.local,.googleapis.com,.google.com OLLAMA_CONTEXT_LENGTH:4096 OLLAMA_DEBUG:INFO OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://0.0.0.0:11434 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:C:\\Users\\research\\.ollama\\models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_REMOTES:[ollama.com] OLLAMA_SCHED_SPREAD:false ROCR_VISIBLE_DEVICES:]" time=2025-09-30T20:01:00.384-04:00 level=INFO source=images.go:518 msg="total blobs: 6" time=2025-09-30T20:01:00.384-04:00 level=INFO source=images.go:525 msg="total unused blobs removed: 0" time=2025-09-30T20:01:00.387-04:00 level=INFO source=routes.go:1528 msg="Listening on [::]:11434 (version 0.12.3)" time=2025-09-30T20:01:00.389-04:00 level=INFO source=gpu.go:217 msg="looking for compatible GPUs" time=2025-09-30T20:01:00.390-04:00 level=INFO source=gpu_windows.go:167 msg=packages count=1 time=2025-09-30T20:01:00.390-04:00 level=INFO source=gpu_windows.go:183 msg="efficiency cores detected" maxEfficiencyClass=1 time=2025-09-30T20:01:00.390-04:00 level=INFO source=gpu_windows.go:214 msg="" package=0 cores=16 efficiency=10 threads=22 time=2025-09-30T20:01:00.528-04:00 level=INFO source=gpu.go:311 msg="detected OS VRAM overhead" id=GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a library=cuda compute=8.9 driver=12.9 name="NVIDIA GeForce RTX 4070 Laptop GPU" overhead="891.0 MiB" time=2025-09-30T20:01:00.528-04:00 level=WARN source=cuda_common.go:60 msg="old CUDA driver detected - please upgrade to a newer driver for best performance" version=12.9 time=2025-09-30T20:01:00.530-04:00 level=INFO source=types.go:131 msg="inference compute" id=GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a library=cuda variant=v12 compute=8.9 driver=12.9 name="NVIDIA GeForce RTX 4070 Laptop GPU" total="8.0 GiB" available="6.9 GiB" time=2025-09-30T20:01:00.530-04:00 level=INFO source=routes.go:1569 msg="entering low vram mode" "total vram"="8.0 GiB" threshold="20.0 GiB" [GIN] 2025/09/30 - 20:07:26 | 200 | 538.7µs | 127.0.0.1 | HEAD "/" [GIN] 2025/09/30 - 20:07:26 | 200 | 11.8492ms | 127.0.0.1 | GET "/api/tags" llama_model_loader: loaded meta data with 30 key-value pairs and 255 tensors from C:\Users\research\.ollama\models\blobs\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff (version GGUF V3 (latest)) llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. llama_model_loader: - kv 0: general.architecture str = llama llama_model_loader: - kv 1: general.type str = model llama_model_loader: - kv 2: general.name str = Llama 3.2 3B Instruct llama_model_loader: - kv 3: general.finetune str = Instruct llama_model_loader: - kv 4: general.basename str = Llama-3.2 llama_model_loader: - kv 5: general.size_label str = 3B llama_model_loader: - kv 6: general.tags arr[str,6] = ["facebook", "meta", "pytorch", "llam... llama_model_loader: - kv 7: general.languages arr[str,8] = ["en", "de", "fr", "it", "pt", "hi", ... llama_model_loader: - kv 8: llama.block_count u32 = 28 llama_model_loader: - kv 9: llama.context_length u32 = 131072 llama_model_loader: - kv 10: llama.embedding_length u32 = 3072 llama_model_loader: - kv 11: llama.feed_forward_length u32 = 8192 llama_model_loader: - kv 12: llama.attention.head_count u32 = 24 llama_model_loader: - kv 13: llama.attention.head_count_kv u32 = 8 llama_model_loader: - kv 14: llama.rope.freq_base f32 = 500000.000000 llama_model_loader: - kv 15: llama.attention.layer_norm_rms_epsilon f32 = 0.000010 llama_model_loader: - kv 16: llama.attention.key_length u32 = 128 llama_model_loader: - kv 17: llama.attention.value_length u32 = 128 llama_model_loader: - kv 18: general.file_type u32 = 15 llama_model_loader: - kv 19: llama.vocab_size u32 = 128256 llama_model_loader: - kv 20: llama.rope.dimension_count u32 = 128 llama_model_loader: - kv 21: tokenizer.ggml.model str = gpt2 llama_model_loader: - kv 22: tokenizer.ggml.pre str = llama-bpe llama_model_loader: - kv 23: tokenizer.ggml.tokens arr[str,128256] = ["!", "\"", "#", "$", "%", "&", "'", ... llama_model_loader: - kv 24: tokenizer.ggml.token_type arr[i32,128256] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... llama_model_loader: - kv 25: tokenizer.ggml.merges arr[str,280147] = ["Ġ Ġ", "Ġ ĠĠĠ", "ĠĠ ĠĠ", "... llama_model_loader: - kv 26: tokenizer.ggml.bos_token_id u32 = 128000 llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 128009 llama_model_loader: - kv 28: tokenizer.chat_template str = {{- bos_token }}\n{%- if custom_tools ... llama_model_loader: - kv 29: general.quantization_version u32 = 2 llama_model_loader: - type f32: 58 tensors llama_model_loader: - type q4_K: 168 tensors llama_model_loader: - type q6_K: 29 tensors print_info: file format = GGUF V3 (latest) print_info: file type = Q4_K - Medium print_info: file size = 1.87 GiB (5.01 BPW) load: printing all EOG tokens: load: - 128001 ('<|end_of_text|>') load: - 128008 ('<|eom_id|>') load: - 128009 ('<|eot_id|>') load: special tokens cache size = 256 load: token to piece cache size = 0.7999 MB print_info: arch = llama print_info: vocab_only = 1 print_info: model type = ?B print_info: model params = 3.21 B print_info: general.name = Llama 3.2 3B Instruct print_info: vocab type = BPE print_info: n_vocab = 128256 print_info: n_merges = 280147 print_info: BOS token = 128000 '<|begin_of_text|>' print_info: EOS token = 128009 '<|eot_id|>' print_info: EOT token = 128009 '<|eot_id|>' print_info: EOM token = 128008 '<|eom_id|>' print_info: LF token = 198 'Ċ' print_info: EOG token = 128001 '<|end_of_text|>' print_info: EOG token = 128008 '<|eom_id|>' print_info: EOG token = 128009 '<|eot_id|>' print_info: max token length = 256 llama_model_load: vocab only - skipping tensors time=2025-09-30T20:10:08.421-04:00 level=INFO source=server.go:399 msg="starting runner" cmd="C:\\Users\\research\\AppData\\Local\\Programs\\Ollama\\ollama.exe runner --model C:\\Users\\research\\.ollama\\models\\blobs\\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff --port 52867" time=2025-09-30T20:10:08.442-04:00 level=INFO source=server.go:504 msg="system memory" total="31.4 GiB" free="18.0 GiB" free_swap="19.8 GiB" time=2025-09-30T20:10:08.443-04:00 level=INFO source=memory.go:36 msg="new model will fit in available VRAM across minimum required GPUs, loading" model=C:\Users\research\.ollama\models\blobs\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff library=cuda parallel=1 required="3.1 GiB" gpus=1 time=2025-09-30T20:10:08.443-04:00 level=INFO source=server.go:544 msg=offload library=cuda layers.requested=-1 layers.model=29 layers.offload=29 layers.split=[29] memory.available="[6.9 GiB]" memory.gpu_overhead="0 B" memory.required.full="3.1 GiB" memory.required.partial="3.1 GiB" memory.required.kv="448.0 MiB" memory.required.allocations="[3.1 GiB]" memory.weights.total="1.9 GiB" memory.weights.repeating="1.6 GiB" memory.weights.nonrepeating="308.2 MiB" memory.graph.full="256.5 MiB" memory.graph.partial="570.7 MiB" time=2025-09-30T20:10:08.455-04:00 level=INFO source=runner.go:864 msg="starting go runner" load_backend: loaded CPU backend from C:\Users\research\AppData\Local\Programs\Ollama\lib\ollama\ggml-cpu-alderlake.dll ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 CUDA devices: Device 0: NVIDIA GeForce RTX 4070 Laptop GPU, compute capability 8.9, VMM: yes, ID: GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a load_backend: loaded CUDA backend from C:\Users\research\AppData\Local\Programs\Ollama\lib\ollama\cuda_v12\ggml-cuda.dll time=2025-09-30T20:10:09.556-04:00 level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX_VNNI=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 CUDA.0.ARCHS=500,600,610,700,750,800,860,870,890,900,1200 CUDA.0.USE_GRAPHS=1 CUDA.0.PEER_MAX_BATCH_SIZE=128 compiler=cgo(clang) time=2025-09-30T20:10:09.557-04:00 level=INFO source=runner.go:900 msg="Server listening on 127.0.0.1:52867" time=2025-09-30T20:10:09.560-04:00 level=INFO source=runner.go:799 msg=load request="{Operation:commit LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:false KvSize:4096 KvCacheType: NumThreads:6 GPULayers:29[ID:GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a Layers:29(0..28)] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}" time=2025-09-30T20:10:09.561-04:00 level=INFO source=server.go:1251 msg="waiting for llama runner to start responding" time=2025-09-30T20:10:09.561-04:00 level=INFO source=server.go:1285 msg="waiting for server to become available" status="llm server loading model" llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 4070 Laptop GPU) - 7056 MiB free llama_model_loader: loaded meta data with 30 key-value pairs and 255 tensors from C:\Users\research\.ollama\models\blobs\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff (version GGUF V3 (latest)) llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. llama_model_loader: - kv 0: general.architecture str = llama llama_model_loader: - kv 1: general.type str = model llama_model_loader: - kv 2: general.name str = Llama 3.2 3B Instruct llama_model_loader: - kv 3: general.finetune str = Instruct llama_model_loader: - kv 4: general.basename str = Llama-3.2 llama_model_loader: - kv 5: general.size_label str = 3B llama_model_loader: - kv 6: general.tags arr[str,6] = ["facebook", "meta", "pytorch", "llam... llama_model_loader: - kv 7: general.languages arr[str,8] = ["en", "de", "fr", "it", "pt", "hi", ... llama_model_loader: - kv 8: llama.block_count u32 = 28 llama_model_loader: - kv 9: llama.context_length u32 = 131072 llama_model_loader: - kv 10: llama.embedding_length u32 = 3072 llama_model_loader: - kv 11: llama.feed_forward_length u32 = 8192 llama_model_loader: - kv 12: llama.attention.head_count u32 = 24 llama_model_loader: - kv 13: llama.attention.head_count_kv u32 = 8 llama_model_loader: - kv 14: llama.rope.freq_base f32 = 500000.000000 llama_model_loader: - kv 15: llama.attention.layer_norm_rms_epsilon f32 = 0.000010 llama_model_loader: - kv 16: llama.attention.key_length u32 = 128 llama_model_loader: - kv 17: llama.attention.value_length u32 = 128 llama_model_loader: - kv 18: general.file_type u32 = 15 llama_model_loader: - kv 19: llama.vocab_size u32 = 128256 llama_model_loader: - kv 20: llama.rope.dimension_count u32 = 128 llama_model_loader: - kv 21: tokenizer.ggml.model str = gpt2 llama_model_loader: - kv 22: tokenizer.ggml.pre str = llama-bpe llama_model_loader: - kv 23: tokenizer.ggml.tokens arr[str,128256] = ["!", "\"", "#", "$", "%", "&", "'", ... llama_model_loader: - kv 24: tokenizer.ggml.token_type arr[i32,128256] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... llama_model_loader: - kv 25: tokenizer.ggml.merges arr[str,280147] = ["Ġ Ġ", "Ġ ĠĠĠ", "ĠĠ ĠĠ", "... llama_model_loader: - kv 26: tokenizer.ggml.bos_token_id u32 = 128000 llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 128009 llama_model_loader: - kv 28: tokenizer.chat_template str = {{- bos_token }}\n{%- if custom_tools ... llama_model_loader: - kv 29: general.quantization_version u32 = 2 llama_model_loader: - type f32: 58 tensors llama_model_loader: - type q4_K: 168 tensors llama_model_loader: - type q6_K: 29 tensors print_info: file format = GGUF V3 (latest) print_info: file type = Q4_K - Medium print_info: file size = 1.87 GiB (5.01 BPW) load: printing all EOG tokens: load: - 128001 ('<|end_of_text|>') load: - 128008 ('<|eom_id|>') load: - 128009 ('<|eot_id|>') load: special tokens cache size = 256 load: token to piece cache size = 0.7999 MB print_info: arch = llama print_info: vocab_only = 0 print_info: n_ctx_train = 131072 print_info: n_embd = 3072 print_info: n_layer = 28 print_info: n_head = 24 print_info: n_head_kv = 8 print_info: n_rot = 128 print_info: n_swa = 0 print_info: is_swa_any = 0 print_info: n_embd_head_k = 128 print_info: n_embd_head_v = 128 print_info: n_gqa = 3 print_info: n_embd_k_gqa = 1024 print_info: n_embd_v_gqa = 1024 print_info: f_norm_eps = 0.0e+00 print_info: f_norm_rms_eps = 1.0e-05 print_info: f_clamp_kqv = 0.0e+00 print_info: f_max_alibi_bias = 0.0e+00 print_info: f_logit_scale = 0.0e+00 print_info: f_attn_scale = 0.0e+00 print_info: n_ff = 8192 print_info: n_expert = 0 print_info: n_expert_used = 0 print_info: causal attn = 1 print_info: pooling type = 0 print_info: rope type = 0 print_info: rope scaling = linear print_info: freq_base_train = 500000.0 print_info: freq_scale_train = 1 print_info: n_ctx_orig_yarn = 131072 print_info: rope_finetuned = unknown print_info: model type = 3B print_info: model params = 3.21 B print_info: general.name = Llama 3.2 3B Instruct print_info: vocab type = BPE print_info: n_vocab = 128256 print_info: n_merges = 280147 print_info: BOS token = 128000 '<|begin_of_text|>' print_info: EOS token = 128009 '<|eot_id|>' print_info: EOT token = 128009 '<|eot_id|>' print_info: EOM token = 128008 '<|eom_id|>' print_info: LF token = 198 'Ċ' print_info: EOG token = 128001 '<|end_of_text|>' print_info: EOG token = 128008 '<|eom_id|>' print_info: EOG token = 128009 '<|eot_id|>' print_info: max token length = 256 load_tensors: loading model tensors, this can take a while... (mmap = false) load_tensors: offloading 28 repeating layers to GPU load_tensors: offloading output layer to GPU load_tensors: offloaded 29/29 layers to GPU load_tensors: CUDA0 model buffer size = 1918.35 MiB load_tensors: CPU model buffer size = 308.23 MiB llama_context: constructing llama_context llama_context: n_seq_max = 1 llama_context: n_ctx = 4096 llama_context: n_ctx_per_seq = 4096 llama_context: n_batch = 512 llama_context: n_ubatch = 512 llama_context: causal_attn = 1 llama_context: flash_attn = 0 llama_context: kv_unified = false llama_context: freq_base = 500000.0 llama_context: freq_scale = 1 llama_context: n_ctx_per_seq (4096) < n_ctx_train (131072) -- the full capacity of the model will not be utilized llama_context: CUDA_Host output buffer size = 0.50 MiB llama_kv_cache_unified: CUDA0 KV buffer size = 448.00 MiB llama_kv_cache_unified: size = 448.00 MiB ( 4096 cells, 28 layers, 1/1 seqs), K (f16): 224.00 MiB, V (f16): 224.00 MiB llama_context: CUDA0 compute buffer size = 256.50 MiB llama_context: CUDA_Host compute buffer size = 18.01 MiB llama_context: graph nodes = 986 llama_context: graph splits = 2 time=2025-09-30T20:10:17.326-04:00 level=INFO source=server.go:1289 msg="llama runner started in 8.90 seconds" time=2025-09-30T20:10:17.326-04:00 level=INFO source=sched.go:470 msg="loaded runners" count=1 time=2025-09-30T20:10:17.326-04:00 level=INFO source=server.go:1251 msg="waiting for llama runner to start responding" time=2025-09-30T20:10:17.332-04:00 level=INFO source=server.go:1289 msg="llama runner started in 8.91 seconds" [GIN] 2025/09/30 - 20:10:19 | 200 | 11.4275454s | 127.0.0.1 | POST "/api/generate" llama_model_loader: loaded meta data with 30 key-value pairs and 255 tensors from C:\Users\research\.ollama\models\blobs\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff (version GGUF V3 (latest)) llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. llama_model_loader: - kv 0: general.architecture str = llama llama_model_loader: - kv 1: general.type str = model llama_model_loader: - kv 2: general.name str = Llama 3.2 3B Instruct llama_model_loader: - kv 3: general.finetune str = Instruct llama_model_loader: - kv 4: general.basename str = Llama-3.2 llama_model_loader: - kv 5: general.size_label str = 3B llama_model_loader: - kv 6: general.tags arr[str,6] = ["facebook", "meta", "pytorch", "llam... llama_model_loader: - kv 7: general.languages arr[str,8] = ["en", "de", "fr", "it", "pt", "hi", ... llama_model_loader: - kv 8: llama.block_count u32 = 28 llama_model_loader: - kv 9: llama.context_length u32 = 131072 llama_model_loader: - kv 10: llama.embedding_length u32 = 3072 llama_model_loader: - kv 11: llama.feed_forward_length u32 = 8192 llama_model_loader: - kv 12: llama.attention.head_count u32 = 24 llama_model_loader: - kv 13: llama.attention.head_count_kv u32 = 8 llama_model_loader: - kv 14: llama.rope.freq_base f32 = 500000.000000 llama_model_loader: - kv 15: llama.attention.layer_norm_rms_epsilon f32 = 0.000010 llama_model_loader: - kv 16: llama.attention.key_length u32 = 128 llama_model_loader: - kv 17: llama.attention.value_length u32 = 128 llama_model_loader: - kv 18: general.file_type u32 = 15 llama_model_loader: - kv 19: llama.vocab_size u32 = 128256 llama_model_loader: - kv 20: llama.rope.dimension_count u32 = 128 llama_model_loader: - kv 21: tokenizer.ggml.model str = gpt2 llama_model_loader: - kv 22: tokenizer.ggml.pre str = llama-bpe llama_model_loader: - kv 23: tokenizer.ggml.tokens arr[str,128256] = ["!", "\"", "#", "$", "%", "&", "'", ... llama_model_loader: - kv 24: tokenizer.ggml.token_type arr[i32,128256] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... llama_model_loader: - kv 25: tokenizer.ggml.merges arr[str,280147] = ["Ġ Ġ", "Ġ ĠĠĠ", "ĠĠ ĠĠ", "... llama_model_loader: - kv 26: tokenizer.ggml.bos_token_id u32 = 128000 llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 128009 llama_model_loader: - kv 28: tokenizer.chat_template str = {{- bos_token }}\n{%- if custom_tools ... llama_model_loader: - kv 29: general.quantization_version u32 = 2 llama_model_loader: - type f32: 58 tensors llama_model_loader: - type q4_K: 168 tensors llama_model_loader: - type q6_K: 29 tensors print_info: file format = GGUF V3 (latest) print_info: file type = Q4_K - Medium print_info: file size = 1.87 GiB (5.01 BPW) load: printing all EOG tokens: load: - 128001 ('<|end_of_text|>') load: - 128008 ('<|eom_id|>') load: - 128009 ('<|eot_id|>') load: special tokens cache size = 256 load: token to piece cache size = 0.7999 MB print_info: arch = llama print_info: vocab_only = 1 print_info: model type = ?B print_info: model params = 3.21 B print_info: general.name = Llama 3.2 3B Instruct print_info: vocab type = BPE print_info: n_vocab = 128256 print_info: n_merges = 280147 print_info: BOS token = 128000 '<|begin_of_text|>' print_info: EOS token = 128009 '<|eot_id|>' print_info: EOT token = 128009 '<|eot_id|>' print_info: EOM token = 128008 '<|eom_id|>' print_info: LF token = 198 'Ċ' print_info: EOG token = 128001 '<|end_of_text|>' print_info: EOG token = 128008 '<|eom_id|>' print_info: EOG token = 128009 '<|eot_id|>' print_info: max token length = 256 llama_model_load: vocab only - skipping tensors time=2025-10-02T13:16:39.849-04:00 level=INFO source=server.go:399 msg="starting runner" cmd="C:\\Users\\research\\AppData\\Local\\Programs\\Ollama\\ollama.exe runner --model C:\\Users\\research\\.ollama\\models\\blobs\\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff --port 52694" time=2025-10-02T13:16:39.876-04:00 level=INFO source=server.go:504 msg="system memory" total="31.4 GiB" free="17.3 GiB" free_swap="16.3 GiB" time=2025-10-02T13:16:39.877-04:00 level=INFO source=memory.go:36 msg="new model will fit in available VRAM across minimum required GPUs, loading" model=C:\Users\research\.ollama\models\blobs\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff library=cuda parallel=1 required="3.1 GiB" gpus=1 time=2025-10-02T13:16:39.877-04:00 level=INFO source=server.go:544 msg=offload library=cuda layers.requested=-1 layers.model=29 layers.offload=29 layers.split=[29] memory.available="[6.9 GiB]" memory.gpu_overhead="0 B" memory.required.full="3.1 GiB" memory.required.partial="3.1 GiB" memory.required.kv="448.0 MiB" memory.required.allocations="[3.1 GiB]" memory.weights.total="1.9 GiB" memory.weights.repeating="1.6 GiB" memory.weights.nonrepeating="308.2 MiB" memory.graph.full="256.5 MiB" memory.graph.partial="570.7 MiB" time=2025-10-02T13:16:39.919-04:00 level=INFO source=runner.go:864 msg="starting go runner" load_backend: loaded CPU backend from C:\Users\research\AppData\Local\Programs\Ollama\lib\ollama\ggml-cpu-alderlake.dll ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 CUDA devices: Device 0: NVIDIA GeForce RTX 4070 Laptop GPU, compute capability 8.9, VMM: yes, ID: GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a load_backend: loaded CUDA backend from C:\Users\research\AppData\Local\Programs\Ollama\lib\ollama\cuda_v12\ggml-cuda.dll time=2025-10-02T13:16:40.538-04:00 level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX_VNNI=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 CUDA.0.ARCHS=500,600,610,700,750,800,860,870,890,900,1200 CUDA.0.USE_GRAPHS=1 CUDA.0.PEER_MAX_BATCH_SIZE=128 compiler=cgo(clang) time=2025-10-02T13:16:40.539-04:00 level=INFO source=runner.go:900 msg="Server listening on 127.0.0.1:52694" time=2025-10-02T13:16:40.547-04:00 level=INFO source=runner.go:799 msg=load request="{Operation:commit LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:false KvSize:4096 KvCacheType: NumThreads:6 GPULayers:29[ID:GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a Layers:29(0..28)] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}" time=2025-10-02T13:16:40.548-04:00 level=INFO source=server.go:1251 msg="waiting for llama runner to start responding" time=2025-10-02T13:16:40.548-04:00 level=INFO source=server.go:1285 msg="waiting for server to become available" status="llm server loading model" llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 4070 Laptop GPU) - 7056 MiB free llama_model_loader: loaded meta data with 30 key-value pairs and 255 tensors from C:\Users\research\.ollama\models\blobs\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff (version GGUF V3 (latest)) llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. llama_model_loader: - kv 0: general.architecture str = llama llama_model_loader: - kv 1: general.type str = model llama_model_loader: - kv 2: general.name str = Llama 3.2 3B Instruct llama_model_loader: - kv 3: general.finetune str = Instruct llama_model_loader: - kv 4: general.basename str = Llama-3.2 llama_model_loader: - kv 5: general.size_label str = 3B llama_model_loader: - kv 6: general.tags arr[str,6] = ["facebook", "meta", "pytorch", "llam... llama_model_loader: - kv 7: general.languages arr[str,8] = ["en", "de", "fr", "it", "pt", "hi", ... llama_model_loader: - kv 8: llama.block_count u32 = 28 llama_model_loader: - kv 9: llama.context_length u32 = 131072 llama_model_loader: - kv 10: llama.embedding_length u32 = 3072 llama_model_loader: - kv 11: llama.feed_forward_length u32 = 8192 llama_model_loader: - kv 12: llama.attention.head_count u32 = 24 llama_model_loader: - kv 13: llama.attention.head_count_kv u32 = 8 llama_model_loader: - kv 14: llama.rope.freq_base f32 = 500000.000000 llama_model_loader: - kv 15: llama.attention.layer_norm_rms_epsilon f32 = 0.000010 llama_model_loader: - kv 16: llama.attention.key_length u32 = 128 llama_model_loader: - kv 17: llama.attention.value_length u32 = 128 llama_model_loader: - kv 18: general.file_type u32 = 15 llama_model_loader: - kv 19: llama.vocab_size u32 = 128256 llama_model_loader: - kv 20: llama.rope.dimension_count u32 = 128 llama_model_loader: - kv 21: tokenizer.ggml.model str = gpt2 llama_model_loader: - kv 22: tokenizer.ggml.pre str = llama-bpe llama_model_loader: - kv 23: tokenizer.ggml.tokens arr[str,128256] = ["!", "\"", "#", "$", "%", "&", "'", ... llama_model_loader: - kv 24: tokenizer.ggml.token_type arr[i32,128256] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... llama_model_loader: - kv 25: tokenizer.ggml.merges arr[str,280147] = ["Ġ Ġ", "Ġ ĠĠĠ", "ĠĠ ĠĠ", "... llama_model_loader: - kv 26: tokenizer.ggml.bos_token_id u32 = 128000 llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 128009 llama_model_loader: - kv 28: tokenizer.chat_template str = {{- bos_token }}\n{%- if custom_tools ... llama_model_loader: - kv 29: general.quantization_version u32 = 2 llama_model_loader: - type f32: 58 tensors llama_model_loader: - type q4_K: 168 tensors llama_model_loader: - type q6_K: 29 tensors print_info: file format = GGUF V3 (latest) print_info: file type = Q4_K - Medium print_info: file size = 1.87 GiB (5.01 BPW) load: printing all EOG tokens: load: - 128001 ('<|end_of_text|>') load: - 128008 ('<|eom_id|>') load: - 128009 ('<|eot_id|>') load: special tokens cache size = 256 load: token to piece cache size = 0.7999 MB print_info: arch = llama print_info: vocab_only = 0 print_info: n_ctx_train = 131072 print_info: n_embd = 3072 print_info: n_layer = 28 print_info: n_head = 24 print_info: n_head_kv = 8 print_info: n_rot = 128 print_info: n_swa = 0 print_info: is_swa_any = 0 print_info: n_embd_head_k = 128 print_info: n_embd_head_v = 128 print_info: n_gqa = 3 print_info: n_embd_k_gqa = 1024 print_info: n_embd_v_gqa = 1024 print_info: f_norm_eps = 0.0e+00 print_info: f_norm_rms_eps = 1.0e-05 print_info: f_clamp_kqv = 0.0e+00 print_info: f_max_alibi_bias = 0.0e+00 print_info: f_logit_scale = 0.0e+00 print_info: f_attn_scale = 0.0e+00 print_info: n_ff = 8192 print_info: n_expert = 0 print_info: n_expert_used = 0 print_info: causal attn = 1 print_info: pooling type = 0 print_info: rope type = 0 print_info: rope scaling = linear print_info: freq_base_train = 500000.0 print_info: freq_scale_train = 1 print_info: n_ctx_orig_yarn = 131072 print_info: rope_finetuned = unknown print_info: model type = 3B print_info: model params = 3.21 B print_info: general.name = Llama 3.2 3B Instruct print_info: vocab type = BPE print_info: n_vocab = 128256 print_info: n_merges = 280147 print_info: BOS token = 128000 '<|begin_of_text|>' print_info: EOS token = 128009 '<|eot_id|>' print_info: EOT token = 128009 '<|eot_id|>' print_info: EOM token = 128008 '<|eom_id|>' print_info: LF token = 198 'Ċ' print_info: EOG token = 128001 '<|end_of_text|>' print_info: EOG token = 128008 '<|eom_id|>' print_info: EOG token = 128009 '<|eot_id|>' print_info: max token length = 256 load_tensors: loading model tensors, this can take a while... (mmap = false) load_tensors: offloading 28 repeating layers to GPU load_tensors: offloading output layer to GPU load_tensors: offloaded 29/29 layers to GPU load_tensors: CUDA0 model buffer size = 1918.35 MiB load_tensors: CPU model buffer size = 308.23 MiB llama_context: constructing llama_context llama_context: n_seq_max = 1 llama_context: n_ctx = 4096 llama_context: n_ctx_per_seq = 4096 llama_context: n_batch = 512 llama_context: n_ubatch = 512 llama_context: causal_attn = 1 llama_context: flash_attn = 0 llama_context: kv_unified = false llama_context: freq_base = 500000.0 llama_context: freq_scale = 1 llama_context: n_ctx_per_seq (4096) < n_ctx_train (131072) -- the full capacity of the model will not be utilized llama_context: CUDA_Host output buffer size = 0.50 MiB llama_kv_cache_unified: CUDA0 KV buffer size = 448.00 MiB llama_kv_cache_unified: size = 448.00 MiB ( 4096 cells, 28 layers, 1/1 seqs), K (f16): 224.00 MiB, V (f16): 224.00 MiB llama_context: CUDA0 compute buffer size = 256.50 MiB llama_context: CUDA_Host compute buffer size = 18.01 MiB llama_context: graph nodes = 986 llama_context: graph splits = 2 time=2025-10-02T13:16:45.305-04:00 level=INFO source=server.go:1289 msg="llama runner started in 5.46 seconds" time=2025-10-02T13:16:45.305-04:00 level=INFO source=sched.go:470 msg="loaded runners" count=1 time=2025-10-02T13:16:45.305-04:00 level=INFO source=server.go:1251 msg="waiting for llama runner to start responding" time=2025-10-02T13:16:45.305-04:00 level=INFO source=server.go:1289 msg="llama runner started in 5.46 seconds" [GIN] 2025/10/02 - 13:16:45 | 200 | 6.3880802s | 127.0.0.1 | POST "/api/generate" [GIN] 2025/10/02 - 13:17:51 | 400 | 12.4638ms | ::1 | POST "/v1/chat/completions" [GIN] 2025/10/02 - 13:21:11 | 200 | 788.1793ms | ::1 | POST "/v1/chat/completions" llama_model_loader: loaded meta data with 30 key-value pairs and 255 tensors from C:\Users\research\.ollama\models\blobs\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff (version GGUF V3 (latest)) llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. llama_model_loader: - kv 0: general.architecture str = llama llama_model_loader: - kv 1: general.type str = model llama_model_loader: - kv 2: general.name str = Llama 3.2 3B Instruct llama_model_loader: - kv 3: general.finetune str = Instruct llama_model_loader: - kv 4: general.basename str = Llama-3.2 llama_model_loader: - kv 5: general.size_label str = 3B llama_model_loader: - kv 6: general.tags arr[str,6] = ["facebook", "meta", "pytorch", "llam... llama_model_loader: - kv 7: general.languages arr[str,8] = ["en", "de", "fr", "it", "pt", "hi", ... llama_model_loader: - kv 8: llama.block_count u32 = 28 llama_model_loader: - kv 9: llama.context_length u32 = 131072 llama_model_loader: - kv 10: llama.embedding_length u32 = 3072 llama_model_loader: - kv 11: llama.feed_forward_length u32 = 8192 llama_model_loader: - kv 12: llama.attention.head_count u32 = 24 llama_model_loader: - kv 13: llama.attention.head_count_kv u32 = 8 llama_model_loader: - kv 14: llama.rope.freq_base f32 = 500000.000000 llama_model_loader: - kv 15: llama.attention.layer_norm_rms_epsilon f32 = 0.000010 llama_model_loader: - kv 16: llama.attention.key_length u32 = 128 llama_model_loader: - kv 17: llama.attention.value_length u32 = 128 llama_model_loader: - kv 18: general.file_type u32 = 15 llama_model_loader: - kv 19: llama.vocab_size u32 = 128256 llama_model_loader: - kv 20: llama.rope.dimension_count u32 = 128 llama_model_loader: - kv 21: tokenizer.ggml.model str = gpt2 llama_model_loader: - kv 22: tokenizer.ggml.pre str = llama-bpe llama_model_loader: - kv 23: tokenizer.ggml.tokens arr[str,128256] = ["!", "\"", "#", "$", "%", "&", "'", ... llama_model_loader: - kv 24: tokenizer.ggml.token_type arr[i32,128256] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... llama_model_loader: - kv 25: tokenizer.ggml.merges arr[str,280147] = ["Ġ Ġ", "Ġ ĠĠĠ", "ĠĠ ĠĠ", "... llama_model_loader: - kv 26: tokenizer.ggml.bos_token_id u32 = 128000 llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 128009 llama_model_loader: - kv 28: tokenizer.chat_template str = {{- bos_token }}\n{%- if custom_tools ... llama_model_loader: - kv 29: general.quantization_version u32 = 2 llama_model_loader: - type f32: 58 tensors llama_model_loader: - type q4_K: 168 tensors llama_model_loader: - type q6_K: 29 tensors print_info: file format = GGUF V3 (latest) print_info: file type = Q4_K - Medium print_info: file size = 1.87 GiB (5.01 BPW) load: printing all EOG tokens: load: - 128001 ('<|end_of_text|>') load: - 128008 ('<|eom_id|>') load: - 128009 ('<|eot_id|>') load: special tokens cache size = 256 load: token to piece cache size = 0.7999 MB print_info: arch = llama print_info: vocab_only = 1 print_info: model type = ?B print_info: model params = 3.21 B print_info: general.name = Llama 3.2 3B Instruct print_info: vocab type = BPE print_info: n_vocab = 128256 print_info: n_merges = 280147 print_info: BOS token = 128000 '<|begin_of_text|>' print_info: EOS token = 128009 '<|eot_id|>' print_info: EOT token = 128009 '<|eot_id|>' print_info: EOM token = 128008 '<|eom_id|>' print_info: LF token = 198 'Ċ' print_info: EOG token = 128001 '<|end_of_text|>' print_info: EOG token = 128008 '<|eom_id|>' print_info: EOG token = 128009 '<|eot_id|>' print_info: max token length = 256 llama_model_load: vocab only - skipping tensors time=2025-10-02T13:37:19.568-04:00 level=INFO source=server.go:399 msg="starting runner" cmd="C:\\Users\\research\\AppData\\Local\\Programs\\Ollama\\ollama.exe runner --model C:\\Users\\research\\.ollama\\models\\blobs\\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff --port 64583" time=2025-10-02T13:37:19.594-04:00 level=INFO source=server.go:504 msg="system memory" total="31.4 GiB" free="16.5 GiB" free_swap="15.6 GiB" time=2025-10-02T13:37:19.594-04:00 level=INFO source=memory.go:36 msg="new model will fit in available VRAM across minimum required GPUs, loading" model=C:\Users\research\.ollama\models\blobs\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff library=cuda parallel=1 required="3.1 GiB" gpus=1 time=2025-10-02T13:37:19.594-04:00 level=INFO source=server.go:544 msg=offload library=cuda layers.requested=-1 layers.model=29 layers.offload=29 layers.split=[29] memory.available="[6.9 GiB]" memory.gpu_overhead="0 B" memory.required.full="3.1 GiB" memory.required.partial="3.1 GiB" memory.required.kv="448.0 MiB" memory.required.allocations="[3.1 GiB]" memory.weights.total="1.9 GiB" memory.weights.repeating="1.6 GiB" memory.weights.nonrepeating="308.2 MiB" memory.graph.full="256.5 MiB" memory.graph.partial="570.7 MiB" time=2025-10-02T13:37:19.633-04:00 level=INFO source=runner.go:864 msg="starting go runner" load_backend: loaded CPU backend from C:\Users\research\AppData\Local\Programs\Ollama\lib\ollama\ggml-cpu-alderlake.dll ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 CUDA devices: Device 0: NVIDIA GeForce RTX 4070 Laptop GPU, compute capability 8.9, VMM: yes, ID: GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a load_backend: loaded CUDA backend from C:\Users\research\AppData\Local\Programs\Ollama\lib\ollama\cuda_v12\ggml-cuda.dll time=2025-10-02T13:37:19.747-04:00 level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX_VNNI=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 CUDA.0.ARCHS=500,600,610,700,750,800,860,870,890,900,1200 CUDA.0.USE_GRAPHS=1 CUDA.0.PEER_MAX_BATCH_SIZE=128 compiler=cgo(clang) time=2025-10-02T13:37:19.749-04:00 level=INFO source=runner.go:900 msg="Server listening on 127.0.0.1:64583" time=2025-10-02T13:37:19.755-04:00 level=INFO source=runner.go:799 msg=load request="{Operation:commit LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:false KvSize:4096 KvCacheType: NumThreads:6 GPULayers:29[ID:GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a Layers:29(0..28)] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}" time=2025-10-02T13:37:19.755-04:00 level=INFO source=server.go:1251 msg="waiting for llama runner to start responding" time=2025-10-02T13:37:19.756-04:00 level=INFO source=server.go:1285 msg="waiting for server to become available" status="llm server loading model" llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 4070 Laptop GPU) - 7056 MiB free llama_model_loader: loaded meta data with 30 key-value pairs and 255 tensors from C:\Users\research\.ollama\models\blobs\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff (version GGUF V3 (latest)) llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. llama_model_loader: - kv 0: general.architecture str = llama llama_model_loader: - kv 1: general.type str = model llama_model_loader: - kv 2: general.name str = Llama 3.2 3B Instruct llama_model_loader: - kv 3: general.finetune str = Instruct llama_model_loader: - kv 4: general.basename str = Llama-3.2 llama_model_loader: - kv 5: general.size_label str = 3B llama_model_loader: - kv 6: general.tags arr[str,6] = ["facebook", "meta", "pytorch", "llam... llama_model_loader: - kv 7: general.languages arr[str,8] = ["en", "de", "fr", "it", "pt", "hi", ... llama_model_loader: - kv 8: llama.block_count u32 = 28 llama_model_loader: - kv 9: llama.context_length u32 = 131072 llama_model_loader: - kv 10: llama.embedding_length u32 = 3072 llama_model_loader: - kv 11: llama.feed_forward_length u32 = 8192 llama_model_loader: - kv 12: llama.attention.head_count u32 = 24 llama_model_loader: - kv 13: llama.attention.head_count_kv u32 = 8 llama_model_loader: - kv 14: llama.rope.freq_base f32 = 500000.000000 llama_model_loader: - kv 15: llama.attention.layer_norm_rms_epsilon f32 = 0.000010 llama_model_loader: - kv 16: llama.attention.key_length u32 = 128 llama_model_loader: - kv 17: llama.attention.value_length u32 = 128 llama_model_loader: - kv 18: general.file_type u32 = 15 llama_model_loader: - kv 19: llama.vocab_size u32 = 128256 llama_model_loader: - kv 20: llama.rope.dimension_count u32 = 128 llama_model_loader: - kv 21: tokenizer.ggml.model str = gpt2 llama_model_loader: - kv 22: tokenizer.ggml.pre str = llama-bpe llama_model_loader: - kv 23: tokenizer.ggml.tokens arr[str,128256] = ["!", "\"", "#", "$", "%", "&", "'", ... llama_model_loader: - kv 24: tokenizer.ggml.token_type arr[i32,128256] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... llama_model_loader: - kv 25: tokenizer.ggml.merges arr[str,280147] = ["Ġ Ġ", "Ġ ĠĠĠ", "ĠĠ ĠĠ", "... llama_model_loader: - kv 26: tokenizer.ggml.bos_token_id u32 = 128000 llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 128009 llama_model_loader: - kv 28: tokenizer.chat_template str = {{- bos_token }}\n{%- if custom_tools ... llama_model_loader: - kv 29: general.quantization_version u32 = 2 llama_model_loader: - type f32: 58 tensors llama_model_loader: - type q4_K: 168 tensors llama_model_loader: - type q6_K: 29 tensors print_info: file format = GGUF V3 (latest) print_info: file type = Q4_K - Medium print_info: file size = 1.87 GiB (5.01 BPW) load: printing all EOG tokens: load: - 128001 ('<|end_of_text|>') load: - 128008 ('<|eom_id|>') load: - 128009 ('<|eot_id|>') load: special tokens cache size = 256 load: token to piece cache size = 0.7999 MB print_info: arch = llama print_info: vocab_only = 0 print_info: n_ctx_train = 131072 print_info: n_embd = 3072 print_info: n_layer = 28 print_info: n_head = 24 print_info: n_head_kv = 8 print_info: n_rot = 128 print_info: n_swa = 0 print_info: is_swa_any = 0 print_info: n_embd_head_k = 128 print_info: n_embd_head_v = 128 print_info: n_gqa = 3 print_info: n_embd_k_gqa = 1024 print_info: n_embd_v_gqa = 1024 print_info: f_norm_eps = 0.0e+00 print_info: f_norm_rms_eps = 1.0e-05 print_info: f_clamp_kqv = 0.0e+00 print_info: f_max_alibi_bias = 0.0e+00 print_info: f_logit_scale = 0.0e+00 print_info: f_attn_scale = 0.0e+00 print_info: n_ff = 8192 print_info: n_expert = 0 print_info: n_expert_used = 0 print_info: causal attn = 1 print_info: pooling type = 0 print_info: rope type = 0 print_info: rope scaling = linear print_info: freq_base_train = 500000.0 print_info: freq_scale_train = 1 print_info: n_ctx_orig_yarn = 131072 print_info: rope_finetuned = unknown print_info: model type = 3B print_info: model params = 3.21 B print_info: general.name = Llama 3.2 3B Instruct print_info: vocab type = BPE print_info: n_vocab = 128256 print_info: n_merges = 280147 print_info: BOS token = 128000 '<|begin_of_text|>' print_info: EOS token = 128009 '<|eot_id|>' print_info: EOT token = 128009 '<|eot_id|>' print_info: EOM token = 128008 '<|eom_id|>' print_info: LF token = 198 'Ċ' print_info: EOG token = 128001 '<|end_of_text|>' print_info: EOG token = 128008 '<|eom_id|>' print_info: EOG token = 128009 '<|eot_id|>' print_info: max token length = 256 load_tensors: loading model tensors, this can take a while... (mmap = false) load_tensors: offloading 28 repeating layers to GPU load_tensors: offloading output layer to GPU load_tensors: offloaded 29/29 layers to GPU load_tensors: CUDA0 model buffer size = 1918.35 MiB load_tensors: CPU model buffer size = 308.23 MiB llama_context: constructing llama_context llama_context: n_seq_max = 1 llama_context: n_ctx = 4096 llama_context: n_ctx_per_seq = 4096 llama_context: n_batch = 512 llama_context: n_ubatch = 512 llama_context: causal_attn = 1 llama_context: flash_attn = 0 llama_context: kv_unified = false llama_context: freq_base = 500000.0 llama_context: freq_scale = 1 llama_context: n_ctx_per_seq (4096) < n_ctx_train (131072) -- the full capacity of the model will not be utilized llama_context: CUDA_Host output buffer size = 0.50 MiB llama_kv_cache_unified: CUDA0 KV buffer size = 448.00 MiB llama_kv_cache_unified: size = 448.00 MiB ( 4096 cells, 28 layers, 1/1 seqs), K (f16): 224.00 MiB, V (f16): 224.00 MiB llama_context: CUDA0 compute buffer size = 256.50 MiB llama_context: CUDA_Host compute buffer size = 18.01 MiB llama_context: graph nodes = 986 llama_context: graph splits = 2 time=2025-10-02T13:37:20.758-04:00 level=INFO source=server.go:1289 msg="llama runner started in 1.19 seconds" time=2025-10-02T13:37:20.758-04:00 level=INFO source=sched.go:470 msg="loaded runners" count=1 time=2025-10-02T13:37:20.758-04:00 level=INFO source=server.go:1251 msg="waiting for llama runner to start responding" time=2025-10-02T13:37:20.758-04:00 level=INFO source=server.go:1289 msg="llama runner started in 1.19 seconds" [GIN] 2025/10/02 - 13:37:21 | 200 | 2.1605434s | ::1 | POST "/v1/chat/completions" [GIN] 2025/10/02 - 13:41:21 | 200 | 973.6852ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:41:21 | 200 | 523.0421ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:41:25 | 200 | 2.3254139s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:41:27 | 200 | 2.111413s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:41:28 | 200 | 1.6160798s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:41:30 | 200 | 1.82832s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:41:45 | 200 | 11.8503084s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:41:52 | 200 | 4.4086316s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:41:57 | 200 | 5.0869978s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:42:07 | 200 | 7.7999355s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:42:20 | 200 | 726.9911ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:42:28 | 200 | 1.1798488s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:42:37 | 200 | 581.546ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:42:45 | 200 | 798.4825ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:42:54 | 200 | 584.4367ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:43:08 | 200 | 2.7074481s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:43:18 | 200 | 2.0521342s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:43:27 | 200 | 775.8149ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:43:37 | 200 | 419.0029ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:43:46 | 200 | 581.7723ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:43:54 | 200 | 578.5869ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:44:04 | 200 | 549.0511ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:44:14 | 200 | 874.7725ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:44:19 | 200 | 398.6407ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:44:24 | 200 | 1.587624s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:44:33 | 200 | 674.154ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:44:44 | 200 | 1.4854066s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:44:59 | 200 | 655.8417ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:45:05 | 200 | 810.1995ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:45:21 | 200 | 11.3562801s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:45:36 | 200 | 670.2497ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:45:46 | 200 | 523.108ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:45:59 | 200 | 729.6798ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:46:10 | 200 | 814.4303ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:46:24 | 200 | 8.6651689s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:46:29 | 200 | 13.8299996s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:46:37 | 200 | 21.6256859s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:46:47 | 200 | 31.1256052s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:46:47 | 200 | 29.0715418s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:46:56 | 200 | 29.7358491s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:46:58 | 200 | 27.2525341s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:47:02 | 200 | 21.696993s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:47:05 | 200 | 14.716109s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:47:06 | 200 | 7.6501685s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:47:08 | 200 | 9.8896524s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:47:10 | 200 | 11.1813304s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:47:11 | 200 | 502.1818ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:47:17 | 200 | 4.1896015s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:47:23 | 200 | 7.4860262s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:47:29 | 200 | 10.5949322s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:47:29 | 200 | 10.2870735s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:47:44 | 200 | 19.7536192s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:47:45 | 200 | 4.6411359s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:47:46 | 200 | 1.6135381s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:47:47 | 200 | 1.2132791s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:47:47 | 200 | 588.5449ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:47:55 | 200 | 2.23621s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:47:55 | 200 | 2.1966382s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:48:08 | 200 | 12.4222678s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:48:16 | 200 | 18.5653689s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:48:19 | 200 | 18.0791208s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:48:19 | 200 | 15.1990567s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:48:20 | 200 | 8.0357655s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:48:28 | 200 | 7.7433813s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:48:28 | 200 | 1.1343039s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:48:38 | 200 | 9.9163448s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:48:38 | 200 | 8.7400894s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:48:49 | 200 | 14.1232605s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:48:58 | 200 | 23.1913994s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:49:05 | 200 | 22.5353405s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:49:05 | 200 | 15.6846864s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:49:05 | 200 | 11.7045012s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:49:12 | 200 | 8.205637s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:49:13 | 200 | 600.207ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:49:14 | 200 | 427.7481ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:49:15 | 200 | 369.7804ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:49:22 | 200 | 5.3564781s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:49:28 | 200 | 10.8868731s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:49:29 | 200 | 6.6168853s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:49:30 | 200 | 7.6491262s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:49:31 | 200 | 7.192303s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:49:38 | 200 | 13.2373589s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 13:49:39 | 200 | 2.4837692s | ::1 | POST "/api/generate" time=2025-10-02T13:49:53.216-04:00 level=INFO source=routes.go:1475 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY:localhost,127.0.0.1,.local,.googleapis.com,.google.com OLLAMA_CONTEXT_LENGTH:4096 OLLAMA_DEBUG:INFO OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://0.0.0.0:11434 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:C:\\Users\\research\\.ollama\\models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_REMOTES:[ollama.com] OLLAMA_SCHED_SPREAD:false ROCR_VISIBLE_DEVICES:]" time=2025-10-02T13:49:53.218-04:00 level=INFO source=images.go:518 msg="total blobs: 6" time=2025-10-02T13:49:53.218-04:00 level=INFO source=images.go:525 msg="total unused blobs removed: 0" time=2025-10-02T13:49:53.218-04:00 level=INFO source=routes.go:1528 msg="Listening on [::]:11434 (version 0.12.3)" time=2025-10-02T13:49:53.218-04:00 level=INFO source=gpu.go:217 msg="looking for compatible GPUs" time=2025-10-02T13:49:53.219-04:00 level=INFO source=gpu_windows.go:167 msg=packages count=1 time=2025-10-02T13:49:53.219-04:00 level=INFO source=gpu_windows.go:183 msg="efficiency cores detected" maxEfficiencyClass=1 time=2025-10-02T13:49:53.219-04:00 level=INFO source=gpu_windows.go:214 msg="" package=0 cores=16 efficiency=10 threads=22 time=2025-10-02T13:49:53.322-04:00 level=INFO source=gpu.go:311 msg="detected OS VRAM overhead" id=GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a library=cuda compute=8.9 driver=12.9 name="NVIDIA GeForce RTX 4070 Laptop GPU" overhead="892.0 MiB" time=2025-10-02T13:49:53.322-04:00 level=WARN source=cuda_common.go:60 msg="old CUDA driver detected - please upgrade to a newer driver for best performance" version=12.9 time=2025-10-02T13:49:53.325-04:00 level=INFO source=types.go:131 msg="inference compute" id=GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a library=cuda variant=v12 compute=8.9 driver=12.9 name="NVIDIA GeForce RTX 4070 Laptop GPU" total="8.0 GiB" available="6.9 GiB" time=2025-10-02T13:49:53.325-04:00 level=INFO source=routes.go:1569 msg="entering low vram mode" "total vram"="8.0 GiB" threshold="20.0 GiB" time=2025-09-29T13:12:06.515-04:00 level=INFO source=routes.go:1475 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY:localhost,127.0.0.1,.local,.googleapis.com,.google.com OLLAMA_CONTEXT_LENGTH:4096 OLLAMA_DEBUG:INFO OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://0.0.0.0:11434 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:C:\\Users\\research\\.ollama\\models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_REMOTES:[ollama.com] OLLAMA_SCHED_SPREAD:false ROCR_VISIBLE_DEVICES:]" time=2025-09-29T13:12:06.520-04:00 level=INFO source=images.go:518 msg="total blobs: 6" time=2025-09-29T13:12:06.520-04:00 level=INFO source=images.go:525 msg="total unused blobs removed: 0" time=2025-09-29T13:12:06.521-04:00 level=INFO source=routes.go:1528 msg="Listening on [::]:11434 (version 0.12.3)" time=2025-09-29T13:12:06.521-04:00 level=INFO source=gpu.go:217 msg="looking for compatible GPUs" time=2025-09-29T13:12:06.521-04:00 level=INFO source=gpu_windows.go:167 msg=packages count=1 time=2025-09-29T13:12:06.521-04:00 level=INFO source=gpu_windows.go:183 msg="efficiency cores detected" maxEfficiencyClass=1 time=2025-09-29T13:12:06.521-04:00 level=INFO source=gpu_windows.go:214 msg="" package=0 cores=16 efficiency=10 threads=22 time=2025-09-29T13:12:06.640-04:00 level=INFO source=gpu.go:311 msg="detected OS VRAM overhead" id=GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a library=cuda compute=8.9 driver=12.9 name="NVIDIA GeForce RTX 4070 Laptop GPU" overhead="891.0 MiB" time=2025-09-29T13:12:06.640-04:00 level=WARN source=cuda_common.go:60 msg="old CUDA driver detected - please upgrade to a newer driver for best performance" version=12.9 time=2025-09-29T13:12:06.643-04:00 level=INFO source=types.go:131 msg="inference compute" id=GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a library=cuda variant=v12 compute=8.9 driver=12.9 name="NVIDIA GeForce RTX 4070 Laptop GPU" total="8.0 GiB" available="6.9 GiB" time=2025-09-29T13:12:06.643-04:00 level=INFO source=routes.go:1569 msg="entering low vram mode" "total vram"="8.0 GiB" threshold="20.0 GiB" time=2025-09-26T13:22:57.046-04:00 level=INFO source=routes.go:1475 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY:localhost,127.0.0.1,.local,.googleapis.com,.google.com OLLAMA_CONTEXT_LENGTH:4096 OLLAMA_DEBUG:INFO OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://0.0.0.0:11434 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:C:\\Users\\research\\.ollama\\models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_REMOTES:[ollama.com] OLLAMA_SCHED_SPREAD:false ROCR_VISIBLE_DEVICES:]" time=2025-09-26T13:22:57.050-04:00 level=INFO source=images.go:518 msg="total blobs: 6" time=2025-09-26T13:22:57.051-04:00 level=INFO source=images.go:525 msg="total unused blobs removed: 0" time=2025-09-26T13:22:57.051-04:00 level=INFO source=routes.go:1528 msg="Listening on [::]:11434 (version 0.12.1)" time=2025-09-26T13:22:57.051-04:00 level=INFO source=gpu.go:217 msg="looking for compatible GPUs" time=2025-09-26T13:22:57.052-04:00 level=INFO source=gpu_windows.go:167 msg=packages count=1 time=2025-09-26T13:22:57.052-04:00 level=INFO source=gpu_windows.go:183 msg="efficiency cores detected" maxEfficiencyClass=1 time=2025-09-26T13:22:57.052-04:00 level=INFO source=gpu_windows.go:214 msg="" package=0 cores=16 efficiency=10 threads=22 time=2025-09-26T13:22:57.191-04:00 level=INFO source=gpu.go:311 msg="detected OS VRAM overhead" id=GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a library=cuda compute=8.9 driver=12.9 name="NVIDIA GeForce RTX 4070 Laptop GPU" overhead="891.0 MiB" time=2025-09-26T13:22:57.191-04:00 level=WARN source=cuda_common.go:60 msg="old CUDA driver detected - please upgrade to a newer driver for best performance" version=12.9 time=2025-09-26T13:22:57.193-04:00 level=INFO source=types.go:131 msg="inference compute" id=GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a library=cuda variant=v12 compute=8.9 driver=12.9 name="NVIDIA GeForce RTX 4070 Laptop GPU" total="8.0 GiB" available="6.9 GiB" time=2025-09-26T13:22:57.194-04:00 level=INFO source=routes.go:1569 msg="entering low vram mode" "total vram"="8.0 GiB" threshold="20.0 GiB" [GIN] 2025/09/27 - 16:35:19 | 200 | 0s | ::1 | GET "/api/version" [GIN] 2025/09/27 - 16:35:19 | 200 | 26.773ms | ::1 | GET "/api/tags" [GIN] 2025/09/27 - 16:35:19 | 200 | 136.7947ms | ::1 | POST "/api/show" time=2025-10-02T14:23:09.068-04:00 level=INFO source=routes.go:1475 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY:localhost,127.0.0.1,.local,.googleapis.com,.google.com OLLAMA_CONTEXT_LENGTH:4096 OLLAMA_DEBUG:INFO OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://0.0.0.0:11434 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:C:\\Users\\research\\.ollama\\models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_REMOTES:[ollama.com] OLLAMA_SCHED_SPREAD:false ROCR_VISIBLE_DEVICES:]" time=2025-10-02T14:23:09.069-04:00 level=INFO source=images.go:518 msg="total blobs: 6" time=2025-10-02T14:23:09.070-04:00 level=INFO source=images.go:525 msg="total unused blobs removed: 0" time=2025-10-02T14:23:09.070-04:00 level=INFO source=routes.go:1528 msg="Listening on [::]:11434 (version 0.12.3)" time=2025-10-02T14:23:09.070-04:00 level=INFO source=gpu.go:217 msg="looking for compatible GPUs" time=2025-10-02T14:23:09.070-04:00 level=INFO source=gpu_windows.go:167 msg=packages count=1 time=2025-10-02T14:23:09.070-04:00 level=INFO source=gpu_windows.go:183 msg="efficiency cores detected" maxEfficiencyClass=1 time=2025-10-02T14:23:09.070-04:00 level=INFO source=gpu_windows.go:214 msg="" package=0 cores=16 efficiency=10 threads=22 time=2025-10-02T14:23:09.174-04:00 level=INFO source=gpu.go:311 msg="detected OS VRAM overhead" id=GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a library=cuda compute=8.9 driver=12.9 name="NVIDIA GeForce RTX 4070 Laptop GPU" overhead="892.0 MiB" time=2025-10-02T14:23:09.174-04:00 level=WARN source=cuda_common.go:60 msg="old CUDA driver detected - please upgrade to a newer driver for best performance" version=12.9 time=2025-10-02T14:23:09.176-04:00 level=INFO source=types.go:131 msg="inference compute" id=GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a library=cuda variant=v12 compute=8.9 driver=12.9 name="NVIDIA GeForce RTX 4070 Laptop GPU" total="8.0 GiB" available="6.9 GiB" time=2025-10-02T14:23:09.176-04:00 level=INFO source=routes.go:1569 msg="entering low vram mode" "total vram"="8.0 GiB" threshold="20.0 GiB" [GIN] 2025/10/02 - 14:23:09 | 200 | 0s | 127.0.0.1 | HEAD "/" [GIN] 2025/10/02 - 14:23:09 | 200 | 1.0621ms | 127.0.0.1 | GET "/api/tags" [GIN] 2025/10/02 - 14:25:50 | 400 | 0s | ::1 | POST "/api/generate" llama_model_loader: loaded meta data with 30 key-value pairs and 255 tensors from C:\Users\research\.ollama\models\blobs\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff (version GGUF V3 (latest)) llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. llama_model_loader: - kv 0: general.architecture str = llama llama_model_loader: - kv 1: general.type str = model llama_model_loader: - kv 2: general.name str = Llama 3.2 3B Instruct llama_model_loader: - kv 3: general.finetune str = Instruct llama_model_loader: - kv 4: general.basename str = Llama-3.2 llama_model_loader: - kv 5: general.size_label str = 3B llama_model_loader: - kv 6: general.tags arr[str,6] = ["facebook", "meta", "pytorch", "llam... llama_model_loader: - kv 7: general.languages arr[str,8] = ["en", "de", "fr", "it", "pt", "hi", ... llama_model_loader: - kv 8: llama.block_count u32 = 28 llama_model_loader: - kv 9: llama.context_length u32 = 131072 llama_model_loader: - kv 10: llama.embedding_length u32 = 3072 llama_model_loader: - kv 11: llama.feed_forward_length u32 = 8192 llama_model_loader: - kv 12: llama.attention.head_count u32 = 24 llama_model_loader: - kv 13: llama.attention.head_count_kv u32 = 8 llama_model_loader: - kv 14: llama.rope.freq_base f32 = 500000.000000 llama_model_loader: - kv 15: llama.attention.layer_norm_rms_epsilon f32 = 0.000010 llama_model_loader: - kv 16: llama.attention.key_length u32 = 128 llama_model_loader: - kv 17: llama.attention.value_length u32 = 128 llama_model_loader: - kv 18: general.file_type u32 = 15 llama_model_loader: - kv 19: llama.vocab_size u32 = 128256 llama_model_loader: - kv 20: llama.rope.dimension_count u32 = 128 llama_model_loader: - kv 21: tokenizer.ggml.model str = gpt2 llama_model_loader: - kv 22: tokenizer.ggml.pre str = llama-bpe llama_model_loader: - kv 23: tokenizer.ggml.tokens arr[str,128256] = ["!", "\"", "#", "$", "%", "&", "'", ... llama_model_loader: - kv 24: tokenizer.ggml.token_type arr[i32,128256] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... llama_model_loader: - kv 25: tokenizer.ggml.merges arr[str,280147] = ["Ġ Ġ", "Ġ ĠĠĠ", "ĠĠ ĠĠ", "... llama_model_loader: - kv 26: tokenizer.ggml.bos_token_id u32 = 128000 llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 128009 llama_model_loader: - kv 28: tokenizer.chat_template str = {{- bos_token }}\n{%- if custom_tools ... llama_model_loader: - kv 29: general.quantization_version u32 = 2 llama_model_loader: - type f32: 58 tensors llama_model_loader: - type q4_K: 168 tensors llama_model_loader: - type q6_K: 29 tensors print_info: file format = GGUF V3 (latest) print_info: file type = Q4_K - Medium print_info: file size = 1.87 GiB (5.01 BPW) load: printing all EOG tokens: load: - 128001 ('<|end_of_text|>') load: - 128008 ('<|eom_id|>') load: - 128009 ('<|eot_id|>') load: special tokens cache size = 256 load: token to piece cache size = 0.7999 MB print_info: arch = llama print_info: vocab_only = 1 print_info: model type = ?B print_info: model params = 3.21 B print_info: general.name = Llama 3.2 3B Instruct print_info: vocab type = BPE print_info: n_vocab = 128256 print_info: n_merges = 280147 print_info: BOS token = 128000 '<|begin_of_text|>' print_info: EOS token = 128009 '<|eot_id|>' print_info: EOT token = 128009 '<|eot_id|>' print_info: EOM token = 128008 '<|eom_id|>' print_info: LF token = 198 'Ċ' print_info: EOG token = 128001 '<|end_of_text|>' print_info: EOG token = 128008 '<|eom_id|>' print_info: EOG token = 128009 '<|eot_id|>' print_info: max token length = 256 llama_model_load: vocab only - skipping tensors time=2025-10-02T14:33:59.227-04:00 level=INFO source=server.go:399 msg="starting runner" cmd="C:\\Users\\research\\AppData\\Local\\Programs\\Ollama\\ollama.exe runner --model C:\\Users\\research\\.ollama\\models\\blobs\\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff --port 53501" time=2025-10-02T14:33:59.251-04:00 level=INFO source=server.go:504 msg="system memory" total="31.4 GiB" free="17.5 GiB" free_swap="15.2 GiB" time=2025-10-02T14:33:59.251-04:00 level=INFO source=memory.go:36 msg="new model will fit in available VRAM across minimum required GPUs, loading" model=C:\Users\research\.ollama\models\blobs\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff library=cuda parallel=1 required="3.1 GiB" gpus=1 time=2025-10-02T14:33:59.251-04:00 level=INFO source=server.go:544 msg=offload library=cuda layers.requested=-1 layers.model=29 layers.offload=29 layers.split=[29] memory.available="[6.9 GiB]" memory.gpu_overhead="0 B" memory.required.full="3.1 GiB" memory.required.partial="3.1 GiB" memory.required.kv="448.0 MiB" memory.required.allocations="[3.1 GiB]" memory.weights.total="1.9 GiB" memory.weights.repeating="1.6 GiB" memory.weights.nonrepeating="308.2 MiB" memory.graph.full="256.5 MiB" memory.graph.partial="570.7 MiB" time=2025-10-02T14:33:59.265-04:00 level=INFO source=runner.go:864 msg="starting go runner" load_backend: loaded CPU backend from C:\Users\research\AppData\Local\Programs\Ollama\lib\ollama\ggml-cpu-alderlake.dll ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 CUDA devices: Device 0: NVIDIA GeForce RTX 4070 Laptop GPU, compute capability 8.9, VMM: yes, ID: GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a load_backend: loaded CUDA backend from C:\Users\research\AppData\Local\Programs\Ollama\lib\ollama\cuda_v12\ggml-cuda.dll time=2025-10-02T14:33:59.381-04:00 level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX_VNNI=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 CUDA.0.ARCHS=500,600,610,700,750,800,860,870,890,900,1200 CUDA.0.USE_GRAPHS=1 CUDA.0.PEER_MAX_BATCH_SIZE=128 compiler=cgo(clang) time=2025-10-02T14:33:59.381-04:00 level=INFO source=runner.go:900 msg="Server listening on 127.0.0.1:53501" time=2025-10-02T14:33:59.390-04:00 level=INFO source=runner.go:799 msg=load request="{Operation:commit LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:false KvSize:4096 KvCacheType: NumThreads:6 GPULayers:29[ID:GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a Layers:29(0..28)] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}" time=2025-10-02T14:33:59.391-04:00 level=INFO source=server.go:1251 msg="waiting for llama runner to start responding" time=2025-10-02T14:33:59.391-04:00 level=INFO source=server.go:1285 msg="waiting for server to become available" status="llm server loading model" llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 4070 Laptop GPU) - 7056 MiB free llama_model_loader: loaded meta data with 30 key-value pairs and 255 tensors from C:\Users\research\.ollama\models\blobs\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff (version GGUF V3 (latest)) llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. llama_model_loader: - kv 0: general.architecture str = llama llama_model_loader: - kv 1: general.type str = model llama_model_loader: - kv 2: general.name str = Llama 3.2 3B Instruct llama_model_loader: - kv 3: general.finetune str = Instruct llama_model_loader: - kv 4: general.basename str = Llama-3.2 llama_model_loader: - kv 5: general.size_label str = 3B llama_model_loader: - kv 6: general.tags arr[str,6] = ["facebook", "meta", "pytorch", "llam... llama_model_loader: - kv 7: general.languages arr[str,8] = ["en", "de", "fr", "it", "pt", "hi", ... llama_model_loader: - kv 8: llama.block_count u32 = 28 llama_model_loader: - kv 9: llama.context_length u32 = 131072 llama_model_loader: - kv 10: llama.embedding_length u32 = 3072 llama_model_loader: - kv 11: llama.feed_forward_length u32 = 8192 llama_model_loader: - kv 12: llama.attention.head_count u32 = 24 llama_model_loader: - kv 13: llama.attention.head_count_kv u32 = 8 llama_model_loader: - kv 14: llama.rope.freq_base f32 = 500000.000000 llama_model_loader: - kv 15: llama.attention.layer_norm_rms_epsilon f32 = 0.000010 llama_model_loader: - kv 16: llama.attention.key_length u32 = 128 llama_model_loader: - kv 17: llama.attention.value_length u32 = 128 llama_model_loader: - kv 18: general.file_type u32 = 15 llama_model_loader: - kv 19: llama.vocab_size u32 = 128256 llama_model_loader: - kv 20: llama.rope.dimension_count u32 = 128 llama_model_loader: - kv 21: tokenizer.ggml.model str = gpt2 llama_model_loader: - kv 22: tokenizer.ggml.pre str = llama-bpe llama_model_loader: - kv 23: tokenizer.ggml.tokens arr[str,128256] = ["!", "\"", "#", "$", "%", "&", "'", ... llama_model_loader: - kv 24: tokenizer.ggml.token_type arr[i32,128256] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... llama_model_loader: - kv 25: tokenizer.ggml.merges arr[str,280147] = ["Ġ Ġ", "Ġ ĠĠĠ", "ĠĠ ĠĠ", "... llama_model_loader: - kv 26: tokenizer.ggml.bos_token_id u32 = 128000 llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 128009 llama_model_loader: - kv 28: tokenizer.chat_template str = {{- bos_token }}\n{%- if custom_tools ... llama_model_loader: - kv 29: general.quantization_version u32 = 2 llama_model_loader: - type f32: 58 tensors llama_model_loader: - type q4_K: 168 tensors llama_model_loader: - type q6_K: 29 tensors print_info: file format = GGUF V3 (latest) print_info: file type = Q4_K - Medium print_info: file size = 1.87 GiB (5.01 BPW) load: printing all EOG tokens: load: - 128001 ('<|end_of_text|>') load: - 128008 ('<|eom_id|>') load: - 128009 ('<|eot_id|>') load: special tokens cache size = 256 load: token to piece cache size = 0.7999 MB print_info: arch = llama print_info: vocab_only = 0 print_info: n_ctx_train = 131072 print_info: n_embd = 3072 print_info: n_layer = 28 print_info: n_head = 24 print_info: n_head_kv = 8 print_info: n_rot = 128 print_info: n_swa = 0 print_info: is_swa_any = 0 print_info: n_embd_head_k = 128 print_info: n_embd_head_v = 128 print_info: n_gqa = 3 print_info: n_embd_k_gqa = 1024 print_info: n_embd_v_gqa = 1024 print_info: f_norm_eps = 0.0e+00 print_info: f_norm_rms_eps = 1.0e-05 print_info: f_clamp_kqv = 0.0e+00 print_info: f_max_alibi_bias = 0.0e+00 print_info: f_logit_scale = 0.0e+00 print_info: f_attn_scale = 0.0e+00 print_info: n_ff = 8192 print_info: n_expert = 0 print_info: n_expert_used = 0 print_info: causal attn = 1 print_info: pooling type = 0 print_info: rope type = 0 print_info: rope scaling = linear print_info: freq_base_train = 500000.0 print_info: freq_scale_train = 1 print_info: n_ctx_orig_yarn = 131072 print_info: rope_finetuned = unknown print_info: model type = 3B print_info: model params = 3.21 B print_info: general.name = Llama 3.2 3B Instruct print_info: vocab type = BPE print_info: n_vocab = 128256 print_info: n_merges = 280147 print_info: BOS token = 128000 '<|begin_of_text|>' print_info: EOS token = 128009 '<|eot_id|>' print_info: EOT token = 128009 '<|eot_id|>' print_info: EOM token = 128008 '<|eom_id|>' print_info: LF token = 198 'Ċ' print_info: EOG token = 128001 '<|end_of_text|>' print_info: EOG token = 128008 '<|eom_id|>' print_info: EOG token = 128009 '<|eot_id|>' print_info: max token length = 256 load_tensors: loading model tensors, this can take a while... (mmap = false) load_tensors: offloading 28 repeating layers to GPU load_tensors: offloading output layer to GPU load_tensors: offloaded 29/29 layers to GPU load_tensors: CUDA0 model buffer size = 1918.35 MiB load_tensors: CPU model buffer size = 308.23 MiB llama_context: constructing llama_context llama_context: n_seq_max = 1 llama_context: n_ctx = 4096 llama_context: n_ctx_per_seq = 4096 llama_context: n_batch = 512 llama_context: n_ubatch = 512 llama_context: causal_attn = 1 llama_context: flash_attn = 0 llama_context: kv_unified = false llama_context: freq_base = 500000.0 llama_context: freq_scale = 1 llama_context: n_ctx_per_seq (4096) < n_ctx_train (131072) -- the full capacity of the model will not be utilized llama_context: CUDA_Host output buffer size = 0.50 MiB llama_kv_cache_unified: CUDA0 KV buffer size = 448.00 MiB llama_kv_cache_unified: size = 448.00 MiB ( 4096 cells, 28 layers, 1/1 seqs), K (f16): 224.00 MiB, V (f16): 224.00 MiB llama_context: CUDA0 compute buffer size = 256.50 MiB llama_context: CUDA_Host compute buffer size = 18.01 MiB llama_context: graph nodes = 986 llama_context: graph splits = 2 time=2025-10-02T14:34:00.393-04:00 level=INFO source=server.go:1289 msg="llama runner started in 1.17 seconds" time=2025-10-02T14:34:00.393-04:00 level=INFO source=sched.go:470 msg="loaded runners" count=1 time=2025-10-02T14:34:00.393-04:00 level=INFO source=server.go:1251 msg="waiting for llama runner to start responding" time=2025-10-02T14:34:00.394-04:00 level=INFO source=server.go:1289 msg="llama runner started in 1.17 seconds" [GIN] 2025/10/02 - 14:34:07 | 200 | 8.1140319s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:34:14 | 200 | 15.5818456s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:34:29 | 200 | 30.5283387s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:34:41 | 200 | 42.5634113s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:34:51 | 200 | 42.1494869s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:34:51 | 200 | 35.8277634s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:34:53 | 200 | 22.1427475s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:34:53 | 200 | 10.3890497s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:34:59 | 200 | 7.3850196s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:02 | 200 | 9.6418308s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:06 | 200 | 11.7860508s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:07 | 200 | 11.6668005s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:07 | 200 | 6.6585297s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:08 | 200 | 1.8010852s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:10 | 200 | 3.0247898s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:14 | 200 | 6.5107871s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:18 | 200 | 8.4541475s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:21 | 200 | 9.8492367s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:22 | 200 | 7.6998287s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:22 | 200 | 7.1220983s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:23 | 200 | 4.3940857s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:23 | 200 | 1.3974011s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:31 | 200 | 9.2615732s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:32 | 200 | 9.2998573s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:32 | 200 | 9.4954242s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:34 | 200 | 9.9008518s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:36 | 200 | 4.7192817s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:37 | 200 | 4.6725913s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:43 | 200 | 9.6675409s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:43 | 200 | 8.3898071s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:54 | 200 | 17.0157331s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:55 | 200 | 16.9997067s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:35:58 | 200 | 15.2620421s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:36:02 | 200 | 16.4597336s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:36:13 | 200 | 17.4744537s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:36:13 | 200 | 17.502287s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:36:17 | 200 | 16.4570281s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:36:20 | 200 | 16.8005335s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:36:23 | 200 | 7.8683112s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:36:24 | 200 | 8.515099s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:36:25 | 200 | 5.8643048s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:36:30 | 200 | 9.1494366s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:36:30 | 200 | 5.2605589s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:36:30 | 200 | 4.1559468s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:36:31 | 200 | 4.5140778s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:36:37 | 200 | 6.925057s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:36:47 | 200 | 16.7570887s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:36:47 | 200 | 15.961644s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:36:48 | 200 | 16.0214272s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:36:48 | 200 | 9.3225182s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:36:59 | 200 | 11.0277955s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:37:03 | 200 | 15.5995844s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:37:04 | 200 | 16.3096375s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:37:05 | 200 | 16.0883839s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:37:13 | 200 | 14.6223983s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:39:00 | 200 | 0s | 127.0.0.1 | HEAD "/" [GIN] 2025/10/02 - 14:39:00 | 200 | 704.3µs | 127.0.0.1 | GET "/api/tags" time=2025-10-02T14:44:34.660-04:00 level=INFO source=routes.go:1475 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY:localhost,127.0.0.1,.local,.googleapis.com,.google.com OLLAMA_CONTEXT_LENGTH:4096 OLLAMA_DEBUG:INFO OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://0.0.0.0:11434 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:C:\\Users\\research\\.ollama\\models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_REMOTES:[ollama.com] OLLAMA_SCHED_SPREAD:false ROCR_VISIBLE_DEVICES:]" time=2025-10-02T14:44:34.661-04:00 level=INFO source=images.go:518 msg="total blobs: 6" time=2025-10-02T14:44:34.661-04:00 level=INFO source=images.go:525 msg="total unused blobs removed: 0" time=2025-10-02T14:44:34.662-04:00 level=INFO source=routes.go:1528 msg="Listening on [::]:11434 (version 0.12.3)" time=2025-10-02T14:44:34.662-04:00 level=INFO source=gpu.go:217 msg="looking for compatible GPUs" time=2025-10-02T14:44:34.662-04:00 level=INFO source=gpu_windows.go:167 msg=packages count=1 time=2025-10-02T14:44:34.662-04:00 level=INFO source=gpu_windows.go:183 msg="efficiency cores detected" maxEfficiencyClass=1 time=2025-10-02T14:44:34.662-04:00 level=INFO source=gpu_windows.go:214 msg="" package=0 cores=16 efficiency=10 threads=22 time=2025-10-02T14:44:34.762-04:00 level=INFO source=gpu.go:311 msg="detected OS VRAM overhead" id=GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a library=cuda compute=8.9 driver=12.9 name="NVIDIA GeForce RTX 4070 Laptop GPU" overhead="892.0 MiB" time=2025-10-02T14:44:34.762-04:00 level=WARN source=cuda_common.go:60 msg="old CUDA driver detected - please upgrade to a newer driver for best performance" version=12.9 time=2025-10-02T14:44:34.764-04:00 level=INFO source=types.go:131 msg="inference compute" id=GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a library=cuda variant=v12 compute=8.9 driver=12.9 name="NVIDIA GeForce RTX 4070 Laptop GPU" total="8.0 GiB" available="6.9 GiB" time=2025-10-02T14:44:34.764-04:00 level=INFO source=routes.go:1569 msg="entering low vram mode" "total vram"="8.0 GiB" threshold="20.0 GiB" llama_model_loader: loaded meta data with 30 key-value pairs and 255 tensors from C:\Users\research\.ollama\models\blobs\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff (version GGUF V3 (latest)) llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. llama_model_loader: - kv 0: general.architecture str = llama llama_model_loader: - kv 1: general.type str = model llama_model_loader: - kv 2: general.name str = Llama 3.2 3B Instruct llama_model_loader: - kv 3: general.finetune str = Instruct llama_model_loader: - kv 4: general.basename str = Llama-3.2 llama_model_loader: - kv 5: general.size_label str = 3B llama_model_loader: - kv 6: general.tags arr[str,6] = ["facebook", "meta", "pytorch", "llam... llama_model_loader: - kv 7: general.languages arr[str,8] = ["en", "de", "fr", "it", "pt", "hi", ... llama_model_loader: - kv 8: llama.block_count u32 = 28 llama_model_loader: - kv 9: llama.context_length u32 = 131072 llama_model_loader: - kv 10: llama.embedding_length u32 = 3072 llama_model_loader: - kv 11: llama.feed_forward_length u32 = 8192 llama_model_loader: - kv 12: llama.attention.head_count u32 = 24 llama_model_loader: - kv 13: llama.attention.head_count_kv u32 = 8 llama_model_loader: - kv 14: llama.rope.freq_base f32 = 500000.000000 llama_model_loader: - kv 15: llama.attention.layer_norm_rms_epsilon f32 = 0.000010 llama_model_loader: - kv 16: llama.attention.key_length u32 = 128 llama_model_loader: - kv 17: llama.attention.value_length u32 = 128 llama_model_loader: - kv 18: general.file_type u32 = 15 llama_model_loader: - kv 19: llama.vocab_size u32 = 128256 llama_model_loader: - kv 20: llama.rope.dimension_count u32 = 128 llama_model_loader: - kv 21: tokenizer.ggml.model str = gpt2 llama_model_loader: - kv 22: tokenizer.ggml.pre str = llama-bpe llama_model_loader: - kv 23: tokenizer.ggml.tokens arr[str,128256] = ["!", "\"", "#", "$", "%", "&", "'", ... llama_model_loader: - kv 24: tokenizer.ggml.token_type arr[i32,128256] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... llama_model_loader: - kv 25: tokenizer.ggml.merges arr[str,280147] = ["Ġ Ġ", "Ġ ĠĠĠ", "ĠĠ ĠĠ", "... llama_model_loader: - kv 26: tokenizer.ggml.bos_token_id u32 = 128000 llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 128009 llama_model_loader: - kv 28: tokenizer.chat_template str = {{- bos_token }}\n{%- if custom_tools ... llama_model_loader: - kv 29: general.quantization_version u32 = 2 llama_model_loader: - type f32: 58 tensors llama_model_loader: - type q4_K: 168 tensors llama_model_loader: - type q6_K: 29 tensors print_info: file format = GGUF V3 (latest) print_info: file type = Q4_K - Medium print_info: file size = 1.87 GiB (5.01 BPW) load: printing all EOG tokens: load: - 128001 ('<|end_of_text|>') load: - 128008 ('<|eom_id|>') load: - 128009 ('<|eot_id|>') load: special tokens cache size = 256 load: token to piece cache size = 0.7999 MB print_info: arch = llama print_info: vocab_only = 1 print_info: model type = ?B print_info: model params = 3.21 B print_info: general.name = Llama 3.2 3B Instruct print_info: vocab type = BPE print_info: n_vocab = 128256 print_info: n_merges = 280147 print_info: BOS token = 128000 '<|begin_of_text|>' print_info: EOS token = 128009 '<|eot_id|>' print_info: EOT token = 128009 '<|eot_id|>' print_info: EOM token = 128008 '<|eom_id|>' print_info: LF token = 198 'Ċ' print_info: EOG token = 128001 '<|end_of_text|>' print_info: EOG token = 128008 '<|eom_id|>' print_info: EOG token = 128009 '<|eot_id|>' print_info: max token length = 256 llama_model_load: vocab only - skipping tensors time=2025-10-02T14:46:19.765-04:00 level=INFO source=server.go:399 msg="starting runner" cmd="C:\\Users\\research\\AppData\\Local\\Programs\\Ollama\\ollama.exe runner --model C:\\Users\\research\\.ollama\\models\\blobs\\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff --port 62246" time=2025-10-02T14:46:19.789-04:00 level=INFO source=server.go:504 msg="system memory" total="31.4 GiB" free="17.3 GiB" free_swap="15.1 GiB" time=2025-10-02T14:46:19.789-04:00 level=INFO source=memory.go:36 msg="new model will fit in available VRAM across minimum required GPUs, loading" model=C:\Users\research\.ollama\models\blobs\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff library=cuda parallel=1 required="3.1 GiB" gpus=1 time=2025-10-02T14:46:19.789-04:00 level=INFO source=server.go:544 msg=offload library=cuda layers.requested=-1 layers.model=29 layers.offload=29 layers.split=[29] memory.available="[6.9 GiB]" memory.gpu_overhead="0 B" memory.required.full="3.1 GiB" memory.required.partial="3.1 GiB" memory.required.kv="448.0 MiB" memory.required.allocations="[3.1 GiB]" memory.weights.total="1.9 GiB" memory.weights.repeating="1.6 GiB" memory.weights.nonrepeating="308.2 MiB" memory.graph.full="256.5 MiB" memory.graph.partial="570.7 MiB" time=2025-10-02T14:46:19.802-04:00 level=INFO source=runner.go:864 msg="starting go runner" load_backend: loaded CPU backend from C:\Users\research\AppData\Local\Programs\Ollama\lib\ollama\ggml-cpu-alderlake.dll ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 CUDA devices: Device 0: NVIDIA GeForce RTX 4070 Laptop GPU, compute capability 8.9, VMM: yes, ID: GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a load_backend: loaded CUDA backend from C:\Users\research\AppData\Local\Programs\Ollama\lib\ollama\cuda_v12\ggml-cuda.dll time=2025-10-02T14:46:19.920-04:00 level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX_VNNI=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 CUDA.0.ARCHS=500,600,610,700,750,800,860,870,890,900,1200 CUDA.0.USE_GRAPHS=1 CUDA.0.PEER_MAX_BATCH_SIZE=128 compiler=cgo(clang) time=2025-10-02T14:46:19.922-04:00 level=INFO source=runner.go:900 msg="Server listening on 127.0.0.1:62246" time=2025-10-02T14:46:19.930-04:00 level=INFO source=runner.go:799 msg=load request="{Operation:commit LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:false KvSize:4096 KvCacheType: NumThreads:6 GPULayers:29[ID:GPU-d46d3ac7-fa8e-b3de-6f28-22cf0609879a Layers:29(0..28)] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}" time=2025-10-02T14:46:19.930-04:00 level=INFO source=server.go:1251 msg="waiting for llama runner to start responding" time=2025-10-02T14:46:19.930-04:00 level=INFO source=server.go:1285 msg="waiting for server to become available" status="llm server loading model" llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 4070 Laptop GPU) - 7056 MiB free llama_model_loader: loaded meta data with 30 key-value pairs and 255 tensors from C:\Users\research\.ollama\models\blobs\sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff (version GGUF V3 (latest)) llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. llama_model_loader: - kv 0: general.architecture str = llama llama_model_loader: - kv 1: general.type str = model llama_model_loader: - kv 2: general.name str = Llama 3.2 3B Instruct llama_model_loader: - kv 3: general.finetune str = Instruct llama_model_loader: - kv 4: general.basename str = Llama-3.2 llama_model_loader: - kv 5: general.size_label str = 3B llama_model_loader: - kv 6: general.tags arr[str,6] = ["facebook", "meta", "pytorch", "llam... llama_model_loader: - kv 7: general.languages arr[str,8] = ["en", "de", "fr", "it", "pt", "hi", ... llama_model_loader: - kv 8: llama.block_count u32 = 28 llama_model_loader: - kv 9: llama.context_length u32 = 131072 llama_model_loader: - kv 10: llama.embedding_length u32 = 3072 llama_model_loader: - kv 11: llama.feed_forward_length u32 = 8192 llama_model_loader: - kv 12: llama.attention.head_count u32 = 24 llama_model_loader: - kv 13: llama.attention.head_count_kv u32 = 8 llama_model_loader: - kv 14: llama.rope.freq_base f32 = 500000.000000 llama_model_loader: - kv 15: llama.attention.layer_norm_rms_epsilon f32 = 0.000010 llama_model_loader: - kv 16: llama.attention.key_length u32 = 128 llama_model_loader: - kv 17: llama.attention.value_length u32 = 128 llama_model_loader: - kv 18: general.file_type u32 = 15 llama_model_loader: - kv 19: llama.vocab_size u32 = 128256 llama_model_loader: - kv 20: llama.rope.dimension_count u32 = 128 llama_model_loader: - kv 21: tokenizer.ggml.model str = gpt2 llama_model_loader: - kv 22: tokenizer.ggml.pre str = llama-bpe llama_model_loader: - kv 23: tokenizer.ggml.tokens arr[str,128256] = ["!", "\"", "#", "$", "%", "&", "'", ... llama_model_loader: - kv 24: tokenizer.ggml.token_type arr[i32,128256] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... llama_model_loader: - kv 25: tokenizer.ggml.merges arr[str,280147] = ["Ġ Ġ", "Ġ ĠĠĠ", "ĠĠ ĠĠ", "... llama_model_loader: - kv 26: tokenizer.ggml.bos_token_id u32 = 128000 llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 128009 llama_model_loader: - kv 28: tokenizer.chat_template str = {{- bos_token }}\n{%- if custom_tools ... llama_model_loader: - kv 29: general.quantization_version u32 = 2 llama_model_loader: - type f32: 58 tensors llama_model_loader: - type q4_K: 168 tensors llama_model_loader: - type q6_K: 29 tensors print_info: file format = GGUF V3 (latest) print_info: file type = Q4_K - Medium print_info: file size = 1.87 GiB (5.01 BPW) load: printing all EOG tokens: load: - 128001 ('<|end_of_text|>') load: - 128008 ('<|eom_id|>') load: - 128009 ('<|eot_id|>') load: special tokens cache size = 256 load: token to piece cache size = 0.7999 MB print_info: arch = llama print_info: vocab_only = 0 print_info: n_ctx_train = 131072 print_info: n_embd = 3072 print_info: n_layer = 28 print_info: n_head = 24 print_info: n_head_kv = 8 print_info: n_rot = 128 print_info: n_swa = 0 print_info: is_swa_any = 0 print_info: n_embd_head_k = 128 print_info: n_embd_head_v = 128 print_info: n_gqa = 3 print_info: n_embd_k_gqa = 1024 print_info: n_embd_v_gqa = 1024 print_info: f_norm_eps = 0.0e+00 print_info: f_norm_rms_eps = 1.0e-05 print_info: f_clamp_kqv = 0.0e+00 print_info: f_max_alibi_bias = 0.0e+00 print_info: f_logit_scale = 0.0e+00 print_info: f_attn_scale = 0.0e+00 print_info: n_ff = 8192 print_info: n_expert = 0 print_info: n_expert_used = 0 print_info: causal attn = 1 print_info: pooling type = 0 print_info: rope type = 0 print_info: rope scaling = linear print_info: freq_base_train = 500000.0 print_info: freq_scale_train = 1 print_info: n_ctx_orig_yarn = 131072 print_info: rope_finetuned = unknown print_info: model type = 3B print_info: model params = 3.21 B print_info: general.name = Llama 3.2 3B Instruct print_info: vocab type = BPE print_info: n_vocab = 128256 print_info: n_merges = 280147 print_info: BOS token = 128000 '<|begin_of_text|>' print_info: EOS token = 128009 '<|eot_id|>' print_info: EOT token = 128009 '<|eot_id|>' print_info: EOM token = 128008 '<|eom_id|>' print_info: LF token = 198 'Ċ' print_info: EOG token = 128001 '<|end_of_text|>' print_info: EOG token = 128008 '<|eom_id|>' print_info: EOG token = 128009 '<|eot_id|>' print_info: max token length = 256 load_tensors: loading model tensors, this can take a while... (mmap = false) load_tensors: offloading 28 repeating layers to GPU load_tensors: offloading output layer to GPU load_tensors: offloaded 29/29 layers to GPU load_tensors: CUDA0 model buffer size = 1918.35 MiB load_tensors: CPU model buffer size = 308.23 MiB llama_context: constructing llama_context llama_context: n_seq_max = 1 llama_context: n_ctx = 4096 llama_context: n_ctx_per_seq = 4096 llama_context: n_batch = 512 llama_context: n_ubatch = 512 llama_context: causal_attn = 1 llama_context: flash_attn = 0 llama_context: kv_unified = false llama_context: freq_base = 500000.0 llama_context: freq_scale = 1 llama_context: n_ctx_per_seq (4096) < n_ctx_train (131072) -- the full capacity of the model will not be utilized llama_context: CUDA_Host output buffer size = 0.50 MiB llama_kv_cache_unified: CUDA0 KV buffer size = 448.00 MiB llama_kv_cache_unified: size = 448.00 MiB ( 4096 cells, 28 layers, 1/1 seqs), K (f16): 224.00 MiB, V (f16): 224.00 MiB llama_context: CUDA0 compute buffer size = 256.50 MiB llama_context: CUDA_Host compute buffer size = 18.01 MiB llama_context: graph nodes = 986 llama_context: graph splits = 2 time=2025-10-02T14:46:20.932-04:00 level=INFO source=server.go:1289 msg="llama runner started in 1.17 seconds" time=2025-10-02T14:46:20.932-04:00 level=INFO source=sched.go:470 msg="loaded runners" count=1 time=2025-10-02T14:46:20.932-04:00 level=INFO source=server.go:1251 msg="waiting for llama runner to start responding" time=2025-10-02T14:46:20.932-04:00 level=INFO source=server.go:1289 msg="llama runner started in 1.17 seconds" [GIN] 2025/10/02 - 14:46:21 | 200 | 1.9086347s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:21 | 200 | 398.349ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:22 | 200 | 410.4979ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:25 | 200 | 3.5164349s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:26 | 200 | 483.0366ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:26 | 200 | 570.8641ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:27 | 200 | 392.9669ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:27 | 200 | 495.8512ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:28 | 200 | 586.5487ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:30 | 200 | 1.9024266s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:30 | 200 | 652.2019ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:32 | 200 | 2.1804509s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:33 | 200 | 3.0325069s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:33 | 200 | 3.5178319s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:34 | 200 | 3.8872204s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:35 | 200 | 1.8752438s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:35 | 200 | 1.9052963s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:36 | 200 | 2.4668505s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:36 | 200 | 1.5901553s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:37 | 200 | 1.4457185s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:39 | 200 | 3.6191916s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:43 | 200 | 7.4555149s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:44 | 200 | 7.3666575s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:45 | 200 | 8.8097063s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:47 | 200 | 8.029839s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:51 | 200 | 7.7062925s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:46:56 | 200 | 12.1223027s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:47:06 | 200 | 20.0659336s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:47:15 | 200 | 27.5533745s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:47:23 | 200 | 29.7106226s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:47:25 | 200 | 26.3761731s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:47:29 | 200 | 19.790255s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:47:30 | 200 | 11.8220373s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:47:31 | 200 | 5.7485617s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:47:33 | 200 | 6.5018322s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:47:34 | 200 | 4.476375s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:47:36 | 200 | 5.964991s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:47:37 | 200 | 5.8870285s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:47:43 | 200 | 9.5937335s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:47:44 | 200 | 10.2574387s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:47:44 | 200 | 8.5938936s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:47:52 | 200 | 12.4493404s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:47:53 | 200 | 9.9232324s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:47:53 | 200 | 7.5989309s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:47:59 | 200 | 12.6173707s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:07 | 200 | 14.6620368s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:07 | 200 | 14.7207936s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:07 | 200 | 14.391168s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:09 | 200 | 9.5383278s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:10 | 200 | 2.887109s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:15 | 200 | 7.4706264s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:15 | 200 | 7.7066357s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:16 | 200 | 7.2528757s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:16 | 200 | 4.4280598s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:16 | 200 | 1.1134078s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:17 | 200 | 1.1268616s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:17 | 200 | 893.5278ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:18 | 200 | 1.0669484s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:19 | 200 | 1.4460095s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:19 | 200 | 2.1779019s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:23 | 200 | 5.5127627s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:24 | 200 | 5.7025648s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:25 | 200 | 6.7349436s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:25 | 200 | 5.959243s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:26 | 200 | 2.497106s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:26 | 200 | 861.1312ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:26 | 200 | 873.6601ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:27 | 200 | 706.4258ms | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:27 | 200 | 1.3101527s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:28 | 200 | 1.6347534s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:31 | 200 | 5.1263664s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:38 | 200 | 11.1305324s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:47 | 200 | 18.69467s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:47 | 200 | 18.3168687s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:54 | 200 | 22.5485174s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:57 | 200 | 16.5798207s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:48:59 | 200 | 12.3752708s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:49:00 | 200 | 10.2806591s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:49:03 | 200 | 8.5626781s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:49:03 | 200 | 4.604008s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:49:05 | 200 | 4.637643s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:49:05 | 200 | 3.5036238s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:49:06 | 200 | 2.7759036s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:49:13 | 200 | 8.8654123s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:49:19 | 200 | 13.6385828s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:49:28 | 200 | 22.4852726s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:49:33 | 200 | 25.6689071s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:49:34 | 200 | 19.1193643s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:49:39 | 200 | 18.4448554s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:49:47 | 200 | 16.7700779s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:49:52 | 200 | 17.9393229s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:49:52 | 200 | 18.0765172s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:49:59 | 200 | 17.8364236s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:00 | 200 | 12.0145333s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:07 | 200 | 14.139078s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:09 | 200 | 14.9904523s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:10 | 200 | 8.8647845s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:11 | 200 | 8.9565731s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:11 | 200 | 2.6143735s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:12 | 200 | 1.3959569s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:17 | 200 | 5.9963681s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:18 | 200 | 6.5558284s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:19 | 200 | 7.3565849s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:20 | 200 | 5.4254726s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:20 | 200 | 1.3199744s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:21 | 200 | 1.468871s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:22 | 200 | 1.6778171s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:23 | 200 | 2.1557477s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:35 | 200 | 13.4210853s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:35 | 200 | 13.926926s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:41 | 200 | 19.0080088s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:42 | 200 | 19.4547201s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:42 | 200 | 19.4542305s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:43 | 200 | 7.3282971s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:43 | 200 | 6.5633971s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:53 | 200 | 11.0645987s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:54 | 200 | 11.4435896s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:54 | 200 | 11.3666799s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:55 | 200 | 11.2762014s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:55 | 200 | 11.5645463s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:50:57 | 200 | 2.8599961s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:51:05 | 200 | 10.3455799s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:51:24 | 200 | 28.9159969s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:51:27 | 200 | 32.2622906s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:51:34 | 200 | 39.2924444s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:51:35 | 200 | 38.1541073s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:51:41 | 200 | 34.0039203s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:52:03 | 200 | 36.7039753s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:52:11 | 200 | 41.4333094s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:52:13 | 200 | 38.2850254s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:52:21 | 200 | 42.522783s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:52:27 | 200 | 41.5327312s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:52:35 | 200 | 29.5474992s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:52:36 | 200 | 23.1488147s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:52:43 | 200 | 20.4302947s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:52:44 | 200 | 15.5161551s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:52:48 | 200 | 16.7711379s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:52:49 | 200 | 12.2413996s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:52:58 | 200 | 20.7486466s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:53:08 | 200 | 23.8883478s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:53:13 | 200 | 27.9401466s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:53:24 | 200 | 35.8285788s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:53:29 | 200 | 40.4026759s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:53:33 | 200 | 32.7854143s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:53:40 | 200 | 31.9543313s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:53:40 | 200 | 27.2461937s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:53:41 | 200 | 15.0778253s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:53:41 | 200 | 12.3260017s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:53:45 | 200 | 10.5484784s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:53:53 | 200 | 12.4790372s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:54:02 | 200 | 20.7787663s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:54:02 | 200 | 20.8688905s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:54:03 | 200 | 22.1467132s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:54:04 | 200 | 22.5628654s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:54:04 | 200 | 23.1393936s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:54:10 | 200 | 27.9186793s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:54:19 | 200 | 31.9405255s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:54:23 | 200 | 27.0978493s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:54:23 | 200 | 20.9215843s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:54:30 | 200 | 26.5321941s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:54:34 | 200 | 30.2386905s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:54:35 | 200 | 30.556604s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:54:37 | 200 | 31.7510259s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:54:40 | 200 | 27.5382831s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:54:44 | 200 | 22.9416537s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:54:45 | 200 | 21.1098235s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:54:48 | 200 | 23.6960899s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:54:58 | 200 | 26.6024148s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:11 | 200 | 36.6645049s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:14 | 200 | 39.1484638s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:14 | 200 | 37.5217673s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:20 | 200 | 39.9192194s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:22 | 200 | 36.8198568s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:30 | 200 | 42.782245s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:30 | 200 | 41.5985785s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:38 | 200 | 39.8276135s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:42 | 200 | 28.6615301s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:47 | 200 | 33.5108607s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:48 | 200 | 33.8153957s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:49 | 200 | 26.9092818s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:49 | 200 | 27.3965711s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:50 | 200 | 18.9293863s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:50 | 200 | 17.8454017s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:51 | 200 | 12.1548711s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:51 | 200 | 5.6410955s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:51 | 200 | 4.1275388s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:58 | 200 | 9.0774248s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:58 | 200 | 8.9176565s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:58 | 200 | 8.8930533s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:55:59 | 200 | 8.6881483s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:56:09 | 200 | 18.1687203s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:56:09 | 200 | 18.4048729s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:56:18 | 200 | 27.2667143s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:56:22 | 200 | 30.9674372s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:56:23 | 200 | 24.9998193s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:56:24 | 200 | 25.3131907s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:56:34 | 200 | 35.1729034s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:56:46 | 200 | 45.2349532s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:56:46 | 200 | 36.814288s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:56:55 | 200 | 43.6802011s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:56:55 | 200 | 34.4772737s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:56:55 | 200 | 33.04759s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:56:56 | 200 | 32.5369145s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:56:56 | 200 | 32.3398238s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:56:57 | 200 | 20.6323068s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:56:58 | 200 | 11.8673916s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:56:58 | 200 | 10.6218492s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:00 | 200 | 4.5474759s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:00 | 200 | 2.3644213s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:00 | 200 | 2.7972243s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:07 | 200 | 8.8992729s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:07 | 200 | 9.0164268s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:08 | 200 | 9.4262412s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:08 | 200 | 8.7343139s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:09 | 200 | 9.1100388s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:15 | 200 | 15.1066942s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:15 | 200 | 15.1343509s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:15 | 200 | 14.8354658s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:20 | 200 | 13.3543177s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:21 | 200 | 13.1348834s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:21 | 200 | 12.9542413s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:21 | 200 | 13.0302103s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:21 | 200 | 12.7968758s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:27 | 200 | 11.6804596s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:27 | 200 | 12.119634s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:28 | 200 | 13.0389109s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:29 | 200 | 8.4213927s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:29 | 200 | 8.1326072s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:29 | 200 | 7.8116541s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:30 | 200 | 8.2049023s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:30 | 200 | 8.4218618s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:31 | 200 | 4.1974642s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:38 | 200 | 9.5027137s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:44 | 200 | 14.7983411s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:44 | 200 | 14.984444s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:49 | 200 | 18.7661294s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:50 | 200 | 19.3630456s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:59 | 200 | 28.0147632s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:57:59 | 200 | 28.2238373s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:05 | 200 | 34.0725365s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:06 | 200 | 27.4163493s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:12 | 200 | 27.3641571s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:15 | 200 | 28.4119696s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:24 | 200 | 33.8108885s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:25 | 200 | 33.677655s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:25 | 200 | 26.4526179s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:25 | 200 | 26.180658s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:26 | 200 | 20.1612679s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:26 | 200 | 19.0155397s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:26 | 200 | 14.8196319s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:27 | 200 | 12.2288933s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:27 | 200 | 2.5457271s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:28 | 200 | 3.1811529s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:30 | 200 | 5.0409094s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:31 | 200 | 4.5316301s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:31 | 200 | 4.9328031s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:32 | 200 | 4.9465419s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:33 | 200 | 5.73554s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:38 | 200 | 10.5332329s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:43 | 200 | 14.8562793s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:50 | 200 | 22.1070034s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:58:51 | 200 | 20.5706995s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:00 | 200 | 28.8238418s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:00 | 200 | 28.8854493s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:00 | 200 | 28.7414565s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:05 | 200 | 32.1191172s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:05 | 200 | 26.964434s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:05 | 200 | 20.2230671s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:06 | 200 | 14.9368152s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:06 | 200 | 12.7073269s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:06 | 200 | 6.4492355s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:06 | 200 | 6.1280127s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:07 | 200 | 4.8785368s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:07 | 200 | 2.1439534s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:15 | 200 | 10.149628s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:20 | 200 | 14.0112134s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:21 | 200 | 15.1889552s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:25 | 200 | 18.9987749s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:29 | 200 | 22.3050614s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:29 | 200 | 22.2991606s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:29 | 200 | 21.8822163s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:29 | 200 | 21.5794492s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:30 | 200 | 12.9584588s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:31 | 200 | 9.6609161s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:31 | 200 | 9.9940233s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:40 | 200 | 11.9918799s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:40 | 200 | 10.9511856s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:42 | 200 | 12.8695403s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:42 | 200 | 12.1054382s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:50 | 200 | 18.6677521s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:50 | 200 | 18.93985s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 14:59:56 | 200 | 24.9695484s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:04 | 200 | 32.5652557s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:04 | 200 | 24.5884261s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:12 | 200 | 30.7909225s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:12 | 200 | 30.0214074s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:13 | 200 | 28.327768s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:13 | 200 | 23.0780567s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:16 | 200 | 24.9723806s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:20 | 200 | 24.150813s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:21 | 200 | 16.518475s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:21 | 200 | 15.8435201s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:22 | 200 | 9.1610648s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:22 | 200 | 8.8518214s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:22 | 200 | 9.1112052s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:23 | 200 | 9.0390294s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:23 | 200 | 6.0544683s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:23 | 200 | 2.3357888s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:24 | 200 | 2.0661923s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:29 | 200 | 6.7064549s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:29 | 200 | 6.9390645s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:35 | 200 | 12.75864s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:36 | 200 | 12.7872576s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:41 | 200 | 17.3661952s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:41 | 200 | 17.5178313s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:42 | 200 | 17.2719888s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:47 | 200 | 22.6765094s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:47 | 200 | 18.8032527s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:48 | 200 | 18.677358s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:48 | 200 | 12.8051135s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:49 | 200 | 12.3132536s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:49 | 200 | 8.0740622s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:50 | 200 | 7.9602304s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:51 | 200 | 8.1813693s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:57 | 200 | 9.590343s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:57 | 200 | 9.5464349s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:00:58 | 200 | 9.6330254s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:00 | 200 | 11.6571944s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:01 | 200 | 11.3781415s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:01 | 200 | 11.4495712s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:07 | 200 | 17.2240465s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:11 | 200 | 19.0220309s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:11 | 200 | 13.4278959s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:12 | 200 | 13.4265993s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:12 | 200 | 12.8981627s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:13 | 200 | 11.8061444s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:13 | 200 | 11.8056822s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:15 | 200 | 12.8114809s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:16 | 200 | 6.9952155s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:16 | 200 | 4.7056737s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:16 | 200 | 4.2964175s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:17 | 200 | 4.4045464s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:24 | 200 | 11.2208365s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:27 | 200 | 14.4713773s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:33 | 200 | 19.1787469s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:51 | 200 | 34.6695476s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:51 | 200 | 35.0134998s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:55 | 200 | 38.2500917s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:56 | 200 | 39.2715207s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:57 | 200 | 38.5934882s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:57 | 200 | 31.2016054s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:57 | 200 | 29.8297176s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:01:58 | 200 | 22.276229s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:02:10 | 200 | 18.6322456s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:02:11 | 200 | 17.7311624s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:02:12 | 200 | 17.1294164s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:02:22 | 200 | 25.9562145s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:02:23 | 200 | 26.2928574s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:02:23 | 200 | 26.246486s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:02:24 | 200 | 26.1404894s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:02:24 | 200 | 24.9645489s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:02:25 | 200 | 13.946094s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:02:32 | 200 | 19.7153011s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:02:32 | 200 | 18.1777954s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:02:39 | 200 | 15.8829926s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:02:57 | 200 | 33.5728799s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:02:57 | 200 | 33.4293968s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:02:58 | 200 | 33.602965s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:02:58 | 200 | 33.22711s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:02:58 | 200 | 33.3534183s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:03:06 | 200 | 33.1451765s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:03:07 | 200 | 32.6952s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:03:12 | 200 | 33.2000604s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:03:12 | 200 | 15.1036657s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:03:13 | 200 | 14.9154479s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:03:13 | 200 | 15.2956261s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:03:17 | 200 | 19.221141s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:03:18 | 200 | 18.9975429s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:03:18 | 200 | 11.6206874s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:03:27 | 200 | 19.1478543s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:03:28 | 200 | 15.7163762s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:03:28 | 200 | 15.3731s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:03:31 | 200 | 17.8352622s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:03:31 | 200 | 13.9140013s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:03:37 | 200 | 19.3276808s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:03:40 | 200 | 22.2070437s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:03:47 | 200 | 25.6287252s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:03:52 | 200 | 24.2647177s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:04:05 | 200 | 36.917536s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:04:05 | 200 | 35.0700915s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:04:14 | 200 | 42.2497823s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:04:21 | 200 | 47.8815326s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:04:26 | 200 | 43.4704746s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:04:26 | 200 | 36.0079077s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:04:38 | 200 | 46.1981043s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:04:39 | 200 | 37.2753003s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:04:39 | 200 | 33.4966968s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:04:52 | 200 | 43.8259628s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:05:04 | 200 | 47.4231492s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:05:04 | 200 | 42.237473s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:05:06 | 200 | 39.8191358s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:05:27 | 200 | 57.241347s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:05:28 | 200 | 49.5518262s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:05:30 | 200 | 51.4002779s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:05:31 | 200 | 50.2276865s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:05:37 | 200 | 40.4380896s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:05:39 | 200 | 34.7083157s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:05:41 | 200 | 35.3096512s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:05:44 | 200 | 37.0388367s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:05:51 | 200 | 22.8285645s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:05:51 | 200 | 20.5931969s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:05:52 | 200 | 21.0213303s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:05:52 | 200 | 20.4407158s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:06:01 | 200 | 20.5462671s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:06:01 | 200 | 20.6507648s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:06:02 | 200 | 20.817072s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:06:10 | 200 | 24.0005782s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:06:18 | 200 | 26.0769959s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:06:18 | 200 | 26.2123861s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:06:19 | 200 | 26.1836617s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:06:28 | 200 | 32.1279714s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:06:57 | 200 | 56.4320001s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:06:57 | 200 | 56.3114884s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:06:58 | 200 | 56.0948686s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:00 | 200 | 50.3092406s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:05 | 200 | 47.4720869s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:06 | 200 | 47.3558396s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:06 | 200 | 47.4616152s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:06 | 200 | 36.3750998s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:07 | 200 | 9.3263468s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:07 | 200 | 9.6468807s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:12 | 200 | 10.8756874s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:13 | 200 | 8.9451012s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:14 | 200 | 9.0573822s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:21 | 200 | 15.923044s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:30 | 200 | 24.4052007s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:31 | 200 | 24.4629511s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:32 | 200 | 24.889315s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:32 | 200 | 24.9085642s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:33 | 200 | 21.1788296s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:33 | 200 | 19.1749496s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:41 | 200 | 25.7120963s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:50 | 200 | 26.1612324s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:50 | 200 | 19.2247581s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:51 | 200 | 19.1472359s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:51 | 200 | 18.9134697s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:52 | 200 | 18.8896652s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:57 | 200 | 23.2790589s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:57 | 200 | 24.0514389s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:58 | 200 | 16.3112882s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:58 | 200 | 8.2449185s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:59 | 200 | 7.8444989s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:07:59 | 200 | 7.8832151s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:08:07 | 200 | 15.3115099s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:08:08 | 200 | 14.6358286s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:08:09 | 200 | 11.3693623s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:08:10 | 200 | 11.4257491s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:08:10 | 200 | 11.7224619s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:08:11 | 200 | 11.9287845s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:08:11 | 200 | 11.8072743s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:08:11 | 200 | 11.4104105s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:08:13 | 200 | 4.3091511s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:08:25 | 200 | 15.3552921s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:08:36 | 200 | 26.1412592s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:08:36 | 200 | 26.0717817s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:08:36 | 200 | 25.728336s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:08:47 | 200 | 35.8426158s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:08:47 | 200 | 35.9831829s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:08:56 | 200 | 45.2825053s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:09:10 | 200 | 55.0737211s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:09:10 | 200 | 45.9718945s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:09:11 | 200 | 34.7427961s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:09:19 | 200 | 42.786923s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:09:20 | 200 | 41.7516558s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:09:30 | 200 | 42.7113927s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:09:30 | 200 | 41.3192201s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:09:31 | 200 | 32.1001665s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:09:31 | 200 | 20.6302286s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:09:42 | 200 | 28.9501277s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:09:42 | 200 | 27.9185354s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:09:43 | 200 | 23.254419s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:09:51 | 200 | 28.3388372s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:09:51 | 200 | 20.9284248s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:09:59 | 200 | 28.0269974s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:10:04 | 200 | 32.6688197s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:10:13 | 200 | 39.1574677s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:10:16 | 200 | 33.2382002s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:10:18 | 200 | 35.319227s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:10:26 | 200 | 42.0571349s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:10:30 | 200 | 38.4639473s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:10:37 | 200 | 44.2858736s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:10:49 | 200 | 48.5067302s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:10:50 | 200 | 45.7080302s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:10:58 | 200 | 42.1485884s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:11:06 | 200 | 49.6318814s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:11:06 | 200 | 45.5933094s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:11:14 | 200 | 44.1015082s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:11:14 | 200 | 44.8242701s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:11:23 | 200 | 43.4580618s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:11:23 | 200 | 33.8703372s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:11:25 | 200 | 33.7821646s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:11:30 | 200 | 28.3693544s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:11:36 | 200 | 30.2860833s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:11:36 | 200 | 28.2712697s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:11:36 | 200 | 21.8107263s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:11:37 | 200 | 21.4364714s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:11:47 | 200 | 23.5061453s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:11:56 | 200 | 29.9774553s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:11:57 | 200 | 29.9472143s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:11:57 | 200 | 27.3041774s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:11:57 | 200 | 21.1032275s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:11:58 | 200 | 21.4797233s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:05 | 200 | 26.1602867s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:15 | 200 | 35.6519832s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:16 | 200 | 26.6508127s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:16 | 200 | 18.9897219s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:20 | 200 | 21.9150194s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:24 | 200 | 25.9176353s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:25 | 200 | 25.4708062s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:25 | 200 | 25.3136197s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:31 | 200 | 23.8653501s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:32 | 200 | 16.046312s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:32 | 200 | 16.0046432s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:39 | 200 | 19.2864611s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:39 | 200 | 16.7249614s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:40 | 200 | 15.4535928s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:40 | 200 | 15.7325564s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:45 | 200 | 18.3026562s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:46 | 200 | 14.2096848s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:46 | 200 | 14.1193439s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:47 | 200 | 13.6149821s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:55 | 200 | 15.1731677s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:55 | 200 | 14.9538696s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:55 | 200 | 14.789857s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:56 | 200 | 13.5055632s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:56 | 200 | 9.9266323s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:57 | 200 | 9.8922871s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:57 | 200 | 9.4133061s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:58 | 200 | 8.3345268s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:12:58 | 200 | 3.0269569s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:08 | 200 | 12.8937472s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:11 | 200 | 15.0031601s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:11 | 200 | 15.003359s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:13 | 200 | 16.3332156s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:15 | 200 | 17.3393038s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:15 | 200 | 17.3210343s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:15 | 200 | 17.4560439s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:20 | 200 | 19.8742165s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:20 | 200 | 9.6115113s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:21 | 200 | 9.6281973s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:27 | 200 | 13.9323992s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:28 | 200 | 14.4559724s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:29 | 200 | 14.0311067s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:32 | 200 | 17.5463263s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:37 | 200 | 21.6499945s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:38 | 200 | 17.9316722s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:39 | 200 | 17.975839s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:39 | 200 | 17.2202615s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:47 | 200 | 20.3402928s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:50 | 200 | 22.4725126s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:57 | 200 | 28.7588945s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:58 | 200 | 23.6297778s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:58 | 200 | 20.2913961s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:58 | 200 | 19.4790033s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:59 | 200 | 19.8603915s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:13:59 | 200 | 20.3026232s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:14:02 | 200 | 12.9099122s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:14:02 | 200 | 10.0693181s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:14:02 | 200 | 4.7380811s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:14:03 | 200 | 4.9359784s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:14:03 | 200 | 4.8891478s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:14:10 | 200 | 10.7926851s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:14:10 | 200 | 10.7243704s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:14:15 | 200 | 16.0421761s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:14:23 | 200 | 21.4204642s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:14:29 | 200 | 26.9477361s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:14:30 | 200 | 26.9995937s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:14:30 | 200 | 27.0407261s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:14:37 | 200 | 33.3210819s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:14:38 | 200 | 27.6880321s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:14:47 | 200 | 33.8774887s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:14:54 | 200 | 37.4186824s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:14:56 | 200 | 30.1049773s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:14:56 | 200 | 26.1356125s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:14:56 | 200 | 26.1090711s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:15:04 | 200 | 32.9979672s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:15:15 | 200 | 37.7146257s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:15:16 | 200 | 36.5184936s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:15:16 | 200 | 27.4007848s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:15:16 | 200 | 20.139854s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:15:17 | 200 | 20.6061623s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:15:17 | 200 | 20.6878093s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:15:17 | 200 | 20.101682s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:15:18 | 200 | 12.5833534s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:15:18 | 200 | 2.5756499s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:15:57 | 200 | 41.1196331s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:07 | 200 | 50.0567856s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:07 | 200 | 50.4402237s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:09 | 200 | 52.2862702s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:10 | 200 | 52.413702s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:21 | 200 | 1m2s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:21 | 200 | 1m3s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:25 | 200 | 1m6s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:25 | 200 | 26.0960324s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:26 | 200 | 18.4696585s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:29 | 200 | 20.8487521s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:34 | 200 | 24.4787289s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:35 | 200 | 24.6858919s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:35 | 200 | 14.4475476s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:36 | 200 | 11.9342551s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:36 | 200 | 11.0277769s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:37 | 200 | 11.0219256s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:38 | 200 | 11.2544812s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:39 | 200 | 7.4761242s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:46 | 200 | 11.6755437s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:46 | 200 | 11.1900973s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:46 | 200 | 11.1481305s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:47 | 200 | 10.6983756s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:47 | 200 | 10.6466637s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:48 | 200 | 10.8488271s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:48 | 200 | 10.5959635s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:49 | 200 | 10.0955801s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:49 | 200 | 3.7037792s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:50 | 200 | 3.0914836s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:50 | 200 | 2.9092956s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:50 | 200 | 2.92132s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:52 | 200 | 4.3959212s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:53 | 200 | 4.3187315s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:53 | 200 | 4.5205864s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:53 | 200 | 4.7316855s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:59 | 200 | 9.3172062s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:16:59 | 200 | 9.3823278s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:17:00 | 200 | 9.7184775s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:17:06 | 200 | 15.6469664s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:17:10 | 200 | 17.2872562s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:17:18 | 200 | 25.168173s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:17:19 | 200 | 25.1023911s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:17:27 | 200 | 33.3258315s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:17:31 | 200 | 31.5463204s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:17:39 | 200 | 39.5836426s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:17:40 | 200 | 38.6471014s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:17:52 | 200 | 46.0315569s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:18:00 | 200 | 50.3543839s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:18:11 | 200 | 52.7144075s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:18:21 | 200 | 59.0684788s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:18:32 | 200 | 1m2s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:18:36 | 200 | 1m3s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:18:37 | 200 | 57.1840739s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:18:42 | 200 | 1m0s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:18:42 | 200 | 47.1910389s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:18:43 | 200 | 40.9255046s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:18:50 | 200 | 37.5316216s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:18:51 | 200 | 27.8350329s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:18:53 | 200 | 16.8169782s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:18:57 | 200 | 20.7607059s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:19:03 | 200 | 25.3932345s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:19:04 | 200 | 21.933779s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:19:14 | 200 | 32.0431698s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:19:15 | 200 | 31.5126732s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:19:15 | 200 | 24.2195605s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:19:19 | 200 | 27.1168045s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:19:21 | 200 | 25.0147414s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:19:25 | 200 | 26.6553029s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:19:26 | 200 | 22.1768927s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:19:26 | 200 | 21.8626216s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:19:37 | 200 | 22.8254974s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:19:38 | 200 | 22.424822s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:19:47 | 200 | 29.7330973s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:19:48 | 200 | 28.2911594s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:19:49 | 200 | 28.1402917s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:19:51 | 200 | 25.0006323s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:19:53 | 200 | 26.5527182s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:20:03 | 200 | 35.4422958s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:20:14 | 200 | 36.4823089s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:20:15 | 200 | 36.6415863s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:20:15 | 200 | 28.7203881s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:20:16 | 200 | 27.4093354s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:20:24 | 200 | 34.2830301s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:20:24 | 200 | 32.7302099s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:20:24 | 200 | 29.6528248s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:20:38 | 200 | 35.1379232s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:20:43 | 200 | 29.0812238s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:20:44 | 200 | 29.0472695s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:20:51 | 200 | 35.6186956s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:20:51 | 200 | 35.2608423s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:20:51 | 200 | 27.4274871s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:20:53 | 200 | 28.3042965s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:20:53 | 200 | 27.8320067s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:20:53 | 200 | 9.2008579s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:01 | 200 | 16.3910903s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:03 | 200 | 18.3314507s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:03 | 200 | 12.048828s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:03 | 200 | 11.8744904s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:04 | 200 | 12.00153s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:04 | 200 | 11.306053s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:04 | 200 | 10.6407538s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:05 | 200 | 10.1249916s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:05 | 200 | 4.3559833s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:11 | 200 | 8.2826799s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:12 | 200 | 8.4197098s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:12 | 200 | 8.5078686s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:12 | 200 | 8.3744763s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:13 | 200 | 8.5148421s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:13 | 200 | 8.8345285s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:19 | 200 | 14.3384429s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:26 | 200 | 20.8942433s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:27 | 200 | 15.1664173s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:33 | 200 | 20.9784967s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:33 | 200 | 20.8232324s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:34 | 200 | 21.2832393s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:34 | 200 | 21.1231618s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:34 | 200 | 21.0834055s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:35 | 200 | 14.3338435s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:37 | 200 | 9.2710995s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:42 | 200 | 13.9855379s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:43 | 200 | 10.5560545s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:50 | 200 | 17.2318653s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:53 | 200 | 19.3855754s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:55 | 200 | 21.6131022s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:21:56 | 200 | 21.4570317s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:04 | 200 | 28.7420202s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:07 | 200 | 30.0825455s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:10 | 200 | 27.7954363s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:13 | 200 | 28.6488656s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:16 | 200 | 24.7882435s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:20 | 200 | 26.6901728s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:25 | 200 | 29.6660868s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:25 | 200 | 29.6438291s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:26 | 200 | 20.3784339s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:26 | 200 | 17.3780794s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:27 | 200 | 16.7613186s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:27 | 200 | 12.2441737s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:28 | 200 | 11.0727247s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:28 | 200 | 8.2892206s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:28 | 200 | 2.8615311s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:28 | 200 | 2.6053136s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:29 | 200 | 2.413674s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:29 | 200 | 2.2997545s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:29 | 200 | 2.5449148s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:29 | 200 | 2.2292342s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:34 | 200 | 6.2671733s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:34 | 200 | 5.8411403s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:34 | 200 | 5.8605843s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:42 | 200 | 12.9417012s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:42 | 200 | 13.2269598s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:43 | 200 | 13.3643397s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:55 | 200 | 25.2024858s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:22:55 | 200 | 25.1773791s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:23:05 | 200 | 30.8941344s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:23:13 | 200 | 38.2821026s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:23:18 | 200 | 42.6127565s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:23:20 | 200 | 37.4288551s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:23:24 | 200 | 41.6045796s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:23:25 | 200 | 40.4940737s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:23:35 | 200 | 40.0588813s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:23:42 | 200 | 44.6042053s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:23:50 | 200 | 42.4784804s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:23:57 | 200 | 42.1588468s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:23:58 | 200 | 37.9380439s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:24:02 | 200 | 40.7285739s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:24:08 | 200 | 43.5707308s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:24:14 | 200 | 47.6888244s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:24:20 | 200 | 43.1228392s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:24:26 | 200 | 41.9901299s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:24:28 | 200 | 36.1732194s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:24:28 | 200 | 28.5807839s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:24:29 | 200 | 28.3135226s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:24:29 | 200 | 27.3860408s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:24:33 | 200 | 24.1990351s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:24:39 | 200 | 24.7876818s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:24:44 | 200 | 22.0807433s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:24:47 | 200 | 19.1365838s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:24:48 | 200 | 19.9762691s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:24:49 | 200 | 20.7501053s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:24:51 | 200 | 22.499866s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:24:58 | 200 | 28.4064634s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:25:05 | 200 | 31.374423s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:25:22 | 200 | 41.5632153s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:25:31 | 200 | 45.5124201s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:25:41 | 200 | 53.1355418s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:25:42 | 200 | 53.1540551s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:25:56 | 200 | 1m5s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:25:57 | 200 | 1m3s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:26:10 | 200 | 1m9s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:26:27 | 200 | 1m20s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:26:27 | 200 | 1m2s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:26:34 | 200 | 1m0s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:26:37 | 200 | 55.737322s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:26:42 | 200 | 58.0656368s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:26:47 | 200 | 49.7794631s | ::1 | POST "/api/generate" time=2025-10-02T15:26:47.086-04:00 level=WARN source=runner.go:127 msg="truncating input prompt" limit=4096 prompt=56774 keep=5 new=4096 [GIN] 2025/10/02 - 15:26:49 | 200 | 49.4050185s | ::1 | POST "/api/generate" time=2025-10-02T15:26:49.186-04:00 level=WARN source=runner.go:127 msg="truncating input prompt" limit=4096 prompt=30507 keep=5 new=4096 [GIN] 2025/10/02 - 15:26:51 | 200 | 39.1698307s | ::1 | POST "/api/generate" time=2025-10-02T15:26:51.198-04:00 level=WARN source=runner.go:127 msg="truncating input prompt" limit=4096 prompt=38378 keep=5 new=4096 [GIN] 2025/10/02 - 15:26:53 | 200 | 25.5444947s | ::1 | POST "/api/generate" time=2025-10-02T15:26:53.184-04:00 level=WARN source=runner.go:127 msg="truncating input prompt" limit=4096 prompt=24526 keep=5 new=4096 [GIN] 2025/10/02 - 15:26:55 | 200 | 25.8703666s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:26:59 | 200 | 22.9492557s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:27:01 | 200 | 21.8141877s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:27:04 | 200 | 20.5627429s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:27:04 | 200 | 15.5558382s | ::1 | POST "/api/generate" time=2025-10-02T15:27:04.627-04:00 level=WARN source=runner.go:127 msg="truncating input prompt" limit=4096 prompt=22413 keep=5 new=4096 [GIN] 2025/10/02 - 15:27:06 | 200 | 17.5074507s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:27:10 | 200 | 9.8215958s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:27:15 | 200 | 12.4338915s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:27:16 | 200 | 11.6631099s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:27:16 | 200 | 11.2738212s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:27:18 | 200 | 5.7758308s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:27:18 | 200 | 1.9108046s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:27:18 | 200 | 1.1009243s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:27:19 | 200 | 1.4607715s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:27:27 | 200 | 9.2116934s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:27:40 | 200 | 21.7339738s | ::1 | POST "/api/generate" [GIN] 2025/10/02 - 15:27:51 | 200 | 33.0791147s | ::1 | POST "/api/generate"