-
-
Notifications
You must be signed in to change notification settings - Fork 97
/
Copy pathollama-curated.yaml
1130 lines (1116 loc) · 38.1 KB
/
ollama-curated.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
openapi: 3.0.3
info:
title: Ollama API
description: API Spec for Ollama API. Please see https://github.com/jmorganca/ollama/blob/main/docs/api.md for more details.
version: 0.1.36
servers:
- url: http://localhost:11434/api
description: Ollama server URL
tags:
- name: Completions
description: Given a prompt, the model will generate a completion.
- name: Chat
description: Given a list of messages comprising a conversation, the model will return a response.
- name: Embeddings
description: Get a vector representation of a given input.
- name: Models
description: List and describe the various models available.
paths:
/version:
get:
operationId: getVersion
summary: Returns the version of the Ollama server.
description: This endpoint returns the version of the Ollama server.
responses:
'200':
description: Successful operation.
content:
application/json:
schema:
$ref: '#/components/schemas/VersionResponse'
/generate:
post:
operationId: generateCompletion
tags:
- Completions
summary: Generate a response for a given prompt with a provided model.
description: The final response object will include statistics and additional data from the request.
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/GenerateCompletionRequest'
responses:
'200':
description: Successful operation.
content:
application/x-ndjson:
schema:
$ref: '#/components/schemas/GenerateCompletionResponse'
/chat:
post:
operationId: generateChatCompletion
tags:
- Chat
summary: Generate the next message in a chat with a provided model.
description: This is a streaming endpoint, so there will be a series of responses. The final response object will include statistics and additional data from the request.
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/GenerateChatCompletionRequest'
responses:
'200':
description: Successful operation.
content:
application/x-ndjson:
schema:
$ref: '#/components/schemas/GenerateChatCompletionResponse'
/embeddings:
post:
operationId: generateEmbedding
tags:
- Embeddings
summary: Generate embeddings from a model.
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/GenerateEmbeddingRequest'
responses:
'200':
description: Successful operation.
content:
application/json:
schema:
$ref: '#/components/schemas/GenerateEmbeddingResponse'
/create:
post:
operationId: createModel
tags:
- Models
summary: Create a model from a Modelfile.
description: It is recommended to set `modelfile` to the content of the Modelfile rather than just set `path`. This is a requirement for remote create. Remote model creation should also create any file blobs, fields such as `FROM` and `ADAPTER`, explicitly with the server using Create a Blob and the value to the path indicated in the response.
requestBody:
description: Create a new model from a Modelfile.
content:
application/json:
schema:
$ref: '#/components/schemas/CreateModelRequest'
responses:
'200':
description: Successful operation.
content:
application/x-ndjson:
schema:
$ref: '#/components/schemas/CreateModelResponse'
/tags:
get:
operationId: listModels
tags:
- Models
summary: List models that are available locally.
responses:
'200':
description: Successful operation.
content:
application/json:
schema:
$ref: '#/components/schemas/ModelsResponse'
/ps:
get:
operationId: listRunningModels
tags:
- Models
summary: List models that are running.
responses:
'200':
description: Successful operation.
content:
application/json:
schema:
$ref: '#/components/schemas/ProcessResponse'
/show:
post:
operationId: showModelInfo
tags:
- Models
summary: Show details about a model including modelfile, template, parameters, license, and system prompt.
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/ModelInfoRequest'
responses:
'200':
description: Successful operation.
content:
application/json:
schema:
$ref: '#/components/schemas/ModelInfo'
/copy:
post:
operationId: copyModel
tags:
- Models
summary: Creates a model with another name from an existing model.
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CopyModelRequest'
responses:
'200':
description: Successful operation.
/delete:
delete:
operationId: deleteModel
tags:
- Models
summary: Delete a model and its data.
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/DeleteModelRequest'
responses:
'200':
description: Successful operation.
/pull:
post:
operationId: pullModel
tags:
- Models
summary: Download a model from the ollama library.
description: Cancelled pulls are resumed from where they left off, and multiple calls will share the same download progress.
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/PullModelRequest'
responses:
'200':
description: Successful operation.
content:
application/x-ndjson:
schema:
$ref: '#/components/schemas/PullModelResponse'
/push:
post:
operationId: pushModel
tags:
- Models
summary: Upload a model to a model library.
description: Requires registering for ollama.ai and adding a public key first.
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/PushModelRequest'
responses:
'200':
description: Successful operation.
content:
application/x-ndjson:
schema:
$ref: '#/components/schemas/PushModelResponse'
/blobs/{digest}:
head:
operationId: checkBlob
tags:
- Models
summary: Ensures that the file blob used for a FROM or ADAPTER field exists on the server.
description: This is checking your Ollama server and not Ollama.ai.
parameters:
- in: path
name: digest
schema:
type: string
required: true
description: the SHA256 digest of the blob
example: sha256:c8edda1f17edd2f1b60253b773d837bda7b9d249a61245931a4d7c9a8d350250
responses:
'200':
description: Blob exists on the server
'404':
description: Blob was not found
post:
operationId: createBlob
tags:
- Models
summary: Create a blob from a file. Returns the server file path.
parameters:
- in: path
name: digest
schema:
type: string
required: true
description: the SHA256 digest of the blob
example: sha256:c8edda1f17edd2f1b60253b773d837bda7b9d249a61245931a4d7c9a8d350250
requestBody:
content:
application/octet-stream:
schema:
type: string
format: binary
responses:
'201':
description: Blob was successfully created
components:
schemas:
GenerateCompletionRequest:
type: object
description: Request class for the generate endpoint.
properties:
model:
type: string
description: &model_name |
The model name.
Model names follow a `model:tag` format. Some examples are `orca-mini:3b-q4_1` and `llama3:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version.
example: llama3.2
prompt:
type: string
description: The prompt to generate a response.
example: Why is the sky blue?
suffix:
type: string
description: The text that comes after the inserted text.
images:
type: array
description: (optional) a list of Base64-encoded images to include in the message (for multimodal models such as llava)
items:
type: string
description: Base64-encoded image (for multimodal models such as llava)
example: iVBORw0KGgoAAAANSUhEUgAAAAkAAAANCAIAAAD0YtNRAAAABnRSTlMA/AD+APzoM1ogAAAAWklEQVR4AWP48+8PLkR7uUdzcMvtU8EhdykHKAciEXL3pvw5FQIURaBDJkARoDhY3zEXiCgCHbNBmAlUiyaBkENoxZSDWnOtBmoAQu7TnT+3WuDOA7KBIkAGAGwiNeqjusp/AAAAAElFTkSuQmCC
system:
type: string
description: The system prompt to (overrides what is defined in the Modelfile).
template:
type: string
description: The full prompt or prompt template (overrides what is defined in the Modelfile).
context:
type: array
description: The context parameter returned from a previous request to [generateCompletion], this can be used to keep a short conversational memory.
items:
type: integer
format: int64
options:
$ref: '#/components/schemas/RequestOptions'
format:
$ref: '#/components/schemas/ResponseFormat'
raw:
type: boolean
description: |
If `true` no formatting will be applied to the prompt and no context will be returned.
You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself.
stream:
type: boolean
description: &stream |
If `false` the response will be returned as a single response object, otherwise the response will be streamed as a series of objects.
default: false
keep_alive: &keep_alive
type: integer
nullable: true
description: |
How long (in minutes) to keep the model loaded in memory.
- If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
- If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
- If set to 0, the model will be unloaded immediately once finished.
- If not set, the model will stay loaded for 5 minutes by default
required:
- model
- prompt
RequestOptions:
type: object
description: Additional model parameters listed in the documentation for the Modelfile such as `temperature`.
properties:
num_keep:
type: integer
nullable: true
description: |
Number of tokens to keep from the prompt.
seed:
type: integer
nullable: true
description: |
Sets the random number seed to use for generation. Setting this to a specific number will make the model
generate the same text for the same prompt. (Default: 0)
num_predict:
type: integer
nullable: true
description: |
Maximum number of tokens to predict when generating text.
(Default: 128, -1 = infinite generation, -2 = fill context)
top_k:
type: integer
nullable: true
description: |
Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers,
while a lower value (e.g. 10) will be more conservative. (Default: 40)
top_p:
type: number
format: float
nullable: true
description: |
Works together with top_k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value
(e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
min_p:
type: number
format: float
nullable: true
description: |
Alternative to the top_p, and aims to ensure a balance of quality and variety. min_p represents the minimum
probability for a token to be considered, relative to the probability of the most likely token. For
example, with min_p=0.05 and the most likely token having a probability of 0.9, logits with a value less
than 0.05*0.9=0.045 are filtered out. (Default: 0.0)
tfs_z:
type: number
format: float
nullable: true
description: |
Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value
(e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1)
typical_p:
type: number
format: float
nullable: true
description: |
Typical p is used to reduce the impact of less probable tokens from the output. (default: 1)
repeat_last_n:
type: integer
nullable: true
description: |
Sets how far back for the model to look back to prevent repetition.
(Default: 64, 0 = disabled, -1 = num_ctx)
temperature:
type: number
format: float
nullable: true
description: |
The temperature of the model. Increasing the temperature will make the model answer more creatively.
(Default: 0.8)
repeat_penalty:
type: number
format: float
nullable: true
description: |
Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more
strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)
presence_penalty:
type: number
format: float
nullable: true
description: |
Positive values penalize new tokens based on whether they appear in the text so far, increasing the
model's likelihood to talk about new topics. (Default: 0)
frequency_penalty:
type: number
format: float
nullable: true
description: |
Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the
model's likelihood to repeat the same line verbatim. (Default: 0)
mirostat:
type: integer
nullable: true
description: |
Enable Mirostat sampling for controlling perplexity.
(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)
mirostat_tau:
type: number
format: float
nullable: true
description: |
Controls the balance between coherence and diversity of the output. A lower value will result in more
focused and coherent text. (Default: 5.0)
mirostat_eta:
type: number
format: float
nullable: true
description: |
Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate
will result in slower adjustments, while a higher learning rate will make the algorithm more responsive.
(Default: 0.1)
penalize_newline:
type: boolean
nullable: true
description: |
Penalize newlines in the output. (Default: true)
stop:
type: array
nullable: true
description: |
Sequences where the API will stop generating further tokens. The returned text will not contain the stop
sequence.
items:
type: string
numa:
type: boolean
nullable: true
description: |
Enable NUMA support. (Default: false)
num_ctx:
type: integer
nullable: true
description: |
Sets the size of the context window used to generate the next token. (Default: 2048)
num_batch:
type: integer
nullable: true
description: |
Sets the number of batches to use for generation. (Default: 512)
num_gpu:
type: integer
nullable: true
description: |
The number of layers to send to the GPU(s).
On macOS it defaults to 1 to enable metal support, 0 to disable.
main_gpu:
type: integer
nullable: true
description: |
The GPU to use for the main model. Default is 0.
low_vram:
type: boolean
nullable: true
description: |
Enable low VRAM mode. (Default: false)
f16_kv:
type: boolean
nullable: true
description: |
Enable f16 key/value. (Default: true)
logits_all:
type: boolean
nullable: true
description: |
Enable logits all. (Default: false)
vocab_only:
type: boolean
nullable: true
description: |
Enable vocab only. (Default: false)
use_mmap:
type: boolean
nullable: true
description: |
Enable mmap. (Default: false)
use_mlock:
type: boolean
nullable: true
description: |
Enable mlock. (Default: false)
num_thread:
type: integer
nullable: true
description: |
Sets the number of threads to use during computation. By default, Ollama will detect this for optimal
performance. It is recommended to set this value to the number of physical CPU cores your system has
(as opposed to the logical number of cores).
ResponseFormat:
type: string
description: |
The format to return a response in. Currently the only accepted value is json.
Enable JSON mode by setting the format parameter to json. This will structure the response as valid JSON.
Note: it's important to instruct the model to use JSON in the prompt. Otherwise, the model may generate large amounts whitespace.
enum:
- json
VersionResponse:
type: object
description: The response class for the version endpoint.
properties:
version:
type: string
description: The version of the Ollama server.
GenerateCompletionResponse:
type: object
description: The response class for the generate endpoint.
properties:
model:
type: string
description: *model_name
example: llama3.2
created_at:
type: string
format: date-time
description: Date on which a model was created.
example: 2023-08-04T19:22:45.499127Z
response:
type: string
description: The response for a given prompt with a provided model.
example: The sky appears blue because of a phenomenon called Rayleigh scattering.
done:
type: boolean
description: Whether the response has completed.
example: true
context:
type: array
description: |
An encoding of the conversation used in this response, this can be sent in the next request to keep a conversational memory.
items:
type: integer
format: int64
example: [ 1, 2, 3 ]
total_duration:
type: integer
format: int64
description: Time spent generating the response.
example: 5589157167
load_duration:
type: integer
format: int64
description: Time spent in nanoseconds loading the model.
example: 3013701500
prompt_eval_count:
type: integer
description: Number of tokens in the prompt.
example: 46
prompt_eval_duration:
type: integer
format: int64
description: Time spent in nanoseconds evaluating the prompt.
example: 1160282000
eval_count:
type: integer
description: Number of tokens the response.
example: 113
eval_duration:
type: integer
format: int64
description: Time in nanoseconds spent generating the response.
example: 1325948000
GenerateChatCompletionRequest:
type: object
description: Request class for the chat endpoint.
properties:
model:
type: string
description: *model_name
example: llama3.2
messages:
type: array
description: The messages of the chat, this can be used to keep a chat memory
items:
$ref: '#/components/schemas/Message'
format:
$ref: '#/components/schemas/ResponseFormat'
options:
$ref: '#/components/schemas/RequestOptions'
stream:
type: boolean
description: *stream
default: false
keep_alive: *keep_alive
tools:
type: array
description: A list of tools the model may call.
items:
$ref: '#/components/schemas/Tool'
required:
- model
- messages
GenerateChatCompletionResponse:
type: object
description: The response class for the chat endpoint.
properties:
message:
$ref: '#/components/schemas/Message'
model:
type: string
description: *model_name
example: llama3.2
created_at:
type: string
format: date-time
description: Date on which a model was created.
example: 2023-08-04T19:22:45.499127Z
done:
type: boolean
description: Whether the response has completed.
example: true
done_reason:
$ref: '#/components/schemas/DoneReason'
total_duration:
type: integer
format: int64
description: Time spent generating the response.
example: 5589157167
load_duration:
type: integer
format: int64
description: Time spent in nanoseconds loading the model.
example: 3013701500
prompt_eval_count:
type: integer
description: Number of tokens in the prompt.
example: 46
prompt_eval_duration:
type: integer
format: int64
description: Time spent in nanoseconds evaluating the prompt.
example: 1160282000
eval_count:
type: integer
description: Number of tokens the response.
example: 113
eval_duration:
type: integer
format: int64
description: Time in nanoseconds spent generating the response.
example: 1325948000
required:
- model
- created_at
- message
- done
DoneReason:
type: string
description: Reason why the model is done generating a response.
enum:
- stop # The generation hit a stop token.
- length # The maximum num_tokens was reached.
- load # The request was sent with an empty body to load the model.
Message:
type: object
description: A message in the chat endpoint
properties:
role:
type: string
description: The role of the message
enum: [ "system", "user", "assistant", "tool" ]
content:
type: string
description: The content of the message
example: Why is the sky blue?
images:
type: array
description: (optional) a list of Base64-encoded images to include in the message (for multimodal models such as llava)
items:
type: string
description: Base64-encoded image (for multimodal models such as llava)
example: iVBORw0KGgoAAAANSUhEUgAAAAkAAAANCAIAAAD0YtNRAAAABnRSTlMA/AD+APzoM1ogAAAAWklEQVR4AWP48+8PLkR7uUdzcMvtU8EhdykHKAciEXL3pvw5FQIURaBDJkARoDhY3zEXiCgCHbNBmAlUiyaBkENoxZSDWnOtBmoAQu7TnT+3WuDOA7KBIkAGAGwiNeqjusp/AAAAAElFTkSuQmCC
tool_calls:
type: array
description: A list of tools the model wants to call.
items:
$ref: '#/components/schemas/ToolCall'
required:
- role
- content
Tool:
type: object
description: A tool the model may call.
properties:
type:
type: string
enum:
- function
default: function
description: The type of tool.
function:
$ref: '#/components/schemas/ToolFunction'
ToolFunction:
type: object
description: A function that the model may call.
properties:
name:
type: string
description: The name of the function to be called.
description:
type: string
description: |
A description of what the function does, used by the model to choose when and how to call the function.
parameters:
$ref: '#/components/schemas/ToolFunctionParams'
required:
- name
- description
- parameters
ToolFunctionParams:
type: object
description: The parameters the functions accepts, described as a JSON Schema object.
additionalProperties: true
ToolCall:
type: object
description: The tool the model wants to call.
properties:
function:
$ref: '#/components/schemas/ToolCallFunction'
ToolCallFunction:
type: object
description: The function the model wants to call.
properties:
name:
type: string
description: The name of the function to be called.
arguments:
$ref: '#/components/schemas/ToolCallFunctionArgs'
required:
- name
- arguments
ToolCallFunctionArgs:
type: object
description: The arguments to pass to the function.
additionalProperties: true
GenerateEmbeddingRequest:
description: Generate embeddings from a model.
type: object
properties:
model:
type: string
description: *model_name
example: llama3.2
prompt:
type: string
description: Text to generate embeddings for.
example: 'Here is an article about llamas...'
options:
$ref: '#/components/schemas/RequestOptions'
keep_alive: *keep_alive
required:
- model
- prompt
GenerateEmbeddingResponse:
type: object
description: Returns the embedding information.
properties:
embedding:
type: array
description: The embedding for the prompt.
items:
type: number
format: double
example: [ 0.5670403838157654, 0.009260174818336964, ... ]
CreateModelRequest:
type: object
description: Create model request object.
properties:
model:
type: string
description: *model_name
example: mario
modelfile:
type: string
description: The contents of the Modelfile.
example: FROM llama3\nSYSTEM You are mario from Super Mario Bros.
path:
type: string
description: Path to the Modelfile (optional)
quantize:
type: string
nullable: true
description: The quantization level of the model.
stream:
type: boolean
description: *stream
default: false
required:
- model
- modelfile
CreateModelResponse:
description: Response object for creating a model. When finished, `status` is `success`.
type: object
properties:
status:
$ref: '#/components/schemas/CreateModelStatus'
CreateModelStatus:
type: string
description: Status creating the model
enum:
- creating system layer
- parsing modelfile
- success
ModelsResponse:
description: Response class for the list models endpoint.
type: object
properties:
models:
type: array
description: List of models available locally.
items:
$ref: '#/components/schemas/Model'
Model:
type: object
description: A model available locally.
properties:
model:
type: string
description: *model_name
example: llama3.2
modified_at:
type: string
format: date-time
description: Model modification date.
example: 2023-08-02T17:02:23.713454393-07:00
size:
type: integer
format: int64
description: Size of the model on disk.
example: 7323310500
digest:
type: string
description: The model's digest.
example: 'sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711a'
details:
$ref: '#/components/schemas/ModelDetails'
ModelDetails:
type: object
description: Details about a model.
properties:
parent_model:
type: string
description: The parent model of the model.
format:
type: string
description: The format of the model.
family:
type: string
description: The family of the model.
families:
type: array
description: The families of the model.
items:
type: string
parameter_size:
type: string
description: The size of the model's parameters.
quantization_level:
type: string
description: The quantization level of the model.
ModelInformation:
type: object
description: Details about a model.
properties:
general.architecture:
type: string
description: The architecture of the model.
general.file_type:
type: integer
nullable: true
description: The file type of the model.
general.parameter_count:
type: integer
format: int64
nullable: true
description: The number of parameters in the model.
general.quantization_version:
type: integer
nullable: true
description: The number of parameters in the model.
ProcessResponse:
type: object
description: Response class for the list running models endpoint.
properties:
models:
type: array
description: List of running models.
items:
$ref: '#/components/schemas/ProcessModel'
ProcessModel:
type: object
description: A model that is currently loaded.
properties:
model:
type: string
description: *model_name
example: llama3.2
size:
type: integer
format: int64
description: Size of the model on disk.
example: 7323310500
digest:
type: string
description: The model's digest.
example: 'sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711a'
details:
$ref: '#/components/schemas/ModelDetails'
expires_at:
type: string
format: date-time
example: 2023-08-02T17:02:23.713454393-07:00
size_vram:
type: integer
format: int64
description: Size of the model on disk.
example: 7323310500
ModelInfoRequest:
description: Request class for the show model info endpoint.
type: object
properties:
model:
type: string
description: *model_name
example: llama3.2
required:
- model
ModelInfo:
description: Details about a model including modelfile, template, parameters, license, and system prompt.
type: object
properties:
license:
type: string
nullable: true
description: The model's license.
example: <contents of license block>
modelfile:
type: string
nullable: true
description: The modelfile associated with the model.
example: 'Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llama3:latest\n\nFROM /Users/username/.ollama/models/blobs/sha256:8daa9615cce30c259a9555b1cc250d461d1bc69980a274b44d7eda0be78076d8\nTEMPLATE \"\"\"[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] \"\"\"\nSYSTEM \"\"\"\"\"\"\nPARAMETER stop [INST]\nPARAMETER stop [/INST]\nPARAMETER stop <<SYS>>\nPARAMETER stop <</SYS>>\n"'
parameters:
type: string
nullable: true
description: The model parameters.
example: 'stop [INST]\nstop [/INST]\nstop <<SYS>>\nstop <</SYS>>'
template:
type: string
nullable: true
description: The prompt template for the model.
example: '[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST]'
system:
type: string
nullable: true
description: The system prompt for the model.
details:
$ref: '#/components/schemas/ModelDetails'
model_info:
$ref: '#/components/schemas/ModelInformation'
messages:
type: array
nullable: true
description: The default messages for the model.
items:
$ref: '#/components/schemas/Message'
CopyModelRequest:
description: Request class for copying a model.
type: object
properties:
source:
type: string