packages/ollama_dart/oas/ollama-curated.yaml

openapi: 3.0.3

info:
  title: Ollama API
  description: API Spec for Ollama API. Please see https://github.com/jmorganca/ollama/blob/main/docs/api.md for more details.
  version: 0.1.36

servers:
  - url: http://localhost:11434/api
    description: Ollama server URL

tags:
  - name: Completions
    description: Given a prompt, the model will generate a completion.
  - name: Chat
    description: Given a list of messages comprising a conversation, the model will return a response.
  - name: Embeddings
    description: Get a vector representation of a given input.
  - name: Models
    description: List and describe the various models available.

paths:
  /version:
    get:
      operationId: getVersion
      summary: Returns the version of the Ollama server.
      description: This endpoint returns the version of the Ollama server.
      responses:
        '200':
          description: Successful operation.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/VersionResponse'
  /generate:
    post:
      operationId: generateCompletion
      tags:
        - Completions
      summary: Generate a response for a given prompt with a provided model.
      description: The final response object will include statistics and additional data from the request.
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GenerateCompletionRequest'
      responses:
        '200':
          description: Successful operation.
          content:
            application/x-ndjson:
              schema:
                $ref: '#/components/schemas/GenerateCompletionResponse'
  /chat:
    post:
      operationId: generateChatCompletion
      tags:
        - Chat
      summary: Generate the next message in a chat with a provided model.
      description: This is a streaming endpoint, so there will be a series of responses. The final response object will include statistics and additional data from the request.
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GenerateChatCompletionRequest'
      responses:
        '200':
          description: Successful operation.
          content:
            application/x-ndjson:
              schema:
                $ref: '#/components/schemas/GenerateChatCompletionResponse'
  /embeddings:
    post:
      operationId: generateEmbedding
      tags:
        - Embeddings
      summary: Generate embeddings from a model.
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GenerateEmbeddingRequest'
      responses:
        '200':
          description: Successful operation.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GenerateEmbeddingResponse'
  /create:
    post:
      operationId: createModel
      tags:
        - Models
      summary: Create a model from a Modelfile.
      description: It is recommended to set `modelfile` to the content of the Modelfile rather than just set `path`. This is a requirement for remote create. Remote model creation should also create any file blobs, fields such as `FROM` and `ADAPTER`, explicitly with the server using Create a Blob and the value to the path indicated in the response.
      requestBody:
        description: Create a new model from a Modelfile.
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateModelRequest'
      responses:
        '200':
          description: Successful operation.
          content:
            application/x-ndjson:
              schema:
                $ref: '#/components/schemas/CreateModelResponse'
  /tags:
    get:
      operationId: listModels
      tags:
        - Models
      summary: List models that are available locally.
      responses:
        '200':
          description: Successful operation.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ModelsResponse'
  /ps:
    get:
      operationId: listRunningModels
      tags:
        - Models
      summary: List models that are running.
      responses:
        '200':
          description: Successful operation.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ProcessResponse'
  /show:
    post:
      operationId: showModelInfo
      tags:
        - Models
      summary: Show details about a model including modelfile, template, parameters, license, and system prompt.
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ModelInfoRequest'
      responses:
        '200':
          description: Successful operation.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ModelInfo'
  /copy:
    post:
      operationId: copyModel
      tags:
        - Models
      summary: Creates a model with another name from an existing model.
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CopyModelRequest'
      responses:
        '200':
          description: Successful operation.
  /delete:
    delete:
      operationId: deleteModel
      tags:
        - Models
      summary: Delete a model and its data.
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/DeleteModelRequest'
      responses:
        '200':
          description: Successful operation.
  /pull:
    post:
      operationId: pullModel
      tags:
        - Models
      summary: Download a model from the ollama library.
      description: Cancelled pulls are resumed from where they left off, and multiple calls will share the same download progress.
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/PullModelRequest'
      responses:
        '200':
          description: Successful operation.
          content:
            application/x-ndjson:
              schema:
                $ref: '#/components/schemas/PullModelResponse'
  /push:
    post:
      operationId: pushModel
      tags:
        - Models
      summary: Upload a model to a model library.
      description: Requires registering for ollama.ai and adding a public key first.
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/PushModelRequest'
      responses:
        '200':
          description: Successful operation.
          content:
            application/x-ndjson:
              schema:
                $ref: '#/components/schemas/PushModelResponse'
  /blobs/{digest}:
    head:
      operationId: checkBlob
      tags:
        - Models
      summary: Ensures that the file blob used for a FROM or ADAPTER field exists on the server.
      description: This is checking your Ollama server and not Ollama.ai.
      parameters:
        - in: path
          name: digest
          schema:
            type: string
          required: true
          description: the SHA256 digest of the blob
          example: sha256:c8edda1f17edd2f1b60253b773d837bda7b9d249a61245931a4d7c9a8d350250
      responses:
        '200':
          description: Blob exists on the server
        '404':
          description: Blob was not found
    post:
      operationId: createBlob
      tags:
        - Models
      summary: Create a blob from a file. Returns the server file path.
      parameters:
        - in: path
          name: digest
          schema:
            type: string
          required: true
          description: the SHA256 digest of the blob
          example: sha256:c8edda1f17edd2f1b60253b773d837bda7b9d249a61245931a4d7c9a8d350250
      requestBody:
        content:
          application/octet-stream:
            schema:
              type: string
              format: binary
      responses:
        '201':
          description: Blob was successfully created

components:
  schemas:
    GenerateCompletionRequest:
      type: object
      description: Request class for the generate endpoint.
      properties:
        model:
          type: string
          description: &model_name |
            The model name. 
            
            Model names follow a `model:tag` format. Some examples are `orca-mini:3b-q4_1` and `llama3:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version.
          example: llama3.2
        prompt:
          type: string
          description: The prompt to generate a response.
          example: Why is the sky blue?
        suffix:
          type: string
          description: The text that comes after the inserted text.
        images:
          type: array
          description: (optional) a list of Base64-encoded images to include in the message (for multimodal models such as llava)
          items:
            type: string
            description: Base64-encoded image (for multimodal models such as llava)
            example: iVBORw0KGgoAAAANSUhEUgAAAAkAAAANCAIAAAD0YtNRAAAABnRSTlMA/AD+APzoM1ogAAAAWklEQVR4AWP48+8PLkR7uUdzcMvtU8EhdykHKAciEXL3pvw5FQIURaBDJkARoDhY3zEXiCgCHbNBmAlUiyaBkENoxZSDWnOtBmoAQu7TnT+3WuDOA7KBIkAGAGwiNeqjusp/AAAAAElFTkSuQmCC
        system:
          type: string
          description: The system prompt to (overrides what is defined in the Modelfile).
        template:
          type: string
          description: The full prompt or prompt template (overrides what is defined in the Modelfile).
        context:
          type: array
          description: The context parameter returned from a previous request to [generateCompletion], this can be used to keep a short conversational memory.
          items:
            type: integer
            format: int64
        options:
          $ref: '#/components/schemas/RequestOptions'
        format:
          $ref: '#/components/schemas/ResponseFormat'
        raw:
          type: boolean
          description: |
            If `true` no formatting will be applied to the prompt and no context will be returned. 
            
            You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself.
        stream:
          type: boolean
          description: &stream |
            If `false` the response will be returned as a single response object, otherwise the response will be streamed as a series of objects.
          default: false
        keep_alive: &keep_alive
          type: integer
          nullable: true
          description: |
            How long (in minutes) to keep the model loaded in memory.
            
            - If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
            - If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
            - If set to 0, the model will be unloaded immediately once finished.
            - If not set, the model will stay loaded for 5 minutes by default
      required:
        - model
        - prompt
    RequestOptions:
      type: object
      description: Additional model parameters listed in the documentation for the Modelfile such as `temperature`.
      properties:
        num_keep:
          type: integer
          nullable: true
          description: |
            Number of tokens to keep from the prompt.
        seed:
          type: integer
          nullable: true
          description: |
            Sets the random number seed to use for generation. Setting this to a specific number will make the model 
            generate the same text for the same prompt. (Default: 0)
        num_predict:
          type: integer
          nullable: true
          description: |
            Maximum number of tokens to predict when generating text. 
            (Default: 128, -1 = infinite generation, -2 = fill context)
        top_k:
          type: integer
          nullable: true
          description: |
            Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, 
            while a lower value (e.g. 10) will be more conservative. (Default: 40)
        top_p:
          type: number
          format: float
          nullable: true
          description: |
            Works together with top_k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value 
            (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
        min_p:
          type: number
          format: float
          nullable: true
          description: |
            Alternative to the top_p, and aims to ensure a balance of quality and variety. min_p represents the minimum 
            probability for a token to be considered, relative to the probability of the most likely token. For 
            example, with min_p=0.05 and the most likely token having a probability of 0.9, logits with a value less 
            than 0.05*0.9=0.045 are filtered out. (Default: 0.0)
        tfs_z:
          type: number
          format: float
          nullable: true
          description: |
            Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value 
            (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1)
        typical_p:
          type: number
          format: float
          nullable: true
          description: |
            Typical p is used to reduce the impact of less probable tokens from the output. (default: 1)
        repeat_last_n:
          type: integer
          nullable: true
          description: |
            Sets how far back for the model to look back to prevent repetition. 
            (Default: 64, 0 = disabled, -1 = num_ctx)
        temperature:
          type: number
          format: float
          nullable: true
          description: |
            The temperature of the model. Increasing the temperature will make the model answer more creatively. 
            (Default: 0.8)
        repeat_penalty:
          type: number
          format: float
          nullable: true
          description: |
            Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more 
            strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)
        presence_penalty:
          type: number
          format: float
          nullable: true
          description: |
            Positive values penalize new tokens based on whether they appear in the text so far, increasing the 
            model's likelihood to talk about new topics. (Default: 0)
        frequency_penalty:
          type: number
          format: float
          nullable: true
          description: |
            Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the 
            model's likelihood to repeat the same line verbatim. (Default: 0)
        mirostat:
          type: integer
          nullable: true
          description: |
            Enable Mirostat sampling for controlling perplexity. 
            (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)
        mirostat_tau:
          type: number
          format: float
          nullable: true
          description: |
            Controls the balance between coherence and diversity of the output. A lower value will result in more 
            focused and coherent text. (Default: 5.0)
        mirostat_eta:
          type: number
          format: float
          nullable: true
          description: |
            Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate 
            will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. 
            (Default: 0.1)
        penalize_newline:
          type: boolean
          nullable: true
          description: |
            Penalize newlines in the output. (Default: true)
        stop:
          type: array
          nullable: true
          description: |
            Sequences where the API will stop generating further tokens. The returned text will not contain the stop 
            sequence.
          items:
            type: string
        numa:
          type: boolean
          nullable: true
          description: |
            Enable NUMA support. (Default: false)
        num_ctx:
          type: integer
          nullable: true
          description: |
            Sets the size of the context window used to generate the next token. (Default: 2048)
        num_batch:
          type: integer
          nullable: true
          description: |
            Sets the number of batches to use for generation. (Default: 512)
        num_gpu:
          type: integer
          nullable: true
          description: |
            The number of layers to send to the GPU(s). 
            On macOS it defaults to 1 to enable metal support, 0 to disable.
        main_gpu:
          type: integer
          nullable: true
          description: |
            The GPU to use for the main model. Default is 0.
        low_vram:
          type: boolean
          nullable: true
          description: |
            Enable low VRAM mode. (Default: false)
        f16_kv:
          type: boolean
          nullable: true
          description: |
            Enable f16 key/value. (Default: true)
        logits_all:
          type: boolean
          nullable: true
          description: |
            Enable logits all. (Default: false)
        vocab_only:
          type: boolean
          nullable: true
          description: |
            Enable vocab only. (Default: false)
        use_mmap:
          type: boolean
          nullable: true
          description: |
            Enable mmap. (Default: false)
        use_mlock:
          type: boolean
          nullable: true
          description: |
            Enable mlock. (Default: false)
        num_thread:
          type: integer
          nullable: true
          description: |
            Sets the number of threads to use during computation. By default, Ollama will detect this for optimal 
            performance. It is recommended to set this value to the number of physical CPU cores your system has 
            (as opposed to the logical number of cores).
    ResponseFormat:
      type: string
      description: |
        The format to return a response in. Currently the only accepted value is json.

        Enable JSON mode by setting the format parameter to json. This will structure the response as valid JSON.

        Note: it's important to instruct the model to use JSON in the prompt. Otherwise, the model may generate large amounts whitespace.
      enum:
        - json
    VersionResponse:
      type: object
      description: The response class for the version endpoint.
      properties:
        version:
          type: string
          description: The version of the Ollama server.
    GenerateCompletionResponse:
      type: object
      description: The response class for the generate endpoint.
      properties:
        model:
          type: string
          description: *model_name
          example: llama3.2
        created_at:
          type: string
          format: date-time
          description: Date on which a model was created.
          example: 2023-08-04T19:22:45.499127Z
        response:
          type: string
          description: The response for a given prompt with a provided model.
          example: The sky appears blue because of a phenomenon called Rayleigh scattering.
        done:
          type: boolean
          description: Whether the response has completed.
          example: true
        context:
          type: array
          description: |
            An encoding of the conversation used in this response, this can be sent in the next request to keep a conversational memory.
          items:
            type: integer
            format: int64
          example: [ 1, 2, 3 ]
        total_duration:
          type: integer
          format: int64
          description: Time spent generating the response.
          example: 5589157167
        load_duration:
          type: integer
          format: int64
          description: Time spent in nanoseconds loading the model.
          example: 3013701500
        prompt_eval_count:
          type: integer
          description: Number of tokens in the prompt.
          example: 46
        prompt_eval_duration:
          type: integer
          format: int64
          description: Time spent in nanoseconds evaluating the prompt.
          example: 1160282000
        eval_count:
          type: integer
          description: Number of tokens the response.
          example: 113
        eval_duration:
          type: integer
          format: int64
          description: Time in nanoseconds spent generating the response.
          example: 1325948000
    GenerateChatCompletionRequest:
      type: object
      description: Request class for the chat endpoint.
      properties:
        model:
          type: string
          description: *model_name
          example: llama3.2
        messages:
          type: array
          description: The messages of the chat, this can be used to keep a chat memory
          items:
            $ref: '#/components/schemas/Message'
        format:
          $ref: '#/components/schemas/ResponseFormat'
        options:
          $ref: '#/components/schemas/RequestOptions'
        stream:
          type: boolean
          description: *stream
          default: false
        keep_alive: *keep_alive
        tools:
          type: array
          description: A list of tools the model may call.
          items:
            $ref: '#/components/schemas/Tool'
      required:
        - model
        - messages
    GenerateChatCompletionResponse:
      type: object
      description: The response class for the chat endpoint.
      properties:
        message:
          $ref: '#/components/schemas/Message'
        model:
          type: string
          description: *model_name
          example: llama3.2
        created_at:
          type: string
          format: date-time
          description: Date on which a model was created.
          example: 2023-08-04T19:22:45.499127Z
        done:
          type: boolean
          description: Whether the response has completed.
          example: true
        done_reason:
          $ref: '#/components/schemas/DoneReason'
        total_duration:
          type: integer
          format: int64
          description: Time spent generating the response.
          example: 5589157167
        load_duration:
          type: integer
          format: int64
          description: Time spent in nanoseconds loading the model.
          example: 3013701500
        prompt_eval_count:
          type: integer
          description: Number of tokens in the prompt.
          example: 46
        prompt_eval_duration:
          type: integer
          format: int64
          description: Time spent in nanoseconds evaluating the prompt.
          example: 1160282000
        eval_count:
          type: integer
          description: Number of tokens the response.
          example: 113
        eval_duration:
          type: integer
          format: int64
          description: Time in nanoseconds spent generating the response.
          example: 1325948000
      required:
        - model
        - created_at
        - message
        - done
    DoneReason:
      type: string
      description: Reason why the model is done generating a response.
      enum:
        - stop # The generation hit a stop token.
        - length # The maximum num_tokens was reached.
        - load # The request was sent with an empty body to load the model.
    Message:
      type: object
      description: A message in the chat endpoint
      properties:
        role:
          type: string
          description: The role of the message
          enum: [ "system", "user", "assistant", "tool" ]
        content:
          type: string
          description: The content of the message
          example: Why is the sky blue?
        images:
          type: array
          description: (optional) a list of Base64-encoded images to include in the message (for multimodal models such as llava)
          items:
            type: string
            description: Base64-encoded image (for multimodal models such as llava)
            example: iVBORw0KGgoAAAANSUhEUgAAAAkAAAANCAIAAAD0YtNRAAAABnRSTlMA/AD+APzoM1ogAAAAWklEQVR4AWP48+8PLkR7uUdzcMvtU8EhdykHKAciEXL3pvw5FQIURaBDJkARoDhY3zEXiCgCHbNBmAlUiyaBkENoxZSDWnOtBmoAQu7TnT+3WuDOA7KBIkAGAGwiNeqjusp/AAAAAElFTkSuQmCC
        tool_calls:
          type: array
          description: A list of tools the model wants to call.
          items:
            $ref: '#/components/schemas/ToolCall'
      required:
        - role
        - content
    Tool:
      type: object
      description: A tool the model may call.
      properties:
        type:
          type: string
          enum:
            - function
          default: function
          description: The type of tool.
        function:
          $ref: '#/components/schemas/ToolFunction'
    ToolFunction:
      type: object
      description: A function that the model may call.
      properties:
        name:
          type: string
          description: The name of the function to be called.
        description:
          type: string
          description: |
            A description of what the function does, used by the model to choose when and how to call the function.
        parameters:
          $ref: '#/components/schemas/ToolFunctionParams'
      required:
        - name
        - description
        - parameters
    ToolFunctionParams:
      type: object
      description: The parameters the functions accepts, described as a JSON Schema object.
      additionalProperties: true
    ToolCall:
      type: object
      description: The tool the model wants to call.
      properties:
        function:
          $ref: '#/components/schemas/ToolCallFunction'
    ToolCallFunction:
      type: object
      description: The function the model wants to call.
      properties:
        name:
          type: string
          description: The name of the function to be called.
        arguments:
          $ref: '#/components/schemas/ToolCallFunctionArgs'
      required:
        - name
        - arguments
    ToolCallFunctionArgs:
      type: object
      description: The arguments to pass to the function.
      additionalProperties: true
    GenerateEmbeddingRequest:
      description: Generate embeddings from a model.
      type: object
      properties:
        model:
          type: string
          description: *model_name
          example: llama3.2
        prompt:
          type: string
          description: Text to generate embeddings for.
          example: 'Here is an article about llamas...'
        options:
          $ref: '#/components/schemas/RequestOptions'
        keep_alive: *keep_alive
      required:
        - model
        - prompt
    GenerateEmbeddingResponse:
      type: object
      description: Returns the embedding information.
      properties:
        embedding:
          type: array
          description: The embedding for the prompt.
          items:
            type: number
            format: double
          example: [ 0.5670403838157654, 0.009260174818336964, ... ]
    CreateModelRequest:
      type: object
      description: Create model request object.
      properties:
        model:
          type: string
          description: *model_name
          example: mario
        modelfile:
          type: string
          description: The contents of the Modelfile.
          example: FROM llama3\nSYSTEM You are mario from Super Mario Bros.
        path:
          type: string
          description: Path to the Modelfile (optional)
        quantize:
          type: string
          nullable: true
          description: The quantization level of the model.
        stream:
          type: boolean
          description: *stream
          default: false
      required:
        - model
        - modelfile
    CreateModelResponse:
      description: Response object for creating a model. When finished, `status` is `success`.
      type: object
      properties:
        status:
          $ref: '#/components/schemas/CreateModelStatus'
    CreateModelStatus:
      type: string
      description: Status creating the model
      enum:
        - creating system layer
        - parsing modelfile
        - success
    ModelsResponse:
      description: Response class for the list models endpoint.
      type: object
      properties:
        models:
          type: array
          description: List of models available locally.
          items:
            $ref: '#/components/schemas/Model'
    Model:
      type: object
      description: A model available locally.
      properties:
        model:
          type: string
          description: *model_name
          example: llama3.2
        modified_at:
          type: string
          format: date-time
          description: Model modification date.
          example: 2023-08-02T17:02:23.713454393-07:00
        size:
          type: integer
          format: int64
          description: Size of the model on disk.
          example: 7323310500
        digest:
          type: string
          description: The model's digest.
          example: 'sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711a'
        details:
          $ref: '#/components/schemas/ModelDetails'
    ModelDetails:
      type: object
      description: Details about a model.
      properties:
        parent_model:
          type: string
          description: The parent model of the model.
        format:
          type: string
          description: The format of the model.
        family:
          type: string
          description: The family of the model.
        families:
          type: array
          description: The families of the model.
          items:
            type: string
        parameter_size:
          type: string
          description: The size of the model's parameters.
        quantization_level:
          type: string
          description: The quantization level of the model.
    ModelInformation:
      type: object
      description: Details about a model.
      properties:
        general.architecture:
          type: string
          description: The architecture of the model.
        general.file_type:
          type: integer
          nullable: true
          description: The file type of the model.
        general.parameter_count:
          type: integer
          format: int64
          nullable: true
          description: The number of parameters in the model.
        general.quantization_version:
          type: integer
          nullable: true
          description: The number of parameters in the model.
    ProcessResponse:
      type: object
      description: Response class for the list running models endpoint.
      properties:
        models:
          type: array
          description: List of running models.
          items:
            $ref: '#/components/schemas/ProcessModel'
    ProcessModel:
      type: object
      description: A model that is currently loaded.
      properties:
        model:
          type: string
          description: *model_name
          example: llama3.2
        size:
          type: integer
          format: int64
          description: Size of the model on disk.
          example: 7323310500
        digest:
          type: string
          description: The model's digest.
          example: 'sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711a'
        details:
          $ref: '#/components/schemas/ModelDetails'
        expires_at:
          type: string
          format: date-time
          example: 2023-08-02T17:02:23.713454393-07:00
        size_vram:
          type: integer
          format: int64
          description: Size of the model on disk.
          example: 7323310500
    ModelInfoRequest:
      description: Request class for the show model info endpoint.
      type: object
      properties:
        model:
          type: string
          description: *model_name
          example: llama3.2
      required:
        - model
    ModelInfo:
      description: Details about a model including modelfile, template, parameters, license, and system prompt.
      type: object
      properties:
        license:
          type: string
          nullable: true
          description: The model's license.
          example: <contents of license block>
        modelfile:
          type: string
          nullable: true
          description: The modelfile associated with the model.
          example: 'Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llama3:latest\n\nFROM /Users/username/.ollama/models/blobs/sha256:8daa9615cce30c259a9555b1cc250d461d1bc69980a274b44d7eda0be78076d8\nTEMPLATE \"\"\"[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] \"\"\"\nSYSTEM \"\"\"\"\"\"\nPARAMETER stop [INST]\nPARAMETER stop [/INST]\nPARAMETER stop <<SYS>>\nPARAMETER stop <</SYS>>\n"'
        parameters:
          type: string
          nullable: true
          description: The model parameters.
          example: 'stop [INST]\nstop [/INST]\nstop <<SYS>>\nstop <</SYS>>'
        template:
          type: string
          nullable: true
          description: The prompt template for the model.
          example: '[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST]'
        system:
          type: string
          nullable: true
          description: The system prompt for the model.
        details:
          $ref: '#/components/schemas/ModelDetails'
        model_info:
          $ref: '#/components/schemas/ModelInformation'
        messages:
          type: array
          nullable: true
          description: The default messages for the model.
          items:
            $ref: '#/components/schemas/Message'
    CopyModelRequest:
      description: Request class for copying a model.
      type: object
      properties:
        source:
          type: string
          description: Name of the model to copy.
          example: llama3.2
        destination:
          type: string
          description: Name of the new model.
          example: llama3-backup
      required:
        - source
        - destination
    DeleteModelRequest:
      description: Request class for deleting a model.
      type: object
      properties:
        model:
          type: string
          description: *model_name
          example: llama3:13b
      required:
        - model
    PullModelRequest:
      description: Request class for pulling a model.
      type: object
      properties:
        model:
          type: string
          description: *model_name
          example: llama3.2
        insecure:
          type: boolean
          description: |
            Allow insecure connections to the library. 
            
            Only use this if you are pulling from your own library during development.
          default: false
        username:
          type: string
          description: Ollama username.
        password:
          type: string
          description: Ollama password.
        stream:
          type: boolean
          description: *stream
          default: false
      required:
        - model
    PullModelResponse:
      description: |
        Response class for pulling a model. 
        
        The first object is the manifest. Then there is a series of downloading responses. Until any of the download is completed, the `completed` key may not be included. 
        
        The number of files to be downloaded depends on the number of layers specified in the manifest.
      type: object
      properties:
        status:
          $ref: '#/components/schemas/PullModelStatus'
        digest:
          type: string
          description: The model's digest.
          example: 'sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711a'
        total:
          type: integer
          format: int64
          description: Total size of the model.
          example: 2142590208
        completed:
          type: integer
          format: int64
          description: Total bytes transferred.
          example: 2142590208
    PullModelStatus:
      type: string
      description: Status pulling the model.
      enum:
        - pulling manifest
        - downloading digestname
        - verifying sha256 digest
        - writing manifest
        - removing any unused layers
        - success
      example: pulling manifest
    PushModelRequest:
      description: Request class for pushing a model.
      type: object
      properties:
        model:
          type: string
          description: The name of the model to push in the form of <namespace>/<model>:<tag>.
          example: 'mattw/pygmalion:latest'
        insecure:
          type: boolean
          description: |
            Allow insecure connections to the library. 
            
            Only use this if you are pushing to your library during development.
          default: false
        username:
          type: string
          description: Ollama username.
        password:
          type: string
          description: Ollama password.
        stream:
          type: boolean
          description: *stream
          default: false
      required:
        - model
    PushModelResponse:
      type: object
      description: Response class for pushing a model.
      properties:
        status:
          type: string
          description: Status pushing the model.
        digest:
          type: string
          description: the model's digest
          example: 'sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711a'
        total:
          type: integer
          format: int64
          description: total size of the model
          example: 2142590208
        completed:
          type: integer
          format: int64
          description: Total bytes transferred.
          example: 2142590208