- Update to version 0.3.4:

* New embedding models - BGE-M3: a large embedding model from BAAI distinguished for its versatility in Multi-Functionality, Multi-Linguality, and Multi-Granularity. - BGE-Large: a large embedding model trained in english. - Paraphrase-Multilingual: A multilingual embedding model trained on parallel data for 50+ languages. * New embedding API with batch support - Ollama now supports a new API endpoint /api/embed for embedding generation: * This API endpoint supports new features: - Batches: generate embeddings for several documents in one request - Normalized embeddings: embeddings are now normalized, improving similarity results - Truncation: a new truncate parameter that will error if set to false - Metrics: responses include load_duration, total_duration and prompt_eval_count metrics OBS-URL: https://build.opensuse.org/package/show/science:machinelearning/ollama?expand=0&rev=46
2024-08-15 18:56:53 +00:00 · 2024-08-15 18:56:53 +00:00 · aa82c484e7
commit aa82c484e7
17 changed files with 871 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1,25 @@
 ## Default LFS
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.bsp filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.gem filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.jar filter=lfs diff=lfs merge=lfs -text
 *.lz filter=lfs diff=lfs merge=lfs -text
 *.lzma filter=lfs diff=lfs merge=lfs -text
 *.obscpio filter=lfs diff=lfs merge=lfs -text
 *.oxt filter=lfs diff=lfs merge=lfs -text
 *.pdf filter=lfs diff=lfs merge=lfs -text
 *.png filter=lfs diff=lfs merge=lfs -text
 *.rpm filter=lfs diff=lfs merge=lfs -text
 *.tbz filter=lfs diff=lfs merge=lfs -text
 *.tbz2 filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.ttf filter=lfs diff=lfs merge=lfs -text
 *.txz filter=lfs diff=lfs merge=lfs -text
 *.whl filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 ## Specific LFS patterns
 vendor.tar.zstd filter=lfs diff=lfs merge=lfs -text
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
 .osc
--- a/23
+++ b/23
@ -0,0 +1,23 @@
 <services>
  <service name="format_spec_file" mode="manual" />
  <service name="obs_scm" mode="manual">
    <param name="url">https://github.com/ollama/ollama.git</param>
    <param name="scm">git</param>
    <param name="revision">v0.3.4</param>
    <param name="versionformat">@PARENT_TAG@</param>
    <param name="versionrewrite-pattern">v(.*)</param>
    <param name="changesgenerate">enable</param>
    <param name="submodules">enable</param>
    <param name="exclude">macapp</param>
    <param name="package-meta">yes</param>
  </service>
  <service name="go_modules" mode="manual">
    <param name="compression">zstd</param>
  </service>
  <service name="set_version" mode="manual" />
  <service name="tar" mode="buildtime">
    <param name="package-meta">yes</param>
  </service>
 </services>
--- a/4
+++ b/4
@ -0,0 +1,4 @@
 <servicedata>
 <service name="tar_scm">
                <param name="url">https://github.com/ollama/ollama.git</param>
              <param name="changesrevision">ce1fb4447efc9958dcf279f7eb2ae6941bec1220</param></service></servicedata>
--- a/enable-lto.patch
+++ b/enable-lto.patch
@ -0,0 +1,28 @@
 diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh
 index db2c6c3..8194cd9 100755
 --- a/llm/generate/gen_linux.sh
 +++ b/llm/generate/gen_linux.sh
@@ -52,6 +52,7 @@ if [ -z "${CUDACXX}" ]; then
     fi
 fi
 COMMON_CMAKE_DEFS="-DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
 +COMMON_CMAKE_DEFS="${COMMON_CMAKE_DEFS} -DGGML_LTO=on -DCMAKE_BUILD_TYPE=Release"
 source $(dirname $0)/gen_common.sh
 init_vars
 git_module_setup
@@ -78,6 +79,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
         init_vars
         echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
         CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
 +        CMAKE_DEFS="${CMAKE_DEFS} -DGGML_LTO=on"
         BUILD_DIR="../build/linux/${ARCH}/cpu"
         echo "Building custom CPU"
         build
@@ -94,6 +96,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
         # -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake
         COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
 +        COMMON_CPU_DEFS="${COMMON_CPU_DEFS} -DGGML_LTO=on -DCMAKE_BUILD_TYPE=Release"
         if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then
             #
             # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
--- a/ollama-0.1.45.obscpio
+++ b/ollama-0.1.45.obscpio
@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:ecc23b875cd051a46ed9c9da0481bfd1a1b11e859b63ceb782d673a6534bda5e
 size 189517838
--- a/ollama-0.2.6.obscpio
+++ b/ollama-0.2.6.obscpio
@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:391fad97bacee37e8fab00273fd5d5a0a20912fd47c51907131ee1f274c7d2bf
 size 161902606
--- a/ollama-0.2.8.obscpio
+++ b/ollama-0.2.8.obscpio
@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:f1dfa7d3fc6d8dc35af4bd9a458a9f22ab613d07c1e5e48db2b2803ff7f77214
 size 151425038
--- a/ollama-0.3.0.obscpio
+++ b/ollama-0.3.0.obscpio
@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:58ac37034b70dfa60b0be1114a82a00e407bd0fb18bff0ad7f4bce86a3c7373f
 size 153287182
--- a/ollama-0.3.3.obscpio
+++ b/ollama-0.3.3.obscpio
@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:80ced6af29997569d44f79381c5cd1d4e51edd3f97d9f5aae0207162b6de26ba
 size 153645582
--- a/ollama-0.3.4.obscpio
+++ b/ollama-0.3.4.obscpio
@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:26f61d9850e1242dc1d65c4ab881f7a36075066f12f887a6596efceebfda490b
 size 223257614
--- a/ollama-user.conf
+++ b/ollama-user.conf
@ -0,0 +1,2 @@
 #Type Name   ID GECOS    Home directory  Shell
 u     ollama -  "Ollama" /var/lib/ollama -
--- a/ollama.changes
+++ b/ollama.changes
@ -0,0 +1,637 @@
 -------------------------------------------------------------------
 Sun Aug 11 02:40:06 UTC 2024 - Alessandro de Oliveira Faria <cabelo@opensuse.org>
 - Update to version 0.3.4:
 * New embedding models
  - BGE-M3: a large embedding model from BAAI distinguished for 
    its versatility in Multi-Functionality, Multi-Linguality, and 
    Multi-Granularity.
  - BGE-Large: a large embedding model trained in english.
  - Paraphrase-Multilingual: A multilingual embedding model 
    trained on parallel data for 50+ languages.
 * New embedding API with batch support
   - Ollama now supports a new API endpoint /api/embed for 
     embedding generation:
 * This API endpoint supports new features:
   - Batches: generate embeddings for several documents in 
     one request
   - Normalized embeddings: embeddings are now normalized, 
     improving similarity results
   - Truncation: a new truncate parameter that will error if 
     set to false
   - Metrics: responses include load_duration, total_duration and 
     prompt_eval_count metrics
 -------------------------------------------------------------------
 Sat Aug 03 09:41:56 UTC 2024 - eyadlorenzo@gmail.com
 - Update to version 0.3.3:
  * The /api/embed endpoint now returns statistics: total_duration,
    load_duration, and prompt_eval_count
  * Added usage metrics to the /v1/embeddings OpenAI compatibility
    API
  * Fixed issue where /api/generate would respond with an empty 
    string if provided a context
  * Fixed issue where /api/generate would return an incorrect 
    value for context
  * /show modefile will now render MESSAGE commands correctly
 - Update to version 0.3.2:
  * Fixed issue where ollama pull would not resume download 
    progress
  * Fixed issue where phi3 would report an error on older versions
 -------------------------------------------------------------------
 Tue Jul 30 07:08:37 UTC 2024 - Adrian Schröter <adrian@suse.de>
 - Update to version 0.3.1:
  * Added support for min_p sampling option
  * Lowered number of requests required when downloading models
    with ollama pull
  * ollama create will now autodetect required stop parameters
    when importing certain models
  * Fixed issue where /save would cause parameters to be saved
    incorrectly.
  * OpenAI-compatible API will now return a finish_reason of
    tool_calls if a tool call occured.
 -------------------------------------------------------------------
 Mon Jul 29 09:59:58 UTC 2024 - Adrian Schröter <adrian@suse.de>
 - fix build on leap 15.6
 - exclude builds on 32bit due to build failures
 -------------------------------------------------------------------
 Sun Jul 28 11:32:19 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
 - Update to version 0.3.0:
  * Ollama now supports tool calling with popular models such
    as Llama 3.1. This enables a model to answer a given prompt
    using tool(s) it knows about, making it possible for models to
    perform more complex tasks or interact with the outside world.
  * New models:
    ~ Llama 3.1
    ~ Mistral Large 2
    ~ Firefunction v2
    ~ Llama-3-Groq-Tool-Use
  * Fixed duplicate error message when running ollama create
 -------------------------------------------------------------------
 Wed Jul 24 14:28:08 UTC 2024 - adrian@suse.de
 - Update to version 0.2.8:
  * api embed docs (#5282)
  * convert: capture `head_dim` for mistral (#5818)
  * Update llama.cpp submodule commit to `d94c6e0c` (#5805)
  * server: collect nested tool call objects when parsing (#5824)
  * Remove no longer supported max vram var
  * Refine error reporting for subprocess crash
  * Remove out of space test temporarily (#5825)
  * llm: consider `head_dim` in llama arch (#5817)
  * Adjust windows ROCm discovery
  * add patch for tekken (#5807)
  * preserve last assistant message (#5802)
  * Fix generate test flakyness (#5804)
  * server: validate template (#5734)
  * OpenAI: Function Based Testing (#5752)
  * adjust openai chat msg processing (#5729)
  * fix parsing tool calls
  * server: check for empty tools array too (#5779)
  * always provide content even if empty (#5778)
  * server: only parse tool calls if tools are provided (#5771)
  * Fix context exhaustion integration test for small gpus
  * Refine scheduler unit tests for reliability
 -------------------------------------------------------------------
 Thu Jul 18 13:09:10 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
 - Fixed issue with shared libraries 
 -------------------------------------------------------------------
 Thu Jul 18 12:27:54 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
 - Added %check section
 - Use -v when building 
 - Update to version 0.2.6:
  * New models: MathΣtral is a 7B model designed for math 
    reasoning and scientific discovery by Mistral AI.
  * Fixed issue where uppercase roles such as USER would no longer
    work in the chat endpoints
  * Fixed issue where empty system message would be included in the
    prompt
 -------------------------------------------------------------------
 Sun Jul 14 17:48:36 UTC 2024 - eyadlorenzo@gmail.com
 - Update to version 0.2.5:
  * Fixed issue where a model's SYSTEM message not be applied
 - Update to version 0.2.4:
  * Fixed issue where context, load_duration and total_duration 
    fields would not be set in the /api/generate endpoint.
  * Ollama will no longer error if loading models larger than 
    system memory if disk space is available
 - Update to version 0.2.3:
  * Fix issue where system prompt would not be applied
 - Update to version 0.2.2:
  * Fixed errors that occurred when using Ollama with Nvidia V100 
    GPUs
  * glm4 models will no longer fail to load from out of memory
    errors
  * Fixed error that would occur when running deepseek-v2 and 
    deepseek-coder-v2 models
  * Fixed a series of out of memory issues when using Nvidia
    GPUs
  * Fixed a series of errors that would occur when using multiple 
    Radeon GPUs
 - Update to version 0.2.1:
  * Fixed issue where setting OLLAMA_NUM_PARALLEL would cause 
    models to be reloaded after each request
 - Update to version 0.2.0:
  * Ollama 0.2.0 is now available with concurrency support. 
    This unlocks 2 specific features:
    ~ Ollama can now serve multiple requests at the same time
    ~ Ollama now supports loading different models at the same time
  * New models: GLM-4: A strong multi-lingual general language 
    model with competitive performance to Llama 3.
  * New models: CodeGeeX4: A versatile model for AI software 
    development scenarios, including code completion.
  * New models: Gemma 2: Improved output quality and base text 
    generation models now available
  * Ollama will now show a better error if a model architecture 
    isn't supported
  * Improved handling of quotes and spaces in Modelfile FROM lines
  * Ollama will now return an error if the system does not have 
    enough memory to run a model on Linux
 -------------------------------------------------------------------
 Sun Jul 07 19:18:11 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
 - Update to version 0.1.48:
  * Fixed issue where Gemma 2 would continuously output when 
    reaching context limits
  * Fixed out of memory and core dump errors when running Gemma 2
  * /show info will now show additional model information in
    ollama run
  * Fixed issue where ollama show would result in an error on 
    certain vision models
 - Update to version 0.1.48:
  * Added support for Google Gemma 2 models (9B and 27B)
  * Fixed issues with ollama create when importing from Safetensors
 -------------------------------------------------------------------
 Mon Jun 24 10:11:17 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
 - Update to version 0.1.46:
  * Docs (#5149)
  * fix: quantization with template
  * Fix use_mmap parsing for modelfiles
  * Refine mmap default logic on linux
  * Bump latest fedora cuda repo to 39
 -------------------------------------------------------------------
 Sat Jun 22 10:08:00 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
 - Update to version 0.1.45:
  * New models: DeepSeek-Coder-V2: A 16B & 236B open-source
    Mixture-of-Experts code language model that achieves
    performance comparable to GPT4-Turbo in code-specific tasks.
  * ollama show <model> will now show model information such as
    context window size
  * Model loading on Windows with CUDA GPUs is now faster
  * Setting seed in the /v1/chat/completions OpenAI compatibility
    endpoint no longer changes temperature
  * Enhanced GPU discovery and multi-gpu support with concurrency
  * Introduced a workaround for AMD Vega RX 56 SDMA support on
    Linux
  * Fix memory prediction for deepseek-v2 and deepseek-coder-v2
    models
  * api/show endpoint returns extensive model metadata
  * GPU configuration variables are now reported in ollama serve
  * Update Linux ROCm to v6.1.1
 -------------------------------------------------------------------
 Tue Jun 18 12:12:41 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
 - Added documentation files to .spec 
 - Update to version 0.1.44:
  * Fixed issue where unicode characters such as emojis would not 
    be loaded correctly when running ollama create
  * Fixed certain cases where Nvidia GPUs would not be detected and
    reported as compute capability 1.0 devices
 - Update to version 0.1.43:
  * New import.md guide for converting and importing models to 
    Ollama
  * Fixed issue where embedding vectors resulting from 
    /api/embeddings would not be accurate
  * JSON mode responses will no longer include invalid escape 
    characters
  * Removing a model will no longer show incorrect File not found 
    errors
  * Fixed issue where running ollama create would result in an 
    error on Windows with certain file formatting
 - Update to version 0.1.42:
  * New models: Qwen 2: a new series of large language models 
    from Alibaba group
  * Qwen 2: a new series of large language models from Alibaba 
    group
  * ollama pull is now faster if it detects a model is already 
    downloaded
  * ollama create will now automatically detect prompt templates
    for popular model architectures such as Llama, Gemma, Phi and 
    more.
  * Ollama can now be accessed from local apps built with Electron 
    and Tauri, as well as in developing apps in local html files
  * Update welcome prompt in Windows to llama3
  * Fixed issues where /api/ps and /api/tags would show invalid 
    timestamps in responses
 - Update to version 0.1.41:
  * Fixed issue on Windows 10 and 11 with Intel CPUs with 
    integrated GPUs where Ollama would encounter an error
 -------------------------------------------------------------------
 Sat Jun 01 21:12:20 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
 - Update to version 0.1.40:
  * New model: Codestral: Codestral is Mistral AI’s first-ever code
  	model designed for code generation tasks.
  * New model: IBM Granite Code: now in 3B and 8B parameter sizes.
  * New model: Deepseek V2: A Strong, Economical, and Efficient 
  	Mixture-of-Experts Language Model
  * Fixed out of memory and incorrect token issues when running 
  	Codestral on 16GB Macs
  * Fixed issue where full-width characters (e.g. Japanese, 
  	Chinese, Russian) were deleted at end of the line when using 
  	ollama run
 -------------------------------------------------------------------
 Wed May 29 11:38:26 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
 - Update to version 0.1.39:
  * New model: Cohere Aya 23: A new state-of-the-art, multilingual
  	LLM covering 23 different languages.
  * New model: Mistral 7B 0.3: A new version of Mistral 7B with 
  	initial support for function calling.
  * New model: Phi-3 Medium: a 14B parameters, lightweight, 
  	state-of-the-art open model by Microsoft.
  * New model: Phi-3 Mini 128K and Phi-3 Medium 128K: versions of
  	the Phi-3 models that support a context window size of 128K
  * New model: Granite code: A family of open foundation models by
  	IBM for Code Intelligence
  * It is now possible to import and quantize Llama 3 and its
  	finetunes from Safetensors format to Ollama.
  * Full changelog at 
  	https://github.com/ollama/ollama/releases/tag/v0.1.39
 -------------------------------------------------------------------
 Wed May 22 18:05:30 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
 - Added 15.6 build
 -------------------------------------------------------------------
 Thu May 16 19:55:51 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
 - Update to version 0.1.38:
  * New model: Falcon 2: A new 11B parameters causal decoder-only
    model built by TII and trained over 5T tokens.
  * New model: Yi 1.5: A new high-performing version of Yi, now 
    licensed as Apache 2.0. Available in 6B, 9B and 34B sizes.
  * Added ollama ps command
  * Added /clear command
  * Fixed issue where switching loaded models on Windows would take
    several seconds
  * Running /save will no longer abort the chat session if an
    incorrect name is provided
  * The /api/tags API endpoint will now correctly return an empty
    list [] instead of null if no models are provided
 -------------------------------------------------------------------
 Sun May 12 19:05:53 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
 - Update to version 0.1.37:
  * Fixed issue where models with uppercase characters in the name
    would not show with ollama list
  * Fixed usage string for ollama create
  * Fix finish_reason being "" instead of null in the Open-AI
    compatible chat API.
 -------------------------------------------------------------------
 Sun May 12 15:20:28 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
 - Use obs_scm service instead of the deprecated tar_scm
 - Use zstd for vendor tarball compression 
 -------------------------------------------------------------------
 Sun May 12 01:39:26 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
 - Update to version 0.1.36:
  * Fixed exit status 0xc0000005 error with AMD graphics cards on Windows
  * Fixed rare out of memory errors when loading a model to run with CPU
 - Update to version 0.1.35:
  * New models: Llama 3 ChatQA: A model from NVIDIA based on Llama
    3 that excels at conversational question answering (QA) and
    retrieval-augmented generation (RAG).
  * Quantization: ollama create can now quantize models when
    importing them using the --quantize or -q flag
  * Fixed issue where inference subprocesses wouldn't be cleaned up
    on shutdown.
  * Fixed a series out of memory errors when loading models on
    multi-GPU systems
  * Ctrl+J characters will now properly add newlines in ollama run
  * Fixed issues when running ollama show for vision models
  * OPTIONS requests to the Ollama API will no longer result in
    errors
  * Fixed issue where partially downloaded files wouldn't be
    cleaned up
  * Added a new done_reason field in responses describing why
    generation stopped responding
  * Ollama will now more accurately estimate how much memory
    is available on multi-GPU systems especially when running
    different models one after another
 - Update to version 0.1.34:
  * New model: Llava Llama 3
  * New model: Llava Phi 3
  * New model: StarCoder2 15B Instruct
  * New model: CodeGemma 1.1
  * New model: StableLM2 12B
  * New model: Moondream 2
  * Fixed issues with LLaVa models where they would respond 
    incorrectly after the first request
  * Fixed out of memory errors when running large models such as 
    Llama 3 70B
  * Fixed various issues with Nvidia GPU discovery on Linux and 
    Windows
  * Fixed a series of Modelfile errors when running ollama create
  * Fixed no slots available error that occurred when cancelling a
    request and then sending follow up requests
  * Improved AMD GPU detection on Fedora
  * Improved reliability when using the experimental
    OLLAMA_NUM_PARALLEL and OLLAMA_MAX_LOADED flags
  * ollama serve will now shut down quickly, even if a model is
    loading
 - Update to version 0.1.33:
  * New model: Llama 3
  * New model: Phi 3 Mini
  * New model: Moondream
  * New model: Llama 3 Gradient 1048K
  * New model: Dolphin Llama 3
  * New model: Qwen 110B
  * Fixed issues where the model would not terminate, causing the 
    API to hang.
  * Fixed a series of out of memory errors on Apple Silicon Macs
  * Fixed out of memory errors when running Mixtral architecture 
    models
  * Aded experimental concurrency features:
    ~ OLLAMA_NUM_PARALLEL: Handle multiple requests simultaneously 
      for a single model
    ~ OLLAMA_MAX_LOADED_MODELS: Load multiple models simultaneously
 -------------------------------------------------------------------
 Tue Apr 23 02:26:34 UTC 2024 - rrahl0@disroot.org
 - Update to version 0.1.32:
  * scale graph based on gpu count
  * Support unicode characters in model path (#3681)
  * darwin: no partial offloading if required memory greater than system
  * update llama.cpp submodule to `7593639` (#3665)
  * fix padding in decode
  * Revert "cmd: provide feedback if OLLAMA_MODELS is set on non-serve command (#3470)" (#3662)
  * Added Solar example at README.md (#3610)
  * Update langchainjs.md (#2030)
  * Added MindsDB information (#3595)
  * examples: add more Go examples using the API (#3599)
  * Update modelfile.md
  * Add llama2 / torch models for `ollama create` (#3607)
  * Terminate subprocess if receiving `SIGINT` or `SIGTERM` signals while model is loading (#3653)
  * app: gracefully shut down `ollama serve` on windows (#3641)
  * types/model: add path helpers (#3619)
  * update llama.cpp submodule to `4bd0f93` (#3627)
  * types/model: make ParseName variants less confusing (#3617)
  * types/model: remove (*Digest).Scan and Digest.Value (#3605)
  * Fix rocm deps with new subprocess paths
  * mixtral mem
  * Revert "types/model: remove (*Digest).Scan and Digest.Value (#3589)"
  * types/model: remove (*Digest).Scan and Digest.Value (#3589)
  * types/model: remove DisplayLong (#3587)
  * types/model: remove MarshalText/UnmarshalText from Digest (#3586)
  * types/model: init with Name and Digest types (#3541)
  * server: provide helpful workaround hint when stalling on pull (#3584)
  * partial offloading
  * refactor tensor query
  * api: start adding documentation to package api (#2878)
  * examples: start adding Go examples using api/ (#2879)
  * Handle very slow model loads
  * fix: rope
  * Revert "build.go: introduce a friendlier way to build Ollama (#3548)" (#3564)
  * build.go: introduce a friendlier way to build Ollama (#3548)
  * update llama.cpp submodule to `1b67731` (#3561)
  * ci: use go-version-file
  * Correct directory reference in macapp/README (#3555)
  * cgo quantize
  * no blob create if already exists
  * update generate scripts with new `LLAMA_CUDA` variable, set `HIP_PLATFORM` to avoid compiler errors (#3528)
  * Docs: Remove wrong parameter for Chat Completion (#3515)
  * no rope parameters
  * add command-r graph estimate
  * Fail fast if mingw missing on windows
  * use an older version of the mac os sdk in release (#3484)
  * Add test case for context exhaustion
  * CI missing archive
  * fix dll compress in windows building
  * CI subprocess path fix
  * Fix CI release glitches
  * update graph size estimate
  * Fix macOS builds on older SDKs (#3467)
  * cmd: provide feedback if OLLAMA_MODELS is set on non-serve command (#3470)
  * feat: add OLLAMA_DEBUG in ollama server help message (#3461)
  * Revert options as a ref in the server
  * default head_kv to 1
  * fix metal gpu
  * Bump to b2581
  * Refined min memory from testing
  * Release gpu discovery library after use
  * Safeguard for noexec
  * Detect too-old cuda driver
  * Integration test improvements
  * Apply 01-cache.diff
  * Switch back to subprocessing for llama.cpp
  * Simplify model conversion (#3422)
  * fix generate output
  * update memory calcualtions
  * refactor model parsing
  * Add chromem-go to community integrations (#3437)
  * Update README.md (#3436)
  * Community Integration: CRAG Ollama Chat (#3423)
  * Update README.md (#3378)
  * Community Integration: ChatOllama (#3400)
  * Update 90_bug_report.yml
  * Add gemma safetensors conversion (#3250)
  * CI automation for tagging latest images
  * Bump ROCm to 6.0.2 patch release
  * CI windows gpu builds
  * Update troubleshooting link
  * fix: trim quotes on OLLAMA_ORIGINS
 - add set_version to automatically switch over to the newer version
 -------------------------------------------------------------------
 Tue Apr 16 10:52:25 UTC 2024 - bwiedemann@suse.com
 - Update to version 0.1.31:
  * Backport MacOS SDK fix from main
  * Apply 01-cache.diff
  * fix: workflows
  * stub stub
  * mangle arch
  * only generate on changes to llm subdirectory
  * only generate cuda/rocm when changes to llm detected
  * Detect arrow keys on windows (#3363)
  * add license in file header for vendored llama.cpp code (#3351)
  * remove need for `$VSINSTALLDIR` since build will fail if `ninja` cannot be found (#3350)
  * change `github.com/jmorganca/ollama` to `github.com/ollama/ollama` (#3347)
  * malformed markdown link (#3358)
  * Switch runner for final release job
  * Use Rocky Linux Vault to get GCC 10.2 installed
  * Revert "Switch arm cuda base image to centos 7"
  * Switch arm cuda base image to centos 7
  * Bump llama.cpp to b2527
  * Fix ROCm link in `development.md`
  * adds ooo to community integrations (#1623)
  * Add cliobot to ollama supported list (#1873)
  * Add Dify.AI to community integrations (#1944)
  * enh: add ollero.nvim to community applications (#1905)
  * Add typechat-cli to Terminal apps (#2428)
  * add new Web & Desktop link in readme for alpaca webui (#2881)
  * Add LibreChat to Web & Desktop Apps (#2918)
  * Add Community Integration: OllamaGUI (#2927)
  * Add Community Integration: OpenAOE (#2946)
  * Add Saddle (#3178)
  * tlm added to README.md terminal section. (#3274)
  * Update README.md (#3288)
  * Update README.md (#3338)
  * Integration tests conditionally pull
  * add support for libcudart.so for CUDA devices (adds Jetson support)
  * llm: prevent race appending to slice (#3320)
  * Bump llama.cpp to b2510
  * Add Testcontainers into Libraries section (#3291)
  * Revamp go based integration tests
  * rename `.gitattributes`
  * Bump llama.cpp to b2474
  * Add docs for GPU selection and nvidia uvm workaround
  * doc: faq gpu compatibility (#3142)
  * Update faq.md
  * Better tmpdir cleanup
  * Update faq.md
  * update `faq.md`
  * dyn global
  * llama: remove server static assets (#3174)
  * add `llm/ext_server` directory to `linguist-vendored` (#3173)
  * Add Radeon gfx940-942 GPU support
  * Wire up more complete CI for releases
  * llm,readline: use errors.Is instead of simple == check (#3161)
  * server: replace blob prefix separator from ':' to '-' (#3146)
  * Add ROCm support to linux install script (#2966)
  * .github: fix model and feature request yml (#3155)
  * .github: add issue templates (#3143)
  * fix: clip memory leak
  * Update README.md
  * add `OLLAMA_KEEP_ALIVE` to environment variable docs for `ollama serve` (#3127)
  * Default Keep Alive environment variable (#3094)
  * Use stdin for term discovery on windows
  * Update ollama.iss
  * restore locale patch (#3091)
  * token repeat limit for prediction requests (#3080)
  * Fix iGPU detection for linux
  * add more docs on for the modelfile message command (#3087)
  * warn when json format is expected but not mentioned in prompt (#3081)
  * Adapt our build for imported server.cpp
  * Import server.cpp as of b2356
  * refactor readseeker
  * Add docs explaining GPU selection env vars
  * chore: fix typo (#3073)
  * fix gpu_info_cuda.c compile warning (#3077)
  * use `-trimpath` when building releases (#3069)
  * relay load model errors to the client (#3065)
  * Update troubleshooting.md
  * update llama.cpp submodule to `ceca1ae` (#3064)
  * convert: fix shape
  * Avoid rocm runner and dependency clash
  * fix `03-locale.diff`
  * Harden for deps file being empty (or short)
  * Add ollama executable peer dir for rocm
  * patch: use default locale in wpm tokenizer (#3034)
  * only copy deps for `amd64` in `build_linux.sh`
  * Rename ROCm deps file to avoid confusion (#3025)
  * add `macapp` to `.dockerignore`
  * add `bundle_metal` and `cleanup_metal` funtions to `gen_darwin.sh`
  * tidy cleanup logs
  * update llama.cpp submodule to `77d1ac7` (#3030)
  * disable gpu for certain model architectures and fix divide-by-zero on memory estimation
  * Doc how to set up ROCm builds on windows
  * Finish unwinding idempotent payload logic
  * update llama.cpp submodule to `c2101a2` (#3020)
  * separate out `isLocalIP`
  * simplify host checks
  * add additional allowed hosts
  * Update docs `README.md` and table of contents
  * add allowed host middleware and remove `workDir` middleware (#3018)
  * decode ggla
  * convert: fix default shape
  * fix: allow importing a model from name reference (#3005)
  * update llama.cpp submodule to `6cdabe6` (#2999)
  * Update api.md
  * Revert "adjust download and upload concurrency based on available bandwidth" (#2995)
  * cmd: tighten up env var usage sections (#2962)
  * default terminal width, height
  * Refined ROCm troubleshooting docs
  * Revamp ROCm support
  * update go to 1.22 in other places (#2975)
  * docs: Add LLM-X to Web Integration section (#2759)
  * fix some typos (#2973)
  * Convert Safetensors to an Ollama model (#2824)
  * Allow setting max vram for workarounds
  * cmd: document environment variables for serve command
  * Add Odin Runes, a Feature-Rich Java UI for Ollama, to README (#2440)
  * Update api.md
  * Add NotesOllama to Community Integrations (#2909)
  * Added community link for Ollama Copilot (#2582)
  * use LimitGroup for uploads
  * adjust group limit based on download speed
  * add new LimitGroup for dynamic concurrency
  * refactor download run
 -------------------------------------------------------------------
 Wed Mar 06 23:51:28 UTC 2024 - computersemiexpert@outlook.com
 - Update to version 0.1.28:
  * Fix embeddings load model behavior (#2848)
  * Add Community Integration: NextChat (#2780)
  * prepend image tags (#2789)
  * fix: print usedMemory size right (#2827)
  * bump submodule to `87c91c07663b707e831c59ec373b5e665ff9d64a` (#2828)
  * Add ollama user to video group
  * Add env var so podman will map cuda GPUs
 -------------------------------------------------------------------
 Tue Feb 27 08:33:15 UTC 2024 - Jan Engelhardt <jengelh@inai.de>
 - Edit description, answer _what_ the package is and use nominal
  phrase. (https://en.opensuse.org/openSUSE:Package_description_guidelines)
 -------------------------------------------------------------------
 Fri Feb 23 21:13:53 UTC 2024 - Loren Burkholder <computersemiexpert@outlook.com>
 - Added the Ollama package
 - Included a systemd service
--- a/ollama.obsinfo
+++ b/ollama.obsinfo
@ -0,0 +1,4 @@
 name: ollama
 version: 0.3.4
 mtime: 1723000849
 commit: de4fc297732cb60ff79a6c8010a7c79971c21b4a
--- a/ollama.service
+++ b/ollama.service
@ -0,0 +1,13 @@
 [Unit]
 Description=Ollama Service
 After=network-online.target
 [Service]
 ExecStart=/usr/bin/ollama serve
 User=ollama
 Group=ollama
 Restart=always
 RestartSec=3
 [Install]
 WantedBy=default.target
--- a/ollama.spec
+++ b/ollama.spec
@ -0,0 +1,113 @@
 #
 # spec file for package ollama
 #
 # Copyright (c) 2024 SUSE LLC
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
 # upon. The license for this file, and modifications and additions to the
 # file, is the same license as for the pristine package itself (unless the
 # license for the pristine package is not an Open Source License, in which
 # case the license is the MIT License). An "Open Source License" is a
 # license that conforms to the Open Source Definition (Version 1.9)
 # published by the Open Source Initiative.
 # Please submit bugfixes or comments via https://bugs.opensuse.org/
 #
 Name:           ollama
 Version:        0.3.4
 Release:        0
 Summary:        Tool for running AI models on-premise
 License:        MIT
 URL:            https://ollama.com
 Source:         %{name}-%{version}.tar
 Source1:        vendor.tar.zstd
 Source2:        ollama.service
 Source3:        %{name}-user.conf
 Patch0:         enable-lto.patch
 BuildRequires:  cmake >= 3.24
 BuildRequires:  git
 BuildRequires:  sysuser-tools
 BuildRequires:  zstd
 BuildRequires:  golang(API) >= 1.22
 %sysusers_requires
 %if 0%{?sle_version} == 150600
 BuildRequires:  gcc12-c++
 BuildRequires:  libstdc++6-gcc12
 %else
 BuildRequires:  gcc-c++ >= 11.4.0
 %endif
 # 32bit seems not to be supported anymore
 ExcludeArch:    %ix86 %arm
 %description
 Ollama is a tool for running AI models on one's own hardware.
 It offers a command-line interface and a RESTful API.
 New models can be created or existing ones modified in the
 Ollama library using the Modelfile syntax.
 Source model weights found on Hugging Face and similar sites
 can be imported.
 %prep
 %autosetup -a1 -p1
 %build
 %sysusers_generate_pre %{SOURCE3} %{name} %{name}-user.conf
 %ifnarch ppc64
 export GOFLAGS="-buildmode=pie -mod=vendor"
 %endif
 %if 0%{?sle_version} == 150600
 export CXX=g++-12
 export CC=gcc-12
 # pie doesn't work with gcc12 on leap
 export GOFLAGS="-mod=vendor"
 %endif
 export OLLAMA_SKIP_PATCHING=1
 go generate ./...
 go build -v .
 %install
 install -D -m 0755 %{name} %{buildroot}/%{_bindir}/%{name}
 install -D -m 0644 %{SOURCE2} %{buildroot}%{_unitdir}/%{name}.service
 install -D -m 0644 %{SOURCE3} %{buildroot}%{_sysusersdir}/%{name}-user.conf
 install -d %{buildroot}%{_localstatedir}/lib/%{name}
 mkdir -p "%{buildroot}/%{_docdir}/%{name}"
 cp -Ra docs/* "%{buildroot}/%{_docdir}/%{name}"
 %check
 %if 0%{?sle_version} == 150600
 export CXX=g++-12
 export CC=gcc-12
 # pie doesn't work with gcc12 on leap
 export GOFLAGS="-mod=vendor"
 %endif
 go test ./...
 %pre -f %{name}.pre
 %service_add_pre %{name}.service
 %post
 %service_add_post %{name}.service
 %preun
 %service_del_preun %{name}.service
 %postun
 %service_del_postun %{name}.service
 %files
 %doc README.md
 %license LICENSE
 %{_docdir}/%{name}
 %{_bindir}/%{name}
 %{_unitdir}/%{name}.service
 %{_sysusersdir}/%{name}-user.conf
 %attr(-, ollama, ollama) %{_localstatedir}/lib/%{name}
 %changelog
--- a/vendor.tar.zstd
+++ b/vendor.tar.zstd
@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:46b2ba6e402d762455dd00b701eab4dcc825133ad33d1028bebfd790a8872fa8
 size 5310383
		`@ -0,0 +1,2 @@`
							`#Type Name ID GECOS Home directory Shell`
							`u ollama - "Ollama" /var/lib/ollama -`