- Update to version 0.3.12:

* Llama 3.2: Meta's Llama 3.2 goes small with 1B and 3B models. * Qwen 2.5 Coder: The latest series of Code-Specific Qwen models, with significant improvements in code generation, code reasoning, and code fixing. * Ollama now supports ARM Windows machines * Fixed rare issue where Ollama would report a missing .dll file on Windows * Fixed performance issue for Windows without GPUs OBS-URL: https://build.opensuse.org/package/show/science:machinelearning/ollama?expand=0&rev=53
2024-09-29 21:30:54 +00:00 · 2024-09-29 21:30:54 +00:00 · 2808304cf4
commit 2808304cf4
20 changed files with 1048 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1,25 @@
+## Default LFS
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.bsp filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.gem filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.jar filter=lfs diff=lfs merge=lfs -text
+*.lz filter=lfs diff=lfs merge=lfs -text
+*.lzma filter=lfs diff=lfs merge=lfs -text
+*.obscpio filter=lfs diff=lfs merge=lfs -text
+*.oxt filter=lfs diff=lfs merge=lfs -text
+*.pdf filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.rpm filter=lfs diff=lfs merge=lfs -text
+*.tbz filter=lfs diff=lfs merge=lfs -text
+*.tbz2 filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.ttf filter=lfs diff=lfs merge=lfs -text
+*.txz filter=lfs diff=lfs merge=lfs -text
+*.whl filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+## Specific LFS patterns
+vendor.tar.zstd filter=lfs diff=lfs merge=lfs -text
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+.osc
--- a/23
+++ b/23
@ -0,0 +1,23 @@
+<services>
+  <service name="format_spec_file" mode="manual" />
+  <service name="obs_scm" mode="manual">
+    <param name="url">https://github.com/ollama/ollama.git</param>
+    <param name="scm">git</param>
+    <param name="revision">v0.3.12</param>
+    <param name="versionformat">@PARENT_TAG@</param>
+    <param name="versionrewrite-pattern">v(.*)</param>
+    <param name="changesgenerate">enable</param>
+    <param name="submodules">enable</param>
+    <param name="exclude">macapp</param>
+    <param name="package-meta">yes</param>
+  </service>
+  <service name="go_modules" mode="manual">
+    <param name="compression">zstd</param>
+  </service>
+  <service name="set_version" mode="manual" />
+
+  <service name="tar" mode="buildtime">
+    <param name="package-meta">yes</param>
+  </service>
+  
+</services>
--- a/4
+++ b/4
@ -0,0 +1,4 @@
+<servicedata>
+<service name="tar_scm">
+                <param name="url">https://github.com/ollama/ollama.git</param>
+              <param name="changesrevision">504a410f02e01a2ec948a92e4579a28295184898</param></service></servicedata>
--- a/enable-lto.patch
+++ b/enable-lto.patch
@ -0,0 +1,20 @@
+--- a/llm/generate/gen_linux.sh.orig	2024-09-17 12:52:41.511508050 +0200
+++ b/llm/generate/gen_linux.sh	2024-09-17 13:01:55.316347171 +0200
+@@ -52,7 +52,7 @@
+         export CUDACXX=$(command -v nvcc)
+     fi
+ fi
+-COMMON_CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
+COMMON_CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off -DGGML_LTO=on"
+ source $(dirname $0)/gen_common.sh
+ init_vars
+ git_module_setup
+@@ -95,7 +95,7 @@
+         # -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake
+         # -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake
+ 
+-        COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
+        COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off -DGGML_LTO=on"
+         if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then
+             #
+             # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
--- a/ollama-0.1.45.obscpio
+++ b/ollama-0.1.45.obscpio
--- a/ollama-0.2.6.obscpio
+++ b/ollama-0.2.6.obscpio
--- a/ollama-0.2.8.obscpio
+++ b/ollama-0.2.8.obscpio
--- a/ollama-0.3.0.obscpio
+++ b/ollama-0.3.0.obscpio
--- a/ollama-0.3.10.obscpio
+++ b/ollama-0.3.10.obscpio
--- a/ollama-0.3.11.obscpio
+++ b/ollama-0.3.11.obscpio
--- a/ollama-0.3.12.obscpio
+++ b/ollama-0.3.12.obscpio
--- a/ollama-0.3.3.obscpio
+++ b/ollama-0.3.3.obscpio
--- a/ollama-0.3.6.obscpio
+++ b/ollama-0.3.6.obscpio
--- a/ollama-user.conf
+++ b/ollama-user.conf
@ -0,0 +1,2 @@
+#Type Name   ID GECOS    Home directory  Shell
+u     ollama -  "Ollama" /var/lib/ollama -
--- a/ollama.changes
+++ b/ollama.changes
@ -0,0 +1,813 @@
+-------------------------------------------------------------------
+Sat Sep 28 03:53:10 UTC 2024 - Alessandro de Oliveira Faria <cabelo@opensuse.org>
+
+- Update to version 0.3.12:
+  * Llama 3.2: Meta's Llama 3.2 goes small with 1B and 3B 
+    models.
+  * Qwen 2.5 Coder: The latest series of Code-Specific Qwen 
+    models, with significant improvements in code generation, 
+    code reasoning, and code fixing.
+  * Ollama now supports ARM Windows machines
+  * Fixed rare issue where Ollama would report a missing .dll
+    file on Windows
+  * Fixed performance issue for Windows without GPUs
+
+-------------------------------------------------------------------
+Fri Sep 20 08:29:30 UTC 2024 - adrian@suse.de
+
+- Update to version 0.3.11:
+  * llm: add solar pro (preview) (#6846)
+  * server: add tool parsing support for nemotron-mini (#6849)
+  * make patches git am-able
+  * CI: dist directories no longer present (#6834)
+  * CI: clean up naming, fix tagging latest (#6832)
+  * CI: set platform build build_linux script to keep buildx happy (#6829)
+  * readme: add Agents-Flex to community integrations (#6788)
+  * fix typo in import docs (#6828)
+  * readme: add vim-intelligence-bridge to Terminal section (#6818)
+  * readme: add Obsidian Quiz Generator plugin to community integrations (#6789)
+  * Fix incremental builds on linux (#6780)
+  * Use GOARCH for build dirs (#6779)
+  * Optimize container images for startup (#6547)
+  * examples: updated requirements.txt for privategpt example
+  * examples: polish loganalyzer example (#6744)
+  * readme: add ollama_moe to community integrations (#6752)
+  * runner: Flush pending responses before returning
+  * add "stop" command (#6739)
+  * refactor show ouput
+  * readme: add QodeAssist to community integrations (#6754)
+  * Verify permissions for AMD GPU (#6736)
+  * add *_proxy for debugging
+  * docs: update examples to use llama3.1 (#6718)
+  * Quiet down dockers new lint warnings (#6716)
+  * catch when model vocab size is set correctly (#6714)
+  * readme: add crewAI to community integrations (#6699)
+  * readme: add crewAI with mesop to community integrations
+
+-------------------------------------------------------------------
+Tue Sep 17 10:48:34 UTC 2024 - adrian@suse.de
+
+- Update to version 0.3.10:
+  * openai: align chat temperature and frequency_penalty options with completion (#6688)
+  * docs: improve linux install documentation (#6683)
+  * openai: don't scale temperature or frequency_penalty (#6514)
+  * readme: add Archyve to community integrations (#6680)
+  * readme: add Plasmoid Ollama Control to community integrations (#6681)
+  * Improve logging on GPU too small (#6666)
+  * openai: fix "presence_penalty" typo and add test (#6665)
+  * Fix gemma2 2b conversion (#6645)
+  * Document uninstall on windows (#6663)
+  * Revert "Detect running in a container (#6495)" (#6662)
+  * llm: make load time stall duration configurable via OLLAMA_LOAD_TIMEOUT
+  * Introduce GPU Overhead env var (#5922)
+  * Detect running in a container (#6495)
+  * readme: add AiLama to the list of community integrations (#4957)
+  * Update gpu.md: Add RTX 3050 Ti and RTX 3050 Ti (#5888)
+  * server: fix blob download when receiving a 200 response  (#6656)
+  * readme: add Gentoo package manager entry to community integrations (#5714)
+  * Update install.sh：Replace "command -v" with encapsulated functionality (#6035)
+  * readme: include Enchanted for Apple Vision Pro (#4949)
+  * readme: add lsp-ai to community integrations (#5063)
+  * readme: add ollama-php library to community integrations (#6361)
+  * readme: add vnc-lm discord bot community integration (#6644)
+  * llm: use json.hpp from common (#6642)
+  * readme: add confichat to community integrations (#6378)
+  * docs: add group to manual Linux isntructions and verify service is running (#6430)
+  * readme: add gollm to the list of community libraries (#6099)
+  * readme: add Cherry Studio to community integrations (#6633)
+  * readme: add Go fun package (#6421)
+  * docs: fix spelling error (#6391)
+  * install.sh: update instructions to use WSL2 (#6450)
+  * readme: add claude-dev to community integrations (#6630)
+  * readme: add PyOllaMx project (#6624)
+  * llm: update llama.cpp commit to 8962422 (#6618)
+  * Use cuda v11 for driver 525 and older (#6620)
+  * Log system memory at info (#6617)
+  * readme: add Painting Droid community integration (#5514)
+  * readme: update Ollama4j link and add link to Ollama4j Web UI (#6608)
+  * Fix sprintf to snprintf (#5664)
+  * readme: add PartCAD tool to readme for generating 3D CAD models using Ollama (#6605)
+  * Reduce docker image size (#5847)
+  * readme: add OllamaFarm project (#6508)
+  * readme: add go-crew and Ollamaclient projects (#6583)
+  * docs: update faq.md for OLLAMA_MODELS env var permissions (#6587)
+  * fix(cmd): show info may have nil ModelInfo (#6579)
+  * docs: update GGUF examples and references (#6577)
+  * Add findutils to base images (#6581)
+  * remove any unneeded build artifacts
+  * doc: Add Nix and Flox to package manager listing (#6074)
+  * update the openai docs to explain how to set the context size (#6548)
+  * fix(test): do not clobber models directory
+  * add llama3.1 chat template (#6545)
+  * update deprecated warnings
+  * validate model path
+  * throw an error when encountering unsupport tensor sizes (#6538)
+  * Move ollama executable out of bin dir (#6535)
+  * update templates to use messages
+  * more tokenizer tests
+  * add safetensors to the modelfile docs (#6532)
+  * Fix import image width (#6528)
+  * Update manual instructions with discrete ROCm bundle (#6445)
+  * llm: fix typo in comment (#6530)
+  * adjust image sizes
+  * clean up convert tokenizer
+  * detect chat template from configs that contain lists
+  * update the import docs (#6104)
+  * server: clean up route names for consistency (#6524)
+  * Only enable numa on CPUs (#6484)
+  * gpu: Group GPU Library sets by variant (#6483)
+  * update faq
+  * passthrough OLLAMA_HOST path to client
+  * convert safetensor adapters into GGUF (#6327)
+  * gpu: Ensure driver version set before variant (#6480)
+  * llm: Align cmake define for cuda no peer copy (#6455)
+  * Fix embeddings memory corruption (#6467)
+  * llama3.1
+  * convert gemma2
+  * create bert models from cli
+  * bert
+  * Split rocm back out of bundle (#6432)
+  * CI: remove directories from dist dir before upload step (#6429)
+  * CI: handle directories during checksum (#6427)
+  * Fix overlapping artifact name on CI
+  * Review comments
+  * Adjust layout to bin+lib/ollama
+  * Remove Jetpack
+  * Add windows cuda v12 + v11 support
+  * Enable cuda v12 flags
+  * Add cuda v12 variant and selection logic
+  * Report GPU variant in log
+  * Add Jetson cuda variants for arm
+  * Wire up ccache and pigz in the docker based build
+  * Refactor linux packaging
+  * server: limit upload parts to 16 (#6411)
+  * Fix white space.
+  * Reset NumCtx.
+  * Override numParallel only if unset.
+  * fix: chmod new layer to 0o644 when creating it
+  * fix: Add tooltip to system tray icon
+  * only skip invalid json manifests
+  * skip invalid manifest files
+  * fix noprune
+  * add `CONTRIBUTING.md` (#6349)
+  * Fix typo and improve readability (#5964)
+  * server: reduce max connections used in download (#6347)
+  * update chatml template format to latest in docs (#6344)
+  * lint
+  * Update openai.md to remove extra checkbox (#6345)
+  * llama3.1 memory
+
+-------------------------------------------------------------------
+Thu Aug 15 18:59:48 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
+
+- Update to version 0.3.6:
+  * Fixed issue where /api/embed would return an error instead of
+    loading the model when the input field was not provided.
+  * ollama create can now import Phi-3 models from Safetensors
+  * Added progress information to ollama create when importing GGUF
+    files
+  * Ollama will now import GGUF files faster by minimizing file
+    copies
+
+- Update to version 0.3.6:
+  * Fixed issue where temporary files would not be cleaned up
+  * Fix rare error when Ollama would start up due to invalid model
+    data
+
+-------------------------------------------------------------------
+Sun Aug 11 02:40:06 UTC 2024 - Alessandro de Oliveira Faria <cabelo@opensuse.org>
+
+- Update to version 0.3.4:
+ * New embedding models
+  - BGE-M3: a large embedding model from BAAI distinguished for 
+    its versatility in Multi-Functionality, Multi-Linguality, and 
+    Multi-Granularity.
+  - BGE-Large: a large embedding model trained in english.
+  - Paraphrase-Multilingual: A multilingual embedding model 
+    trained on parallel data for 50+ languages.
+ * New embedding API with batch support
+   - Ollama now supports a new API endpoint /api/embed for 
+     embedding generation:
+ * This API endpoint supports new features:
+   - Batches: generate embeddings for several documents in 
+     one request
+   - Normalized embeddings: embeddings are now normalized, 
+     improving similarity results
+   - Truncation: a new truncate parameter that will error if 
+     set to false
+   - Metrics: responses include load_duration, total_duration and 
+     prompt_eval_count metrics
+
+-------------------------------------------------------------------
+Sat Aug 03 09:41:56 UTC 2024 - eyadlorenzo@gmail.com
+
+- Update to version 0.3.3:
+  * The /api/embed endpoint now returns statistics: total_duration,
+    load_duration, and prompt_eval_count
+  * Added usage metrics to the /v1/embeddings OpenAI compatibility
+    API
+  * Fixed issue where /api/generate would respond with an empty 
+    string if provided a context
+  * Fixed issue where /api/generate would return an incorrect 
+    value for context
+  * /show modefile will now render MESSAGE commands correctly
+
+- Update to version 0.3.2:
+  * Fixed issue where ollama pull would not resume download 
+    progress
+  * Fixed issue where phi3 would report an error on older versions
+  
+-------------------------------------------------------------------
+Tue Jul 30 07:08:37 UTC 2024 - Adrian Schröter <adrian@suse.de>
+
+- Update to version 0.3.1:
+  * Added support for min_p sampling option
+  * Lowered number of requests required when downloading models
+    with ollama pull
+  * ollama create will now autodetect required stop parameters
+    when importing certain models
+  * Fixed issue where /save would cause parameters to be saved
+    incorrectly.
+  * OpenAI-compatible API will now return a finish_reason of
+    tool_calls if a tool call occured.
+
+-------------------------------------------------------------------
+Mon Jul 29 09:59:58 UTC 2024 - Adrian Schröter <adrian@suse.de>
+
+- fix build on leap 15.6
+- exclude builds on 32bit due to build failures
+
+-------------------------------------------------------------------
+Sun Jul 28 11:32:19 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
+
+- Update to version 0.3.0:
+  * Ollama now supports tool calling with popular models such
+    as Llama 3.1. This enables a model to answer a given prompt
+    using tool(s) it knows about, making it possible for models to
+    perform more complex tasks or interact with the outside world.
+  * New models:
+    ~ Llama 3.1
+    ~ Mistral Large 2
+    ~ Firefunction v2
+    ~ Llama-3-Groq-Tool-Use
+  * Fixed duplicate error message when running ollama create
+
+-------------------------------------------------------------------
+Wed Jul 24 14:28:08 UTC 2024 - adrian@suse.de
+
+- Update to version 0.2.8:
+  * api embed docs (#5282)
+  * convert: capture `head_dim` for mistral (#5818)
+  * Update llama.cpp submodule commit to `d94c6e0c` (#5805)
+  * server: collect nested tool call objects when parsing (#5824)
+  * Remove no longer supported max vram var
+  * Refine error reporting for subprocess crash
+  * Remove out of space test temporarily (#5825)
+  * llm: consider `head_dim` in llama arch (#5817)
+  * Adjust windows ROCm discovery
+  * add patch for tekken (#5807)
+  * preserve last assistant message (#5802)
+  * Fix generate test flakyness (#5804)
+  * server: validate template (#5734)
+  * OpenAI: Function Based Testing (#5752)
+  * adjust openai chat msg processing (#5729)
+  * fix parsing tool calls
+  * server: check for empty tools array too (#5779)
+  * always provide content even if empty (#5778)
+  * server: only parse tool calls if tools are provided (#5771)
+  * Fix context exhaustion integration test for small gpus
+  * Refine scheduler unit tests for reliability
+
+-------------------------------------------------------------------
+Thu Jul 18 13:09:10 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
+
+- Fixed issue with shared libraries 
+
+-------------------------------------------------------------------
+Thu Jul 18 12:27:54 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
+
+- Added %check section
+- Use -v when building 
+
+- Update to version 0.2.6:
+  * New models: MathΣtral is a 7B model designed for math 
+    reasoning and scientific discovery by Mistral AI.
+  * Fixed issue where uppercase roles such as USER would no longer
+    work in the chat endpoints
+  * Fixed issue where empty system message would be included in the
+    prompt
+
+-------------------------------------------------------------------
+Sun Jul 14 17:48:36 UTC 2024 - eyadlorenzo@gmail.com
+
+- Update to version 0.2.5:
+  * Fixed issue where a model's SYSTEM message not be applied
+
+- Update to version 0.2.4:
+  * Fixed issue where context, load_duration and total_duration 
+    fields would not be set in the /api/generate endpoint.
+  * Ollama will no longer error if loading models larger than 
+    system memory if disk space is available
+
+- Update to version 0.2.3:
+  * Fix issue where system prompt would not be applied
+
+- Update to version 0.2.2:
+  * Fixed errors that occurred when using Ollama with Nvidia V100 
+    GPUs
+  * glm4 models will no longer fail to load from out of memory
+    errors
+  * Fixed error that would occur when running deepseek-v2 and 
+    deepseek-coder-v2 models
+  * Fixed a series of out of memory issues when using Nvidia
+    GPUs
+  * Fixed a series of errors that would occur when using multiple 
+    Radeon GPUs
+
+- Update to version 0.2.1:
+  * Fixed issue where setting OLLAMA_NUM_PARALLEL would cause 
+    models to be reloaded after each request
+
+- Update to version 0.2.0:
+  * Ollama 0.2.0 is now available with concurrency support. 
+    This unlocks 2 specific features:
+    ~ Ollama can now serve multiple requests at the same time
+    ~ Ollama now supports loading different models at the same time
+  * New models: GLM-4: A strong multi-lingual general language 
+    model with competitive performance to Llama 3.
+  * New models: CodeGeeX4: A versatile model for AI software 
+    development scenarios, including code completion.
+  * New models: Gemma 2: Improved output quality and base text 
+    generation models now available
+  * Ollama will now show a better error if a model architecture 
+    isn't supported
+  * Improved handling of quotes and spaces in Modelfile FROM lines
+  * Ollama will now return an error if the system does not have 
+    enough memory to run a model on Linux
+-------------------------------------------------------------------
+Sun Jul 07 19:18:11 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
+
+- Update to version 0.1.48:
+  * Fixed issue where Gemma 2 would continuously output when 
+    reaching context limits
+  * Fixed out of memory and core dump errors when running Gemma 2
+  * /show info will now show additional model information in
+    ollama run
+  * Fixed issue where ollama show would result in an error on 
+    certain vision models
+
+- Update to version 0.1.48:
+  * Added support for Google Gemma 2 models (9B and 27B)
+  * Fixed issues with ollama create when importing from Safetensors
+  
+-------------------------------------------------------------------
+Mon Jun 24 10:11:17 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
+
+- Update to version 0.1.46:
+  * Docs (#5149)
+  * fix: quantization with template
+  * Fix use_mmap parsing for modelfiles
+  * Refine mmap default logic on linux
+  * Bump latest fedora cuda repo to 39
+
+-------------------------------------------------------------------
+Sat Jun 22 10:08:00 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
+
+- Update to version 0.1.45:
+  * New models: DeepSeek-Coder-V2: A 16B & 236B open-source
+    Mixture-of-Experts code language model that achieves
+    performance comparable to GPT4-Turbo in code-specific tasks.
+  * ollama show <model> will now show model information such as
+    context window size
+  * Model loading on Windows with CUDA GPUs is now faster
+  * Setting seed in the /v1/chat/completions OpenAI compatibility
+    endpoint no longer changes temperature
+  * Enhanced GPU discovery and multi-gpu support with concurrency
+  * Introduced a workaround for AMD Vega RX 56 SDMA support on
+    Linux
+  * Fix memory prediction for deepseek-v2 and deepseek-coder-v2
+    models
+  * api/show endpoint returns extensive model metadata
+  * GPU configuration variables are now reported in ollama serve
+  * Update Linux ROCm to v6.1.1
+
+-------------------------------------------------------------------
+Tue Jun 18 12:12:41 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
+
+- Added documentation files to .spec 
+
+- Update to version 0.1.44:
+  * Fixed issue where unicode characters such as emojis would not 
+    be loaded correctly when running ollama create
+  * Fixed certain cases where Nvidia GPUs would not be detected and
+    reported as compute capability 1.0 devices
+
+- Update to version 0.1.43:
+  * New import.md guide for converting and importing models to 
+    Ollama
+  * Fixed issue where embedding vectors resulting from 
+    /api/embeddings would not be accurate
+  * JSON mode responses will no longer include invalid escape 
+    characters
+  * Removing a model will no longer show incorrect File not found 
+    errors
+  * Fixed issue where running ollama create would result in an 
+    error on Windows with certain file formatting
+
+- Update to version 0.1.42:
+  * New models: Qwen 2: a new series of large language models 
+    from Alibaba group
+  * Qwen 2: a new series of large language models from Alibaba 
+    group
+  * ollama pull is now faster if it detects a model is already 
+    downloaded
+  * ollama create will now automatically detect prompt templates
+    for popular model architectures such as Llama, Gemma, Phi and 
+    more.
+  * Ollama can now be accessed from local apps built with Electron 
+    and Tauri, as well as in developing apps in local html files
+  * Update welcome prompt in Windows to llama3
+  * Fixed issues where /api/ps and /api/tags would show invalid 
+    timestamps in responses
+
+- Update to version 0.1.41:
+  * Fixed issue on Windows 10 and 11 with Intel CPUs with 
+    integrated GPUs where Ollama would encounter an error
+
+-------------------------------------------------------------------
+Sat Jun 01 21:12:20 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
+
+- Update to version 0.1.40:
+  * New model: Codestral: Codestral is Mistral AI’s first-ever code
+  	model designed for code generation tasks.
+  * New model: IBM Granite Code: now in 3B and 8B parameter sizes.
+  * New model: Deepseek V2: A Strong, Economical, and Efficient 
+  	Mixture-of-Experts Language Model
+  * Fixed out of memory and incorrect token issues when running 
+  	Codestral on 16GB Macs
+  * Fixed issue where full-width characters (e.g. Japanese, 
+  	Chinese, Russian) were deleted at end of the line when using 
+  	ollama run
+
+-------------------------------------------------------------------
+Wed May 29 11:38:26 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
+
+- Update to version 0.1.39:
+  * New model: Cohere Aya 23: A new state-of-the-art, multilingual
+  	LLM covering 23 different languages.
+  * New model: Mistral 7B 0.3: A new version of Mistral 7B with 
+  	initial support for function calling.
+  * New model: Phi-3 Medium: a 14B parameters, lightweight, 
+  	state-of-the-art open model by Microsoft.
+  * New model: Phi-3 Mini 128K and Phi-3 Medium 128K: versions of
+  	the Phi-3 models that support a context window size of 128K
+  * New model: Granite code: A family of open foundation models by
+  	IBM for Code Intelligence
+  * It is now possible to import and quantize Llama 3 and its
+  	finetunes from Safetensors format to Ollama.
+  * Full changelog at 
+  	https://github.com/ollama/ollama/releases/tag/v0.1.39
+
+-------------------------------------------------------------------
+Wed May 22 18:05:30 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
+
+- Added 15.6 build
+
+-------------------------------------------------------------------
+Thu May 16 19:55:51 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
+
+- Update to version 0.1.38:
+  * New model: Falcon 2: A new 11B parameters causal decoder-only
+    model built by TII and trained over 5T tokens.
+  * New model: Yi 1.5: A new high-performing version of Yi, now 
+    licensed as Apache 2.0. Available in 6B, 9B and 34B sizes.
+  * Added ollama ps command
+  * Added /clear command
+  * Fixed issue where switching loaded models on Windows would take
+    several seconds
+  * Running /save will no longer abort the chat session if an
+    incorrect name is provided
+  * The /api/tags API endpoint will now correctly return an empty
+    list [] instead of null if no models are provided
+
+-------------------------------------------------------------------
+Sun May 12 19:05:53 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
+
+- Update to version 0.1.37:
+  * Fixed issue where models with uppercase characters in the name
+    would not show with ollama list
+  * Fixed usage string for ollama create
+  * Fix finish_reason being "" instead of null in the Open-AI
+    compatible chat API.
+
+-------------------------------------------------------------------
+Sun May 12 15:20:28 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
+
+- Use obs_scm service instead of the deprecated tar_scm
+- Use zstd for vendor tarball compression 
+
+-------------------------------------------------------------------
+Sun May 12 01:39:26 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
+
+- Update to version 0.1.36:
+  * Fixed exit status 0xc0000005 error with AMD graphics cards on Windows
+  * Fixed rare out of memory errors when loading a model to run with CPU
+
+- Update to version 0.1.35:
+  * New models: Llama 3 ChatQA: A model from NVIDIA based on Llama
+    3 that excels at conversational question answering (QA) and
+    retrieval-augmented generation (RAG).
+  * Quantization: ollama create can now quantize models when
+    importing them using the --quantize or -q flag
+  * Fixed issue where inference subprocesses wouldn't be cleaned up
+    on shutdown.
+  * Fixed a series out of memory errors when loading models on
+    multi-GPU systems
+  * Ctrl+J characters will now properly add newlines in ollama run
+  * Fixed issues when running ollama show for vision models
+  * OPTIONS requests to the Ollama API will no longer result in
+    errors
+  * Fixed issue where partially downloaded files wouldn't be
+    cleaned up
+  * Added a new done_reason field in responses describing why
+    generation stopped responding
+  * Ollama will now more accurately estimate how much memory
+    is available on multi-GPU systems especially when running
+    different models one after another
+
+- Update to version 0.1.34:
+  * New model: Llava Llama 3
+  * New model: Llava Phi 3
+  * New model: StarCoder2 15B Instruct
+  * New model: CodeGemma 1.1
+  * New model: StableLM2 12B
+  * New model: Moondream 2
+  * Fixed issues with LLaVa models where they would respond 
+    incorrectly after the first request
+  * Fixed out of memory errors when running large models such as 
+    Llama 3 70B
+  * Fixed various issues with Nvidia GPU discovery on Linux and 
+    Windows
+  * Fixed a series of Modelfile errors when running ollama create
+  * Fixed no slots available error that occurred when cancelling a
+    request and then sending follow up requests
+  * Improved AMD GPU detection on Fedora
+  * Improved reliability when using the experimental
+    OLLAMA_NUM_PARALLEL and OLLAMA_MAX_LOADED flags
+  * ollama serve will now shut down quickly, even if a model is
+    loading
+
+- Update to version 0.1.33:
+  * New model: Llama 3
+  * New model: Phi 3 Mini
+  * New model: Moondream
+  * New model: Llama 3 Gradient 1048K
+  * New model: Dolphin Llama 3
+  * New model: Qwen 110B
+  * Fixed issues where the model would not terminate, causing the 
+    API to hang.
+  * Fixed a series of out of memory errors on Apple Silicon Macs
+  * Fixed out of memory errors when running Mixtral architecture 
+    models
+  * Aded experimental concurrency features:
+    ~ OLLAMA_NUM_PARALLEL: Handle multiple requests simultaneously 
+      for a single model
+    ~ OLLAMA_MAX_LOADED_MODELS: Load multiple models simultaneously
+
+-------------------------------------------------------------------
+Tue Apr 23 02:26:34 UTC 2024 - rrahl0@disroot.org
+
+- Update to version 0.1.32:
+  * scale graph based on gpu count
+  * Support unicode characters in model path (#3681)
+  * darwin: no partial offloading if required memory greater than system
+  * update llama.cpp submodule to `7593639` (#3665)
+  * fix padding in decode
+  * Revert "cmd: provide feedback if OLLAMA_MODELS is set on non-serve command (#3470)" (#3662)
+  * Added Solar example at README.md (#3610)
+  * Update langchainjs.md (#2030)
+  * Added MindsDB information (#3595)
+  * examples: add more Go examples using the API (#3599)
+  * Update modelfile.md
+  * Add llama2 / torch models for `ollama create` (#3607)
+  * Terminate subprocess if receiving `SIGINT` or `SIGTERM` signals while model is loading (#3653)
+  * app: gracefully shut down `ollama serve` on windows (#3641)
+  * types/model: add path helpers (#3619)
+  * update llama.cpp submodule to `4bd0f93` (#3627)
+  * types/model: make ParseName variants less confusing (#3617)
+  * types/model: remove (*Digest).Scan and Digest.Value (#3605)
+  * Fix rocm deps with new subprocess paths
+  * mixtral mem
+  * Revert "types/model: remove (*Digest).Scan and Digest.Value (#3589)"
+  * types/model: remove (*Digest).Scan and Digest.Value (#3589)
+  * types/model: remove DisplayLong (#3587)
+  * types/model: remove MarshalText/UnmarshalText from Digest (#3586)
+  * types/model: init with Name and Digest types (#3541)
+  * server: provide helpful workaround hint when stalling on pull (#3584)
+  * partial offloading
+  * refactor tensor query
+  * api: start adding documentation to package api (#2878)
+  * examples: start adding Go examples using api/ (#2879)
+  * Handle very slow model loads
+  * fix: rope
+  * Revert "build.go: introduce a friendlier way to build Ollama (#3548)" (#3564)
+  * build.go: introduce a friendlier way to build Ollama (#3548)
+  * update llama.cpp submodule to `1b67731` (#3561)
+  * ci: use go-version-file
+  * Correct directory reference in macapp/README (#3555)
+  * cgo quantize
+  * no blob create if already exists
+  * update generate scripts with new `LLAMA_CUDA` variable, set `HIP_PLATFORM` to avoid compiler errors (#3528)
+  * Docs: Remove wrong parameter for Chat Completion (#3515)
+  * no rope parameters
+  * add command-r graph estimate
+  * Fail fast if mingw missing on windows
+  * use an older version of the mac os sdk in release (#3484)
+  * Add test case for context exhaustion
+  * CI missing archive
+  * fix dll compress in windows building
+  * CI subprocess path fix
+  * Fix CI release glitches
+  * update graph size estimate
+  * Fix macOS builds on older SDKs (#3467)
+  * cmd: provide feedback if OLLAMA_MODELS is set on non-serve command (#3470)
+  * feat: add OLLAMA_DEBUG in ollama server help message (#3461)
+  * Revert options as a ref in the server
+  * default head_kv to 1
+  * fix metal gpu
+  * Bump to b2581
+  * Refined min memory from testing
+  * Release gpu discovery library after use
+  * Safeguard for noexec
+  * Detect too-old cuda driver
+  * Integration test improvements
+  * Apply 01-cache.diff
+  * Switch back to subprocessing for llama.cpp
+  * Simplify model conversion (#3422)
+  * fix generate output
+  * update memory calcualtions
+  * refactor model parsing
+  * Add chromem-go to community integrations (#3437)
+  * Update README.md (#3436)
+  * Community Integration: CRAG Ollama Chat (#3423)
+  * Update README.md (#3378)
+  * Community Integration: ChatOllama (#3400)
+  * Update 90_bug_report.yml
+  * Add gemma safetensors conversion (#3250)
+  * CI automation for tagging latest images
+  * Bump ROCm to 6.0.2 patch release
+  * CI windows gpu builds
+  * Update troubleshooting link
+  * fix: trim quotes on OLLAMA_ORIGINS
+
+- add set_version to automatically switch over to the newer version
+
+-------------------------------------------------------------------
+Tue Apr 16 10:52:25 UTC 2024 - bwiedemann@suse.com
+
+- Update to version 0.1.31:
+  * Backport MacOS SDK fix from main
+  * Apply 01-cache.diff
+  * fix: workflows
+  * stub stub
+  * mangle arch
+  * only generate on changes to llm subdirectory
+  * only generate cuda/rocm when changes to llm detected
+  * Detect arrow keys on windows (#3363)
+  * add license in file header for vendored llama.cpp code (#3351)
+  * remove need for `$VSINSTALLDIR` since build will fail if `ninja` cannot be found (#3350)
+  * change `github.com/jmorganca/ollama` to `github.com/ollama/ollama` (#3347)
+  * malformed markdown link (#3358)
+  * Switch runner for final release job
+  * Use Rocky Linux Vault to get GCC 10.2 installed
+  * Revert "Switch arm cuda base image to centos 7"
+  * Switch arm cuda base image to centos 7
+  * Bump llama.cpp to b2527
+  * Fix ROCm link in `development.md`
+  * adds ooo to community integrations (#1623)
+  * Add cliobot to ollama supported list (#1873)
+  * Add Dify.AI to community integrations (#1944)
+  * enh: add ollero.nvim to community applications (#1905)
+  * Add typechat-cli to Terminal apps (#2428)
+  * add new Web & Desktop link in readme for alpaca webui (#2881)
+  * Add LibreChat to Web & Desktop Apps (#2918)
+  * Add Community Integration: OllamaGUI (#2927)
+  * Add Community Integration: OpenAOE (#2946)
+  * Add Saddle (#3178)
+  * tlm added to README.md terminal section. (#3274)
+  * Update README.md (#3288)
+  * Update README.md (#3338)
+  * Integration tests conditionally pull
+  * add support for libcudart.so for CUDA devices (adds Jetson support)
+  * llm: prevent race appending to slice (#3320)
+  * Bump llama.cpp to b2510
+  * Add Testcontainers into Libraries section (#3291)
+  * Revamp go based integration tests
+  * rename `.gitattributes`
+  * Bump llama.cpp to b2474
+  * Add docs for GPU selection and nvidia uvm workaround
+  * doc: faq gpu compatibility (#3142)
+  * Update faq.md
+  * Better tmpdir cleanup
+  * Update faq.md
+  * update `faq.md`
+  * dyn global
+  * llama: remove server static assets (#3174)
+  * add `llm/ext_server` directory to `linguist-vendored` (#3173)
+  * Add Radeon gfx940-942 GPU support
+  * Wire up more complete CI for releases
+  * llm,readline: use errors.Is instead of simple == check (#3161)
+  * server: replace blob prefix separator from ':' to '-' (#3146)
+  * Add ROCm support to linux install script (#2966)
+  * .github: fix model and feature request yml (#3155)
+  * .github: add issue templates (#3143)
+  * fix: clip memory leak
+  * Update README.md
+  * add `OLLAMA_KEEP_ALIVE` to environment variable docs for `ollama serve` (#3127)
+  * Default Keep Alive environment variable (#3094)
+  * Use stdin for term discovery on windows
+  * Update ollama.iss
+  * restore locale patch (#3091)
+  * token repeat limit for prediction requests (#3080)
+  * Fix iGPU detection for linux
+  * add more docs on for the modelfile message command (#3087)
+  * warn when json format is expected but not mentioned in prompt (#3081)
+  * Adapt our build for imported server.cpp
+  * Import server.cpp as of b2356
+  * refactor readseeker
+  * Add docs explaining GPU selection env vars
+  * chore: fix typo (#3073)
+  * fix gpu_info_cuda.c compile warning (#3077)
+  * use `-trimpath` when building releases (#3069)
+  * relay load model errors to the client (#3065)
+  * Update troubleshooting.md
+  * update llama.cpp submodule to `ceca1ae` (#3064)
+  * convert: fix shape
+  * Avoid rocm runner and dependency clash
+  * fix `03-locale.diff`
+  * Harden for deps file being empty (or short)
+  * Add ollama executable peer dir for rocm
+  * patch: use default locale in wpm tokenizer (#3034)
+  * only copy deps for `amd64` in `build_linux.sh`
+  * Rename ROCm deps file to avoid confusion (#3025)
+  * add `macapp` to `.dockerignore`
+  * add `bundle_metal` and `cleanup_metal` funtions to `gen_darwin.sh`
+  * tidy cleanup logs
+  * update llama.cpp submodule to `77d1ac7` (#3030)
+  * disable gpu for certain model architectures and fix divide-by-zero on memory estimation
+  * Doc how to set up ROCm builds on windows
+  * Finish unwinding idempotent payload logic
+  * update llama.cpp submodule to `c2101a2` (#3020)
+  * separate out `isLocalIP`
+  * simplify host checks
+  * add additional allowed hosts
+  * Update docs `README.md` and table of contents
+  * add allowed host middleware and remove `workDir` middleware (#3018)
+  * decode ggla
+  * convert: fix default shape
+  * fix: allow importing a model from name reference (#3005)
+  * update llama.cpp submodule to `6cdabe6` (#2999)
+  * Update api.md
+  * Revert "adjust download and upload concurrency based on available bandwidth" (#2995)
+  * cmd: tighten up env var usage sections (#2962)
+  * default terminal width, height
+  * Refined ROCm troubleshooting docs
+  * Revamp ROCm support
+  * update go to 1.22 in other places (#2975)
+  * docs: Add LLM-X to Web Integration section (#2759)
+  * fix some typos (#2973)
+  * Convert Safetensors to an Ollama model (#2824)
+  * Allow setting max vram for workarounds
+  * cmd: document environment variables for serve command
+  * Add Odin Runes, a Feature-Rich Java UI for Ollama, to README (#2440)
+  * Update api.md
+  * Add NotesOllama to Community Integrations (#2909)
+  * Added community link for Ollama Copilot (#2582)
+  * use LimitGroup for uploads
+  * adjust group limit based on download speed
+  * add new LimitGroup for dynamic concurrency
+  * refactor download run
+
+-------------------------------------------------------------------
+Wed Mar 06 23:51:28 UTC 2024 - computersemiexpert@outlook.com
+
+- Update to version 0.1.28:
+  * Fix embeddings load model behavior (#2848)
+  * Add Community Integration: NextChat (#2780)
+  * prepend image tags (#2789)
+  * fix: print usedMemory size right (#2827)
+  * bump submodule to `87c91c07663b707e831c59ec373b5e665ff9d64a` (#2828)
+  * Add ollama user to video group
+  * Add env var so podman will map cuda GPUs
+
+-------------------------------------------------------------------
+Tue Feb 27 08:33:15 UTC 2024 - Jan Engelhardt <jengelh@inai.de>
+
+- Edit description, answer _what_ the package is and use nominal
+  phrase. (https://en.opensuse.org/openSUSE:Package_description_guidelines)
+
+-------------------------------------------------------------------
+Fri Feb 23 21:13:53 UTC 2024 - Loren Burkholder <computersemiexpert@outlook.com>
+
+- Added the Ollama package
+- Included a systemd service
--- a/ollama.obsinfo
+++ b/ollama.obsinfo
@ -0,0 +1,4 @@
+name: ollama
+version: 0.3.12
+mtime: 1727216290
+commit: e9e9bdb8d904f009e8b1e54af9f77624d481cfb2
--- a/ollama.service
+++ b/ollama.service
@ -0,0 +1,13 @@
+[Unit]
+Description=Ollama Service
+After=network-online.target
+
+[Service]
+ExecStart=/usr/bin/ollama serve
+User=ollama
+Group=ollama
+Restart=always
+RestartSec=3
+
+[Install]
+WantedBy=default.target
--- a/ollama.spec
+++ b/ollama.spec
@ -0,0 +1,113 @@
+#
+# spec file for package ollama
+#
+# Copyright (c) 2024 SUSE LLC
+#
+# All modifications and additions to the file contributed by third parties
+# remain the property of their copyright owners, unless otherwise agreed
+# upon. The license for this file, and modifications and additions to the
+# file, is the same license as for the pristine package itself (unless the
+# license for the pristine package is not an Open Source License, in which
+# case the license is the MIT License). An "Open Source License" is a
+# license that conforms to the Open Source Definition (Version 1.9)
+# published by the Open Source Initiative.
+
+# Please submit bugfixes or comments via https://bugs.opensuse.org/
+#
+
+
+Name:           ollama
+Version:        0.3.12
+Release:        0
+Summary:        Tool for running AI models on-premise
+License:        MIT
+URL:            https://ollama.com
+Source:         %{name}-%{version}.tar
+Source1:        vendor.tar.zstd
+Source2:        ollama.service
+Source3:        %{name}-user.conf
+Patch0:         enable-lto.patch
+BuildRequires:  cmake >= 3.24
+BuildRequires:  git
+BuildRequires:  sysuser-tools
+BuildRequires:  zstd
+BuildRequires:  golang(API) >= 1.22
+%sysusers_requires
+%if 0%{?sle_version} == 150600
+BuildRequires:  gcc12-c++
+BuildRequires:  libstdc++6-gcc12
+%else
+BuildRequires:  gcc-c++ >= 11.4.0
+%endif
+# 32bit seems not to be supported anymore
+ExcludeArch:    %ix86 %arm
+
+%description
+Ollama is a tool for running AI models on one's own hardware.
+It offers a command-line interface and a RESTful API.
+New models can be created or existing ones modified in the
+Ollama library using the Modelfile syntax.
+Source model weights found on Hugging Face and similar sites
+can be imported.
+
+%prep
+%autosetup -a1 -p1
+
+%build
+%sysusers_generate_pre %{SOURCE3} %{name} %{name}-user.conf
+
+%ifnarch ppc64
+export GOFLAGS="-buildmode=pie -mod=vendor"
+%endif
+%if 0%{?sle_version} == 150600
+export CXX=g++-12
+export CC=gcc-12
+# pie doesn't work with gcc12 on leap
+export GOFLAGS="-mod=vendor"
+%endif
+
+export OLLAMA_SKIP_PATCHING=1
+
+go generate ./...
+go build -v .
+
+%install
+install -D -m 0755 %{name} %{buildroot}/%{_bindir}/%{name}
+install -D -m 0644 %{SOURCE2} %{buildroot}%{_unitdir}/%{name}.service
+install -D -m 0644 %{SOURCE3} %{buildroot}%{_sysusersdir}/%{name}-user.conf
+install -d %{buildroot}%{_localstatedir}/lib/%{name}
+
+mkdir -p "%{buildroot}/%{_docdir}/%{name}"
+cp -Ra docs/* "%{buildroot}/%{_docdir}/%{name}"
+
+%check
+%if 0%{?sle_version} == 150600
+export CXX=g++-12
+export CC=gcc-12
+# pie doesn't work with gcc12 on leap
+export GOFLAGS="-mod=vendor"
+%endif
+go test ./...
+
+%pre -f %{name}.pre
+%service_add_pre %{name}.service
+
+%post
+%service_add_post %{name}.service
+
+%preun
+%service_del_preun %{name}.service
+
+%postun
+%service_del_postun %{name}.service
+
+%files
+%doc README.md
+%license LICENSE
+%{_docdir}/%{name}
+%{_bindir}/%{name}
+%{_unitdir}/%{name}.service
+%{_sysusersdir}/%{name}-user.conf
+%attr(-, ollama, ollama) %{_localstatedir}/lib/%{name}
+
+%changelog
--- a/vendor.tar.zstd
+++ b/vendor.tar.zstd