From c97461a42dfe4e08dc371f21dbae214a695b8094fa916e4a6a3beef47c7ee00e Mon Sep 17 00:00:00 2001 From: Eyad Issa Date: Thu, 19 Sep 2024 08:48:38 +0000 Subject: [PATCH] =?UTF-8?q?-=20Update=20to=20version=200.3.10:=20=20=20*?= =?UTF-8?q?=20openai:=20align=20chat=20temperature=20and=20frequency=5Fpen?= =?UTF-8?q?alty=20options=20with=20completion=20(#6688)=20=20=20*=20docs:?= =?UTF-8?q?=20improve=20linux=20install=20documentation=20(#6683)=20=20=20?= =?UTF-8?q?*=20openai:=20don't=20scale=20temperature=20or=20frequency=5Fpe?= =?UTF-8?q?nalty=20(#6514)=20=20=20*=20readme:=20add=20Archyve=20to=20comm?= =?UTF-8?q?unity=20integrations=20(#6680)=20=20=20*=20readme:=20add=20Plas?= =?UTF-8?q?moid=20Ollama=20Control=20to=20community=20integrations=20(#668?= =?UTF-8?q?1)=20=20=20*=20Improve=20logging=20on=20GPU=20too=20small=20(#6?= =?UTF-8?q?666)=20=20=20*=20openai:=20fix=20"presence=5Fpenalty"=20typo=20?= =?UTF-8?q?and=20add=20test=20(#6665)=20=20=20*=20Fix=20gemma2=202b=20conv?= =?UTF-8?q?ersion=20(#6645)=20=20=20*=20Document=20uninstall=20on=20window?= =?UTF-8?q?s=20(#6663)=20=20=20*=20Revert=20"Detect=20running=20in=20a=20c?= =?UTF-8?q?ontainer=20(#6495)"=20(#6662)=20=20=20*=20llm:=20make=20load=20?= =?UTF-8?q?time=20stall=20duration=20configurable=20via=20OLLAMA=5FLOAD=5F?= =?UTF-8?q?TIMEOUT=20=20=20*=20Introduce=20GPU=20Overhead=20env=20var=20(#?= =?UTF-8?q?5922)=20=20=20*=20Detect=20running=20in=20a=20container=20(#649?= =?UTF-8?q?5)=20=20=20*=20readme:=20add=20AiLama=20to=20the=20list=20of=20?= =?UTF-8?q?community=20integrations=20(#4957)=20=20=20*=20Update=20gpu.md:?= =?UTF-8?q?=20Add=20RTX=203050=20Ti=20and=20RTX=203050=20Ti=20(#5888)=20?= =?UTF-8?q?=20=20*=20server:=20fix=20blob=20download=20when=20receiving=20?= =?UTF-8?q?a=20200=20response=20=20(#6656)=20=20=20*=20readme:=20add=20Gen?= =?UTF-8?q?too=20package=20manager=20entry=20to=20community=20integrations?= =?UTF-8?q?=20(#5714)=20=20=20*=20Update=20install.sh=EF=BC=9AReplace=20"c?= =?UTF-8?q?ommand=20-v"=20with=20encapsulated=20functionality=20(#6035)=20?= =?UTF-8?q?=20=20*=20readme:=20include=20Enchanted=20for=20Apple=20Vision?= =?UTF-8?q?=20Pro=20(#4949)=20=20=20*=20readme:=20add=20lsp-ai=20to=20comm?= =?UTF-8?q?unity=20integrations=20(#5063)=20=20=20*=20readme:=20add=20olla?= =?UTF-8?q?ma-php=20library=20to=20community=20integrations=20(#6361)=20?= =?UTF-8?q?=20=20*=20readme:=20add=20vnc-lm=20discord=20bot=20community=20?= =?UTF-8?q?integration=20(#6644)=20=20=20*=20llm:=20use=20json.hpp=20from?= =?UTF-8?q?=20common=20(#6642)=20=20=20*=20readme:=20add=20confichat=20to?= =?UTF-8?q?=20community=20integrations=20(#6378)=20=20=20*=20docs:=20add?= =?UTF-8?q?=20group=20to=20manual=20Linux=20isntructions=20and=20verify=20?= =?UTF-8?q?service=20is=20running=20(#6430)=20=20=20*=20readme:=20add=20go?= =?UTF-8?q?llm=20to=20the=20list=20of=20community=20libraries=20(#6099)=20?= =?UTF-8?q?=20=20*=20readme:=20add=20Cherry=20Studio=20to=20community=20in?= =?UTF-8?q?tegrations=20(#6633)=20=20=20*=20readme:=20add=20Go=20fun=20pac?= =?UTF-8?q?kage=20(#6421)=20=20=20*=20docs:=20fix=20spelling=20error=20(#6?= =?UTF-8?q?391)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OBS-URL: https://build.opensuse.org/package/show/science:machinelearning/ollama?expand=0&rev=49 --- .gitattributes | 25 ++ .gitignore | 1 + _service | 23 ++ _servicedata | 4 + enable-lto.patch | 20 ++ ollama-0.1.45.obscpio | 3 + ollama-0.2.6.obscpio | 3 + ollama-0.2.8.obscpio | 3 + ollama-0.3.0.obscpio | 3 + ollama-0.3.10.obscpio | 3 + ollama-0.3.3.obscpio | 3 + ollama-0.3.6.obscpio | 3 + ollama-user.conf | 2 + ollama.changes | 767 ++++++++++++++++++++++++++++++++++++++++++ ollama.obsinfo | 4 + ollama.service | 13 + ollama.spec | 113 +++++++ vendor.tar.zstd | 3 + 18 files changed, 996 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 _service create mode 100644 _servicedata create mode 100644 enable-lto.patch create mode 100644 ollama-0.1.45.obscpio create mode 100644 ollama-0.2.6.obscpio create mode 100644 ollama-0.2.8.obscpio create mode 100644 ollama-0.3.0.obscpio create mode 100644 ollama-0.3.10.obscpio create mode 100644 ollama-0.3.3.obscpio create mode 100644 ollama-0.3.6.obscpio create mode 100644 ollama-user.conf create mode 100644 ollama.changes create mode 100644 ollama.obsinfo create mode 100644 ollama.service create mode 100644 ollama.spec create mode 100644 vendor.tar.zstd diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..1076d31 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,25 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +## Specific LFS patterns +vendor.tar.zstd filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..57affb6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.osc diff --git a/_service b/_service new file mode 100644 index 0000000..20ec761 --- /dev/null +++ b/_service @@ -0,0 +1,23 @@ + + + + https://github.com/ollama/ollama.git + git + v0.3.10 + @PARENT_TAG@ + v(.*) + enable + enable + macapp + yes + + + zstd + + + + + yes + + + diff --git a/_servicedata b/_servicedata new file mode 100644 index 0000000..507c534 --- /dev/null +++ b/_servicedata @@ -0,0 +1,4 @@ + + + https://github.com/ollama/ollama.git + 06d4fba851b91eb55da892d23834e8fe75096ca7 \ No newline at end of file diff --git a/enable-lto.patch b/enable-lto.patch new file mode 100644 index 0000000..7d70831 --- /dev/null +++ b/enable-lto.patch @@ -0,0 +1,20 @@ +--- a/llm/generate/gen_linux.sh.orig 2024-09-17 12:52:41.511508050 +0200 ++++ b/llm/generate/gen_linux.sh 2024-09-17 13:01:55.316347171 +0200 +@@ -52,7 +52,7 @@ + export CUDACXX=$(command -v nvcc) + fi + fi +-COMMON_CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off" ++COMMON_CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off -DGGML_LTO=on" + source $(dirname $0)/gen_common.sh + init_vars + git_module_setup +@@ -95,7 +95,7 @@ + # -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake + # -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake + +- COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off" ++ COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off -DGGML_LTO=on" + if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then + # + # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta) diff --git a/ollama-0.1.45.obscpio b/ollama-0.1.45.obscpio new file mode 100644 index 0000000..d08649d --- /dev/null +++ b/ollama-0.1.45.obscpio @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecc23b875cd051a46ed9c9da0481bfd1a1b11e859b63ceb782d673a6534bda5e +size 189517838 diff --git a/ollama-0.2.6.obscpio b/ollama-0.2.6.obscpio new file mode 100644 index 0000000..1266c77 --- /dev/null +++ b/ollama-0.2.6.obscpio @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:391fad97bacee37e8fab00273fd5d5a0a20912fd47c51907131ee1f274c7d2bf +size 161902606 diff --git a/ollama-0.2.8.obscpio b/ollama-0.2.8.obscpio new file mode 100644 index 0000000..586056d --- /dev/null +++ b/ollama-0.2.8.obscpio @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1dfa7d3fc6d8dc35af4bd9a458a9f22ab613d07c1e5e48db2b2803ff7f77214 +size 151425038 diff --git a/ollama-0.3.0.obscpio b/ollama-0.3.0.obscpio new file mode 100644 index 0000000..ee11dee --- /dev/null +++ b/ollama-0.3.0.obscpio @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58ac37034b70dfa60b0be1114a82a00e407bd0fb18bff0ad7f4bce86a3c7373f +size 153287182 diff --git a/ollama-0.3.10.obscpio b/ollama-0.3.10.obscpio new file mode 100644 index 0000000..f42ac7a --- /dev/null +++ b/ollama-0.3.10.obscpio @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb0a02c484b24248cd1f5f59dd80868f034fc6d202b2b8747ca148b84c3e8ada +size 187615246 diff --git a/ollama-0.3.3.obscpio b/ollama-0.3.3.obscpio new file mode 100644 index 0000000..be4529c --- /dev/null +++ b/ollama-0.3.3.obscpio @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80ced6af29997569d44f79381c5cd1d4e51edd3f97d9f5aae0207162b6de26ba +size 153645582 diff --git a/ollama-0.3.6.obscpio b/ollama-0.3.6.obscpio new file mode 100644 index 0000000..bd96258 --- /dev/null +++ b/ollama-0.3.6.obscpio @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96235e8d8cfdc20c732a699cc2036c907027c67482aa75fb0a7f3b6aa5089077 +size 184983566 diff --git a/ollama-user.conf b/ollama-user.conf new file mode 100644 index 0000000..1487342 --- /dev/null +++ b/ollama-user.conf @@ -0,0 +1,2 @@ +#Type Name ID GECOS Home directory Shell +u ollama - "Ollama" /var/lib/ollama - diff --git a/ollama.changes b/ollama.changes new file mode 100644 index 0000000..eb7fa92 --- /dev/null +++ b/ollama.changes @@ -0,0 +1,767 @@ +------------------------------------------------------------------- +Tue Sep 17 10:48:34 UTC 2024 - adrian@suse.de + +- Update to version 0.3.10: + * openai: align chat temperature and frequency_penalty options with completion (#6688) + * docs: improve linux install documentation (#6683) + * openai: don't scale temperature or frequency_penalty (#6514) + * readme: add Archyve to community integrations (#6680) + * readme: add Plasmoid Ollama Control to community integrations (#6681) + * Improve logging on GPU too small (#6666) + * openai: fix "presence_penalty" typo and add test (#6665) + * Fix gemma2 2b conversion (#6645) + * Document uninstall on windows (#6663) + * Revert "Detect running in a container (#6495)" (#6662) + * llm: make load time stall duration configurable via OLLAMA_LOAD_TIMEOUT + * Introduce GPU Overhead env var (#5922) + * Detect running in a container (#6495) + * readme: add AiLama to the list of community integrations (#4957) + * Update gpu.md: Add RTX 3050 Ti and RTX 3050 Ti (#5888) + * server: fix blob download when receiving a 200 response (#6656) + * readme: add Gentoo package manager entry to community integrations (#5714) + * Update install.sh:Replace "command -v" with encapsulated functionality (#6035) + * readme: include Enchanted for Apple Vision Pro (#4949) + * readme: add lsp-ai to community integrations (#5063) + * readme: add ollama-php library to community integrations (#6361) + * readme: add vnc-lm discord bot community integration (#6644) + * llm: use json.hpp from common (#6642) + * readme: add confichat to community integrations (#6378) + * docs: add group to manual Linux isntructions and verify service is running (#6430) + * readme: add gollm to the list of community libraries (#6099) + * readme: add Cherry Studio to community integrations (#6633) + * readme: add Go fun package (#6421) + * docs: fix spelling error (#6391) + * install.sh: update instructions to use WSL2 (#6450) + * readme: add claude-dev to community integrations (#6630) + * readme: add PyOllaMx project (#6624) + * llm: update llama.cpp commit to 8962422 (#6618) + * Use cuda v11 for driver 525 and older (#6620) + * Log system memory at info (#6617) + * readme: add Painting Droid community integration (#5514) + * readme: update Ollama4j link and add link to Ollama4j Web UI (#6608) + * Fix sprintf to snprintf (#5664) + * readme: add PartCAD tool to readme for generating 3D CAD models using Ollama (#6605) + * Reduce docker image size (#5847) + * readme: add OllamaFarm project (#6508) + * readme: add go-crew and Ollamaclient projects (#6583) + * docs: update faq.md for OLLAMA_MODELS env var permissions (#6587) + * fix(cmd): show info may have nil ModelInfo (#6579) + * docs: update GGUF examples and references (#6577) + * Add findutils to base images (#6581) + * remove any unneeded build artifacts + * doc: Add Nix and Flox to package manager listing (#6074) + * update the openai docs to explain how to set the context size (#6548) + * fix(test): do not clobber models directory + * add llama3.1 chat template (#6545) + * update deprecated warnings + * validate model path + * throw an error when encountering unsupport tensor sizes (#6538) + * Move ollama executable out of bin dir (#6535) + * update templates to use messages + * more tokenizer tests + * add safetensors to the modelfile docs (#6532) + * Fix import image width (#6528) + * Update manual instructions with discrete ROCm bundle (#6445) + * llm: fix typo in comment (#6530) + * adjust image sizes + * clean up convert tokenizer + * detect chat template from configs that contain lists + * update the import docs (#6104) + * server: clean up route names for consistency (#6524) + * Only enable numa on CPUs (#6484) + * gpu: Group GPU Library sets by variant (#6483) + * update faq + * passthrough OLLAMA_HOST path to client + * convert safetensor adapters into GGUF (#6327) + * gpu: Ensure driver version set before variant (#6480) + * llm: Align cmake define for cuda no peer copy (#6455) + * Fix embeddings memory corruption (#6467) + * llama3.1 + * convert gemma2 + * create bert models from cli + * bert + * Split rocm back out of bundle (#6432) + * CI: remove directories from dist dir before upload step (#6429) + * CI: handle directories during checksum (#6427) + * Fix overlapping artifact name on CI + * Review comments + * Adjust layout to bin+lib/ollama + * Remove Jetpack + * Add windows cuda v12 + v11 support + * Enable cuda v12 flags + * Add cuda v12 variant and selection logic + * Report GPU variant in log + * Add Jetson cuda variants for arm + * Wire up ccache and pigz in the docker based build + * Refactor linux packaging + * server: limit upload parts to 16 (#6411) + * Fix white space. + * Reset NumCtx. + * Override numParallel only if unset. + * fix: chmod new layer to 0o644 when creating it + * fix: Add tooltip to system tray icon + * only skip invalid json manifests + * skip invalid manifest files + * fix noprune + * add `CONTRIBUTING.md` (#6349) + * Fix typo and improve readability (#5964) + * server: reduce max connections used in download (#6347) + * update chatml template format to latest in docs (#6344) + * lint + * Update openai.md to remove extra checkbox (#6345) + * llama3.1 memory + +------------------------------------------------------------------- +Thu Aug 15 18:59:48 UTC 2024 - Eyad Issa + +- Update to version 0.3.6: + * Fixed issue where /api/embed would return an error instead of + loading the model when the input field was not provided. + * ollama create can now import Phi-3 models from Safetensors + * Added progress information to ollama create when importing GGUF + files + * Ollama will now import GGUF files faster by minimizing file + copies + +- Update to version 0.3.6: + * Fixed issue where temporary files would not be cleaned up + * Fix rare error when Ollama would start up due to invalid model + data + +------------------------------------------------------------------- +Sun Aug 11 02:40:06 UTC 2024 - Alessandro de Oliveira Faria + +- Update to version 0.3.4: + * New embedding models + - BGE-M3: a large embedding model from BAAI distinguished for + its versatility in Multi-Functionality, Multi-Linguality, and + Multi-Granularity. + - BGE-Large: a large embedding model trained in english. + - Paraphrase-Multilingual: A multilingual embedding model + trained on parallel data for 50+ languages. + * New embedding API with batch support + - Ollama now supports a new API endpoint /api/embed for + embedding generation: + * This API endpoint supports new features: + - Batches: generate embeddings for several documents in + one request + - Normalized embeddings: embeddings are now normalized, + improving similarity results + - Truncation: a new truncate parameter that will error if + set to false + - Metrics: responses include load_duration, total_duration and + prompt_eval_count metrics + +------------------------------------------------------------------- +Sat Aug 03 09:41:56 UTC 2024 - eyadlorenzo@gmail.com + +- Update to version 0.3.3: + * The /api/embed endpoint now returns statistics: total_duration, + load_duration, and prompt_eval_count + * Added usage metrics to the /v1/embeddings OpenAI compatibility + API + * Fixed issue where /api/generate would respond with an empty + string if provided a context + * Fixed issue where /api/generate would return an incorrect + value for context + * /show modefile will now render MESSAGE commands correctly + +- Update to version 0.3.2: + * Fixed issue where ollama pull would not resume download + progress + * Fixed issue where phi3 would report an error on older versions + +------------------------------------------------------------------- +Tue Jul 30 07:08:37 UTC 2024 - Adrian Schröter + +- Update to version 0.3.1: + * Added support for min_p sampling option + * Lowered number of requests required when downloading models + with ollama pull + * ollama create will now autodetect required stop parameters + when importing certain models + * Fixed issue where /save would cause parameters to be saved + incorrectly. + * OpenAI-compatible API will now return a finish_reason of + tool_calls if a tool call occured. + +------------------------------------------------------------------- +Mon Jul 29 09:59:58 UTC 2024 - Adrian Schröter + +- fix build on leap 15.6 +- exclude builds on 32bit due to build failures + +------------------------------------------------------------------- +Sun Jul 28 11:32:19 UTC 2024 - Eyad Issa + +- Update to version 0.3.0: + * Ollama now supports tool calling with popular models such + as Llama 3.1. This enables a model to answer a given prompt + using tool(s) it knows about, making it possible for models to + perform more complex tasks or interact with the outside world. + * New models: + ~ Llama 3.1 + ~ Mistral Large 2 + ~ Firefunction v2 + ~ Llama-3-Groq-Tool-Use + * Fixed duplicate error message when running ollama create + +------------------------------------------------------------------- +Wed Jul 24 14:28:08 UTC 2024 - adrian@suse.de + +- Update to version 0.2.8: + * api embed docs (#5282) + * convert: capture `head_dim` for mistral (#5818) + * Update llama.cpp submodule commit to `d94c6e0c` (#5805) + * server: collect nested tool call objects when parsing (#5824) + * Remove no longer supported max vram var + * Refine error reporting for subprocess crash + * Remove out of space test temporarily (#5825) + * llm: consider `head_dim` in llama arch (#5817) + * Adjust windows ROCm discovery + * add patch for tekken (#5807) + * preserve last assistant message (#5802) + * Fix generate test flakyness (#5804) + * server: validate template (#5734) + * OpenAI: Function Based Testing (#5752) + * adjust openai chat msg processing (#5729) + * fix parsing tool calls + * server: check for empty tools array too (#5779) + * always provide content even if empty (#5778) + * server: only parse tool calls if tools are provided (#5771) + * Fix context exhaustion integration test for small gpus + * Refine scheduler unit tests for reliability + +------------------------------------------------------------------- +Thu Jul 18 13:09:10 UTC 2024 - Eyad Issa + +- Fixed issue with shared libraries + +------------------------------------------------------------------- +Thu Jul 18 12:27:54 UTC 2024 - Eyad Issa + +- Added %check section +- Use -v when building + +- Update to version 0.2.6: + * New models: MathΣtral is a 7B model designed for math + reasoning and scientific discovery by Mistral AI. + * Fixed issue where uppercase roles such as USER would no longer + work in the chat endpoints + * Fixed issue where empty system message would be included in the + prompt + +------------------------------------------------------------------- +Sun Jul 14 17:48:36 UTC 2024 - eyadlorenzo@gmail.com + +- Update to version 0.2.5: + * Fixed issue where a model's SYSTEM message not be applied + +- Update to version 0.2.4: + * Fixed issue where context, load_duration and total_duration + fields would not be set in the /api/generate endpoint. + * Ollama will no longer error if loading models larger than + system memory if disk space is available + +- Update to version 0.2.3: + * Fix issue where system prompt would not be applied + +- Update to version 0.2.2: + * Fixed errors that occurred when using Ollama with Nvidia V100 + GPUs + * glm4 models will no longer fail to load from out of memory + errors + * Fixed error that would occur when running deepseek-v2 and + deepseek-coder-v2 models + * Fixed a series of out of memory issues when using Nvidia + GPUs + * Fixed a series of errors that would occur when using multiple + Radeon GPUs + +- Update to version 0.2.1: + * Fixed issue where setting OLLAMA_NUM_PARALLEL would cause + models to be reloaded after each request + +- Update to version 0.2.0: + * Ollama 0.2.0 is now available with concurrency support. + This unlocks 2 specific features: + ~ Ollama can now serve multiple requests at the same time + ~ Ollama now supports loading different models at the same time + * New models: GLM-4: A strong multi-lingual general language + model with competitive performance to Llama 3. + * New models: CodeGeeX4: A versatile model for AI software + development scenarios, including code completion. + * New models: Gemma 2: Improved output quality and base text + generation models now available + * Ollama will now show a better error if a model architecture + isn't supported + * Improved handling of quotes and spaces in Modelfile FROM lines + * Ollama will now return an error if the system does not have + enough memory to run a model on Linux +------------------------------------------------------------------- +Sun Jul 07 19:18:11 UTC 2024 - Eyad Issa + +- Update to version 0.1.48: + * Fixed issue where Gemma 2 would continuously output when + reaching context limits + * Fixed out of memory and core dump errors when running Gemma 2 + * /show info will now show additional model information in + ollama run + * Fixed issue where ollama show would result in an error on + certain vision models + +- Update to version 0.1.48: + * Added support for Google Gemma 2 models (9B and 27B) + * Fixed issues with ollama create when importing from Safetensors + +------------------------------------------------------------------- +Mon Jun 24 10:11:17 UTC 2024 - Eyad Issa + +- Update to version 0.1.46: + * Docs (#5149) + * fix: quantization with template + * Fix use_mmap parsing for modelfiles + * Refine mmap default logic on linux + * Bump latest fedora cuda repo to 39 + +------------------------------------------------------------------- +Sat Jun 22 10:08:00 UTC 2024 - Eyad Issa + +- Update to version 0.1.45: + * New models: DeepSeek-Coder-V2: A 16B & 236B open-source + Mixture-of-Experts code language model that achieves + performance comparable to GPT4-Turbo in code-specific tasks. + * ollama show will now show model information such as + context window size + * Model loading on Windows with CUDA GPUs is now faster + * Setting seed in the /v1/chat/completions OpenAI compatibility + endpoint no longer changes temperature + * Enhanced GPU discovery and multi-gpu support with concurrency + * Introduced a workaround for AMD Vega RX 56 SDMA support on + Linux + * Fix memory prediction for deepseek-v2 and deepseek-coder-v2 + models + * api/show endpoint returns extensive model metadata + * GPU configuration variables are now reported in ollama serve + * Update Linux ROCm to v6.1.1 + +------------------------------------------------------------------- +Tue Jun 18 12:12:41 UTC 2024 - Eyad Issa + +- Added documentation files to .spec + +- Update to version 0.1.44: + * Fixed issue where unicode characters such as emojis would not + be loaded correctly when running ollama create + * Fixed certain cases where Nvidia GPUs would not be detected and + reported as compute capability 1.0 devices + +- Update to version 0.1.43: + * New import.md guide for converting and importing models to + Ollama + * Fixed issue where embedding vectors resulting from + /api/embeddings would not be accurate + * JSON mode responses will no longer include invalid escape + characters + * Removing a model will no longer show incorrect File not found + errors + * Fixed issue where running ollama create would result in an + error on Windows with certain file formatting + +- Update to version 0.1.42: + * New models: Qwen 2: a new series of large language models + from Alibaba group + * Qwen 2: a new series of large language models from Alibaba + group + * ollama pull is now faster if it detects a model is already + downloaded + * ollama create will now automatically detect prompt templates + for popular model architectures such as Llama, Gemma, Phi and + more. + * Ollama can now be accessed from local apps built with Electron + and Tauri, as well as in developing apps in local html files + * Update welcome prompt in Windows to llama3 + * Fixed issues where /api/ps and /api/tags would show invalid + timestamps in responses + +- Update to version 0.1.41: + * Fixed issue on Windows 10 and 11 with Intel CPUs with + integrated GPUs where Ollama would encounter an error + +------------------------------------------------------------------- +Sat Jun 01 21:12:20 UTC 2024 - Eyad Issa + +- Update to version 0.1.40: + * New model: Codestral: Codestral is Mistral AI’s first-ever code + model designed for code generation tasks. + * New model: IBM Granite Code: now in 3B and 8B parameter sizes. + * New model: Deepseek V2: A Strong, Economical, and Efficient + Mixture-of-Experts Language Model + * Fixed out of memory and incorrect token issues when running + Codestral on 16GB Macs + * Fixed issue where full-width characters (e.g. Japanese, + Chinese, Russian) were deleted at end of the line when using + ollama run + +------------------------------------------------------------------- +Wed May 29 11:38:26 UTC 2024 - Eyad Issa + +- Update to version 0.1.39: + * New model: Cohere Aya 23: A new state-of-the-art, multilingual + LLM covering 23 different languages. + * New model: Mistral 7B 0.3: A new version of Mistral 7B with + initial support for function calling. + * New model: Phi-3 Medium: a 14B parameters, lightweight, + state-of-the-art open model by Microsoft. + * New model: Phi-3 Mini 128K and Phi-3 Medium 128K: versions of + the Phi-3 models that support a context window size of 128K + * New model: Granite code: A family of open foundation models by + IBM for Code Intelligence + * It is now possible to import and quantize Llama 3 and its + finetunes from Safetensors format to Ollama. + * Full changelog at + https://github.com/ollama/ollama/releases/tag/v0.1.39 + +------------------------------------------------------------------- +Wed May 22 18:05:30 UTC 2024 - Eyad Issa + +- Added 15.6 build + +------------------------------------------------------------------- +Thu May 16 19:55:51 UTC 2024 - Eyad Issa + +- Update to version 0.1.38: + * New model: Falcon 2: A new 11B parameters causal decoder-only + model built by TII and trained over 5T tokens. + * New model: Yi 1.5: A new high-performing version of Yi, now + licensed as Apache 2.0. Available in 6B, 9B and 34B sizes. + * Added ollama ps command + * Added /clear command + * Fixed issue where switching loaded models on Windows would take + several seconds + * Running /save will no longer abort the chat session if an + incorrect name is provided + * The /api/tags API endpoint will now correctly return an empty + list [] instead of null if no models are provided + +------------------------------------------------------------------- +Sun May 12 19:05:53 UTC 2024 - Eyad Issa + +- Update to version 0.1.37: + * Fixed issue where models with uppercase characters in the name + would not show with ollama list + * Fixed usage string for ollama create + * Fix finish_reason being "" instead of null in the Open-AI + compatible chat API. + +------------------------------------------------------------------- +Sun May 12 15:20:28 UTC 2024 - Eyad Issa + +- Use obs_scm service instead of the deprecated tar_scm +- Use zstd for vendor tarball compression + +------------------------------------------------------------------- +Sun May 12 01:39:26 UTC 2024 - Eyad Issa + +- Update to version 0.1.36: + * Fixed exit status 0xc0000005 error with AMD graphics cards on Windows + * Fixed rare out of memory errors when loading a model to run with CPU + +- Update to version 0.1.35: + * New models: Llama 3 ChatQA: A model from NVIDIA based on Llama + 3 that excels at conversational question answering (QA) and + retrieval-augmented generation (RAG). + * Quantization: ollama create can now quantize models when + importing them using the --quantize or -q flag + * Fixed issue where inference subprocesses wouldn't be cleaned up + on shutdown. + * Fixed a series out of memory errors when loading models on + multi-GPU systems + * Ctrl+J characters will now properly add newlines in ollama run + * Fixed issues when running ollama show for vision models + * OPTIONS requests to the Ollama API will no longer result in + errors + * Fixed issue where partially downloaded files wouldn't be + cleaned up + * Added a new done_reason field in responses describing why + generation stopped responding + * Ollama will now more accurately estimate how much memory + is available on multi-GPU systems especially when running + different models one after another + +- Update to version 0.1.34: + * New model: Llava Llama 3 + * New model: Llava Phi 3 + * New model: StarCoder2 15B Instruct + * New model: CodeGemma 1.1 + * New model: StableLM2 12B + * New model: Moondream 2 + * Fixed issues with LLaVa models where they would respond + incorrectly after the first request + * Fixed out of memory errors when running large models such as + Llama 3 70B + * Fixed various issues with Nvidia GPU discovery on Linux and + Windows + * Fixed a series of Modelfile errors when running ollama create + * Fixed no slots available error that occurred when cancelling a + request and then sending follow up requests + * Improved AMD GPU detection on Fedora + * Improved reliability when using the experimental + OLLAMA_NUM_PARALLEL and OLLAMA_MAX_LOADED flags + * ollama serve will now shut down quickly, even if a model is + loading + +- Update to version 0.1.33: + * New model: Llama 3 + * New model: Phi 3 Mini + * New model: Moondream + * New model: Llama 3 Gradient 1048K + * New model: Dolphin Llama 3 + * New model: Qwen 110B + * Fixed issues where the model would not terminate, causing the + API to hang. + * Fixed a series of out of memory errors on Apple Silicon Macs + * Fixed out of memory errors when running Mixtral architecture + models + * Aded experimental concurrency features: + ~ OLLAMA_NUM_PARALLEL: Handle multiple requests simultaneously + for a single model + ~ OLLAMA_MAX_LOADED_MODELS: Load multiple models simultaneously + +------------------------------------------------------------------- +Tue Apr 23 02:26:34 UTC 2024 - rrahl0@disroot.org + +- Update to version 0.1.32: + * scale graph based on gpu count + * Support unicode characters in model path (#3681) + * darwin: no partial offloading if required memory greater than system + * update llama.cpp submodule to `7593639` (#3665) + * fix padding in decode + * Revert "cmd: provide feedback if OLLAMA_MODELS is set on non-serve command (#3470)" (#3662) + * Added Solar example at README.md (#3610) + * Update langchainjs.md (#2030) + * Added MindsDB information (#3595) + * examples: add more Go examples using the API (#3599) + * Update modelfile.md + * Add llama2 / torch models for `ollama create` (#3607) + * Terminate subprocess if receiving `SIGINT` or `SIGTERM` signals while model is loading (#3653) + * app: gracefully shut down `ollama serve` on windows (#3641) + * types/model: add path helpers (#3619) + * update llama.cpp submodule to `4bd0f93` (#3627) + * types/model: make ParseName variants less confusing (#3617) + * types/model: remove (*Digest).Scan and Digest.Value (#3605) + * Fix rocm deps with new subprocess paths + * mixtral mem + * Revert "types/model: remove (*Digest).Scan and Digest.Value (#3589)" + * types/model: remove (*Digest).Scan and Digest.Value (#3589) + * types/model: remove DisplayLong (#3587) + * types/model: remove MarshalText/UnmarshalText from Digest (#3586) + * types/model: init with Name and Digest types (#3541) + * server: provide helpful workaround hint when stalling on pull (#3584) + * partial offloading + * refactor tensor query + * api: start adding documentation to package api (#2878) + * examples: start adding Go examples using api/ (#2879) + * Handle very slow model loads + * fix: rope + * Revert "build.go: introduce a friendlier way to build Ollama (#3548)" (#3564) + * build.go: introduce a friendlier way to build Ollama (#3548) + * update llama.cpp submodule to `1b67731` (#3561) + * ci: use go-version-file + * Correct directory reference in macapp/README (#3555) + * cgo quantize + * no blob create if already exists + * update generate scripts with new `LLAMA_CUDA` variable, set `HIP_PLATFORM` to avoid compiler errors (#3528) + * Docs: Remove wrong parameter for Chat Completion (#3515) + * no rope parameters + * add command-r graph estimate + * Fail fast if mingw missing on windows + * use an older version of the mac os sdk in release (#3484) + * Add test case for context exhaustion + * CI missing archive + * fix dll compress in windows building + * CI subprocess path fix + * Fix CI release glitches + * update graph size estimate + * Fix macOS builds on older SDKs (#3467) + * cmd: provide feedback if OLLAMA_MODELS is set on non-serve command (#3470) + * feat: add OLLAMA_DEBUG in ollama server help message (#3461) + * Revert options as a ref in the server + * default head_kv to 1 + * fix metal gpu + * Bump to b2581 + * Refined min memory from testing + * Release gpu discovery library after use + * Safeguard for noexec + * Detect too-old cuda driver + * Integration test improvements + * Apply 01-cache.diff + * Switch back to subprocessing for llama.cpp + * Simplify model conversion (#3422) + * fix generate output + * update memory calcualtions + * refactor model parsing + * Add chromem-go to community integrations (#3437) + * Update README.md (#3436) + * Community Integration: CRAG Ollama Chat (#3423) + * Update README.md (#3378) + * Community Integration: ChatOllama (#3400) + * Update 90_bug_report.yml + * Add gemma safetensors conversion (#3250) + * CI automation for tagging latest images + * Bump ROCm to 6.0.2 patch release + * CI windows gpu builds + * Update troubleshooting link + * fix: trim quotes on OLLAMA_ORIGINS + +- add set_version to automatically switch over to the newer version + +------------------------------------------------------------------- +Tue Apr 16 10:52:25 UTC 2024 - bwiedemann@suse.com + +- Update to version 0.1.31: + * Backport MacOS SDK fix from main + * Apply 01-cache.diff + * fix: workflows + * stub stub + * mangle arch + * only generate on changes to llm subdirectory + * only generate cuda/rocm when changes to llm detected + * Detect arrow keys on windows (#3363) + * add license in file header for vendored llama.cpp code (#3351) + * remove need for `$VSINSTALLDIR` since build will fail if `ninja` cannot be found (#3350) + * change `github.com/jmorganca/ollama` to `github.com/ollama/ollama` (#3347) + * malformed markdown link (#3358) + * Switch runner for final release job + * Use Rocky Linux Vault to get GCC 10.2 installed + * Revert "Switch arm cuda base image to centos 7" + * Switch arm cuda base image to centos 7 + * Bump llama.cpp to b2527 + * Fix ROCm link in `development.md` + * adds ooo to community integrations (#1623) + * Add cliobot to ollama supported list (#1873) + * Add Dify.AI to community integrations (#1944) + * enh: add ollero.nvim to community applications (#1905) + * Add typechat-cli to Terminal apps (#2428) + * add new Web & Desktop link in readme for alpaca webui (#2881) + * Add LibreChat to Web & Desktop Apps (#2918) + * Add Community Integration: OllamaGUI (#2927) + * Add Community Integration: OpenAOE (#2946) + * Add Saddle (#3178) + * tlm added to README.md terminal section. (#3274) + * Update README.md (#3288) + * Update README.md (#3338) + * Integration tests conditionally pull + * add support for libcudart.so for CUDA devices (adds Jetson support) + * llm: prevent race appending to slice (#3320) + * Bump llama.cpp to b2510 + * Add Testcontainers into Libraries section (#3291) + * Revamp go based integration tests + * rename `.gitattributes` + * Bump llama.cpp to b2474 + * Add docs for GPU selection and nvidia uvm workaround + * doc: faq gpu compatibility (#3142) + * Update faq.md + * Better tmpdir cleanup + * Update faq.md + * update `faq.md` + * dyn global + * llama: remove server static assets (#3174) + * add `llm/ext_server` directory to `linguist-vendored` (#3173) + * Add Radeon gfx940-942 GPU support + * Wire up more complete CI for releases + * llm,readline: use errors.Is instead of simple == check (#3161) + * server: replace blob prefix separator from ':' to '-' (#3146) + * Add ROCm support to linux install script (#2966) + * .github: fix model and feature request yml (#3155) + * .github: add issue templates (#3143) + * fix: clip memory leak + * Update README.md + * add `OLLAMA_KEEP_ALIVE` to environment variable docs for `ollama serve` (#3127) + * Default Keep Alive environment variable (#3094) + * Use stdin for term discovery on windows + * Update ollama.iss + * restore locale patch (#3091) + * token repeat limit for prediction requests (#3080) + * Fix iGPU detection for linux + * add more docs on for the modelfile message command (#3087) + * warn when json format is expected but not mentioned in prompt (#3081) + * Adapt our build for imported server.cpp + * Import server.cpp as of b2356 + * refactor readseeker + * Add docs explaining GPU selection env vars + * chore: fix typo (#3073) + * fix gpu_info_cuda.c compile warning (#3077) + * use `-trimpath` when building releases (#3069) + * relay load model errors to the client (#3065) + * Update troubleshooting.md + * update llama.cpp submodule to `ceca1ae` (#3064) + * convert: fix shape + * Avoid rocm runner and dependency clash + * fix `03-locale.diff` + * Harden for deps file being empty (or short) + * Add ollama executable peer dir for rocm + * patch: use default locale in wpm tokenizer (#3034) + * only copy deps for `amd64` in `build_linux.sh` + * Rename ROCm deps file to avoid confusion (#3025) + * add `macapp` to `.dockerignore` + * add `bundle_metal` and `cleanup_metal` funtions to `gen_darwin.sh` + * tidy cleanup logs + * update llama.cpp submodule to `77d1ac7` (#3030) + * disable gpu for certain model architectures and fix divide-by-zero on memory estimation + * Doc how to set up ROCm builds on windows + * Finish unwinding idempotent payload logic + * update llama.cpp submodule to `c2101a2` (#3020) + * separate out `isLocalIP` + * simplify host checks + * add additional allowed hosts + * Update docs `README.md` and table of contents + * add allowed host middleware and remove `workDir` middleware (#3018) + * decode ggla + * convert: fix default shape + * fix: allow importing a model from name reference (#3005) + * update llama.cpp submodule to `6cdabe6` (#2999) + * Update api.md + * Revert "adjust download and upload concurrency based on available bandwidth" (#2995) + * cmd: tighten up env var usage sections (#2962) + * default terminal width, height + * Refined ROCm troubleshooting docs + * Revamp ROCm support + * update go to 1.22 in other places (#2975) + * docs: Add LLM-X to Web Integration section (#2759) + * fix some typos (#2973) + * Convert Safetensors to an Ollama model (#2824) + * Allow setting max vram for workarounds + * cmd: document environment variables for serve command + * Add Odin Runes, a Feature-Rich Java UI for Ollama, to README (#2440) + * Update api.md + * Add NotesOllama to Community Integrations (#2909) + * Added community link for Ollama Copilot (#2582) + * use LimitGroup for uploads + * adjust group limit based on download speed + * add new LimitGroup for dynamic concurrency + * refactor download run + +------------------------------------------------------------------- +Wed Mar 06 23:51:28 UTC 2024 - computersemiexpert@outlook.com + +- Update to version 0.1.28: + * Fix embeddings load model behavior (#2848) + * Add Community Integration: NextChat (#2780) + * prepend image tags (#2789) + * fix: print usedMemory size right (#2827) + * bump submodule to `87c91c07663b707e831c59ec373b5e665ff9d64a` (#2828) + * Add ollama user to video group + * Add env var so podman will map cuda GPUs + +------------------------------------------------------------------- +Tue Feb 27 08:33:15 UTC 2024 - Jan Engelhardt + +- Edit description, answer _what_ the package is and use nominal + phrase. (https://en.opensuse.org/openSUSE:Package_description_guidelines) + +------------------------------------------------------------------- +Fri Feb 23 21:13:53 UTC 2024 - Loren Burkholder + +- Added the Ollama package +- Included a systemd service diff --git a/ollama.obsinfo b/ollama.obsinfo new file mode 100644 index 0000000..4829e3e --- /dev/null +++ b/ollama.obsinfo @@ -0,0 +1,4 @@ +name: ollama +version: 0.3.10 +mtime: 1725725288 +commit: 06d4fba851b91eb55da892d23834e8fe75096ca7 diff --git a/ollama.service b/ollama.service new file mode 100644 index 0000000..22828b1 --- /dev/null +++ b/ollama.service @@ -0,0 +1,13 @@ +[Unit] +Description=Ollama Service +After=network-online.target + +[Service] +ExecStart=/usr/bin/ollama serve +User=ollama +Group=ollama +Restart=always +RestartSec=3 + +[Install] +WantedBy=default.target diff --git a/ollama.spec b/ollama.spec new file mode 100644 index 0000000..19e234e --- /dev/null +++ b/ollama.spec @@ -0,0 +1,113 @@ +# +# spec file for package ollama +# +# Copyright (c) 2024 SUSE LLC +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# + + +Name: ollama +Version: 0.3.10 +Release: 0 +Summary: Tool for running AI models on-premise +License: MIT +URL: https://ollama.com +Source: %{name}-%{version}.tar +Source1: vendor.tar.zstd +Source2: ollama.service +Source3: %{name}-user.conf +Patch0: enable-lto.patch +BuildRequires: cmake >= 3.24 +BuildRequires: git +BuildRequires: sysuser-tools +BuildRequires: zstd +BuildRequires: golang(API) >= 1.22 +%sysusers_requires +%if 0%{?sle_version} == 150600 +BuildRequires: gcc12-c++ +BuildRequires: libstdc++6-gcc12 +%else +BuildRequires: gcc-c++ >= 11.4.0 +%endif +# 32bit seems not to be supported anymore +ExcludeArch: %ix86 %arm + +%description +Ollama is a tool for running AI models on one's own hardware. +It offers a command-line interface and a RESTful API. +New models can be created or existing ones modified in the +Ollama library using the Modelfile syntax. +Source model weights found on Hugging Face and similar sites +can be imported. + +%prep +%autosetup -a1 -p1 + +%build +%sysusers_generate_pre %{SOURCE3} %{name} %{name}-user.conf + +%ifnarch ppc64 +export GOFLAGS="-buildmode=pie -mod=vendor" +%endif +%if 0%{?sle_version} == 150600 +export CXX=g++-12 +export CC=gcc-12 +# pie doesn't work with gcc12 on leap +export GOFLAGS="-mod=vendor" +%endif + +export OLLAMA_SKIP_PATCHING=1 + +go generate ./... +go build -v . + +%install +install -D -m 0755 %{name} %{buildroot}/%{_bindir}/%{name} +install -D -m 0644 %{SOURCE2} %{buildroot}%{_unitdir}/%{name}.service +install -D -m 0644 %{SOURCE3} %{buildroot}%{_sysusersdir}/%{name}-user.conf +install -d %{buildroot}%{_localstatedir}/lib/%{name} + +mkdir -p "%{buildroot}/%{_docdir}/%{name}" +cp -Ra docs/* "%{buildroot}/%{_docdir}/%{name}" + +%check +%if 0%{?sle_version} == 150600 +export CXX=g++-12 +export CC=gcc-12 +# pie doesn't work with gcc12 on leap +export GOFLAGS="-mod=vendor" +%endif +go test ./... + +%pre -f %{name}.pre +%service_add_pre %{name}.service + +%post +%service_add_post %{name}.service + +%preun +%service_del_preun %{name}.service + +%postun +%service_del_postun %{name}.service + +%files +%doc README.md +%license LICENSE +%{_docdir}/%{name} +%{_bindir}/%{name} +%{_unitdir}/%{name}.service +%{_sysusersdir}/%{name}-user.conf +%attr(-, ollama, ollama) %{_localstatedir}/lib/%{name} + +%changelog diff --git a/vendor.tar.zstd b/vendor.tar.zstd new file mode 100644 index 0000000..fb6b342 --- /dev/null +++ b/vendor.tar.zstd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fc07ae239af7667ad257ce20adddb3d6271ef14d06ef632348d2fb6c83a49db +size 5355011