Compare commits

..

No commits in common. "factory" and "devel" have entirely different histories.

16 changed files with 122 additions and 577 deletions

View File

@ -3,7 +3,7 @@
<service name="obs_scm" mode="manual"> <service name="obs_scm" mode="manual">
<param name="url">https://github.com/ollama/ollama.git</param> <param name="url">https://github.com/ollama/ollama.git</param>
<param name="scm">git</param> <param name="scm">git</param>
<param name="revision">v0.5.7</param> <param name="revision">v0.3.6</param>
<param name="versionformat">@PARENT_TAG@</param> <param name="versionformat">@PARENT_TAG@</param>
<param name="versionrewrite-pattern">v(.*)</param> <param name="versionrewrite-pattern">v(.*)</param>
<param name="changesgenerate">enable</param> <param name="changesgenerate">enable</param>
@ -19,5 +19,5 @@
<service name="tar" mode="buildtime"> <service name="tar" mode="buildtime">
<param name="package-meta">yes</param> <param name="package-meta">yes</param>
</service> </service>
</services> </services>

View File

@ -1,4 +1,4 @@
<servicedata> <servicedata>
<service name="tar_scm"> <service name="tar_scm">
<param name="url">https://github.com/ollama/ollama.git</param> <param name="url">https://github.com/ollama/ollama.git</param>
<param name="changesrevision">a420a453b4783841e3e79c248ef0fe9548df6914</param></service></servicedata> <param name="changesrevision">4c4fe3f87fe1858b35bd0d41e093a0039ec4cee4</param></service></servicedata>

28
enable-lto.patch Normal file
View File

@ -0,0 +1,28 @@
diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh
index db2c6c3..8194cd9 100755
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -52,6 +52,7 @@ if [ -z "${CUDACXX}" ]; then
fi
fi
COMMON_CMAKE_DEFS="-DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
+COMMON_CMAKE_DEFS="${COMMON_CMAKE_DEFS} -DGGML_LTO=on -DCMAKE_BUILD_TYPE=Release"
source $(dirname $0)/gen_common.sh
init_vars
git_module_setup
@@ -78,6 +79,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
init_vars
echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
+ CMAKE_DEFS="${CMAKE_DEFS} -DGGML_LTO=on"
BUILD_DIR="../build/linux/${ARCH}/cpu"
echo "Building custom CPU"
build
@@ -94,6 +96,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
# -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake
COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
+ COMMON_CPU_DEFS="${COMMON_CPU_DEFS} -DGGML_LTO=on -DCMAKE_BUILD_TYPE=Release"
if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then
#
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)

BIN
ollama-0.1.45.obscpio (Stored with Git LFS) Normal file

Binary file not shown.

BIN
ollama-0.2.6.obscpio (Stored with Git LFS) Normal file

Binary file not shown.

BIN
ollama-0.2.8.obscpio (Stored with Git LFS) Normal file

Binary file not shown.

BIN
ollama-0.3.0.obscpio (Stored with Git LFS) Normal file

Binary file not shown.

BIN
ollama-0.3.3.obscpio (Stored with Git LFS) Normal file

Binary file not shown.

BIN
ollama-0.3.6.obscpio (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:efb1f1510c40a71f933e8b1ad3b88acb499f017e1bfd271492d9ccf5b7a69d2c
size 154515982

View File

@ -1,438 +1,3 @@
-------------------------------------------------------------------
Mon Jan 27 14:21:42 UTC 2025 - Adrian Schröter <adrian@suse.de>
- Make ollama configurable by the admin via /etc/sysconfig/ollama
(boo#1236008)
- cleanup reproducible.patch
-------------------------------------------------------------------
Thu Jan 16 23:52:45 UTC 2025 - Eyad Issa <eyadlorenzo@gmail.com>
- Removed 01-build-verbose.patch: embedded GOFLAG into .spec file
- Disabled reproducible.patch: should be not needed, as .gz is
not produced anymore
- Update to version 0.5.7:
* Fixed issue where using two FROM commands in Modelfile
* Support importing Command R and Command R+ architectures
from safetensors
- Update to version 0.5.6:
* Fixed errors that would occur when running ollama create on
Windows and when using absolute paths
- Update to version 0.5.5:
* New models:
~ Phi-4
~ Command R7B
~ DeepSeek-V3
~ OLMo 2
~ Dolphin 3
~ SmallThinker:
~ Granite 3.1 Dense
~ Granite 3.1 MoE
* The /api/create API endpoint that powers ollama create has
been changed to improve conversion time and also accept a JSON
object.
* Fixed runtime error that would occur when filling the model's
context window
* Fixed crash that would occur when quotes were used in /save
* Fixed errors that would occur when sending x-stainless headers
from OpenAI clients
- Update to version 0.5.4:
* New model: Falcon3
* Fixed issue where providing null to format would result in
an error
- Update to version 0.5.3:
* Fixed runtime errors on older Intel Macs
* Fixed issue where setting the format field to "" would cause
an error
- Update to version 0.5.2:
* New model: EXAONE 3.5
* Fixed issue where whitespace would get trimmed from prompt
when images were provided
* Improved memory estimation when scheduling models
* OLLAMA_ORIGINS will now check hosts in a case insensitive
manner
-------------------------------------------------------------------
Thu Dec 12 14:00:56 UTC 2024 - Bernhard Wiedemann <bwiedemann@suse.com>
- Add reproducible.patch for deterministic .gz creation (boo#1047218)
-------------------------------------------------------------------
Sat Dec 07 18:24:04 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
- Update to version 0.5.1:
* Fixed issue where Ollama's API would generate JSON output when
specifying "format": null
* Fixed issue where passing --format json to ollama run would
cause an error
- Update to version 0.5.0:
* New models:
~ Llama 3.3: a new state of the art 70B model.
~ Snowflake Arctic Embed 2: Snowflake's frontier embedding
model.
* Ollama now supports structured outputs, making it possible to
constrain a model's output to a specific format defined by a
JSON schema. The Ollama Python and JavaScript libraries have
been updated to support structured outputs, together with
Ollama's OpenAI-compatible API endpoints.
* Fixed error importing model vocabulary files
* Experimental: new flag to set KV cache quantization to 4-bit
(q4_0), 8-bit (q8_0) or 16-bit (f16). This reduces VRAM
requirements for longer context windows.
- Update to version 0.4.7:
* Enable index tracking for tools - openai api support (#7888)
* llama: fix typo and formatting in readme (#7876)
* readme: add SpaceLlama, YouLama, and DualMind to community
integrations (#7216)
-------------------------------------------------------------------
Sat Nov 30 19:47:23 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
- Update to version 0.4.6:
* New model: QwQ: an experimental research model by the Qwen
team, focused on advancing AI reasoning capabilities.
* Tool calls will now be included in streaming responses
* Ollama will now provide an error when submitting SVG images
* Image tokens will no longer be counted in token counts when
running a text-only model
- Update to version 0.4.5:
* The Ollama Python Library has been updated
* Fixed issue where HTTPS_PROXY and HTTP_PROXY environment
variables would have no effect
* Ollama will now accept X-Stainless-Retry-Count used by many
OpenAI API clients
* Fix issue where importing certain GGUF files would result in
the incorrect quantization level
* ollama push will now print the uploaded model URL on
ollama.com
- Update to version 0.4.4:
* Marco-o1: An open large reasoning model for real-world
solutions by the Alibaba International Digital Commerce Group
(AIDC-AI).
* Fixed issue where Ollama would freeze when processing requests
in parallel (e.g. when using code completion tools)
* Redirecting output to a file no longer outputs progress bars
or spinners
- Update to version 0.4.3:
* New model: Tülu 3 is a leading instruction following model
family, offering fully open-source data, code, and recipes by
the The Allen Institute for AI.
* New model: Mistral Large: a new version of Mistral Large with
improved Long Context, Function Calling and System Prompt
support.
* Improved performance issues that occurred in Ollama versions
0.4.0-0.4.2
* Fixed issue that would cause granite3-dense to generate empty
responses
* Fixed crashes and hanging caused by KV cache management
-------------------------------------------------------------------
Sat Nov 16 16:07:38 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
- Update to version 0.4.2:
* runner.go: Propagate panics back to the user.
* runner.go: Increase survivability of main processing loop
* build: fix arm container image (#7674)
* add line numbers for parser errors (#7326)
* chore(deps): bump golang.org/x dependencies (#7655)
* runner.go: Don't trim whitespace from inputs
* runner.go: Enforce NUM_PARALLEL directly in the runner
* cmd: preserve exact bytes when displaying template/system layers (#7586)
* fix(mllama): sync backend between batches
* runner.go: Fix off-by-one for num predicted
* CI: give windows lint more time (#7635)
* Jetpack support for Go server (#7217)
* doc: capture numeric group requirement (#6941)
* docs: Capture docker cgroup workaround (#7519)
* runner.go: Make KV entry accounting more robust
* readme: add aichat terminal app to community integrations (#7418)
* api: fix typos in Go Doc comments (#7620)
* readme: add GoLamify to community integrations (#7521)
* readme: add browser extension that enables using Ollama for interacting with web pages (#5827)
* docs: add mentions of Llama 3.2 (#7517)
* api: fix typo in python ClientFromEnvironment docs (#7604)
* readme: add llama3.2-vision to model list (#7580)
-------------------------------------------------------------------
Mon Nov 11 13:57:46 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
- Add patch 01-build-verbose.patch to add the -v option
to go build
- Update to version 0.4.1:
* runner.go: Check for zero length images
* docs: update langchainpy.md with proper model name (#7527)
* Set macos min version for all architectures (#7579)
* win: remove preview title from installer (#7529)
* Workaround buggy P2P ROCm copy on windows (#7466)
* Debug logging for nvcuda init (#7532)
* Align rocm compiler flags (#7467)
* Be explicit for gpu library link dir (#7560)
* docs: OLLAMA_NEW_RUNNERS no longer exists
* runner.go: Remove unused arguments
* sched: Lift parallel restriction for multimodal models except mllama
-------------------------------------------------------------------
Thu Nov 07 12:06:09 UTC 2024 - adrian@suse.de
- Update to version 0.4.0:
* Update README.md (#7516)
* One corrupt manifest should not wedge model operations (#7515)
* prompt: Use a single token when estimating mllama context size
* readme: add Hexabot to the list of community integrations
* Quiet down debug log of image payload (#7454)
-------------------------------------------------------------------
Wed Nov 06 12:31:53 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
- Update to version 0.4.0-rc8:
* CI: Switch to v13 macos runner (#7498)
* CI: matrix strategy fix (#7496)
* Sign windows arm64 official binaries (#7493)
* readme: add TextCraft to community integrations (#7377)
* nvidia libs have inconsistent ordering (#7473)
* CI: omit unused tools for faster release builds (#7432)
* llama: Improve error handling
* runner.go: Only allocate 1 element embedding batches for mllama
* refactor kv estimation
* mllama cross attention
* Add basic mllama integration tests (#7455)
* runner.go: Don't set cross attention before sending embeddings
* Give unicode test more time to run (#7437)
-------------------------------------------------------------------
Fri Nov 01 02:18:50 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
- Remove enable-lto.patch
- Update to version 0.4.0-rc6:
* Refine default thread selection for NUMA systems (#7322)
* runner.go: Better abstract vision model integration
* Soften windows clang requirement (#7428)
* Remove submodule and shift to Go server - 0.4.0 (#7157)
* Move windows app out of preview (#7347)
* windows: Support alt install paths, fit and finish (#6967)
* add more tests for getting the optimal tiled canvas (#7411)
* Switch windows to clang (#7407)
* tests: Add test for Unicode processing
* runner.go: Better handle return NULL values from llama.cpp
* add mllama image processing to the generate handler (#7384)
* Bump to latest Go 1.22 patch (#7379)
* Fix deepseek deseret regex (#7369)
* Better support for AMD multi-GPU on linux (#7212)
* Fix unicode output on windows with redirect to file (#7358)
* Fix incremental build file deps (#7361)
* Improve dependency gathering logic (#7345)
* fix #7247 - invalid image input (#7249)
* integration: harden embedding test (#7306)
* default to "FROM ." if a Modelfile isn't present (#7250)
* Fix rocm windows build and clean up dependency gathering (#7305)
* runner.go: Merge partial unicode characters before sending
* readme: add Ollama for Swift to the community integrations (#7295)
* server: allow vscode-webview origin (#7273)
* image processing for llama3.2 (#6963)
* llama: Decouple patching script from submodule (#7139)
* llama: add compiler tags for cpu features (#7137)
-------------------------------------------------------------------
Wed Oct 30 01:47:37 UTC 2024 - Alessandro de Oliveira Faria <cabelo@opensuse.org>
- Update to version 0.3.14:
* New Models
+ Granite 3 MoE: The IBM Granite 1B and 3B models are the
first mixture of experts (MoE) Granite models from IBM
designed for low latency usage.
+ Granite 3 Dense: The IBM Granite 2B and 8B models are
designed to support tool-based use cases and support for
retrieval augmented generation (RAG), streamlining code
generation, translation and bug fixing.
-------------------------------------------------------------------
Sat Oct 12 20:55:18 UTC 2024 - eyadlorenzo@gmail.com
- Update to version 0.3.13:
* New safety models:
~ Llama Guard 3: a series of models by Meta, fine-tuned for
content safety classification of LLM inputs and responses.
~ ShieldGemma: ShieldGemma is set of instruction tuned models
from Google DeepMind for evaluating the safety of text
prompt input and text output responses against a set of
defined safety policies.
* Fixed issue where ollama pull would leave connections when
encountering an error
* ollama rm will now stop a model if it is running prior to
deleting it
-------------------------------------------------------------------
Sat Sep 28 03:53:10 UTC 2024 - Alessandro de Oliveira Faria <cabelo@opensuse.org>
- Update to version 0.3.12:
* Llama 3.2: Meta's Llama 3.2 goes small with 1B and 3B
models.
* Qwen 2.5 Coder: The latest series of Code-Specific Qwen
models, with significant improvements in code generation,
code reasoning, and code fixing.
* Ollama now supports ARM Windows machines
* Fixed rare issue where Ollama would report a missing .dll
file on Windows
* Fixed performance issue for Windows without GPUs
-------------------------------------------------------------------
Fri Sep 20 08:29:30 UTC 2024 - adrian@suse.de
- Update to version 0.3.11:
* llm: add solar pro (preview) (#6846)
* server: add tool parsing support for nemotron-mini (#6849)
* make patches git am-able
* CI: dist directories no longer present (#6834)
* CI: clean up naming, fix tagging latest (#6832)
* CI: set platform build build_linux script to keep buildx happy (#6829)
* readme: add Agents-Flex to community integrations (#6788)
* fix typo in import docs (#6828)
* readme: add vim-intelligence-bridge to Terminal section (#6818)
* readme: add Obsidian Quiz Generator plugin to community integrations (#6789)
* Fix incremental builds on linux (#6780)
* Use GOARCH for build dirs (#6779)
* Optimize container images for startup (#6547)
* examples: updated requirements.txt for privategpt example
* examples: polish loganalyzer example (#6744)
* readme: add ollama_moe to community integrations (#6752)
* runner: Flush pending responses before returning
* add "stop" command (#6739)
* refactor show ouput
* readme: add QodeAssist to community integrations (#6754)
* Verify permissions for AMD GPU (#6736)
* add *_proxy for debugging
* docs: update examples to use llama3.1 (#6718)
* Quiet down dockers new lint warnings (#6716)
* catch when model vocab size is set correctly (#6714)
* readme: add crewAI to community integrations (#6699)
* readme: add crewAI with mesop to community integrations
-------------------------------------------------------------------
Tue Sep 17 10:48:34 UTC 2024 - adrian@suse.de
- Update to version 0.3.10:
* openai: align chat temperature and frequency_penalty options with completion (#6688)
* docs: improve linux install documentation (#6683)
* openai: don't scale temperature or frequency_penalty (#6514)
* readme: add Archyve to community integrations (#6680)
* readme: add Plasmoid Ollama Control to community integrations (#6681)
* Improve logging on GPU too small (#6666)
* openai: fix "presence_penalty" typo and add test (#6665)
* Fix gemma2 2b conversion (#6645)
* Document uninstall on windows (#6663)
* Revert "Detect running in a container (#6495)" (#6662)
* llm: make load time stall duration configurable via OLLAMA_LOAD_TIMEOUT
* Introduce GPU Overhead env var (#5922)
* Detect running in a container (#6495)
* readme: add AiLama to the list of community integrations (#4957)
* Update gpu.md: Add RTX 3050 Ti and RTX 3050 Ti (#5888)
* server: fix blob download when receiving a 200 response (#6656)
* readme: add Gentoo package manager entry to community integrations (#5714)
* Update install.shReplace "command -v" with encapsulated functionality (#6035)
* readme: include Enchanted for Apple Vision Pro (#4949)
* readme: add lsp-ai to community integrations (#5063)
* readme: add ollama-php library to community integrations (#6361)
* readme: add vnc-lm discord bot community integration (#6644)
* llm: use json.hpp from common (#6642)
* readme: add confichat to community integrations (#6378)
* docs: add group to manual Linux isntructions and verify service is running (#6430)
* readme: add gollm to the list of community libraries (#6099)
* readme: add Cherry Studio to community integrations (#6633)
* readme: add Go fun package (#6421)
* docs: fix spelling error (#6391)
* install.sh: update instructions to use WSL2 (#6450)
* readme: add claude-dev to community integrations (#6630)
* readme: add PyOllaMx project (#6624)
* llm: update llama.cpp commit to 8962422 (#6618)
* Use cuda v11 for driver 525 and older (#6620)
* Log system memory at info (#6617)
* readme: add Painting Droid community integration (#5514)
* readme: update Ollama4j link and add link to Ollama4j Web UI (#6608)
* Fix sprintf to snprintf (#5664)
* readme: add PartCAD tool to readme for generating 3D CAD models using Ollama (#6605)
* Reduce docker image size (#5847)
* readme: add OllamaFarm project (#6508)
* readme: add go-crew and Ollamaclient projects (#6583)
* docs: update faq.md for OLLAMA_MODELS env var permissions (#6587)
* fix(cmd): show info may have nil ModelInfo (#6579)
* docs: update GGUF examples and references (#6577)
* Add findutils to base images (#6581)
* remove any unneeded build artifacts
* doc: Add Nix and Flox to package manager listing (#6074)
* update the openai docs to explain how to set the context size (#6548)
* fix(test): do not clobber models directory
* add llama3.1 chat template (#6545)
* update deprecated warnings
* validate model path
* throw an error when encountering unsupport tensor sizes (#6538)
* Move ollama executable out of bin dir (#6535)
* update templates to use messages
* more tokenizer tests
* add safetensors to the modelfile docs (#6532)
* Fix import image width (#6528)
* Update manual instructions with discrete ROCm bundle (#6445)
* llm: fix typo in comment (#6530)
* adjust image sizes
* clean up convert tokenizer
* detect chat template from configs that contain lists
* update the import docs (#6104)
* server: clean up route names for consistency (#6524)
* Only enable numa on CPUs (#6484)
* gpu: Group GPU Library sets by variant (#6483)
* update faq
* passthrough OLLAMA_HOST path to client
* convert safetensor adapters into GGUF (#6327)
* gpu: Ensure driver version set before variant (#6480)
* llm: Align cmake define for cuda no peer copy (#6455)
* Fix embeddings memory corruption (#6467)
* llama3.1
* convert gemma2
* create bert models from cli
* bert
* Split rocm back out of bundle (#6432)
* CI: remove directories from dist dir before upload step (#6429)
* CI: handle directories during checksum (#6427)
* Fix overlapping artifact name on CI
* Review comments
* Adjust layout to bin+lib/ollama
* Remove Jetpack
* Add windows cuda v12 + v11 support
* Enable cuda v12 flags
* Add cuda v12 variant and selection logic
* Report GPU variant in log
* Add Jetson cuda variants for arm
* Wire up ccache and pigz in the docker based build
* Refactor linux packaging
* server: limit upload parts to 16 (#6411)
* Fix white space.
* Reset NumCtx.
* Override numParallel only if unset.
* fix: chmod new layer to 0o644 when creating it
* fix: Add tooltip to system tray icon
* only skip invalid json manifests
* skip invalid manifest files
* fix noprune
* add `CONTRIBUTING.md` (#6349)
* Fix typo and improve readability (#5964)
* server: reduce max connections used in download (#6347)
* update chatml template format to latest in docs (#6344)
* lint
* Update openai.md to remove extra checkbox (#6345)
* llama3.1 memory
------------------------------------------------------------------- -------------------------------------------------------------------
Thu Aug 15 18:59:48 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com> Thu Aug 15 18:59:48 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
@ -455,23 +20,23 @@ Sun Aug 11 02:40:06 UTC 2024 - Alessandro de Oliveira Faria <cabelo@opensuse.org
- Update to version 0.3.4: - Update to version 0.3.4:
* New embedding models * New embedding models
- BGE-M3: a large embedding model from BAAI distinguished for - BGE-M3: a large embedding model from BAAI distinguished for
its versatility in Multi-Functionality, Multi-Linguality, and its versatility in Multi-Functionality, Multi-Linguality, and
Multi-Granularity. Multi-Granularity.
- BGE-Large: a large embedding model trained in english. - BGE-Large: a large embedding model trained in english.
- Paraphrase-Multilingual: A multilingual embedding model - Paraphrase-Multilingual: A multilingual embedding model
trained on parallel data for 50+ languages. trained on parallel data for 50+ languages.
* New embedding API with batch support * New embedding API with batch support
- Ollama now supports a new API endpoint /api/embed for - Ollama now supports a new API endpoint /api/embed for
embedding generation: embedding generation:
* This API endpoint supports new features: * This API endpoint supports new features:
- Batches: generate embeddings for several documents in - Batches: generate embeddings for several documents in
one request one request
- Normalized embeddings: embeddings are now normalized, - Normalized embeddings: embeddings are now normalized,
improving similarity results improving similarity results
- Truncation: a new truncate parameter that will error if - Truncation: a new truncate parameter that will error if
set to false set to false
- Metrics: responses include load_duration, total_duration and - Metrics: responses include load_duration, total_duration and
prompt_eval_count metrics prompt_eval_count metrics
------------------------------------------------------------------- -------------------------------------------------------------------
@ -482,17 +47,17 @@ Sat Aug 03 09:41:56 UTC 2024 - eyadlorenzo@gmail.com
load_duration, and prompt_eval_count load_duration, and prompt_eval_count
* Added usage metrics to the /v1/embeddings OpenAI compatibility * Added usage metrics to the /v1/embeddings OpenAI compatibility
API API
* Fixed issue where /api/generate would respond with an empty * Fixed issue where /api/generate would respond with an empty
string if provided a context string if provided a context
* Fixed issue where /api/generate would return an incorrect * Fixed issue where /api/generate would return an incorrect
value for context value for context
* /show modefile will now render MESSAGE commands correctly * /show modefile will now render MESSAGE commands correctly
- Update to version 0.3.2: - Update to version 0.3.2:
* Fixed issue where ollama pull would not resume download * Fixed issue where ollama pull would not resume download
progress progress
* Fixed issue where phi3 would report an error on older versions * Fixed issue where phi3 would report an error on older versions
------------------------------------------------------------------- -------------------------------------------------------------------
Tue Jul 30 07:08:37 UTC 2024 - Adrian Schröter <adrian@suse.de> Tue Jul 30 07:08:37 UTC 2024 - Adrian Schröter <adrian@suse.de>
@ -557,16 +122,16 @@ Wed Jul 24 14:28:08 UTC 2024 - adrian@suse.de
------------------------------------------------------------------- -------------------------------------------------------------------
Thu Jul 18 13:09:10 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com> Thu Jul 18 13:09:10 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
- Fixed issue with shared libraries - Fixed issue with shared libraries
------------------------------------------------------------------- -------------------------------------------------------------------
Thu Jul 18 12:27:54 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com> Thu Jul 18 12:27:54 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
- Added %check section - Added %check section
- Use -v when building - Use -v when building
- Update to version 0.2.6: - Update to version 0.2.6:
* New models: MathΣtral is a 7B model designed for math * New models: MathΣtral is a 7B model designed for math
reasoning and scientific discovery by Mistral AI. reasoning and scientific discovery by Mistral AI.
* Fixed issue where uppercase roles such as USER would no longer * Fixed issue where uppercase roles such as USER would no longer
work in the chat endpoints work in the chat endpoints
@ -580,62 +145,62 @@ Sun Jul 14 17:48:36 UTC 2024 - eyadlorenzo@gmail.com
* Fixed issue where a model's SYSTEM message not be applied * Fixed issue where a model's SYSTEM message not be applied
- Update to version 0.2.4: - Update to version 0.2.4:
* Fixed issue where context, load_duration and total_duration * Fixed issue where context, load_duration and total_duration
fields would not be set in the /api/generate endpoint. fields would not be set in the /api/generate endpoint.
* Ollama will no longer error if loading models larger than * Ollama will no longer error if loading models larger than
system memory if disk space is available system memory if disk space is available
- Update to version 0.2.3: - Update to version 0.2.3:
* Fix issue where system prompt would not be applied * Fix issue where system prompt would not be applied
- Update to version 0.2.2: - Update to version 0.2.2:
* Fixed errors that occurred when using Ollama with Nvidia V100 * Fixed errors that occurred when using Ollama with Nvidia V100
GPUs GPUs
* glm4 models will no longer fail to load from out of memory * glm4 models will no longer fail to load from out of memory
errors errors
* Fixed error that would occur when running deepseek-v2 and * Fixed error that would occur when running deepseek-v2 and
deepseek-coder-v2 models deepseek-coder-v2 models
* Fixed a series of out of memory issues when using Nvidia * Fixed a series of out of memory issues when using Nvidia
GPUs GPUs
* Fixed a series of errors that would occur when using multiple * Fixed a series of errors that would occur when using multiple
Radeon GPUs Radeon GPUs
- Update to version 0.2.1: - Update to version 0.2.1:
* Fixed issue where setting OLLAMA_NUM_PARALLEL would cause * Fixed issue where setting OLLAMA_NUM_PARALLEL would cause
models to be reloaded after each request models to be reloaded after each request
- Update to version 0.2.0: - Update to version 0.2.0:
* Ollama 0.2.0 is now available with concurrency support. * Ollama 0.2.0 is now available with concurrency support.
This unlocks 2 specific features: This unlocks 2 specific features:
~ Ollama can now serve multiple requests at the same time ~ Ollama can now serve multiple requests at the same time
~ Ollama now supports loading different models at the same time ~ Ollama now supports loading different models at the same time
* New models: GLM-4: A strong multi-lingual general language * New models: GLM-4: A strong multi-lingual general language
model with competitive performance to Llama 3. model with competitive performance to Llama 3.
* New models: CodeGeeX4: A versatile model for AI software * New models: CodeGeeX4: A versatile model for AI software
development scenarios, including code completion. development scenarios, including code completion.
* New models: Gemma 2: Improved output quality and base text * New models: Gemma 2: Improved output quality and base text
generation models now available generation models now available
* Ollama will now show a better error if a model architecture * Ollama will now show a better error if a model architecture
isn't supported isn't supported
* Improved handling of quotes and spaces in Modelfile FROM lines * Improved handling of quotes and spaces in Modelfile FROM lines
* Ollama will now return an error if the system does not have * Ollama will now return an error if the system does not have
enough memory to run a model on Linux enough memory to run a model on Linux
------------------------------------------------------------------- -------------------------------------------------------------------
Sun Jul 07 19:18:11 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com> Sun Jul 07 19:18:11 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
- Update to version 0.1.48: - Update to version 0.1.48:
* Fixed issue where Gemma 2 would continuously output when * Fixed issue where Gemma 2 would continuously output when
reaching context limits reaching context limits
* Fixed out of memory and core dump errors when running Gemma 2 * Fixed out of memory and core dump errors when running Gemma 2
* /show info will now show additional model information in * /show info will now show additional model information in
ollama run ollama run
* Fixed issue where ollama show would result in an error on * Fixed issue where ollama show would result in an error on
certain vision models certain vision models
- Update to version 0.1.48: - Update to version 0.1.48:
* Added support for Google Gemma 2 models (9B and 27B) * Added support for Google Gemma 2 models (9B and 27B)
* Fixed issues with ollama create when importing from Safetensors * Fixed issues with ollama create when importing from Safetensors
------------------------------------------------------------------- -------------------------------------------------------------------
Mon Jun 24 10:11:17 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com> Mon Jun 24 10:11:17 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
@ -670,44 +235,44 @@ Sat Jun 22 10:08:00 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
------------------------------------------------------------------- -------------------------------------------------------------------
Tue Jun 18 12:12:41 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com> Tue Jun 18 12:12:41 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
- Added documentation files to .spec - Added documentation files to .spec
- Update to version 0.1.44: - Update to version 0.1.44:
* Fixed issue where unicode characters such as emojis would not * Fixed issue where unicode characters such as emojis would not
be loaded correctly when running ollama create be loaded correctly when running ollama create
* Fixed certain cases where Nvidia GPUs would not be detected and * Fixed certain cases where Nvidia GPUs would not be detected and
reported as compute capability 1.0 devices reported as compute capability 1.0 devices
- Update to version 0.1.43: - Update to version 0.1.43:
* New import.md guide for converting and importing models to * New import.md guide for converting and importing models to
Ollama Ollama
* Fixed issue where embedding vectors resulting from * Fixed issue where embedding vectors resulting from
/api/embeddings would not be accurate /api/embeddings would not be accurate
* JSON mode responses will no longer include invalid escape * JSON mode responses will no longer include invalid escape
characters characters
* Removing a model will no longer show incorrect File not found * Removing a model will no longer show incorrect File not found
errors errors
* Fixed issue where running ollama create would result in an * Fixed issue where running ollama create would result in an
error on Windows with certain file formatting error on Windows with certain file formatting
- Update to version 0.1.42: - Update to version 0.1.42:
* New models: Qwen 2: a new series of large language models * New models: Qwen 2: a new series of large language models
from Alibaba group from Alibaba group
* Qwen 2: a new series of large language models from Alibaba * Qwen 2: a new series of large language models from Alibaba
group group
* ollama pull is now faster if it detects a model is already * ollama pull is now faster if it detects a model is already
downloaded downloaded
* ollama create will now automatically detect prompt templates * ollama create will now automatically detect prompt templates
for popular model architectures such as Llama, Gemma, Phi and for popular model architectures such as Llama, Gemma, Phi and
more. more.
* Ollama can now be accessed from local apps built with Electron * Ollama can now be accessed from local apps built with Electron
and Tauri, as well as in developing apps in local html files and Tauri, as well as in developing apps in local html files
* Update welcome prompt in Windows to llama3 * Update welcome prompt in Windows to llama3
* Fixed issues where /api/ps and /api/tags would show invalid * Fixed issues where /api/ps and /api/tags would show invalid
timestamps in responses timestamps in responses
- Update to version 0.1.41: - Update to version 0.1.41:
* Fixed issue on Windows 10 and 11 with Intel CPUs with * Fixed issue on Windows 10 and 11 with Intel CPUs with
integrated GPUs where Ollama would encounter an error integrated GPUs where Ollama would encounter an error
------------------------------------------------------------------- -------------------------------------------------------------------
@ -717,12 +282,12 @@ Sat Jun 01 21:12:20 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
* New model: Codestral: Codestral is Mistral AIs first-ever code * New model: Codestral: Codestral is Mistral AIs first-ever code
model designed for code generation tasks. model designed for code generation tasks.
* New model: IBM Granite Code: now in 3B and 8B parameter sizes. * New model: IBM Granite Code: now in 3B and 8B parameter sizes.
* New model: Deepseek V2: A Strong, Economical, and Efficient * New model: Deepseek V2: A Strong, Economical, and Efficient
Mixture-of-Experts Language Model Mixture-of-Experts Language Model
* Fixed out of memory and incorrect token issues when running * Fixed out of memory and incorrect token issues when running
Codestral on 16GB Macs Codestral on 16GB Macs
* Fixed issue where full-width characters (e.g. Japanese, * Fixed issue where full-width characters (e.g. Japanese,
Chinese, Russian) were deleted at end of the line when using Chinese, Russian) were deleted at end of the line when using
ollama run ollama run
------------------------------------------------------------------- -------------------------------------------------------------------
@ -731,9 +296,9 @@ Wed May 29 11:38:26 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
- Update to version 0.1.39: - Update to version 0.1.39:
* New model: Cohere Aya 23: A new state-of-the-art, multilingual * New model: Cohere Aya 23: A new state-of-the-art, multilingual
LLM covering 23 different languages. LLM covering 23 different languages.
* New model: Mistral 7B 0.3: A new version of Mistral 7B with * New model: Mistral 7B 0.3: A new version of Mistral 7B with
initial support for function calling. initial support for function calling.
* New model: Phi-3 Medium: a 14B parameters, lightweight, * New model: Phi-3 Medium: a 14B parameters, lightweight,
state-of-the-art open model by Microsoft. state-of-the-art open model by Microsoft.
* New model: Phi-3 Mini 128K and Phi-3 Medium 128K: versions of * New model: Phi-3 Mini 128K and Phi-3 Medium 128K: versions of
the Phi-3 models that support a context window size of 128K the Phi-3 models that support a context window size of 128K
@ -741,7 +306,7 @@ Wed May 29 11:38:26 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
IBM for Code Intelligence IBM for Code Intelligence
* It is now possible to import and quantize Llama 3 and its * It is now possible to import and quantize Llama 3 and its
finetunes from Safetensors format to Ollama. finetunes from Safetensors format to Ollama.
* Full changelog at * Full changelog at
https://github.com/ollama/ollama/releases/tag/v0.1.39 https://github.com/ollama/ollama/releases/tag/v0.1.39
------------------------------------------------------------------- -------------------------------------------------------------------
@ -755,7 +320,7 @@ Thu May 16 19:55:51 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
- Update to version 0.1.38: - Update to version 0.1.38:
* New model: Falcon 2: A new 11B parameters causal decoder-only * New model: Falcon 2: A new 11B parameters causal decoder-only
model built by TII and trained over 5T tokens. model built by TII and trained over 5T tokens.
* New model: Yi 1.5: A new high-performing version of Yi, now * New model: Yi 1.5: A new high-performing version of Yi, now
licensed as Apache 2.0. Available in 6B, 9B and 34B sizes. licensed as Apache 2.0. Available in 6B, 9B and 34B sizes.
* Added ollama ps command * Added ollama ps command
* Added /clear command * Added /clear command
@ -780,7 +345,7 @@ Sun May 12 19:05:53 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
Sun May 12 15:20:28 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com> Sun May 12 15:20:28 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
- Use obs_scm service instead of the deprecated tar_scm - Use obs_scm service instead of the deprecated tar_scm
- Use zstd for vendor tarball compression - Use zstd for vendor tarball compression
------------------------------------------------------------------- -------------------------------------------------------------------
Sun May 12 01:39:26 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com> Sun May 12 01:39:26 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
@ -818,11 +383,11 @@ Sun May 12 01:39:26 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
* New model: CodeGemma 1.1 * New model: CodeGemma 1.1
* New model: StableLM2 12B * New model: StableLM2 12B
* New model: Moondream 2 * New model: Moondream 2
* Fixed issues with LLaVa models where they would respond * Fixed issues with LLaVa models where they would respond
incorrectly after the first request incorrectly after the first request
* Fixed out of memory errors when running large models such as * Fixed out of memory errors when running large models such as
Llama 3 70B Llama 3 70B
* Fixed various issues with Nvidia GPU discovery on Linux and * Fixed various issues with Nvidia GPU discovery on Linux and
Windows Windows
* Fixed a series of Modelfile errors when running ollama create * Fixed a series of Modelfile errors when running ollama create
* Fixed no slots available error that occurred when cancelling a * Fixed no slots available error that occurred when cancelling a
@ -840,13 +405,13 @@ Sun May 12 01:39:26 UTC 2024 - Eyad Issa <eyadlorenzo@gmail.com>
* New model: Llama 3 Gradient 1048K * New model: Llama 3 Gradient 1048K
* New model: Dolphin Llama 3 * New model: Dolphin Llama 3
* New model: Qwen 110B * New model: Qwen 110B
* Fixed issues where the model would not terminate, causing the * Fixed issues where the model would not terminate, causing the
API to hang. API to hang.
* Fixed a series of out of memory errors on Apple Silicon Macs * Fixed a series of out of memory errors on Apple Silicon Macs
* Fixed out of memory errors when running Mixtral architecture * Fixed out of memory errors when running Mixtral architecture
models models
* Aded experimental concurrency features: * Aded experimental concurrency features:
~ OLLAMA_NUM_PARALLEL: Handle multiple requests simultaneously ~ OLLAMA_NUM_PARALLEL: Handle multiple requests simultaneously
for a single model for a single model
~ OLLAMA_MAX_LOADED_MODELS: Load multiple models simultaneously ~ OLLAMA_MAX_LOADED_MODELS: Load multiple models simultaneously

View File

@ -1,4 +1,4 @@
name: ollama name: ollama
version: 0.5.7 version: 0.3.6
mtime: 1737018844 mtime: 1723575229
commit: a420a453b4783841e3e79c248ef0fe9548df6914 commit: 4c4fe3f87fe1858b35bd0d41e093a0039ec4cee4

View File

@ -8,7 +8,6 @@ User=ollama
Group=ollama Group=ollama
Restart=always Restart=always
RestartSec=3 RestartSec=3
EnvironmentFile=-/etc/sysconfig/ollama
[Install] [Install]
WantedBy=default.target WantedBy=default.target

View File

@ -1,7 +1,7 @@
# #
# spec file for package ollama # spec file for package ollama
# #
# Copyright (c) 2025 SUSE LLC # Copyright (c) 2024 SUSE LLC
# #
# All modifications and additions to the file contributed by third parties # All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed # remain the property of their copyright owners, unless otherwise agreed
@ -17,7 +17,7 @@
Name: ollama Name: ollama
Version: 0.5.7 Version: 0.3.6
Release: 0 Release: 0
Summary: Tool for running AI models on-premise Summary: Tool for running AI models on-premise
License: MIT License: MIT
@ -26,14 +26,12 @@ Source: %{name}-%{version}.tar
Source1: vendor.tar.zstd Source1: vendor.tar.zstd
Source2: ollama.service Source2: ollama.service
Source3: %{name}-user.conf Source3: %{name}-user.conf
Source4: sysconfig.ollama Patch0: enable-lto.patch
BuildRequires: cmake >= 3.24 BuildRequires: cmake >= 3.24
BuildRequires: git BuildRequires: git
BuildRequires: sysuser-tools BuildRequires: sysuser-tools
BuildRequires: zstd BuildRequires: zstd
BuildRequires: golang(API) >= 1.22 BuildRequires: golang(API) >= 1.22
# 32bit seems not to be supported anymore
ExcludeArch: %{ix86} %{arm}
%sysusers_requires %sysusers_requires
%if 0%{?sle_version} == 150600 %if 0%{?sle_version} == 150600
BuildRequires: gcc12-c++ BuildRequires: gcc12-c++
@ -41,7 +39,8 @@ BuildRequires: libstdc++6-gcc12
%else %else
BuildRequires: gcc-c++ >= 11.4.0 BuildRequires: gcc-c++ >= 11.4.0
%endif %endif
Requires(pre): %{fillup_prereq} # 32bit seems not to be supported anymore
ExcludeArch: %ix86 %arm
%description %description
Ollama is a tool for running AI models on one's own hardware. Ollama is a tool for running AI models on one's own hardware.
@ -67,16 +66,15 @@ export CC=gcc-12
export GOFLAGS="-mod=vendor" export GOFLAGS="-mod=vendor"
%endif %endif
export GOFLAGS="${GOFLAGS} -v" export OLLAMA_SKIP_PATCHING=1
%make_build go generate ./...
go build -v .
%install %install
install -D -m 0755 %{name} %{buildroot}/%{_bindir}/%{name} install -D -m 0755 %{name} %{buildroot}/%{_bindir}/%{name}
install -D -m 0644 %{SOURCE2} %{buildroot}%{_unitdir}/%{name}.service install -D -m 0644 %{SOURCE2} %{buildroot}%{_unitdir}/%{name}.service
install -D -m 0644 %{SOURCE3} %{buildroot}%{_sysusersdir}/%{name}-user.conf install -D -m 0644 %{SOURCE3} %{buildroot}%{_sysusersdir}/%{name}-user.conf
install -D -m 0644 %{SOURCE4} %{buildroot}%{_fillupdir}/sysconfig.%name
install -d %{buildroot}%{_localstatedir}/lib/%{name} install -d %{buildroot}%{_localstatedir}/lib/%{name}
mkdir -p "%{buildroot}/%{_docdir}/%{name}" mkdir -p "%{buildroot}/%{_docdir}/%{name}"
@ -89,14 +87,13 @@ export CC=gcc-12
# pie doesn't work with gcc12 on leap # pie doesn't work with gcc12 on leap
export GOFLAGS="-mod=vendor" export GOFLAGS="-mod=vendor"
%endif %endif
go test -v ./... go test ./...
%pre -f %{name}.pre %pre -f %{name}.pre
%service_add_pre %{name}.service %service_add_pre %{name}.service
%post %post
%service_add_post %{name}.service %service_add_post %{name}.service
%fillup_only
%preun %preun
%service_del_preun %{name}.service %service_del_preun %{name}.service
@ -111,7 +108,6 @@ go test -v ./...
%{_bindir}/%{name} %{_bindir}/%{name}
%{_unitdir}/%{name}.service %{_unitdir}/%{name}.service
%{_sysusersdir}/%{name}-user.conf %{_sysusersdir}/%{name}-user.conf
%{_fillupdir}/sysconfig.%name
%attr(-, ollama, ollama) %{_localstatedir}/lib/%{name} %attr(-, ollama, ollama) %{_localstatedir}/lib/%{name}
%changelog %changelog

View File

@ -1,58 +0,0 @@
## Path: Network/Ollama
## Description: Ollama server access
## Type: string
## Default: "http://127.0.0.1:11434"
## ServiceRestart: ollama
#
# set it to 0.0.0.0 for global network access
#
OLLAMA_HOST="http://127.0.0.1:11434"
## Type: string
## Description: Ollama default quantization type for the K/V cache
## Default: "f16"
## ServiceRestart: ollama
OLLAMA_KV_CACHE_TYPE=f16
## Type: string
## Description: Ollama default quantization type for the K/V cache
## Default: "f16"
## ServiceRestart: ollama
OLLAMA_KEEP_ALIVE=
## Type: string
## Description: Parallel processes
## Default: ""
## ServiceRestart: ollama
OLLAMA_NUM_PARALLEL=
## Type: string
## Description: Maxmimal memory to be used
## Default: ""
## ServiceRestart: ollama
OLLAMA_MAX_VRAM=
## Type: string
## Description: Ollama runner directory
## Default: ""
## ServiceRestart: ollama
OLLAMA_RUNNERS_DIR=
## Type: string
## Description: Ollama temporary directory
## Default: ""
## ServiceRestart: ollama
OLLAMA_TMPDIR=
## Type: string
## Description: Models to be loaded by default
## Default: ""
## ServiceRestart: ollama
OLLAMA_MODELS=
## Type: string
## Description: List of allowed remote hosts
## Default: ""
## ServiceRestart: ollama
OLLAMA_ORIGINS=

BIN
vendor.tar.zstd (Stored with Git LFS)

Binary file not shown.