commit b6df46618425bdb4602a42209ca5a20644ad8e4019926ff60ca6df70d82ae6ca Author: Andrea Manzini Date: Wed Oct 30 06:52:04 2024 +0000 - update to 9.7.3 * OPTIONS file to be loaded by remote worker is now preserved so that it does not get purged by the primary host. - update to 9.7.2 * Fixed a bug for surfacing write unix time: Iterator::GetProperty("rocksdb.iterator.write-time") for non-L0 files. - update to 9.7.1 * Fix for Several DB option settings could be lost through GetOptionsFromString(), possibly elsewhere as well. * Fix under counting of allocated memory in the compressed secondary cache * Skip insertion of compressed blocks in the secondary cache if the lowest_used_cache_tier DB option is kVolatileTier. - update to 9.7.0 * New Features: + Make Cache a customizable class that can be instantiated by the object registry. + Add new option prefix_seek_opt_in_only that makes iterators generally safer + Add a new table property "rocksdb.key.largest.seqno" which records the largest sequence number of all keys in file. * Behavior Changes + Changed the semantics of the BlobDB configuration option blob_garbage_collection_force_threshold + Set write_dbid_to_manifest=true by default. This means DB ID will now be preserved through backups, checkpoints, etc. by default. Also add write_identity_file option + In FIFO compaction, compactions for changing file temperature (configured by option file_temperature_age_thresholds) will compact one file at a time + Support ingesting db generated files using hard link + Add a new file ingestion option IngestExternalFileOptions::link_files to hard link input files and preserve original files links after ingestion. OBS-URL: https://build.opensuse.org/package/show/server:database/rocksdb?expand=0&rev=55 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9b03811 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..57affb6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.osc diff --git a/_constraints b/_constraints new file mode 100644 index 0000000..5dddfdf --- /dev/null +++ b/_constraints @@ -0,0 +1,10 @@ + + + + 4 + + + 4 + + + diff --git a/rocksdb-8.0.0-reproducible.patch b/rocksdb-8.0.0-reproducible.patch new file mode 100644 index 0000000..1432929 --- /dev/null +++ b/rocksdb-8.0.0-reproducible.patch @@ -0,0 +1,20 @@ +Index: rocksdb-9.7.3/Makefile +=================================================================== +--- rocksdb-9.7.3.orig/Makefile ++++ rocksdb-9.7.3/Makefile +@@ -816,9 +816,12 @@ ROCKSDB_PATCH = $(shell grep -E "ROCKSDB + # the file needs to already exist or else the build will fail + ifndef NO_UPDATE_BUILD_VERSION + +-# By default, use the current date-time as the date. If there are no changes, +-# we will use the last commit date instead. +-build_date := $(shell date "+%Y-%m-%d %T") ++DATE_FMT = %Y-%m-%d ++ifdef SOURCE_DATE_EPOCH ++ build_date ?= $(shell date -u -d "@$(SOURCE_DATE_EPOCH)" "+$(DATE_FMT)" 2>/dev/null || date -u -r "$(SOURCE_DATE_EPOCH)" "+$(DATE_FMT)" 2>/dev/null || date -u "+$(DATE_FMT)") ++else ++ build_date ?= $(shell date "+$(DATE_FMT)") ++endif + + ifdef FORCE_GIT_SHA + git_sha := $(FORCE_GIT_SHA) diff --git a/rocksdb-8.0.0-rpath.patch b/rocksdb-8.0.0-rpath.patch new file mode 100644 index 0000000..d02eb13 --- /dev/null +++ b/rocksdb-8.0.0-rpath.patch @@ -0,0 +1,10 @@ +Index: rocksdb-9.7.3/tools/CMakeLists.txt +=================================================================== +--- rocksdb-9.7.3.orig/tools/CMakeLists.txt ++++ rocksdb-9.7.3/tools/CMakeLists.txt +@@ -1,3 +1,5 @@ ++set(CMAKE_SKIP_BUILD_RPATH TRUE) ++ + set(CORE_TOOLS + sst_dump.cc + ldb.cc) diff --git a/rocksdb-8.0.0-shared-liburing.patch b/rocksdb-8.0.0-shared-liburing.patch new file mode 100644 index 0000000..4414536 --- /dev/null +++ b/rocksdb-8.0.0-shared-liburing.patch @@ -0,0 +1,13 @@ +Index: rocksdb-9.7.3/cmake/modules/Finduring.cmake +=================================================================== +--- rocksdb-9.7.3.orig/cmake/modules/Finduring.cmake ++++ rocksdb-9.7.3/cmake/modules/Finduring.cmake +@@ -7,7 +7,7 @@ + find_path(uring_INCLUDE_DIR + NAMES liburing.h) + find_library(uring_LIBRARIES +- NAMES liburing.a liburing) ++ NAMES liburing.so liburing) + + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(uring diff --git a/rocksdb-9.3.1.tar.gz b/rocksdb-9.3.1.tar.gz new file mode 100644 index 0000000..eb4d8bb --- /dev/null +++ b/rocksdb-9.3.1.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63f1be162998c0f49a538a7fe3fcac0e40cad77ee47d5592a65bca50f7c4620 +size 13512627 diff --git a/rocksdb-9.4.0.tar.gz b/rocksdb-9.4.0.tar.gz new file mode 100644 index 0000000..bde5040 --- /dev/null +++ b/rocksdb-9.4.0.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f829976aa24b8ba432e156f52c9e0f0bd89c46dc0cc5a9a628ea70571c1551c +size 13550643 diff --git a/rocksdb-9.5.2.tar.gz b/rocksdb-9.5.2.tar.gz new file mode 100644 index 0000000..9265979 --- /dev/null +++ b/rocksdb-9.5.2.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b20780586d3df4a3c5bcbde341a2c1946b03d18237960bda5bc5e9538f42af40 +size 13565856 diff --git a/rocksdb-9.6.1.tar.gz b/rocksdb-9.6.1.tar.gz new file mode 100644 index 0000000..101f4ff --- /dev/null +++ b/rocksdb-9.6.1.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98cf497c1d6d0a927142d2002a0b6b4816a0998c74fda9ae7b1bdaf6b784e895 +size 13589860 diff --git a/rocksdb-9.7.3.tar.gz b/rocksdb-9.7.3.tar.gz new file mode 100644 index 0000000..618ae6c --- /dev/null +++ b/rocksdb-9.7.3.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acfabb989cbfb5b5c4d23214819b059638193ec33dad2d88373c46448d16d38b +size 13614392 diff --git a/rocksdb.changes b/rocksdb.changes new file mode 100644 index 0000000..c931217 --- /dev/null +++ b/rocksdb.changes @@ -0,0 +1,607 @@ +------------------------------------------------------------------- +Sun Oct 27 14:35:46 UTC 2024 - Andrea Manzini + +- update to 9.7.3 + * OPTIONS file to be loaded by remote worker is now preserved so that it does + not get purged by the primary host. + +- update to 9.7.2 + * Fixed a bug for surfacing write unix time: + Iterator::GetProperty("rocksdb.iterator.write-time") for non-L0 files. + +- update to 9.7.1 + * Fix for Several DB option settings could be lost through + GetOptionsFromString(), possibly elsewhere as well. + * Fix under counting of allocated memory in the compressed secondary cache + * Skip insertion of compressed blocks in the secondary cache if the + lowest_used_cache_tier DB option is kVolatileTier. + +- update to 9.7.0 + * New Features: + + Make Cache a customizable class that can be instantiated by the object + registry. + + Add new option prefix_seek_opt_in_only that makes iterators generally safer + + Add a new table property "rocksdb.key.largest.seqno" which records the + largest sequence number of all keys in file. + * Behavior Changes + + Changed the semantics of the BlobDB configuration option + blob_garbage_collection_force_threshold + + Set write_dbid_to_manifest=true by default. This means DB ID will now be + preserved through backups, checkpoints, etc. by default. Also add + write_identity_file option + + In FIFO compaction, compactions for changing file temperature (configured + by option file_temperature_age_thresholds) will compact one file at a time + + Support ingesting db generated files using hard link + + Add a new file ingestion option IngestExternalFileOptions::link_files + to hard link input files and preserve original files links after ingestion. + + DB::Close now untracks files in SstFileManager, making avaialble any space + used by them + * Bug Fixes + + Fix a bug in CompactRange() where result files may not be compacted in any + future compaction. + + Fix handling of dynamic change of prefix_extractor with memtable prefix + filter. + + Fix a bug with manual_wal_flush and auto error recovery from WAL failure + that may cause CFs to be inconsistent + +------------------------------------------------------------------- +Thu Sep 12 13:22:35 UTC 2024 - Andrea Manzini + +- update to 9.6.1: + * Fix correctness of MultiGet across column families with user timestamp. + +- update to 9.6.0: + - New Features + * Best efforts recovery supports recovering to incomplete Version with a + clean seqno cut that presents a valid point in time view from the user's + perspective, if versioning history doesn't include atomic flush. + * New option BlockBasedTableOptions::decouple_partitioned_filters should + improve efficiency in serving read queries because filter and index + partitions can consistently target the configured metadata_block_size. + This option is currently opt-in. + * Introduce a new mutable CF option paranoid_memory_checks. It enables + additional validation on data integrity during reads/scanning. Currently, + skip list based memtable will validate key ordering during look up and scans. + - Public API Changes + * Add ticker stats to count file read retries due to checksum mismatch + * Adds optional installation callback function for remote compaction + - Behavior Changes + * There may be less intra-L0 compaction triggered by total L0 size being too + small. We now use compensated file size (tombstones are assigned some value + size) when calculating L0 size and reduce the threshold for L0 size limit. + This is to avoid accumulating too much data/tombstones in L0. + - Bug Fixes + * Make DestroyDB supports slow deletion when it's configured in SstFileManager. + The slow deletion is subject to the configured rate_bytes_per_sec, but not + subject to the max_trash_db_ratio. + * Fixed a bug where we set unprep_seqs_ even when WriteImpl() fails. This was + caught by stress test write fault injection in WriteImpl(). This may have + incorrectly caused iteration creation failure for unvalidated writes or + returned wrong result for WriteUnpreparedTxn::GetUnpreparedSequenceNumbers(). + * Fixed a bug where successful write right after error recovery for last failed + write finishes causes duplicate WAL entries + * Fixed a data race involving the background error status in unordered_write + mode. + * Fix a bug where file snapshot functions like backup, checkpoint may attempt + to copy a non-existing manifest file. #12882 + * Fix a bug where per kv checksum corruption may be ignored in MultiGet(). + * Fix a race condition in pessimistic transactions that could allow multiple + transactions with the same name to be registered simultaneously, resulting + in a crash or other unpredictable behavior. + +------------------------------------------------------------------- +Wed Aug 28 07:28:34 UTC 2024 - Andrea Manzini + +- update to 9.5.2: + * Fix a race condition in pessimistic transactions that could allow + multiple transactions with the same name to be registered simultaneously, + resulting in a crash or other unpredictable behavior. + * Add ticker stats to count file read retries due to checksum mismatch + +- update to 9.5.1: + * Make DestroyDB supports slow deletion when it's configured in + SstFileManager. The slow deletion is subject to the configured + rate_bytes_per_sec, but not subject to the max_trash_db_ratio. + +- update to 9.5.0: + * Introduced new C API function rocksdb_writebatch_iterate_cf for column + family-aware iteration over the contents of a WriteBatch + * Add support to ingest SST files generated by a DB instead of SstFileWriter. + This can be enabled with experimental option + IngestExternalFileOptions::allow_db_generated_files. + * When calculating total log size for the log_size_for_flush argument + in CreateCheckpoint API, the size of the archived log will not be + included to avoid unnecessary flush + * Fix a major bug in which an iterator using prefix filtering and SeekForPrev + might miss data when the DB is using whole_key_filtering=false and + partition_filters=true. + * Fixed a bug where OnErrorRecoveryBegin() is not called before auto + recovery starts. + * Fixed a bug where event listener reads ErrorHandler's bg_error_ member + without holding db mutex(#12803). + * Fixed a bug in handling MANIFEST write error that caused the latest valid + MANIFEST file to get deleted, resulting in the DB being unopenable. + * Fixed a race between error recovery due to manifest sync or write failure + and external SST file ingestion. Both attempt to write a new manifest file, + which causes an assertion failure. + * Fix an issue where compactions were opening table files and reading table + properties while holding db mutex_. + * Reduce unnecessary filesystem queries and DB mutex acquires in creating + backups and checkpoints. + +------------------------------------------------------------------- +Sat Jul 13 13:21:25 UTC 2024 - Andreas Stieger + +- update to 9.4.0: + * Added a CompactForTieringCollectorFactory to auto trigger + compaction for tiering use case. + * Optimistic transactions and pessimistic transactions with the + WriteCommitted policy now support the GetEntityForUpdate API. + * Added a new "count" command to the ldb repl shell. By default, + it prints a count of keys in the database from start to end. + The options --from= and/or --to= can be specified to limit the + range. + * Deprecated names LogFile and VectorLogPtr in favor of new names + WalFile and VectorWalPtr. + * Introduce a new universal compaction option + CompactionOptionsUniversal::max_read_amp which allows user to + define the limit on the number of sorted runs separately from + the trigger for compaction (level0_file_num_compaction_trigger) + * Inactive WALs are immediately closed upon being fully sync-ed + rather than in a background thread. + * Bug Fixes + +------------------------------------------------------------------- +Sat Jun 29 13:24:27 UTC 2024 - Andreas Stieger + +- update to 9.3.1: + * Optimistic transactions and pessimistic transactions with the + WriteCommitted policy now support the GetEntity API. + * Added new Iterator property, "rocksdb.iterator.is-value-pinned", + for checking whether the Slice returned by Iterator::value() + can be used until the Iterator is destroyed. + * Optimistic transactions and WriteCommitted pessimistic + transactions now support the MultiGetEntity API. + * Optimistic transactions and pessimistic transactions with the + WriteCommitted policy now support the PutEntity API. Support + for read APIs and other write policies (WritePrepared, + WriteUnprepared) will be added later. + * Exposed block based metadata cache options via C API + * Exposed compaction pri via c api. + * Add a kAdmPolicyAllowAll option to TieredAdmissionPolicy that + admits all blocks evicted from the primary block cache into + the compressed secondary cache. + * CompactRange() with change_level=true on a CF with FIFO + compaction will return Status::NotSupported(). + * External file ingestion with FIFO compaction will always ingest + to L0. + * bug fixes + +------------------------------------------------------------------- +Thu May 23 20:13:16 UTC 2024 - Andreas Stieger + +- update to 9.2.1: + * Added two options deadline and max_size_bytes for CacheDumper + to exit early + * API for wide-column point lookups with read-your-own-writes + consistency, and a batched versions of the same + * API to support programmatically read a SST file as a raw table + file + * API to wait for background purge to complete + * DeleteRange() will return NotSupported() if row_cache is + configured since they don't work together in some cases + * Deprecated CompactionOptions::compression + * Using OptionChangeMigration() to migrate from non-FIFO to FIFO + compaction with Options::compaction_options_fifo. + max_table_files_size > 0 can cause the whole DB to be dropped + right after migration if the migrated data is larger than + max_table_files_size + * Various behavior changes, and changes of defaults + * On distributed file systems that support file system level + checksum verification and reconstruction reads, RocksDB will + now retry a file read if the initial read fails RocksDB block + level or record level checksum verification. This applies to + MANIFEST file reads when the DB is opened, and to SST file + reads at all times. + * Bug fixes + +------------------------------------------------------------------- +Mon Apr 22 20:56:46 UTC 2024 - Andreas Stieger + +- update to 9.1.1: + * Adde an option GetMergeOperandsOptions::continue_cb to give + users the ability to end GetMergeOperands()'s lookup process + before all merge operands were found. + * Add sanity checks for ingesting external files that currently + checks if the user key comparator used to create the file is + compatible with the column family's user key comparator. + * Support ingesting external files for column family that has + user-defined timestamps in memtable only enabled + * On file systems that support storage level data checksum and + reconstruction, retry SST block reads for point lookups, scans, + and flush and compaction if there's a checksum mismatch on the + initial read. + * Some enhancements and fixes to experimental Temperature handling + features, including new default_write_temperature CF option and + opening an SstFileWriter with a temperature. + * WriteBatchWithIndex now supports wide-column point lookups via + the GetEntityFromBatch API. + * Implement experimental features: + API Iterator::GetProperty("rocksdb.iterator.write-time") to + allow users to get data's approximate write unix time and write + data with a specific write time via WriteBatch::TimedPut API. +- drop rocksdb-9.0.0-Fix-zstd-typo-in-cmake.patch, upstream + +------------------------------------------------------------------- +Thu Apr 18 06:08:39 UTC 2024 - Andreas Stieger + +- update to 9.0.1: + * Fix CMake Javadoc and source jar builds + * Fix Java SstFileMetaData to prevent throwing + java.lang.NoSuchMethodError + +------------------------------------------------------------------- +Tue Mar 19 20:47:00 UTC 2024 - Andreas Stieger + +- update to 9.0.0: + * Provide support for FSBuffer for point lookups. Also added + support for scans and compactions that don't go through prefetching. + * *Make SstFileWriter create SST files without persisting user + defined timestamps when the + Option.persist_user_defined_timestamps flag is set to false. + * Add support for user-defined timestamps in APIs + DeleteFilesInRanges and GetPropertiesOfTablesInRange. + * Mark wal_compression feature as production-ready. Currently + only compatible with ZSTD compression. + * Public API Changes, including incompatible changes + * format_version=6 is the new default setting in + BlockBasedTableOptions, for more robust data integrity + checking. DBs and SST files written with this setting cannot be + read by RocksDB versions before 8.6.0. + * Compactions can be scheduled in parallel in an additional + scenario: multiple files are marked for compaction within a + single column family + * For leveled compaction, RocksDB will try to do intra-L0 + compaction if the total L0 size is small compared to Lbase. + * Users with atomic_flush=true are more likely to see the impact + of this change. + * Bug Fixes +- add rocksdb-9.0.0-Fix-zstd-typo-in-cmake.patch + +------------------------------------------------------------------- +Wed Feb 28 15:18:46 UTC 2024 - Andrea Manzini + +- update to 8.11.3: + * Bug Fixes + + Fix a bug where older data of an ingested key can be returned for read when universal compaction is used + + Apply appropriate rate limiting and priorities in more places. + +- update to 8.11.0: + * New Features + + Add new statistics: rocksdb.sst.write.micros measures time of each write to SST file + + * Public API Changes + + Added another enumerator kVerify to enum class FileOperationType in listener.h. + Update your switch statements as needed. + + Add CompressionOptions to the CompressedSecondaryCacheOptions structure to allow users to specify + library specific options when creating the compressed secondary cache. + + Deprecated several options: level_compaction_dynamic_file_size, ignore_max_compaction_bytes_for_input, + + check_flush_compaction_key_order, flush_verify_memtable_count, compaction_verify_record_count, + fail_if_options_file_error, and enforce_single_del_contracts + + Exposed options ttl via c api. + + * Behavior Changes + + rocksdb.blobdb.blob.file.write.micros expands to also measure time writing the header and footer. + Therefore the COUNT may be higher and values may be smaller than before. For stacked BlobDB, + it no longer measures the time of explictly flushing blob file. + + Files will be compacted to the next level if the data age exceeds periodic_compaction_seconds + except for the last level. + + Reduced the compaction debt ratio trigger for scheduling parallel compactions + + For leveled compaction with default compaction pri (kMinOverlappingRatio), + files marked for compaction will be prioritized over files not marked when picking a file + from a level for compaction. + + * Bug Fixes + + Fix bug in auto_readahead_size that combined with IndexType::kBinarySearchWithFirstKey + fails + or iterator lands at a wrong key + + Fixed some cases in which DB file corruption was detected but ignored on creating a backup with BackupEngine. + + Fix bugs where rocksdb.blobdb.blob.file.synced includes blob files failed to get synced + and rocksdb.blobdb.blob.file.bytes.written includes blob bytes failed to get written. + + Fixed a possible memory leak or crash on a failure (such as I/O error) + in automatic atomic flush of multiple column families. + + Fixed some cases of in-memory data corruption using mmap reads with BackupEngine, sst_dump, or ldb. + + Fixed issues with experimental preclude_last_level_data_seconds option that could interfere + with expected data tiering. + + Fixed the handling of the edge case when all existing blob files become unreferenced. + Such files are now correctly deleted. + +------------------------------------------------------------------- +Wed Feb 21 18:53:31 UTC 2024 - Andreas Stieger + +- update to 8.10.2: + * Fix bug in auto_readahead_size that combined with + IndexType::kBinarySearchWithFirstKey + fails or iterator lands + at a wrong key + +------------------------------------------------------------------- +Thu Jan 11 19:45:23 UTC 2024 - Andreas Stieger + +- update to 8.10.0: + * Provide support for async_io to trim readahead_size by doing + block cache lookup + * Added initial wide-column support in WriteBatchWithIndex. This + includes the PutEntity API and support for wide columns in the + existing read APIs (GetFromBatch, GetFromBatchAndDB, + MultiGetFromBatchAndDB, and BaseDeltaIterator). + * Custom implementations of TablePropertiesCollectorFactory may + now return a nullptr collector to decline processing a file, + reducing callback overheads in such cases. + * Make ReadOptions.auto_readahead_size default true + * Bug fixes and performance improvements + +------------------------------------------------------------------- +Tue Dec 12 20:57:08 UTC 2023 - Andreas Stieger + +- update to 8.9.1: + * Add GetEntity() and PutEntity() API implementation for + Attribute Group support. Through the use of Column Families, + AttributeGroup enables users to logically group wide-column + entities. + * Make the EnableFileDeletion API not default to force enabling. + For users that rely on this default behavior and still want to + continue to use force enabling, they need to explicitly pass a + true to EnableFileDeletion. + * During off-peak hours defined by daily_offpeak_time_utc, the + compaction picker will select a larger number of files for + periodic compaction. This selection will include files that are + projected to expire by the next off-peak start time, ensuring + that these files are not chosen for periodic compaction outside + of off-peak hours. + * If an error occurs when writing to a trace file after + DB::StartTrace(), the subsequent trace writes are skipped to + avoid writing to a file that has previously seen error. In this + case, DB::EndTrace() will also return a non-ok status with info + about the error occured previously in its status message. + * Deleting stale files upon recovery are delegated to SstFileManger + if available so they can be rate limited. + * Make RocksDB only call TablePropertiesCollector::Finish() once. + * When WAL_ttl_seconds > 0, we now process archived WALs for + deletion at least every WAL_ttl_seconds / 2 seconds. Previously + it could be less frequent in case of small WAL_ttl_seconds values + when size-based expiration (WAL_size_limit_MB > 0 ) was + simultaneously enabled. + * Fix a crash or assertion failure bug in experimental new + HyperClockCache variant, especially when running with a + SecondaryCache. + * Fix a race between flush error recovery and db destruction that + can lead to db crashing. + * Fix some bugs in the index builder/reader path for user- + defined timestamps in Memtable only feature. + +------------------------------------------------------------------- +Fri Nov 24 14:40:42 UTC 2023 - Andrea Manzini + +- update to 8.8.1 + * Bug fixes: + - Make the cache memory reservation accounting in Tiered cache + (primary and compressed secondary cache) more accurate to avoid over/under charging the secondary cache. + - Allow increasing the compressed_secondary_ratio in the Tiered cache after setting it to 0 to disable. + +- update to 8.8.0 + * New features: + - Introduce AttributeGroup by adding the first AttributeGroup support API, MultiGetEntity(). + - Added new tickers rocksdb.fifo.{max.size|ttl}.compactions to count FIFO compactions + that drop files for different reasons + - Add an experimental offpeak duration awareness by setting DBOptions::daily_offpeak_time_utc in "HH:mm-HH:mm" format. + - Users can now change the max bytes granted in a single refill period (i.e, burst) during runtime + by SetSingleBurstBytes() for RocksDB rate limiter + * Public API Changes: + - The default value of DBOptions::fail_if_options_file_error changed from false to true. + - Add new Cache APIs GetSecondaryCacheCapacity() and GetSecondaryCachePinnedUsage() + to return the configured capacity, and cache reservation charged to the secondary cache. + * Behavior Changes: + - For non direct IO, eliminate the file system prefetching attempt for compaction read + when Options::compaction_readahead_size is 0 + - During a write stop, writes now block on in-progress recovery attempts + - Deleting stale files upon recovery are delegated to SstFileManger if available so they can be rate limited. + * Bug Fixes + - Fix a bug in auto_readahead_size where first_internal_key of index blocks wasn't copied properly + resulting in corruption error when first_internal_key was used for comparison. + - Fixed a bug where compaction read under non direct IO still falls back to RocksDB internal prefetching + after file system's prefetching returns non-OK status other than Status::NotSupported() + - Add bounds check in WBWIIteratorImpl and make BaseDeltaIterator, WriteUnpreparedTxn and WritePreparedTxn + respect the upper bound and lower bound in ReadOption. + - Fixed the handling of wide-column base values in the max_successive_merges logic. + - Fixed a rare race bug involving a concurrent combination of Create/DropColumnFamily and/or Set(DB)Options + that could lead to inconsistency between (a) the DB's reported options state, (b) the DB options in effect, + and (c) the latest persisted OPTIONS file. + - Fixed a possible underflow when computing the compressed secondary cache share of memory reservations + while updating the compressed secondary to total block cache ratio. + * Performance Improvements + - Improved the I/O efficiency of DB::Open a new DB with create_missing_column_families=true and many column families. + +- update to 8.7.3 + * Behavior Changes: + - Deleting stale files upon recovery are delegated to SstFileManger if available so they can be rate limited. + * Public API Changes: + - Add new Cache APIs GetSecondaryCacheCapacity() and GetSecondaryCachePinnedUsage() to return + the configured capacity, and cache reservation charged to the secondary cache. + * Bug Fixes: + - Fixed a possible underflow when computing the compressed secondary cache share of memory reservations + while updating the compressed secondary to total block cache ratio. + - Fix an assertion failure when UpdeteTieredCache() is called in an idempotent manner. + + * see more details at https://github.com/facebook/rocksdb/releases/tag/v8.7.3 + +------------------------------------------------------------------- +Sat Oct 28 19:10:49 UTC 2023 - Andreas Stieger + +- update constraints for reliable building + +------------------------------------------------------------------- +Mon Oct 9 13:51:07 UTC 2023 - Andrea Manzini + +- update to 8.6.7 + * Fixed a bug where compaction read under non direct IO still falls back to RocksDB internal prefetching after file system's prefetching returns non-OK status other than Status::NotSupported() + * For non direct IO, eliminate the file system prefetching attempt for compaction read when Options::compaction_readahead_size is 0 + +- update to 8.6.6 + * Fix a bug with atomic_flush=true that can cause DB to stuck after a flush fails + * Fix a bug where RocksDB (with atomic_flush=false) can delete output SST files of pending flushes when a previous concurrent flush fails + * When the compressed secondary cache capacity is reduced to 0, it should be completely disabled. Before this fix, inserts and lookups would still go to the backing LRUCache + +- update to 8.6.5 + * Fixed a bug where rocksdb.file.read.verify.file.checksums.micros is not populated. + +- update to 8.6.4 + * Public API change: Add a column family option default_temperature that is used for file reading accounting purpose, such as io statistics, for files that don't have an explicitly set temperature. + +- update to 8.6.3 + * Fix a bug where if there is an error reading from offset 0 of a file from L1+ and that the file is not the first file in the sorted run, data can be lost in compaction and read/scan can return incorrect results. + * Fix a bug where iterator may return incorrect result for DeleteRange() users if there was an error reading from a file. + +- update to 8.6.2 + * Fix for async_io where during seek, when reading a block for seeking a target key in a file without any readahead, the iterator aligned the read on a page boundary and reading more than necessary. This increased the storage read bandwidth usage. + +- update to 8.6.1 + * Options::compaction_readahead_size 's default value is changed from 0 to 2MB. + * Compaction read performance will regress when Options::compaction_readahead_size is explicitly set to 0 + +- update to 8.6.0 + * Added enhanced data integrity checking on SST files with new format_version=6. + * Add a new feature to trim readahead_size during scans upto upper_bound when iterate_upper_bound is specified. + * RocksDB will compare the number of input keys to the number of keys processed after each compaction. + * Add a CF option bottommost_file_compaction_delay to allow specifying the delay of bottommost level single-file compactions. + * Add support to allow enabling / disabling user-defined timestamps feature for an existing column family in combination with the in-Memtable only feature. + * Implement a new admission policy for the compressed secondary cache that admits blocks evicted from the primary cache with the hit bit set. + * Add a column family option memtable_max_range_deletions that limits the number of range deletions in a memtable. + * Add PutEntity API in sst_file_writer + * Add timeout in microsecond option to WaitForCompactOptions + * New statistics rocksdb.file.read.{get|multiget|db.iterator|verify.checksum|verify.file.checksums}.micros measure read time of block-based SST tables or blob files during db open, Get(), MultiGet(), using db iterator, VerifyFileChecksums() and VerifyChecksum(). They require stats level greater than StatsLevel::kExceptDetailedTimers. + * Add close_db option to WaitForCompactOptions to call Close() after waiting is done. + * Add a new compression option CompressionOptions::checksum for enabling ZSTD's checksum feature to detect corruption during decompression. + * Mark Options::access_hint_on_compaction_start related APIs as deprecated. + * Statistics rocksdb.sst.read.micros now includes time spent on multi read and async read into the file + * For Universal Compaction users, periodic compaction (option periodic_compaction_seconds) will be set to 30 days by default if block based table is used. + * Fix a bug in FileTTLBooster that can cause users with a large number of levels (more than 65) to see errors like "runtime error: shift exponent .. is too large.." + + +------------------------------------------------------------------- +Wed Sep 27 16:56:46 UTC 2023 - Andreas Stieger + +- update to 8.5.4: + * Fix a bug where compaction read under non direct IO still falls + back to RocksDB internal prefetching after file system's + prefetching returns non-OK status other than + Status::NotSupported() + * For non direct IO, eliminate the file system prefetching + attempt for compaction read when + Options::compaction_readahead_size is 0 + +------------------------------------------------------------------- +Wed Sep 6 06:44:11 UTC 2023 - Andrea Manzini + +- update to 8.5.3 + * Fixed a race condition in GenericRateLimiter that could cause it to stop granting requests + +- update to 8.5.2 + * Fix a bug where iterator may return incorrect result for DeleteRange() users if there was an error reading from a file. + +- update to 8.5.1 + * Fix a bug where if there is an error reading from offset 0 of a file from L1+ and that the file is not the first file in the sorted run, data can be lost in compaction and read/scan can return incorrect results. + +- update to 8.5.0 + * Public API Changes: + Removed recently added APIs GeneralCache and MakeSharedGeneralCache() as our plan changed to stop exposing a general-purpose cache interface. The old forms of these APIs, Cache and NewLRUCache(), are still available, although general-purpose caching support will be dropped eventually. + * Behavior Changes + Option periodic_compaction_seconds no longer supports FIFO compaction: setting it has no effect on FIFO compactions. FIFO compaction users should only set option ttl instead. + Move prefetching responsibility to page cache for compaction read for non directIO use case + * Performance Improvements + In case of direct_io, if buffer passed by callee is already aligned, RandomAccessFileRead::Read will avoid realloacting a new buffer, reducing memcpy and use already passed aligned buffer. + Small efficiency improvement to HyperClockCache by reducing chance of compiler-generated heap allocations + * Bug Fixes + Fix use_after_free bug in async_io MultiReads when underlying FS enabled kFSBuffer. kFSBuffer is when underlying FS pass their own buffer instead of using RocksDB scratch in FSReadRequest. Right now it's an experimental feature. + Fix a bug in FileTTLBooster that can cause users with a large number of levels (more than 65) to see errors like "runtime error: shift exponent .. is too large.." + +- see more on HISTORY.md (https://github.com/facebook/rocksdb/blob/main/HISTORY.md) + + +------------------------------------------------------------------- +Thu Jul 27 13:10:54 UTC 2023 - Andrea Manzini + +- update to 8.3.2 + * Bug Fixes: + Reduced cases of illegally using Env::Default() during static destruction by never destroying the internal PosixEnv itself +- update to 8.3.1 + * Performance Improvements: + Fixed higher read QPS during DB::Open() reading files created prior to #11406, +- update to 8.3.0 + * New Features: + Introduced a new option block_protection_bytes_per_key, which can be used to enable per key-value integrity protection for in-memory blocks in block cache + Improve the operational safety of publishing a DB or SST files to many hosts by using different block cache hash seeds on different hosts. + Introduced a new option CompactionOptionsFIFO::file_temperature_age_thresholds that allows FIFO compaction to compact files to different temperatures based on key age + Added a new ticker stat to count how many times RocksDB detected a corruption while verifying a block checksum: BLOCK_CHECKSUM_MISMATCH_COUNT. + New statistics rocksdb.file.read.db.open.micros that measures read time of block-based SST tables or blob files during db open. + New statistics tickers for various iterator seek behaviors and relevant filtering, as *_LEVEL_SEEK_*. (#11460) + * Public API Changes: + EXPERIMENTAL: Add new API DB::ClipColumnFamily to clip the key in CF to a certain range. It will physically deletes all keys outside the range including tombstones. + Add MakeSharedCache() construction functions to various cache Options objects, and deprecated the NewWhateverCache() functions with long parameter lists. + Changed the meaning of various Bloom filter stats (prefix vs. whole key), with iterator-related filtering only being tracked in the new *_LEVEL_SEEK_*. stats. (#11460) + * Behavior changes: + For x86, CPU features are no longer detected at runtime nor in build scripts, but in source code using common preprocessor defines. + This will likely unlock some small performance improvements on some newer hardware, but could hurt performance of the kCRC32c checksum, + which is no longer the default, on some "portable" builds. See PR #11419 for details. + * Bug Fixes: + Delete an empty WAL file on DB open if the log number is less than the min log number to keep + Delete temp OPTIONS file on DB open if there is a failure to write it out or rename it + * Performance Improvements: + Improved the I/O efficiency of prefetching SST metadata by recording more information in the DB manifest. + +------------------------------------------------------------------- +Fri Apr 21 12:55:27 UTC 2023 - Andreas Stieger + +- RocksDB 8.1.1 + * Compaction output file cutting logic now considers range + tombstone start keys. For example, SST partitioner now may + receive ParitionRequest for range tombstone start keys. + * If the async_io ReadOption is specified for MultiGet or + NewIterator on a platform that doesn't support IO uring, the + option is ignored and synchronous IO is used. + * Add support for SecondaryCache with HyperClockCache + * New statistics and performance counters + * bug fixes + * API changes: Changed various functions and features in Cache + that are mostly relevant to custom implementations or wrappers +- build with shared gflags libs + +------------------------------------------------------------------- +Sat Apr 15 13:57:16 UTC 2023 - Andreas Stieger + +- add hardware constraints for reliable builds + +------------------------------------------------------------------- +Sat Mar 25 07:37:14 UTC 2023 - Andreas Stieger + +- Disable usage of jemalloc for gh#jemalloc/jemalloc#1237 to avoid + "TLS error: cannot allocate memory in static TLS block" + +------------------------------------------------------------------- +Wed Mar 22 21:56:01 UTC 2023 - Andreas Stieger + +- RocksDB 8.0.0 +- add rocksdb-8.0.0-reproducible.patch - make build reproducible +- add rocksdb-8.0.0-shared-liburing.patch - liburing dynamic linking +- add rocksdb-8.0.0-rpath.patch + +------------------------------------------------------------------- +Wed Oct 28 21:56:34 UTC 2020 - Adam Mizerski + +- update to 6.13.3 + * see packaged HISTORY.md +- refreshed patch gtest.patch + +------------------------------------------------------------------- +Sat Feb 2 13:44:05 UTC 2019 - Adam Mizerski + +- new package; version 5.17.2 +- added patch gtest.patch diff --git a/rocksdb.spec b/rocksdb.spec new file mode 100644 index 0000000..ebae431 --- /dev/null +++ b/rocksdb.spec @@ -0,0 +1,147 @@ +# +# spec file for package rocksdb +# +# Copyright (c) 2024 SUSE LLC +# Copyright (c) 2024 Andreas Stieger +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# + + +%define lib_name librocksdb9 +%bcond_with jemalloc +Name: rocksdb +Version: 9.7.3 +Release: 0 +Summary: Library for embeddable, persistent and fast key-value store +License: (Apache-2.0 OR GPL-2.0-only) AND BSD-2-Clause +URL: https://rocksdb.org/ +Source: https://github.com/facebook/rocksdb/archive/v%{version}.tar.gz#/%{name}-%{version}.tar.gz +Patch0: rocksdb-8.0.0-reproducible.patch +Patch1: rocksdb-8.0.0-shared-liburing.patch +Patch2: rocksdb-8.0.0-rpath.patch +BuildRequires: c++_compiler +BuildRequires: cmake +BuildRequires: pkgconfig +BuildRequires: cmake(Snappy) +BuildRequires: pkgconfig(bzip2) +BuildRequires: pkgconfig(liblz4) +BuildRequires: pkgconfig(liburing) +BuildRequires: pkgconfig(libzstd) +BuildRequires: pkgconfig(zlib) +%if %{with jemalloc} +BuildRequires: pkgconfig(jemalloc) +%endif +# see SR#1075555 for gflags linking failure +%if 0%{?suse_version} > 1500 +BuildRequires: pkgconfig(gflags) +%else +BuildRequires: gflags-devel-static +%endif + +%description +RocksDB is a high performance embedded database for key-value data. +It is a fork of LevelDB which was then optimized to exploit many +central processing unit (CPU) cores, and make efficient use of fast +storage, such as solid-state drives (SSD), for input/output (I/O) +bound workloads. It is based on a log-structured merge-tree (LSM tree) +data structure. + +%package -n %{lib_name} +Summary: Shared library from rocksdb + +%description -n %{lib_name} +RocksDB is a high performance embedded database for key-value data. +It is a fork of LevelDB which was then optimized to exploit many +central processing unit (CPU) cores, and make efficient use of fast +storage, such as solid-state drives (SSD), for input/output (I/O) +bound workloads. It is based on a log-structured merge-tree (LSM tree) +data structure. + +This package holds the shared library of rocksdb. + +%package tools +Summary: Utility tools for RocksDB +# MariaDB ships /usr/bin/sst_dump - MDEV-14918 +Conflicts: mariadb + +%description tools +RocksDB is a high performance embedded database for key-value data. +This package contains utility tools for RocksDB. + +%package devel +Summary: Development package for RocksDB +Requires: %{lib_name} = %{version} + +%description devel +RocksDB is a high performance embedded database for key-value data. +It is a fork of LevelDB which was then optimized to exploit many +central processing unit (CPU) cores, and make efficient use of fast +storage, such as solid-state drives (SSD), for input/output (I/O) +bound workloads. It is based on a log-structured merge-tree (LSM tree) +data structure. + +This package contains the files needed to compile programs that use +the RocksDB library. + +%prep +%autosetup -p1 + +%build +# building tests is disabled, because they require additional instrumentation, +# which is build in library in debug mode and adds some overhead. +# Warnings: https://github.com/facebook/rocksdb/issues/11043 +%cmake \ + -DPORTABLE=1 \ + -DFAIL_ON_WARNINGS=OFF \ +%if !%{with jemalloc} + -DWITH_JEMALLOC=0 \ +%endif + -DWITH_SNAPPY=ON \ + -DWITH_LZ4=ON \ + -DWITH_ZLIB=ON \ + -DWITH_ZSTD=ON \ + -DWITH_BZ2=ON \ + -DWITH_TESTS=OFF \ + -DWITH_TOOLS=OFF \ + -DWITH_BENCHMARK_TOOLS=OFF \ + %{nil} + +%cmake_build + +%install +%cmake_install +find %{buildroot}%{_libdir} -type f -name "*.a" -print -delete +install -dD -m 755 %{buildroot}/%{_bindir} +install -m 755 build/tools/ldb %{buildroot}/%{_bindir}/ldb +install -m 755 build/tools/sst_dump %{buildroot}/%{_bindir}/sst_dump + +%ldconfig_scriptlets -n %{lib_name} + +%files -n %{lib_name} +%license COPYING LICENSE.Apache LICENSE.leveldb +%{_libdir}/librocksdb.so.* + +%files tools +%license COPYING LICENSE.Apache LICENSE.leveldb +%{_bindir}/ldb +%{_bindir}/sst_dump + +%files devel +%license COPYING LICENSE.Apache LICENSE.leveldb +%doc README.md HISTORY.md LANGUAGE-BINDINGS.md +%{_includedir}/rocksdb +%{_libdir}/librocksdb.so +%{_libdir}/pkgconfig/rocksdb.pc +%{_libdir}/cmake/rocksdb + +%changelog