diff --git a/fdupes-1.6.1.tar.gz b/fdupes-1.6.1.tar.gz deleted file mode 100644 index 1b4ae27..0000000 --- a/fdupes-1.6.1.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9d6b6fdb0b8419815b4df3bdfd0aebc135b8276c90bbbe78ebe6af0b88ba49ea -size 20869 diff --git a/fdupes-2.3.1.tar.gz b/fdupes-2.3.1.tar.gz new file mode 100644 index 0000000..37e286b --- /dev/null +++ b/fdupes-2.3.1.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2482b4b8c931bd17cea21f4c27fa4747b877523029d57f794a2b48e6c378db17 +size 155094 diff --git a/fdupes-makefile.patch b/fdupes-makefile.patch deleted file mode 100644 index 1884f2d..0000000 --- a/fdupes-makefile.patch +++ /dev/null @@ -1,13 +0,0 @@ -Index: Makefile -=================================================================== ---- Makefile.orig 2016-08-21 06:54:46.000000000 +0200 -+++ Makefile 2016-11-04 13:46:21.037423459 +0100 -@@ -11,7 +11,7 @@ - # determination of the actual installation directories. - # Suggested values are "/usr/local", "/usr", "/pkgs/fdupes-$(VERSION)" - # --PREFIX = /usr/local -+PREFIX = /usr - - # - # When compiling for 32-bit systems, FILEOFFSET_64BIT must be enabled diff --git a/fdupes.changes b/fdupes.changes index 04694a6..5381a28 100644 --- a/fdupes.changes +++ b/fdupes.changes @@ -1,3 +1,120 @@ +------------------------------------------------------------------- +Fri Oct 24 15:06:13 UTC 2025 - Pablo Suárez Hernández + +- Only build package on RHEL environments + +------------------------------------------------------------------- +Sat Jun 29 20:11:57 UTC 2024 - Dirk Müller + +- update to 2.3.1: + * Fix buffer overflow bug in getrealpath() function. + +------------------------------------------------------------------- +Wed Mar 20 07:25:33 UTC 2024 - Dominique Leuenberger + +- Do not use sqlite, as this pulls sqlite into Ring0 at no real + benefit performance wise: the cache is not reused between runs. + + Drop sqlite-devel BuildRequires + + Pass --without-sqlite to configure + +------------------------------------------------------------------- +Mon Mar 18 09:36:27 UTC 2024 - ming li + +- Update to 2.3.0: + * Add --cache option to speed up file comparisons. + * Use nanosecond precision for file times, if available. + * Fix compilation issue on OpenBSD. + * Other changes like fixing typos, wording, etc. + +------------------------------------------------------------------- +Sun Oct 2 16:21:27 UTC 2022 - Andrea Manzini + +- update to 2.2.1: + * Fix bug in code meant to skip over the current log file when --log option is given. + * Updates to copyright notices in source code. + * Add --deferconfirmation option. + * Check that files marked as duplicates haven't changed during program execution before deleting them. + * Update documentation to indicate units for SIZE in command-line options. + * Move some configuration settings to configure.ac file. + +------------------------------------------------------------------- +Fri Apr 1 19:50:32 UTC 2022 - Stefan Brüns + +- Fixes for the new wrapper: + * Order duplicates by name, to get a reproducible file set + (boo#1197484). + * Remove redundant order parameter from fdupes invocation. + * Modernize code, significantly reduce allocations. + * Exit immediately when mandatory parameters are missing. + * Remove obsolete buildroot parameter + * Add some tests for the wrapper + +------------------------------------------------------------------- +Tue Mar 15 07:41:35 UTC 2022 - Stephan Kulow + +- A more correct approach to creating symlinks (old bug actually): + Do not link the files as given by fdupes, but turn them into + relative links (it works by chance if given a buildroot, but + fails if running on a subdirectory) +- Support multiple directories given (as glob to the macro) + +------------------------------------------------------------------- +Mon Mar 14 13:44:54 UTC 2022 - Stephan Kulow + +- Handle symlinks (-s argument) correctly + +------------------------------------------------------------------- +Sat Mar 12 08:17:37 UTC 2022 - Stephan Kulow + +- Simplify macros.fdupes with a call to a C++ program that does + the same within a fraction of a second what the shell loop did + in many seconds (bsc#1195709) + +------------------------------------------------------------------- +Sun Aug 16 16:59:45 UTC 2020 - Dirk Mueller + +- update to 2.1.2: + * Do not enter ncurses mode when --immediate option given. + * Fix logging/memory corruption bug when using --log with --immediate. + * Break mtime ties using ctime when sorting by time. + * Reduce number of calls to stat(), for speed. + * Clear last command status when new command is entered. + * Rename cs command ("clear all selections") from cs to csel. + * Rename igs command ("invert selections") from igs to isel. + * Add "prune" command as synonym for DELETE key. + * Clear selections after deleting files via prune/DELETE. + * Fix dependency issues when fdupes is configured to not use ncurses. +- build without ncurses for now until buildcycles can be solved + +------------------------------------------------------------------- +Fri Jun 5 23:42:10 UTC 2020 - Jan Engelhardt + +- Use noun phrase in summary. Drop old specfile constructs. + +------------------------------------------------------------------- +Fri May 22 08:27:11 UTC 2020 - Paolo Stivanin + +- Update to v2.0.0 + * Add ncurses mode for interactive file deletion (plain mode still available via --plain or ./configure). + * Add --minsize option. + * Add --maxsize option. + * Add --time option. + * Add --order=ctime option. + * Add --log option. + * Use configure script for installation (Autotools/Automake). +- Remove fdupes-makefile.patch + +------------------------------------------------------------------- +Thu Apr 16 21:07:45 UTC 2020 - Matej Cepl + +- Make package building even on platforms, where _rpmmacrodir + is not defined. + +------------------------------------------------------------------- +Wed May 8 09:37:54 UTC 2019 - Dominique Leuenberger + +- Move RPM macros to %_rpmmacrodir. + ------------------------------------------------------------------- Fri Dec 16 12:40:20 UTC 2016 - psimons@suse.com diff --git a/fdupes.spec b/fdupes.spec index 66b3daa..b062090 100644 --- a/fdupes.spec +++ b/fdupes.spec @@ -1,7 +1,7 @@ # # spec file for package fdupes # -# Copyright (c) 2016 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2024 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -12,42 +12,43 @@ # license that conforms to the Open Source Definition (Version 1.9) # published by the Open Source Initiative. -# Please submit bugfixes or comments via http://bugs.opensuse.org/ +# Please submit bugfixes or comments via https://bugs.opensuse.org/ # -# Upstream calls this version 1.6.1, but that version number is *lower* than -# previously released ones, like 1.51, so we mangle the number to keep -# continuity: https://github.com/adrianlopezroche/fdupes/issues/74. -%global upstream_version 1.6.1 + +%{?!_rpmmacrodir:%define _rpmmacrodir /usr/lib/rpm/macros.d} Name: fdupes -Version: 1.61 +Version: 2.3.1 Release: 0 -Summary: Identifying or deleting duplicate files +Summary: Tool to identify or delete duplicate files License: MIT Group: Productivity/Archiving/Compression -Url: https://github.com/adrianlopezroche/fdupes -Source0: https://github.com/adrianlopezroche/fdupes/archive/v%{upstream_version}.tar.gz#/%{name}-%{upstream_version}.tar.gz +URL: https://github.com/adrianlopezroche/fdupes +Source0: https://github.com/adrianlopezroche/fdupes/releases/download/v%{version}/fdupes-%{version}.tar.gz Source1: macros.fdupes -#PATCH-FIX-SUSE: fix patch according distro's needs -Patch0: fdupes-makefile.patch -BuildRoot: %{_tmppath}/%{name}-%{version}-build +Source2: fdupes_wrapper.cpp +BuildRequires: gcc-c++ +%if ! 0%{?rhel} +ExclusiveArch: do_not_build +%endif %description FDUPES is a program for identifying or deleting duplicate files residing within specified directories. %prep -%setup -q -n %{name}-%{upstream_version} -%patch0 +%autosetup -p1 %build -make %{?_smp_mflags} COMPILER_OPTIONS="%{optflags}" +%configure --without-ncurses --without-sqlite +%make_build +g++ $RPM_OPT_FLAGS %{S:2} -o fdupes_wrapper %install -install -D -m755 %{name} %{buildroot}%{_bindir}/%{name} -install -D -m644 %{name}.1 %{buildroot}%{_mandir}/man1/%{name}.1 -install -D -m644 %{SOURCE1} %{buildroot}%{_sysconfdir}/rpm/macros.%{name} +%make_install +install -D -m644 %{SOURCE1} %{buildroot}%{_rpmmacrodir}/macros.%{name} +install -D -m755 fdupes_wrapper %{buildroot}/usr/lib/rpm/fdupes_wrapper %check ./%{name} testdir @@ -55,11 +56,31 @@ install -D -m644 %{SOURCE1} %{buildroot}%{_sysconfdir}/rpm/macros.%{name} ./%{name} --recurse testdir ./%{name} --size testdir +# Check wrapper +PATH=`pwd`:$PATH +(cd testdir; md5sum ./* ./*/* > ../testdir.md5 || true) +for operation in '-n' '-s' ' '; do + cp -R testdir "testdir${operation}" + ./fdupes_wrapper ${operation} "testdir${operation}" + (cd "testdir${operation}"; md5sum --check ../testdir.md5) +done +# Check order does not depend on creation order - x should be target +mkdir testdir_order +for t in "a b x" "x a b" "a x b"; do + pushd testdir_order + for f in $t ; do cp ../testdir.md5 $f; done + ../fdupes_wrapper -s ./ + test -h ./a + test -h ./b + rm * + popd +done + %files -%defattr(-, root, root) %doc CHANGES %{_bindir}/%{name} %{_mandir}/man1/%{name}.1* -%config %{_sysconfdir}/rpm/macros.%{name} +%{_rpmmacrodir}/macros.%{name} +/usr/lib/rpm/fdupes_wrapper %changelog diff --git a/fdupes_wrapper.cpp b/fdupes_wrapper.cpp new file mode 100644 index 0000000..7bd8216 --- /dev/null +++ b/fdupes_wrapper.cpp @@ -0,0 +1,239 @@ +/* + * A little helper to wrap around fdupes and create hard/soft links of the + * dups found. Used in openSUSE rpm. + * + * Copyright 2022 Jiri Slaby + * 2022 Stephan Kulow + * 2022 Stefan Brüns + * + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +struct file_entry +{ + ino_t inode; + nlink_t link_count; + string path; + + file_entry(ino_t i, nlink_t n, string&& p) + : inode(i), link_count(n), path(move(p)) {} +}; +using dup_set = vector; + +enum class Operation { + Symlink, + Hardlink, + DryRun, +}; + +vector split_paths(const string& path) +{ + string token; + vector paths; + stringstream ss(path); + while (getline(ss, token, '/')) { + if (token == "..") { + paths.pop_back(); + } else if (token != "." || ss.eof()) { + paths.push_back(token); + } + } + return paths; +} + +string merge_paths(const vector& paths) +{ + string path; + for (const auto& s : paths) { + if (s.empty()) + continue; + if (!path.empty()) + path += "/"; + path += s; + } + + return path; +} + +string relative(const string& p1, const string& p2) +{ + vector paths1 = split_paths(p1); + paths1.pop_back(); + vector paths2 = split_paths(p2); + vector paths; + vector::const_iterator it1 = paths1.begin(); + vector::const_iterator it2 = paths2.begin(); + // first remove the common parts + while (it1 != paths1.end() && *it1 == *it2) { + it1++; + it2++; + } + for (; it1 != paths1.end(); ++it1) { + paths.push_back(".."); + } + for (; it2 != paths2.end(); ++it2) { + paths.push_back(*it2); + } + + return merge_paths(paths); +} + +void link_file(const std::string& file, const std::string& target, Operation op) +{ + std::cout << "Linking " << file << " -> " << target << std::endl; + if (op == Operation::DryRun) + return; + + if (unlink(file.c_str())) { + std::cerr << "Removing '" << file << "' failed." << std::endl; + exit(1); + } + int ret; + if (op == Operation::Symlink) { + ret = ::symlink(target.c_str(), file.c_str()); + } else { + ret = link(target.c_str(), file.c_str()); + } + if (ret) { + std::cerr << "Linking '" << file << "' failed." << std::endl; + exit(1); + } +} + +std::string target_for_link(string target, const std::string &file, Operation op) +{ + if (op == Operation::Hardlink) // hardlinks don't care + return target; + + return relative(file, target); +} + +void handle_dups(dup_set& dups, Operation op) +{ + // calculate number of hardlinked duplicates found, for each file + // this may be different than the st_nlink value + std::sort(dups.begin(), dups.end(), [](const file_entry& a, const file_entry& b) { + return a.inode < b.inode; + }); + auto first = dups.begin(); + while (first != dups.end()) { + auto r = equal_range(first, dups.end(), *first, [](const file_entry& a, const file_entry& b) { + return a.inode < b.inode; + }); + for (auto i = r.first; i != r.second; ++i) { + i->link_count = std::distance(r.first, r.second); + } + first = r.second; + } + + // use the file with most hardlinks as target + // in case of ties, sort by name to get a stable order for reproducible builds + std::sort(dups.begin(), dups.end(), [](const file_entry& a, const file_entry& b) { + if (a.link_count == b.link_count) + return a.path > b.path; + return a.link_count > b.link_count; + }); + + const string& target = dups[0].path; + + for (const file_entry& e : dups) { + // skip duplicates hardlinked to first entry + if (e.inode == dups[0].inode) + continue; + + link_file(e.path, target_for_link(target, e.path, op), op); + } +} + +int main(int argc, char** argv) +{ + Operation op = Operation::Hardlink; + std::vector roots; + while (1) { + int result = getopt(argc, argv, "sn"); + if (result == -1) + break; /* end of list */ + switch (result) { + case 's': + op = Operation::Symlink; + break; + case 'n': + op = Operation::DryRun; + break; + default: /* unknown */ + break; + } + } + while (optind < argc) { + std::string root = argv[optind++]; + if (root.front() != '/') { + char buffer[PATH_MAX]; + root = std::string(getcwd(buffer, PATH_MAX)) + '/' + root; + } + roots.push_back(root); + } + + if (roots.empty()) { + std::cerr << "Missing directory argument."; + return 1; + } + /* fdupes options used: + -q: hide progress indicator + -p: don't consider files with different owner/group or permission bits as duplicates + -n: exclude zero-length files from consideration + -r: follow subdirectories + -H: also report hard links as duplicates + */ + std::string command = "fdupes -q -p -r -n"; + if (op != Operation::Symlink) { + /* if we create symlinks, avoid looking at hard links being duplicated. This way + fdupes is faster and won't break them up anyway */ + command += " -H"; + } + for (auto it = roots.begin(); it != roots.end(); ++it) { + command += " '" + *it + "'"; + } + FILE* pipe = popen(command.c_str(), "r"); + if (!pipe) { + throw std::runtime_error("popen() failed!"); + } + std::vector buffer; + buffer.resize(MAXPATHLEN); + + dup_set dups; + while (fgets(buffer.data(), buffer.size(), pipe) != nullptr) { + std::string line = buffer.data(); + if (line.length() < 2) { + handle_dups(dups, op); + dups.clear(); + continue; + } + if (line.back() != '\n') { + std::cerr << "Too long lines? '" << line << "'" << std::endl; + return 1; + } + line.pop_back(); + + struct stat sb; + if (stat(line.c_str(), &sb)) { + std::cerr << "Stat on '" << buffer.data() << "' failed" << std::endl; + return 1; + } + dups.emplace_back(sb.st_ino, 0, std::move(line)); + } + pclose(pipe); + + return 0; +} diff --git a/macros.fdupes b/macros.fdupes index fc9b3ad..be95009 100644 --- a/macros.fdupes +++ b/macros.fdupes @@ -1,21 +1 @@ -%fdupes(s) \ - _target=""; \ - _symlinks=0; \ - %{-s:_symlinks=1;} \ - fdupes -q -p -n -H -o name -r %1 | \ - while read _file; do \ - if test -z "$_target" ; then \ - _target="$_file"; \ - else \ - if test -z "$_file" ; then \ - _target=""; \ - continue ; \ - fi ; \ - if test "$_symlinks" = 1; then \ - ln -sf "${_target#%{buildroot}}" "$_file"; \ - else \ - ln -f "$_target" "$_file"; \ - fi ;\ - fi ; \ - done \ -%{nil} +%fdupes /usr/lib/rpm/fdupes_wrapper