From cf0e83b6ef38cc16ad1087549857c82951e6fbb4ad00019ca17dfa1ca8ac6ba0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20Schr=C3=B6ter?= Date: Fri, 3 May 2024 12:26:10 +0200 Subject: [PATCH] Sync from SUSE:SLFO:Main fdupes revision a8e4ad5136b3b82a5b3c1a9ea22d5fd6 --- .gitattributes | 23 ++++ fdupes-2.2.1.tar.gz | 3 + fdupes.changes | 260 ++++++++++++++++++++++++++++++++++++++++++++ fdupes.spec | 83 ++++++++++++++ fdupes_wrapper.cpp | 239 ++++++++++++++++++++++++++++++++++++++++ macros.fdupes | 1 + 6 files changed, 609 insertions(+) create mode 100644 .gitattributes create mode 100644 fdupes-2.2.1.tar.gz create mode 100644 fdupes.changes create mode 100644 fdupes.spec create mode 100644 fdupes_wrapper.cpp create mode 100644 macros.fdupes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9b03811 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text diff --git a/fdupes-2.2.1.tar.gz b/fdupes-2.2.1.tar.gz new file mode 100644 index 0000000..66730a0 --- /dev/null +++ b/fdupes-2.2.1.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:846bb79ca3f0157856aa93ed50b49217feb68e1b35226193b6bc578be0c5698d +size 144719 diff --git a/fdupes.changes b/fdupes.changes new file mode 100644 index 0000000..b0af349 --- /dev/null +++ b/fdupes.changes @@ -0,0 +1,260 @@ +------------------------------------------------------------------- +Sun Oct 2 16:21:27 UTC 2022 - Andrea Manzini + +- update to 2.2.1: + * Fix bug in code meant to skip over the current log file when --log option is given. + * Updates to copyright notices in source code. + * Add --deferconfirmation option. + * Check that files marked as duplicates haven't changed during program execution before deleting them. + * Update documentation to indicate units for SIZE in command-line options. + * Move some configuration settings to configure.ac file. + +------------------------------------------------------------------- +Fri Apr 1 19:50:32 UTC 2022 - Stefan Brüns + +- Fixes for the new wrapper: + * Order duplicates by name, to get a reproducible file set + (boo#1197484). + * Remove redundant order parameter from fdupes invocation. + * Modernize code, significantly reduce allocations. + * Exit immediately when mandatory parameters are missing. + * Remove obsolete buildroot parameter + * Add some tests for the wrapper + +------------------------------------------------------------------- +Tue Mar 15 07:41:35 UTC 2022 - Stephan Kulow + +- A more correct approach to creating symlinks (old bug actually): + Do not link the files as given by fdupes, but turn them into + relative links (it works by chance if given a buildroot, but + fails if running on a subdirectory) +- Support multiple directories given (as glob to the macro) + +------------------------------------------------------------------- +Mon Mar 14 13:44:54 UTC 2022 - Stephan Kulow + +- Handle symlinks (-s argument) correctly + +------------------------------------------------------------------- +Sat Mar 12 08:17:37 UTC 2022 - Stephan Kulow + +- Simplify macros.fdupes with a call to a C++ program that does + the same within a fraction of a second what the shell loop did + in many seconds (bsc#1195709) + +------------------------------------------------------------------- +Sun Aug 16 16:59:45 UTC 2020 - Dirk Mueller + +- update to 2.1.2: + * Do not enter ncurses mode when --immediate option given. + * Fix logging/memory corruption bug when using --log with --immediate. + * Break mtime ties using ctime when sorting by time. + * Reduce number of calls to stat(), for speed. + * Clear last command status when new command is entered. + * Rename cs command ("clear all selections") from cs to csel. + * Rename igs command ("invert selections") from igs to isel. + * Add "prune" command as synonym for DELETE key. + * Clear selections after deleting files via prune/DELETE. + * Fix dependency issues when fdupes is configured to not use ncurses. +- build without ncurses for now until buildcycles can be solved + +------------------------------------------------------------------- +Fri Jun 5 23:42:10 UTC 2020 - Jan Engelhardt + +- Use noun phrase in summary. Drop old specfile constructs. + +------------------------------------------------------------------- +Fri May 22 08:27:11 UTC 2020 - Paolo Stivanin + +- Update to v2.0.0 + * Add ncurses mode for interactive file deletion (plain mode still available via --plain or ./configure). + * Add --minsize option. + * Add --maxsize option. + * Add --time option. + * Add --order=ctime option. + * Add --log option. + * Use configure script for installation (Autotools/Automake). +- Remove fdupes-makefile.patch + +------------------------------------------------------------------- +Thu Apr 16 21:07:45 UTC 2020 - Matej Cepl + +- Make package building even on platforms, where _rpmmacrodir + is not defined. + +------------------------------------------------------------------- +Wed May 8 09:37:54 UTC 2019 - Dominique Leuenberger + +- Move RPM macros to %_rpmmacrodir. + +------------------------------------------------------------------- +Fri Dec 16 12:40:20 UTC 2016 - psimons@suse.com + +- We cannot update from fdupes 1.51 to 1.6.1. That "downgrade" + works okay'ish for Tumbleweed because we can replace the old + package with the new one, but in SLE this is not possible. We + asked upstream to please release a "2.0" version to remedy these + issues (https://github.com/adrianlopezroche/fdupes/issues/74), + but he does not respond. Therefore, we'll call this version 1.61, + ignoring upstreams change in the versioning scheme. + +------------------------------------------------------------------- +Mon Dec 5 13:54:08 UTC 2016 - psimons@suse.com + +- Upstream has changed their versioning scheme after version 1.51. + Unfortunately, the new version 1.6.x won't be recognized as + "newer" by zypper. This commit adds appropriate "provides" and + "obsoletes" attributes to the spec file to remedy that issue. + +------------------------------------------------------------------- +Fri Nov 4 14:33:59 UTC 2016 - psimons@suse.com + +- Drop 50_bts284274_hardlinkreplace.dpatch. The --linkhard option + added by this patch has an implementation bug that can cause data + loss. https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=677419 + has more details. + +------------------------------------------------------------------- +Fri Nov 4 13:47:27 UTC 2016 - psimons@suse.com + +- Update to version 1.6.1. The following patches have been applied + upstream and were dropped: + * 0001-restore-pristine-code.patch + * 0002-Added-to-escape-minus-signs-in-manpage-lintian-warni.patch + * 0003-Fix-a-typo-in-a-manpage-bts353789.patch + * 0005-add-summarize-to-manpage-bts481809.patch + * 0006-add-nohidden-support-bts511702.patch + * 0007-Disambiguate-the-options-recurse-and-recurse-bts5371.patch + * 0008-speedup-the-file-compare.patch + * 0009-glibc-endianness-check-in-md5.patch + * 0010-add-permissions-mode.patch + * 0011-add-an-option-to-sort-duplicate-files-by-name.patch +- 50_bts284274_hardlinkreplace.dpatch had to be refreshed. + +------------------------------------------------------------------- +Tue Aug 4 13:01:47 UTC 2015 - tchvatal@suse.com + +- By default relink hardlinks too, should fix bnc#940296 + +------------------------------------------------------------------- +Mon Aug 3 19:26:55 UTC 2015 - tchvatal@suse.com + +- Update to upstream git repo on github +- Refresh patches: + * fdupes-makefile.patch + * 0008-speedup-the-file-compare.patch + * 0010-add-permissions-mode.patch + * 0011-add-an-option-to-sort-duplicate-files-by-name.patch + * 50_bts284274_hardlinkreplace.dpatch +- Upstreamed patch: + * 0004-Large-file-support-for-2GB-files-bts447601.patch +- Remove whitespace from fdupes.macros file +- Cleanup with spec-cleaner + - Obey rpm-opt-flags + - run test phase + +------------------------------------------------------------------- +Sun Dec 21 19:58:41 UTC 2014 - bwiedemann@suse.com + +- add -L (--linkhard) option + add 50_bts284274_hardlinkreplace.dpatch + +------------------------------------------------------------------- +Tue Apr 29 16:08:34 UTC 2014 - stefan.bruens@rwth-aachen.de + +- sort the output of fdupes by filename to make it deterministic + for parallel builds + * 0011-add-an-option-to-sort-duplicate-files-by-name.patch + +------------------------------------------------------------------- +Tue Oct 16 11:44:08 UTC 2012 - mvyskocil@suse.com + +- update to 1.5.0-PR2 + * new "--summarize" option + * new "--recurse:" selective recursion option + * new "--noprompt" option for totally automated deletion of + duplicate files. + * sorts duplicates (old to new) for consistent order when + listing or deleteing duplicate files. + * tests for early matching of files, which should help speed up + the matching process when large files are involved. + * warns whenever a file cannot be deleted. + * bugfixes (proper file closing, zero-length files, ...) +- drop the fdupes-sort-output.diff (upstream uses mtime based) +- rename and rebase fdupes-speedup.patch to 0008-speedup-the-compare.patch +- rename and rebase fdupes-endianness.patch to + 0009-glibc-endianness-check-in-md5.patch +- add -p/--permissions switch so files with different permissions or uid/gid + are not considered as duplicates (bnc#784670) + * this mode is a default one for fdupes macro + 0010-add-permissions-mode.patch +- imported several fixes from Debian + * 0001-restore-pristine-code.patch - some common code fixes, partly obsoletes + speedup patch + * manual page fixes + 0002-Added-to-escape-minus-signs-in-manpage-lintian-warni.patch + 0003-Fix-a-typo-in-a-manpage-bts353789.patch + 0005-add-summarize-to-manpage-bts481809.patch + 0006-add-nohidden-support-bts511702.patch + 0007-Disambiguate-the-options-recurse-and-recurse-bts5371.patch + * 0004-Large-file-support-for-2GB-files-bts447601.patch - large file support + +------------------------------------------------------------------- +Sun Mar 25 22:13:12 UTC 2012 - behrisch@users.sf.net + +- added "which" requirement for red hat distros + +------------------------------------------------------------------- +Mon Feb 13 10:46:03 UTC 2012 - coolo@suse.com + +- patch license to follow spdx.org standard + +------------------------------------------------------------------- +Wed Oct 5 15:14:32 UTC 2011 - uli@suse.com + +- cross-build workaround: fake gcc script to work around build + system not honoring CC + +------------------------------------------------------------------- +Sun Sep 18 17:17:12 UTC 2011 - jengelh@medozas.de + +- Apply packaging guidelines (remove redundant/obsolete + tags/sections from specfile, etc.) + +------------------------------------------------------------------- +Mon Feb 15 15:43:34 UTC 2010 - mvyskocil@suse.cz + +- fix bnc#406825: speedup fdupes + * fdupes-speedup.patch fixes some performance gaps in code + * fdupes-endianness.patch speedups the built in md5 on little endian machines + +------------------------------------------------------------------- +Wed Aug 26 12:53:54 CEST 2009 - mls@suse.de + +- make patch0 usage consistent + +------------------------------------------------------------------- +Thu Jan 15 17:05:36 CET 2009 - coolo@suse.de + +- sort the output of fdupes to make it deterministic + +------------------------------------------------------------------- +Thu Sep 6 18:41:37 CEST 2007 - mls@suse.de + +- do not hardlink empty files in %fdupes macro + +------------------------------------------------------------------- +Wed Sep 5 15:44:52 CEST 2007 - nadvornik@suse.cz + +- support filenames with spaces in %fdupes macro [#307727] + +------------------------------------------------------------------- +Tue May 15 22:53:03 CEST 2007 - coolo@suse.de + +- add an RPM macro to make use of it in spec files + +------------------------------------------------------------------- +Thu Nov 16 13:16:07 CET 2006 - dmueller@suse.de + +- Initial package (1.40) + diff --git a/fdupes.spec b/fdupes.spec new file mode 100644 index 0000000..2e1bd9f --- /dev/null +++ b/fdupes.spec @@ -0,0 +1,83 @@ +# +# spec file for package fdupes +# +# Copyright (c) 2022 SUSE LLC +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# + + +%{?!_rpmmacrodir:%define _rpmmacrodir /usr/lib/rpm/macros.d} + +Name: fdupes +Version: 2.2.1 +Release: 0 +Summary: Tool to identify or delete duplicate files +License: MIT +Group: Productivity/Archiving/Compression +URL: https://github.com/adrianlopezroche/fdupes +Source0: https://github.com/adrianlopezroche/fdupes/releases/download/v%{version}/fdupes-%{version}.tar.gz +Source1: macros.fdupes +Source2: fdupes_wrapper.cpp +BuildRequires: gcc-c++ + +%description +FDUPES is a program for identifying or deleting duplicate files +residing within specified directories. + +%prep +%autosetup -p1 + +%build +%configure --without-ncurses +%make_build +g++ $RPM_OPT_FLAGS %{S:2} -o fdupes_wrapper + +%install +%make_install +install -D -m644 %{SOURCE1} %{buildroot}%{_rpmmacrodir}/macros.%{name} +install -D -m755 fdupes_wrapper %{buildroot}/usr/lib/rpm/fdupes_wrapper + +%check +./%{name} testdir +./%{name} --omitfirst testdir +./%{name} --recurse testdir +./%{name} --size testdir + +# Check wrapper +PATH=`pwd`:$PATH +(cd testdir; md5sum ./* ./*/* > ../testdir.md5 || true) +for operation in '-n' '-s' ' '; do + cp -R testdir "testdir${operation}" + ./fdupes_wrapper ${operation} "testdir${operation}" + (cd "testdir${operation}"; md5sum --check ../testdir.md5) +done +# Check order does not depend on creation order - x should be target +mkdir testdir_order +for t in "a b x" "x a b" "a x b"; do + pushd testdir_order + for f in $t ; do cp ../testdir.md5 $f; done + ../fdupes_wrapper -s ./ + test -h ./a + test -h ./b + rm * + popd +done + +%files +%doc CHANGES +%{_bindir}/%{name} +%{_mandir}/man1/%{name}.1* +%{_rpmmacrodir}/macros.%{name} +/usr/lib/rpm/fdupes_wrapper + +%changelog diff --git a/fdupes_wrapper.cpp b/fdupes_wrapper.cpp new file mode 100644 index 0000000..7bd8216 --- /dev/null +++ b/fdupes_wrapper.cpp @@ -0,0 +1,239 @@ +/* + * A little helper to wrap around fdupes and create hard/soft links of the + * dups found. Used in openSUSE rpm. + * + * Copyright 2022 Jiri Slaby + * 2022 Stephan Kulow + * 2022 Stefan Brüns + * + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +struct file_entry +{ + ino_t inode; + nlink_t link_count; + string path; + + file_entry(ino_t i, nlink_t n, string&& p) + : inode(i), link_count(n), path(move(p)) {} +}; +using dup_set = vector; + +enum class Operation { + Symlink, + Hardlink, + DryRun, +}; + +vector split_paths(const string& path) +{ + string token; + vector paths; + stringstream ss(path); + while (getline(ss, token, '/')) { + if (token == "..") { + paths.pop_back(); + } else if (token != "." || ss.eof()) { + paths.push_back(token); + } + } + return paths; +} + +string merge_paths(const vector& paths) +{ + string path; + for (const auto& s : paths) { + if (s.empty()) + continue; + if (!path.empty()) + path += "/"; + path += s; + } + + return path; +} + +string relative(const string& p1, const string& p2) +{ + vector paths1 = split_paths(p1); + paths1.pop_back(); + vector paths2 = split_paths(p2); + vector paths; + vector::const_iterator it1 = paths1.begin(); + vector::const_iterator it2 = paths2.begin(); + // first remove the common parts + while (it1 != paths1.end() && *it1 == *it2) { + it1++; + it2++; + } + for (; it1 != paths1.end(); ++it1) { + paths.push_back(".."); + } + for (; it2 != paths2.end(); ++it2) { + paths.push_back(*it2); + } + + return merge_paths(paths); +} + +void link_file(const std::string& file, const std::string& target, Operation op) +{ + std::cout << "Linking " << file << " -> " << target << std::endl; + if (op == Operation::DryRun) + return; + + if (unlink(file.c_str())) { + std::cerr << "Removing '" << file << "' failed." << std::endl; + exit(1); + } + int ret; + if (op == Operation::Symlink) { + ret = ::symlink(target.c_str(), file.c_str()); + } else { + ret = link(target.c_str(), file.c_str()); + } + if (ret) { + std::cerr << "Linking '" << file << "' failed." << std::endl; + exit(1); + } +} + +std::string target_for_link(string target, const std::string &file, Operation op) +{ + if (op == Operation::Hardlink) // hardlinks don't care + return target; + + return relative(file, target); +} + +void handle_dups(dup_set& dups, Operation op) +{ + // calculate number of hardlinked duplicates found, for each file + // this may be different than the st_nlink value + std::sort(dups.begin(), dups.end(), [](const file_entry& a, const file_entry& b) { + return a.inode < b.inode; + }); + auto first = dups.begin(); + while (first != dups.end()) { + auto r = equal_range(first, dups.end(), *first, [](const file_entry& a, const file_entry& b) { + return a.inode < b.inode; + }); + for (auto i = r.first; i != r.second; ++i) { + i->link_count = std::distance(r.first, r.second); + } + first = r.second; + } + + // use the file with most hardlinks as target + // in case of ties, sort by name to get a stable order for reproducible builds + std::sort(dups.begin(), dups.end(), [](const file_entry& a, const file_entry& b) { + if (a.link_count == b.link_count) + return a.path > b.path; + return a.link_count > b.link_count; + }); + + const string& target = dups[0].path; + + for (const file_entry& e : dups) { + // skip duplicates hardlinked to first entry + if (e.inode == dups[0].inode) + continue; + + link_file(e.path, target_for_link(target, e.path, op), op); + } +} + +int main(int argc, char** argv) +{ + Operation op = Operation::Hardlink; + std::vector roots; + while (1) { + int result = getopt(argc, argv, "sn"); + if (result == -1) + break; /* end of list */ + switch (result) { + case 's': + op = Operation::Symlink; + break; + case 'n': + op = Operation::DryRun; + break; + default: /* unknown */ + break; + } + } + while (optind < argc) { + std::string root = argv[optind++]; + if (root.front() != '/') { + char buffer[PATH_MAX]; + root = std::string(getcwd(buffer, PATH_MAX)) + '/' + root; + } + roots.push_back(root); + } + + if (roots.empty()) { + std::cerr << "Missing directory argument."; + return 1; + } + /* fdupes options used: + -q: hide progress indicator + -p: don't consider files with different owner/group or permission bits as duplicates + -n: exclude zero-length files from consideration + -r: follow subdirectories + -H: also report hard links as duplicates + */ + std::string command = "fdupes -q -p -r -n"; + if (op != Operation::Symlink) { + /* if we create symlinks, avoid looking at hard links being duplicated. This way + fdupes is faster and won't break them up anyway */ + command += " -H"; + } + for (auto it = roots.begin(); it != roots.end(); ++it) { + command += " '" + *it + "'"; + } + FILE* pipe = popen(command.c_str(), "r"); + if (!pipe) { + throw std::runtime_error("popen() failed!"); + } + std::vector buffer; + buffer.resize(MAXPATHLEN); + + dup_set dups; + while (fgets(buffer.data(), buffer.size(), pipe) != nullptr) { + std::string line = buffer.data(); + if (line.length() < 2) { + handle_dups(dups, op); + dups.clear(); + continue; + } + if (line.back() != '\n') { + std::cerr << "Too long lines? '" << line << "'" << std::endl; + return 1; + } + line.pop_back(); + + struct stat sb; + if (stat(line.c_str(), &sb)) { + std::cerr << "Stat on '" << buffer.data() << "' failed" << std::endl; + return 1; + } + dups.emplace_back(sb.st_ino, 0, std::move(line)); + } + pclose(pipe); + + return 0; +} diff --git a/macros.fdupes b/macros.fdupes new file mode 100644 index 0000000..be95009 --- /dev/null +++ b/macros.fdupes @@ -0,0 +1 @@ +%fdupes /usr/lib/rpm/fdupes_wrapper