Sync from SUSE:SLFO:Main fdupes revision a8e4ad5136b3b82a5b3c1a9ea22d5fd6

This commit is contained in:
Adrian Schröter 2024-05-03 12:26:10 +02:00
commit cf0e83b6ef
6 changed files with 609 additions and 0 deletions

23
.gitattributes vendored Normal file
View File

@ -0,0 +1,23 @@
## Default LFS
*.7z filter=lfs diff=lfs merge=lfs -text
*.bsp filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.gem filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.jar filter=lfs diff=lfs merge=lfs -text
*.lz filter=lfs diff=lfs merge=lfs -text
*.lzma filter=lfs diff=lfs merge=lfs -text
*.obscpio filter=lfs diff=lfs merge=lfs -text
*.oxt filter=lfs diff=lfs merge=lfs -text
*.pdf filter=lfs diff=lfs merge=lfs -text
*.png filter=lfs diff=lfs merge=lfs -text
*.rpm filter=lfs diff=lfs merge=lfs -text
*.tbz filter=lfs diff=lfs merge=lfs -text
*.tbz2 filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.ttf filter=lfs diff=lfs merge=lfs -text
*.txz filter=lfs diff=lfs merge=lfs -text
*.whl filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text

BIN
fdupes-2.2.1.tar.gz (Stored with Git LFS) Normal file

Binary file not shown.

260
fdupes.changes Normal file
View File

@ -0,0 +1,260 @@
-------------------------------------------------------------------
Sun Oct 2 16:21:27 UTC 2022 - Andrea Manzini <andrea.manzini@suse.com>
- update to 2.2.1:
* Fix bug in code meant to skip over the current log file when --log option is given.
* Updates to copyright notices in source code.
* Add --deferconfirmation option.
* Check that files marked as duplicates haven't changed during program execution before deleting them.
* Update documentation to indicate units for SIZE in command-line options.
* Move some configuration settings to configure.ac file.
-------------------------------------------------------------------
Fri Apr 1 19:50:32 UTC 2022 - Stefan Brüns <stefan.bruens@rwth-aachen.de>
- Fixes for the new wrapper:
* Order duplicates by name, to get a reproducible file set
(boo#1197484).
* Remove redundant order parameter from fdupes invocation.
* Modernize code, significantly reduce allocations.
* Exit immediately when mandatory parameters are missing.
* Remove obsolete buildroot parameter
* Add some tests for the wrapper
-------------------------------------------------------------------
Tue Mar 15 07:41:35 UTC 2022 - Stephan Kulow <coolo@suse.com>
- A more correct approach to creating symlinks (old bug actually):
Do not link the files as given by fdupes, but turn them into
relative links (it works by chance if given a buildroot, but
fails if running on a subdirectory)
- Support multiple directories given (as glob to the macro)
-------------------------------------------------------------------
Mon Mar 14 13:44:54 UTC 2022 - Stephan Kulow <coolo@suse.com>
- Handle symlinks (-s argument) correctly
-------------------------------------------------------------------
Sat Mar 12 08:17:37 UTC 2022 - Stephan Kulow <coolo@suse.com>
- Simplify macros.fdupes with a call to a C++ program that does
the same within a fraction of a second what the shell loop did
in many seconds (bsc#1195709)
-------------------------------------------------------------------
Sun Aug 16 16:59:45 UTC 2020 - Dirk Mueller <dmueller@suse.com>
- update to 2.1.2:
* Do not enter ncurses mode when --immediate option given.
* Fix logging/memory corruption bug when using --log with --immediate.
* Break mtime ties using ctime when sorting by time.
* Reduce number of calls to stat(), for speed.
* Clear last command status when new command is entered.
* Rename cs command ("clear all selections") from cs to csel.
* Rename igs command ("invert selections") from igs to isel.
* Add "prune" command as synonym for DELETE key.
* Clear selections after deleting files via prune/DELETE.
* Fix dependency issues when fdupes is configured to not use ncurses.
- build without ncurses for now until buildcycles can be solved
-------------------------------------------------------------------
Fri Jun 5 23:42:10 UTC 2020 - Jan Engelhardt <jengelh@inai.de>
- Use noun phrase in summary. Drop old specfile constructs.
-------------------------------------------------------------------
Fri May 22 08:27:11 UTC 2020 - Paolo Stivanin <info@paolostivanin.com>
- Update to v2.0.0
* Add ncurses mode for interactive file deletion (plain mode still available via --plain or ./configure).
* Add --minsize option.
* Add --maxsize option.
* Add --time option.
* Add --order=ctime option.
* Add --log option.
* Use configure script for installation (Autotools/Automake).
- Remove fdupes-makefile.patch
-------------------------------------------------------------------
Thu Apr 16 21:07:45 UTC 2020 - Matej Cepl <mcepl@suse.com>
- Make package building even on platforms, where _rpmmacrodir
is not defined.
-------------------------------------------------------------------
Wed May 8 09:37:54 UTC 2019 - Dominique Leuenberger <dimstar@opensuse.org>
- Move RPM macros to %_rpmmacrodir.
-------------------------------------------------------------------
Fri Dec 16 12:40:20 UTC 2016 - psimons@suse.com
- We cannot update from fdupes 1.51 to 1.6.1. That "downgrade"
works okay'ish for Tumbleweed because we can replace the old
package with the new one, but in SLE this is not possible. We
asked upstream to please release a "2.0" version to remedy these
issues (https://github.com/adrianlopezroche/fdupes/issues/74),
but he does not respond. Therefore, we'll call this version 1.61,
ignoring upstreams change in the versioning scheme.
-------------------------------------------------------------------
Mon Dec 5 13:54:08 UTC 2016 - psimons@suse.com
- Upstream has changed their versioning scheme after version 1.51.
Unfortunately, the new version 1.6.x won't be recognized as
"newer" by zypper. This commit adds appropriate "provides" and
"obsoletes" attributes to the spec file to remedy that issue.
-------------------------------------------------------------------
Fri Nov 4 14:33:59 UTC 2016 - psimons@suse.com
- Drop 50_bts284274_hardlinkreplace.dpatch. The --linkhard option
added by this patch has an implementation bug that can cause data
loss. https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=677419
has more details.
-------------------------------------------------------------------
Fri Nov 4 13:47:27 UTC 2016 - psimons@suse.com
- Update to version 1.6.1. The following patches have been applied
upstream and were dropped:
* 0001-restore-pristine-code.patch
* 0002-Added-to-escape-minus-signs-in-manpage-lintian-warni.patch
* 0003-Fix-a-typo-in-a-manpage-bts353789.patch
* 0005-add-summarize-to-manpage-bts481809.patch
* 0006-add-nohidden-support-bts511702.patch
* 0007-Disambiguate-the-options-recurse-and-recurse-bts5371.patch
* 0008-speedup-the-file-compare.patch
* 0009-glibc-endianness-check-in-md5.patch
* 0010-add-permissions-mode.patch
* 0011-add-an-option-to-sort-duplicate-files-by-name.patch
- 50_bts284274_hardlinkreplace.dpatch had to be refreshed.
-------------------------------------------------------------------
Tue Aug 4 13:01:47 UTC 2015 - tchvatal@suse.com
- By default relink hardlinks too, should fix bnc#940296
-------------------------------------------------------------------
Mon Aug 3 19:26:55 UTC 2015 - tchvatal@suse.com
- Update to upstream git repo on github
- Refresh patches:
* fdupes-makefile.patch
* 0008-speedup-the-file-compare.patch
* 0010-add-permissions-mode.patch
* 0011-add-an-option-to-sort-duplicate-files-by-name.patch
* 50_bts284274_hardlinkreplace.dpatch
- Upstreamed patch:
* 0004-Large-file-support-for-2GB-files-bts447601.patch
- Remove whitespace from fdupes.macros file
- Cleanup with spec-cleaner
- Obey rpm-opt-flags
- run test phase
-------------------------------------------------------------------
Sun Dec 21 19:58:41 UTC 2014 - bwiedemann@suse.com
- add -L (--linkhard) option
add 50_bts284274_hardlinkreplace.dpatch
-------------------------------------------------------------------
Tue Apr 29 16:08:34 UTC 2014 - stefan.bruens@rwth-aachen.de
- sort the output of fdupes by filename to make it deterministic
for parallel builds
* 0011-add-an-option-to-sort-duplicate-files-by-name.patch
-------------------------------------------------------------------
Tue Oct 16 11:44:08 UTC 2012 - mvyskocil@suse.com
- update to 1.5.0-PR2
* new "--summarize" option
* new "--recurse:" selective recursion option
* new "--noprompt" option for totally automated deletion of
duplicate files.
* sorts duplicates (old to new) for consistent order when
listing or deleteing duplicate files.
* tests for early matching of files, which should help speed up
the matching process when large files are involved.
* warns whenever a file cannot be deleted.
* bugfixes (proper file closing, zero-length files, ...)
- drop the fdupes-sort-output.diff (upstream uses mtime based)
- rename and rebase fdupes-speedup.patch to 0008-speedup-the-compare.patch
- rename and rebase fdupes-endianness.patch to
0009-glibc-endianness-check-in-md5.patch
- add -p/--permissions switch so files with different permissions or uid/gid
are not considered as duplicates (bnc#784670)
* this mode is a default one for fdupes macro
0010-add-permissions-mode.patch
- imported several fixes from Debian
* 0001-restore-pristine-code.patch - some common code fixes, partly obsoletes
speedup patch
* manual page fixes
0002-Added-to-escape-minus-signs-in-manpage-lintian-warni.patch
0003-Fix-a-typo-in-a-manpage-bts353789.patch
0005-add-summarize-to-manpage-bts481809.patch
0006-add-nohidden-support-bts511702.patch
0007-Disambiguate-the-options-recurse-and-recurse-bts5371.patch
* 0004-Large-file-support-for-2GB-files-bts447601.patch - large file support
-------------------------------------------------------------------
Sun Mar 25 22:13:12 UTC 2012 - behrisch@users.sf.net
- added "which" requirement for red hat distros
-------------------------------------------------------------------
Mon Feb 13 10:46:03 UTC 2012 - coolo@suse.com
- patch license to follow spdx.org standard
-------------------------------------------------------------------
Wed Oct 5 15:14:32 UTC 2011 - uli@suse.com
- cross-build workaround: fake gcc script to work around build
system not honoring CC
-------------------------------------------------------------------
Sun Sep 18 17:17:12 UTC 2011 - jengelh@medozas.de
- Apply packaging guidelines (remove redundant/obsolete
tags/sections from specfile, etc.)
-------------------------------------------------------------------
Mon Feb 15 15:43:34 UTC 2010 - mvyskocil@suse.cz
- fix bnc#406825: speedup fdupes
* fdupes-speedup.patch fixes some performance gaps in code
* fdupes-endianness.patch speedups the built in md5 on little endian machines
-------------------------------------------------------------------
Wed Aug 26 12:53:54 CEST 2009 - mls@suse.de
- make patch0 usage consistent
-------------------------------------------------------------------
Thu Jan 15 17:05:36 CET 2009 - coolo@suse.de
- sort the output of fdupes to make it deterministic
-------------------------------------------------------------------
Thu Sep 6 18:41:37 CEST 2007 - mls@suse.de
- do not hardlink empty files in %fdupes macro
-------------------------------------------------------------------
Wed Sep 5 15:44:52 CEST 2007 - nadvornik@suse.cz
- support filenames with spaces in %fdupes macro [#307727]
-------------------------------------------------------------------
Tue May 15 22:53:03 CEST 2007 - coolo@suse.de
- add an RPM macro to make use of it in spec files
-------------------------------------------------------------------
Thu Nov 16 13:16:07 CET 2006 - dmueller@suse.de
- Initial package (1.40)

83
fdupes.spec Normal file
View File

@ -0,0 +1,83 @@
#
# spec file for package fdupes
#
# Copyright (c) 2022 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
# upon. The license for this file, and modifications and additions to the
# file, is the same license as for the pristine package itself (unless the
# license for the pristine package is not an Open Source License, in which
# case the license is the MIT License). An "Open Source License" is a
# license that conforms to the Open Source Definition (Version 1.9)
# published by the Open Source Initiative.
# Please submit bugfixes or comments via https://bugs.opensuse.org/
#
%{?!_rpmmacrodir:%define _rpmmacrodir /usr/lib/rpm/macros.d}
Name: fdupes
Version: 2.2.1
Release: 0
Summary: Tool to identify or delete duplicate files
License: MIT
Group: Productivity/Archiving/Compression
URL: https://github.com/adrianlopezroche/fdupes
Source0: https://github.com/adrianlopezroche/fdupes/releases/download/v%{version}/fdupes-%{version}.tar.gz
Source1: macros.fdupes
Source2: fdupes_wrapper.cpp
BuildRequires: gcc-c++
%description
FDUPES is a program for identifying or deleting duplicate files
residing within specified directories.
%prep
%autosetup -p1
%build
%configure --without-ncurses
%make_build
g++ $RPM_OPT_FLAGS %{S:2} -o fdupes_wrapper
%install
%make_install
install -D -m644 %{SOURCE1} %{buildroot}%{_rpmmacrodir}/macros.%{name}
install -D -m755 fdupes_wrapper %{buildroot}/usr/lib/rpm/fdupes_wrapper
%check
./%{name} testdir
./%{name} --omitfirst testdir
./%{name} --recurse testdir
./%{name} --size testdir
# Check wrapper
PATH=`pwd`:$PATH
(cd testdir; md5sum ./* ./*/* > ../testdir.md5 || true)
for operation in '-n' '-s' ' '; do
cp -R testdir "testdir${operation}"
./fdupes_wrapper ${operation} "testdir${operation}"
(cd "testdir${operation}"; md5sum --check ../testdir.md5)
done
# Check order does not depend on creation order - x should be target
mkdir testdir_order
for t in "a b x" "x a b" "a x b"; do
pushd testdir_order
for f in $t ; do cp ../testdir.md5 $f; done
../fdupes_wrapper -s ./
test -h ./a
test -h ./b
rm *
popd
done
%files
%doc CHANGES
%{_bindir}/%{name}
%{_mandir}/man1/%{name}.1*
%{_rpmmacrodir}/macros.%{name}
/usr/lib/rpm/fdupes_wrapper
%changelog

239
fdupes_wrapper.cpp Normal file
View File

@ -0,0 +1,239 @@
/*
* A little helper to wrap around fdupes and create hard/soft links of the
* dups found. Used in openSUSE rpm.
*
* Copyright 2022 Jiri Slaby <jslaby@suse.cz>
* 2022 Stephan Kulow <coolo@suse.de>
* 2022 Stefan Brüns <stefan.bruens@rwth-aachen.de>
*
* SPDX-License-Identifier: MIT
*/
#include <algorithm>
#include <iostream>
#include <string>
#include <sys/param.h>
#include <sys/stat.h>
#include <unistd.h>
#include <utility>
#include <vector>
#include <sstream>
using namespace std;
struct file_entry
{
ino_t inode;
nlink_t link_count;
string path;
file_entry(ino_t i, nlink_t n, string&& p)
: inode(i), link_count(n), path(move(p)) {}
};
using dup_set = vector<file_entry>;
enum class Operation {
Symlink,
Hardlink,
DryRun,
};
vector<string> split_paths(const string& path)
{
string token;
vector<string> paths;
stringstream ss(path);
while (getline(ss, token, '/')) {
if (token == "..") {
paths.pop_back();
} else if (token != "." || ss.eof()) {
paths.push_back(token);
}
}
return paths;
}
string merge_paths(const vector<string>& paths)
{
string path;
for (const auto& s : paths) {
if (s.empty())
continue;
if (!path.empty())
path += "/";
path += s;
}
return path;
}
string relative(const string& p1, const string& p2)
{
vector<string> paths1 = split_paths(p1);
paths1.pop_back();
vector<string> paths2 = split_paths(p2);
vector<string> paths;
vector<string>::const_iterator it1 = paths1.begin();
vector<string>::const_iterator it2 = paths2.begin();
// first remove the common parts
while (it1 != paths1.end() && *it1 == *it2) {
it1++;
it2++;
}
for (; it1 != paths1.end(); ++it1) {
paths.push_back("..");
}
for (; it2 != paths2.end(); ++it2) {
paths.push_back(*it2);
}
return merge_paths(paths);
}
void link_file(const std::string& file, const std::string& target, Operation op)
{
std::cout << "Linking " << file << " -> " << target << std::endl;
if (op == Operation::DryRun)
return;
if (unlink(file.c_str())) {
std::cerr << "Removing '" << file << "' failed." << std::endl;
exit(1);
}
int ret;
if (op == Operation::Symlink) {
ret = ::symlink(target.c_str(), file.c_str());
} else {
ret = link(target.c_str(), file.c_str());
}
if (ret) {
std::cerr << "Linking '" << file << "' failed." << std::endl;
exit(1);
}
}
std::string target_for_link(string target, const std::string &file, Operation op)
{
if (op == Operation::Hardlink) // hardlinks don't care
return target;
return relative(file, target);
}
void handle_dups(dup_set& dups, Operation op)
{
// calculate number of hardlinked duplicates found, for each file
// this may be different than the st_nlink value
std::sort(dups.begin(), dups.end(), [](const file_entry& a, const file_entry& b) {
return a.inode < b.inode;
});
auto first = dups.begin();
while (first != dups.end()) {
auto r = equal_range(first, dups.end(), *first, [](const file_entry& a, const file_entry& b) {
return a.inode < b.inode;
});
for (auto i = r.first; i != r.second; ++i) {
i->link_count = std::distance(r.first, r.second);
}
first = r.second;
}
// use the file with most hardlinks as target
// in case of ties, sort by name to get a stable order for reproducible builds
std::sort(dups.begin(), dups.end(), [](const file_entry& a, const file_entry& b) {
if (a.link_count == b.link_count)
return a.path > b.path;
return a.link_count > b.link_count;
});
const string& target = dups[0].path;
for (const file_entry& e : dups) {
// skip duplicates hardlinked to first entry
if (e.inode == dups[0].inode)
continue;
link_file(e.path, target_for_link(target, e.path, op), op);
}
}
int main(int argc, char** argv)
{
Operation op = Operation::Hardlink;
std::vector<std::string> roots;
while (1) {
int result = getopt(argc, argv, "sn");
if (result == -1)
break; /* end of list */
switch (result) {
case 's':
op = Operation::Symlink;
break;
case 'n':
op = Operation::DryRun;
break;
default: /* unknown */
break;
}
}
while (optind < argc) {
std::string root = argv[optind++];
if (root.front() != '/') {
char buffer[PATH_MAX];
root = std::string(getcwd(buffer, PATH_MAX)) + '/' + root;
}
roots.push_back(root);
}
if (roots.empty()) {
std::cerr << "Missing directory argument.";
return 1;
}
/* fdupes options used:
-q: hide progress indicator
-p: don't consider files with different owner/group or permission bits as duplicates
-n: exclude zero-length files from consideration
-r: follow subdirectories
-H: also report hard links as duplicates
*/
std::string command = "fdupes -q -p -r -n";
if (op != Operation::Symlink) {
/* if we create symlinks, avoid looking at hard links being duplicated. This way
fdupes is faster and won't break them up anyway */
command += " -H";
}
for (auto it = roots.begin(); it != roots.end(); ++it) {
command += " '" + *it + "'";
}
FILE* pipe = popen(command.c_str(), "r");
if (!pipe) {
throw std::runtime_error("popen() failed!");
}
std::vector<char> buffer;
buffer.resize(MAXPATHLEN);
dup_set dups;
while (fgets(buffer.data(), buffer.size(), pipe) != nullptr) {
std::string line = buffer.data();
if (line.length() < 2) {
handle_dups(dups, op);
dups.clear();
continue;
}
if (line.back() != '\n') {
std::cerr << "Too long lines? '" << line << "'" << std::endl;
return 1;
}
line.pop_back();
struct stat sb;
if (stat(line.c_str(), &sb)) {
std::cerr << "Stat on '" << buffer.data() << "' failed" << std::endl;
return 1;
}
dups.emplace_back(sb.st_ino, 0, std::move(line));
}
pclose(pipe);
return 0;
}

1
macros.fdupes Normal file
View File

@ -0,0 +1 @@
%fdupes /usr/lib/rpm/fdupes_wrapper