Sync from SUSE:SLFO:Main fdupes revision a8e4ad5136b3b82a5b3c1a9ea22d5fd6
This commit is contained in:
commit
cf0e83b6ef
23
.gitattributes
vendored
Normal file
23
.gitattributes
vendored
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
## Default LFS
|
||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bsp filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gem filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.jar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lzma filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.obscpio filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.oxt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pdf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.png filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rpm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tbz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tbz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ttf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.txz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.whl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
BIN
fdupes-2.2.1.tar.gz
(Stored with Git LFS)
Normal file
BIN
fdupes-2.2.1.tar.gz
(Stored with Git LFS)
Normal file
Binary file not shown.
260
fdupes.changes
Normal file
260
fdupes.changes
Normal file
@ -0,0 +1,260 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Sun Oct 2 16:21:27 UTC 2022 - Andrea Manzini <andrea.manzini@suse.com>
|
||||||
|
|
||||||
|
- update to 2.2.1:
|
||||||
|
* Fix bug in code meant to skip over the current log file when --log option is given.
|
||||||
|
* Updates to copyright notices in source code.
|
||||||
|
* Add --deferconfirmation option.
|
||||||
|
* Check that files marked as duplicates haven't changed during program execution before deleting them.
|
||||||
|
* Update documentation to indicate units for SIZE in command-line options.
|
||||||
|
* Move some configuration settings to configure.ac file.
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Fri Apr 1 19:50:32 UTC 2022 - Stefan Brüns <stefan.bruens@rwth-aachen.de>
|
||||||
|
|
||||||
|
- Fixes for the new wrapper:
|
||||||
|
* Order duplicates by name, to get a reproducible file set
|
||||||
|
(boo#1197484).
|
||||||
|
* Remove redundant order parameter from fdupes invocation.
|
||||||
|
* Modernize code, significantly reduce allocations.
|
||||||
|
* Exit immediately when mandatory parameters are missing.
|
||||||
|
* Remove obsolete buildroot parameter
|
||||||
|
* Add some tests for the wrapper
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Mar 15 07:41:35 UTC 2022 - Stephan Kulow <coolo@suse.com>
|
||||||
|
|
||||||
|
- A more correct approach to creating symlinks (old bug actually):
|
||||||
|
Do not link the files as given by fdupes, but turn them into
|
||||||
|
relative links (it works by chance if given a buildroot, but
|
||||||
|
fails if running on a subdirectory)
|
||||||
|
- Support multiple directories given (as glob to the macro)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Mar 14 13:44:54 UTC 2022 - Stephan Kulow <coolo@suse.com>
|
||||||
|
|
||||||
|
- Handle symlinks (-s argument) correctly
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Sat Mar 12 08:17:37 UTC 2022 - Stephan Kulow <coolo@suse.com>
|
||||||
|
|
||||||
|
- Simplify macros.fdupes with a call to a C++ program that does
|
||||||
|
the same within a fraction of a second what the shell loop did
|
||||||
|
in many seconds (bsc#1195709)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Sun Aug 16 16:59:45 UTC 2020 - Dirk Mueller <dmueller@suse.com>
|
||||||
|
|
||||||
|
- update to 2.1.2:
|
||||||
|
* Do not enter ncurses mode when --immediate option given.
|
||||||
|
* Fix logging/memory corruption bug when using --log with --immediate.
|
||||||
|
* Break mtime ties using ctime when sorting by time.
|
||||||
|
* Reduce number of calls to stat(), for speed.
|
||||||
|
* Clear last command status when new command is entered.
|
||||||
|
* Rename cs command ("clear all selections") from cs to csel.
|
||||||
|
* Rename igs command ("invert selections") from igs to isel.
|
||||||
|
* Add "prune" command as synonym for DELETE key.
|
||||||
|
* Clear selections after deleting files via prune/DELETE.
|
||||||
|
* Fix dependency issues when fdupes is configured to not use ncurses.
|
||||||
|
- build without ncurses for now until buildcycles can be solved
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Fri Jun 5 23:42:10 UTC 2020 - Jan Engelhardt <jengelh@inai.de>
|
||||||
|
|
||||||
|
- Use noun phrase in summary. Drop old specfile constructs.
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Fri May 22 08:27:11 UTC 2020 - Paolo Stivanin <info@paolostivanin.com>
|
||||||
|
|
||||||
|
- Update to v2.0.0
|
||||||
|
* Add ncurses mode for interactive file deletion (plain mode still available via --plain or ./configure).
|
||||||
|
* Add --minsize option.
|
||||||
|
* Add --maxsize option.
|
||||||
|
* Add --time option.
|
||||||
|
* Add --order=ctime option.
|
||||||
|
* Add --log option.
|
||||||
|
* Use configure script for installation (Autotools/Automake).
|
||||||
|
- Remove fdupes-makefile.patch
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Thu Apr 16 21:07:45 UTC 2020 - Matej Cepl <mcepl@suse.com>
|
||||||
|
|
||||||
|
- Make package building even on platforms, where _rpmmacrodir
|
||||||
|
is not defined.
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Wed May 8 09:37:54 UTC 2019 - Dominique Leuenberger <dimstar@opensuse.org>
|
||||||
|
|
||||||
|
- Move RPM macros to %_rpmmacrodir.
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Fri Dec 16 12:40:20 UTC 2016 - psimons@suse.com
|
||||||
|
|
||||||
|
- We cannot update from fdupes 1.51 to 1.6.1. That "downgrade"
|
||||||
|
works okay'ish for Tumbleweed because we can replace the old
|
||||||
|
package with the new one, but in SLE this is not possible. We
|
||||||
|
asked upstream to please release a "2.0" version to remedy these
|
||||||
|
issues (https://github.com/adrianlopezroche/fdupes/issues/74),
|
||||||
|
but he does not respond. Therefore, we'll call this version 1.61,
|
||||||
|
ignoring upstreams change in the versioning scheme.
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Dec 5 13:54:08 UTC 2016 - psimons@suse.com
|
||||||
|
|
||||||
|
- Upstream has changed their versioning scheme after version 1.51.
|
||||||
|
Unfortunately, the new version 1.6.x won't be recognized as
|
||||||
|
"newer" by zypper. This commit adds appropriate "provides" and
|
||||||
|
"obsoletes" attributes to the spec file to remedy that issue.
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Fri Nov 4 14:33:59 UTC 2016 - psimons@suse.com
|
||||||
|
|
||||||
|
- Drop 50_bts284274_hardlinkreplace.dpatch. The --linkhard option
|
||||||
|
added by this patch has an implementation bug that can cause data
|
||||||
|
loss. https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=677419
|
||||||
|
has more details.
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Fri Nov 4 13:47:27 UTC 2016 - psimons@suse.com
|
||||||
|
|
||||||
|
- Update to version 1.6.1. The following patches have been applied
|
||||||
|
upstream and were dropped:
|
||||||
|
* 0001-restore-pristine-code.patch
|
||||||
|
* 0002-Added-to-escape-minus-signs-in-manpage-lintian-warni.patch
|
||||||
|
* 0003-Fix-a-typo-in-a-manpage-bts353789.patch
|
||||||
|
* 0005-add-summarize-to-manpage-bts481809.patch
|
||||||
|
* 0006-add-nohidden-support-bts511702.patch
|
||||||
|
* 0007-Disambiguate-the-options-recurse-and-recurse-bts5371.patch
|
||||||
|
* 0008-speedup-the-file-compare.patch
|
||||||
|
* 0009-glibc-endianness-check-in-md5.patch
|
||||||
|
* 0010-add-permissions-mode.patch
|
||||||
|
* 0011-add-an-option-to-sort-duplicate-files-by-name.patch
|
||||||
|
- 50_bts284274_hardlinkreplace.dpatch had to be refreshed.
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Aug 4 13:01:47 UTC 2015 - tchvatal@suse.com
|
||||||
|
|
||||||
|
- By default relink hardlinks too, should fix bnc#940296
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Aug 3 19:26:55 UTC 2015 - tchvatal@suse.com
|
||||||
|
|
||||||
|
- Update to upstream git repo on github
|
||||||
|
- Refresh patches:
|
||||||
|
* fdupes-makefile.patch
|
||||||
|
* 0008-speedup-the-file-compare.patch
|
||||||
|
* 0010-add-permissions-mode.patch
|
||||||
|
* 0011-add-an-option-to-sort-duplicate-files-by-name.patch
|
||||||
|
* 50_bts284274_hardlinkreplace.dpatch
|
||||||
|
- Upstreamed patch:
|
||||||
|
* 0004-Large-file-support-for-2GB-files-bts447601.patch
|
||||||
|
- Remove whitespace from fdupes.macros file
|
||||||
|
- Cleanup with spec-cleaner
|
||||||
|
- Obey rpm-opt-flags
|
||||||
|
- run test phase
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Sun Dec 21 19:58:41 UTC 2014 - bwiedemann@suse.com
|
||||||
|
|
||||||
|
- add -L (--linkhard) option
|
||||||
|
add 50_bts284274_hardlinkreplace.dpatch
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Apr 29 16:08:34 UTC 2014 - stefan.bruens@rwth-aachen.de
|
||||||
|
|
||||||
|
- sort the output of fdupes by filename to make it deterministic
|
||||||
|
for parallel builds
|
||||||
|
* 0011-add-an-option-to-sort-duplicate-files-by-name.patch
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Oct 16 11:44:08 UTC 2012 - mvyskocil@suse.com
|
||||||
|
|
||||||
|
- update to 1.5.0-PR2
|
||||||
|
* new "--summarize" option
|
||||||
|
* new "--recurse:" selective recursion option
|
||||||
|
* new "--noprompt" option for totally automated deletion of
|
||||||
|
duplicate files.
|
||||||
|
* sorts duplicates (old to new) for consistent order when
|
||||||
|
listing or deleteing duplicate files.
|
||||||
|
* tests for early matching of files, which should help speed up
|
||||||
|
the matching process when large files are involved.
|
||||||
|
* warns whenever a file cannot be deleted.
|
||||||
|
* bugfixes (proper file closing, zero-length files, ...)
|
||||||
|
- drop the fdupes-sort-output.diff (upstream uses mtime based)
|
||||||
|
- rename and rebase fdupes-speedup.patch to 0008-speedup-the-compare.patch
|
||||||
|
- rename and rebase fdupes-endianness.patch to
|
||||||
|
0009-glibc-endianness-check-in-md5.patch
|
||||||
|
- add -p/--permissions switch so files with different permissions or uid/gid
|
||||||
|
are not considered as duplicates (bnc#784670)
|
||||||
|
* this mode is a default one for fdupes macro
|
||||||
|
0010-add-permissions-mode.patch
|
||||||
|
- imported several fixes from Debian
|
||||||
|
* 0001-restore-pristine-code.patch - some common code fixes, partly obsoletes
|
||||||
|
speedup patch
|
||||||
|
* manual page fixes
|
||||||
|
0002-Added-to-escape-minus-signs-in-manpage-lintian-warni.patch
|
||||||
|
0003-Fix-a-typo-in-a-manpage-bts353789.patch
|
||||||
|
0005-add-summarize-to-manpage-bts481809.patch
|
||||||
|
0006-add-nohidden-support-bts511702.patch
|
||||||
|
0007-Disambiguate-the-options-recurse-and-recurse-bts5371.patch
|
||||||
|
* 0004-Large-file-support-for-2GB-files-bts447601.patch - large file support
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Sun Mar 25 22:13:12 UTC 2012 - behrisch@users.sf.net
|
||||||
|
|
||||||
|
- added "which" requirement for red hat distros
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Feb 13 10:46:03 UTC 2012 - coolo@suse.com
|
||||||
|
|
||||||
|
- patch license to follow spdx.org standard
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Wed Oct 5 15:14:32 UTC 2011 - uli@suse.com
|
||||||
|
|
||||||
|
- cross-build workaround: fake gcc script to work around build
|
||||||
|
system not honoring CC
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Sun Sep 18 17:17:12 UTC 2011 - jengelh@medozas.de
|
||||||
|
|
||||||
|
- Apply packaging guidelines (remove redundant/obsolete
|
||||||
|
tags/sections from specfile, etc.)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Feb 15 15:43:34 UTC 2010 - mvyskocil@suse.cz
|
||||||
|
|
||||||
|
- fix bnc#406825: speedup fdupes
|
||||||
|
* fdupes-speedup.patch fixes some performance gaps in code
|
||||||
|
* fdupes-endianness.patch speedups the built in md5 on little endian machines
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Wed Aug 26 12:53:54 CEST 2009 - mls@suse.de
|
||||||
|
|
||||||
|
- make patch0 usage consistent
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Thu Jan 15 17:05:36 CET 2009 - coolo@suse.de
|
||||||
|
|
||||||
|
- sort the output of fdupes to make it deterministic
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Thu Sep 6 18:41:37 CEST 2007 - mls@suse.de
|
||||||
|
|
||||||
|
- do not hardlink empty files in %fdupes macro
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Wed Sep 5 15:44:52 CEST 2007 - nadvornik@suse.cz
|
||||||
|
|
||||||
|
- support filenames with spaces in %fdupes macro [#307727]
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue May 15 22:53:03 CEST 2007 - coolo@suse.de
|
||||||
|
|
||||||
|
- add an RPM macro to make use of it in spec files
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Thu Nov 16 13:16:07 CET 2006 - dmueller@suse.de
|
||||||
|
|
||||||
|
- Initial package (1.40)
|
||||||
|
|
83
fdupes.spec
Normal file
83
fdupes.spec
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
#
|
||||||
|
# spec file for package fdupes
|
||||||
|
#
|
||||||
|
# Copyright (c) 2022 SUSE LLC
|
||||||
|
#
|
||||||
|
# All modifications and additions to the file contributed by third parties
|
||||||
|
# remain the property of their copyright owners, unless otherwise agreed
|
||||||
|
# upon. The license for this file, and modifications and additions to the
|
||||||
|
# file, is the same license as for the pristine package itself (unless the
|
||||||
|
# license for the pristine package is not an Open Source License, in which
|
||||||
|
# case the license is the MIT License). An "Open Source License" is a
|
||||||
|
# license that conforms to the Open Source Definition (Version 1.9)
|
||||||
|
# published by the Open Source Initiative.
|
||||||
|
|
||||||
|
# Please submit bugfixes or comments via https://bugs.opensuse.org/
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
%{?!_rpmmacrodir:%define _rpmmacrodir /usr/lib/rpm/macros.d}
|
||||||
|
|
||||||
|
Name: fdupes
|
||||||
|
Version: 2.2.1
|
||||||
|
Release: 0
|
||||||
|
Summary: Tool to identify or delete duplicate files
|
||||||
|
License: MIT
|
||||||
|
Group: Productivity/Archiving/Compression
|
||||||
|
URL: https://github.com/adrianlopezroche/fdupes
|
||||||
|
Source0: https://github.com/adrianlopezroche/fdupes/releases/download/v%{version}/fdupes-%{version}.tar.gz
|
||||||
|
Source1: macros.fdupes
|
||||||
|
Source2: fdupes_wrapper.cpp
|
||||||
|
BuildRequires: gcc-c++
|
||||||
|
|
||||||
|
%description
|
||||||
|
FDUPES is a program for identifying or deleting duplicate files
|
||||||
|
residing within specified directories.
|
||||||
|
|
||||||
|
%prep
|
||||||
|
%autosetup -p1
|
||||||
|
|
||||||
|
%build
|
||||||
|
%configure --without-ncurses
|
||||||
|
%make_build
|
||||||
|
g++ $RPM_OPT_FLAGS %{S:2} -o fdupes_wrapper
|
||||||
|
|
||||||
|
%install
|
||||||
|
%make_install
|
||||||
|
install -D -m644 %{SOURCE1} %{buildroot}%{_rpmmacrodir}/macros.%{name}
|
||||||
|
install -D -m755 fdupes_wrapper %{buildroot}/usr/lib/rpm/fdupes_wrapper
|
||||||
|
|
||||||
|
%check
|
||||||
|
./%{name} testdir
|
||||||
|
./%{name} --omitfirst testdir
|
||||||
|
./%{name} --recurse testdir
|
||||||
|
./%{name} --size testdir
|
||||||
|
|
||||||
|
# Check wrapper
|
||||||
|
PATH=`pwd`:$PATH
|
||||||
|
(cd testdir; md5sum ./* ./*/* > ../testdir.md5 || true)
|
||||||
|
for operation in '-n' '-s' ' '; do
|
||||||
|
cp -R testdir "testdir${operation}"
|
||||||
|
./fdupes_wrapper ${operation} "testdir${operation}"
|
||||||
|
(cd "testdir${operation}"; md5sum --check ../testdir.md5)
|
||||||
|
done
|
||||||
|
# Check order does not depend on creation order - x should be target
|
||||||
|
mkdir testdir_order
|
||||||
|
for t in "a b x" "x a b" "a x b"; do
|
||||||
|
pushd testdir_order
|
||||||
|
for f in $t ; do cp ../testdir.md5 $f; done
|
||||||
|
../fdupes_wrapper -s ./
|
||||||
|
test -h ./a
|
||||||
|
test -h ./b
|
||||||
|
rm *
|
||||||
|
popd
|
||||||
|
done
|
||||||
|
|
||||||
|
%files
|
||||||
|
%doc CHANGES
|
||||||
|
%{_bindir}/%{name}
|
||||||
|
%{_mandir}/man1/%{name}.1*
|
||||||
|
%{_rpmmacrodir}/macros.%{name}
|
||||||
|
/usr/lib/rpm/fdupes_wrapper
|
||||||
|
|
||||||
|
%changelog
|
239
fdupes_wrapper.cpp
Normal file
239
fdupes_wrapper.cpp
Normal file
@ -0,0 +1,239 @@
|
|||||||
|
/*
|
||||||
|
* A little helper to wrap around fdupes and create hard/soft links of the
|
||||||
|
* dups found. Used in openSUSE rpm.
|
||||||
|
*
|
||||||
|
* Copyright 2022 Jiri Slaby <jslaby@suse.cz>
|
||||||
|
* 2022 Stephan Kulow <coolo@suse.de>
|
||||||
|
* 2022 Stefan Brüns <stefan.bruens@rwth-aachen.de>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
#include <sys/param.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
struct file_entry
|
||||||
|
{
|
||||||
|
ino_t inode;
|
||||||
|
nlink_t link_count;
|
||||||
|
string path;
|
||||||
|
|
||||||
|
file_entry(ino_t i, nlink_t n, string&& p)
|
||||||
|
: inode(i), link_count(n), path(move(p)) {}
|
||||||
|
};
|
||||||
|
using dup_set = vector<file_entry>;
|
||||||
|
|
||||||
|
enum class Operation {
|
||||||
|
Symlink,
|
||||||
|
Hardlink,
|
||||||
|
DryRun,
|
||||||
|
};
|
||||||
|
|
||||||
|
vector<string> split_paths(const string& path)
|
||||||
|
{
|
||||||
|
string token;
|
||||||
|
vector<string> paths;
|
||||||
|
stringstream ss(path);
|
||||||
|
while (getline(ss, token, '/')) {
|
||||||
|
if (token == "..") {
|
||||||
|
paths.pop_back();
|
||||||
|
} else if (token != "." || ss.eof()) {
|
||||||
|
paths.push_back(token);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return paths;
|
||||||
|
}
|
||||||
|
|
||||||
|
string merge_paths(const vector<string>& paths)
|
||||||
|
{
|
||||||
|
string path;
|
||||||
|
for (const auto& s : paths) {
|
||||||
|
if (s.empty())
|
||||||
|
continue;
|
||||||
|
if (!path.empty())
|
||||||
|
path += "/";
|
||||||
|
path += s;
|
||||||
|
}
|
||||||
|
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
|
string relative(const string& p1, const string& p2)
|
||||||
|
{
|
||||||
|
vector<string> paths1 = split_paths(p1);
|
||||||
|
paths1.pop_back();
|
||||||
|
vector<string> paths2 = split_paths(p2);
|
||||||
|
vector<string> paths;
|
||||||
|
vector<string>::const_iterator it1 = paths1.begin();
|
||||||
|
vector<string>::const_iterator it2 = paths2.begin();
|
||||||
|
// first remove the common parts
|
||||||
|
while (it1 != paths1.end() && *it1 == *it2) {
|
||||||
|
it1++;
|
||||||
|
it2++;
|
||||||
|
}
|
||||||
|
for (; it1 != paths1.end(); ++it1) {
|
||||||
|
paths.push_back("..");
|
||||||
|
}
|
||||||
|
for (; it2 != paths2.end(); ++it2) {
|
||||||
|
paths.push_back(*it2);
|
||||||
|
}
|
||||||
|
|
||||||
|
return merge_paths(paths);
|
||||||
|
}
|
||||||
|
|
||||||
|
void link_file(const std::string& file, const std::string& target, Operation op)
|
||||||
|
{
|
||||||
|
std::cout << "Linking " << file << " -> " << target << std::endl;
|
||||||
|
if (op == Operation::DryRun)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (unlink(file.c_str())) {
|
||||||
|
std::cerr << "Removing '" << file << "' failed." << std::endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
int ret;
|
||||||
|
if (op == Operation::Symlink) {
|
||||||
|
ret = ::symlink(target.c_str(), file.c_str());
|
||||||
|
} else {
|
||||||
|
ret = link(target.c_str(), file.c_str());
|
||||||
|
}
|
||||||
|
if (ret) {
|
||||||
|
std::cerr << "Linking '" << file << "' failed." << std::endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string target_for_link(string target, const std::string &file, Operation op)
|
||||||
|
{
|
||||||
|
if (op == Operation::Hardlink) // hardlinks don't care
|
||||||
|
return target;
|
||||||
|
|
||||||
|
return relative(file, target);
|
||||||
|
}
|
||||||
|
|
||||||
|
void handle_dups(dup_set& dups, Operation op)
|
||||||
|
{
|
||||||
|
// calculate number of hardlinked duplicates found, for each file
|
||||||
|
// this may be different than the st_nlink value
|
||||||
|
std::sort(dups.begin(), dups.end(), [](const file_entry& a, const file_entry& b) {
|
||||||
|
return a.inode < b.inode;
|
||||||
|
});
|
||||||
|
auto first = dups.begin();
|
||||||
|
while (first != dups.end()) {
|
||||||
|
auto r = equal_range(first, dups.end(), *first, [](const file_entry& a, const file_entry& b) {
|
||||||
|
return a.inode < b.inode;
|
||||||
|
});
|
||||||
|
for (auto i = r.first; i != r.second; ++i) {
|
||||||
|
i->link_count = std::distance(r.first, r.second);
|
||||||
|
}
|
||||||
|
first = r.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
// use the file with most hardlinks as target
|
||||||
|
// in case of ties, sort by name to get a stable order for reproducible builds
|
||||||
|
std::sort(dups.begin(), dups.end(), [](const file_entry& a, const file_entry& b) {
|
||||||
|
if (a.link_count == b.link_count)
|
||||||
|
return a.path > b.path;
|
||||||
|
return a.link_count > b.link_count;
|
||||||
|
});
|
||||||
|
|
||||||
|
const string& target = dups[0].path;
|
||||||
|
|
||||||
|
for (const file_entry& e : dups) {
|
||||||
|
// skip duplicates hardlinked to first entry
|
||||||
|
if (e.inode == dups[0].inode)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
link_file(e.path, target_for_link(target, e.path, op), op);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv)
|
||||||
|
{
|
||||||
|
Operation op = Operation::Hardlink;
|
||||||
|
std::vector<std::string> roots;
|
||||||
|
while (1) {
|
||||||
|
int result = getopt(argc, argv, "sn");
|
||||||
|
if (result == -1)
|
||||||
|
break; /* end of list */
|
||||||
|
switch (result) {
|
||||||
|
case 's':
|
||||||
|
op = Operation::Symlink;
|
||||||
|
break;
|
||||||
|
case 'n':
|
||||||
|
op = Operation::DryRun;
|
||||||
|
break;
|
||||||
|
default: /* unknown */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (optind < argc) {
|
||||||
|
std::string root = argv[optind++];
|
||||||
|
if (root.front() != '/') {
|
||||||
|
char buffer[PATH_MAX];
|
||||||
|
root = std::string(getcwd(buffer, PATH_MAX)) + '/' + root;
|
||||||
|
}
|
||||||
|
roots.push_back(root);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (roots.empty()) {
|
||||||
|
std::cerr << "Missing directory argument.";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
/* fdupes options used:
|
||||||
|
-q: hide progress indicator
|
||||||
|
-p: don't consider files with different owner/group or permission bits as duplicates
|
||||||
|
-n: exclude zero-length files from consideration
|
||||||
|
-r: follow subdirectories
|
||||||
|
-H: also report hard links as duplicates
|
||||||
|
*/
|
||||||
|
std::string command = "fdupes -q -p -r -n";
|
||||||
|
if (op != Operation::Symlink) {
|
||||||
|
/* if we create symlinks, avoid looking at hard links being duplicated. This way
|
||||||
|
fdupes is faster and won't break them up anyway */
|
||||||
|
command += " -H";
|
||||||
|
}
|
||||||
|
for (auto it = roots.begin(); it != roots.end(); ++it) {
|
||||||
|
command += " '" + *it + "'";
|
||||||
|
}
|
||||||
|
FILE* pipe = popen(command.c_str(), "r");
|
||||||
|
if (!pipe) {
|
||||||
|
throw std::runtime_error("popen() failed!");
|
||||||
|
}
|
||||||
|
std::vector<char> buffer;
|
||||||
|
buffer.resize(MAXPATHLEN);
|
||||||
|
|
||||||
|
dup_set dups;
|
||||||
|
while (fgets(buffer.data(), buffer.size(), pipe) != nullptr) {
|
||||||
|
std::string line = buffer.data();
|
||||||
|
if (line.length() < 2) {
|
||||||
|
handle_dups(dups, op);
|
||||||
|
dups.clear();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (line.back() != '\n') {
|
||||||
|
std::cerr << "Too long lines? '" << line << "'" << std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
line.pop_back();
|
||||||
|
|
||||||
|
struct stat sb;
|
||||||
|
if (stat(line.c_str(), &sb)) {
|
||||||
|
std::cerr << "Stat on '" << buffer.data() << "' failed" << std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
dups.emplace_back(sb.st_ino, 0, std::move(line));
|
||||||
|
}
|
||||||
|
pclose(pipe);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
1
macros.fdupes
Normal file
1
macros.fdupes
Normal file
@ -0,0 +1 @@
|
|||||||
|
%fdupes /usr/lib/rpm/fdupes_wrapper
|
Loading…
Reference in New Issue
Block a user