SHA256
3
0
forked from pool/fdupes

Accepting request 961567 from home:coolo:branches:utilities

- Simplify macros.fdupes with a call to a C++ program that does
  the same within a fraction of a second what the shell loop did
  in many seconds (bsc#1195709)

OBS-URL: https://build.opensuse.org/request/show/961567
OBS-URL: https://build.opensuse.org/package/show/utilities/fdupes?expand=0&rev=23
This commit is contained in:
Peter Simons 2022-03-14 09:15:34 +00:00 committed by Git OBS Bridge
parent c440bc5122
commit da1a4eb97b
4 changed files with 162 additions and 24 deletions

View File

@ -1,3 +1,10 @@
-------------------------------------------------------------------
Sat Mar 12 08:17:37 UTC 2022 - Stephan Kulow <coolo@suse.com>
- Simplify macros.fdupes with a call to a C++ program that does
the same within a fraction of a second what the shell loop did
in many seconds (bsc#1195709)
-------------------------------------------------------------------
Sun Aug 16 16:59:45 UTC 2020 - Dirk Mueller <dmueller@suse.com>

View File

@ -1,7 +1,7 @@
#
# spec file for package fdupes
#
# Copyright (c) 2016 SUSE LINUX GmbH, Nuernberg, Germany.
# Copyright (c) 2022 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@ -12,9 +12,10 @@
# license that conforms to the Open Source Definition (Version 1.9)
# published by the Open Source Initiative.
# Please submit bugfixes or comments via http://bugs.opensuse.org/
# Please submit bugfixes or comments via https://bugs.opensuse.org/
#
%{?!_rpmmacrodir:%define _rpmmacrodir /usr/lib/rpm/macros.d}
Name: fdupes
@ -23,9 +24,11 @@ Release: 0
Summary: Tool to identify or delete duplicate files
License: MIT
Group: Productivity/Archiving/Compression
Url: https://github.com/adrianlopezroche/fdupes
URL: https://github.com/adrianlopezroche/fdupes
Source0: https://github.com/adrianlopezroche/fdupes/releases/download/v%{version}/fdupes-%{version}.tar.gz
Source1: macros.fdupes
Source2: fdupes_wrapper.cpp
BuildRequires: gcc-c++
%description
FDUPES is a program for identifying or deleting duplicate files
@ -37,10 +40,12 @@ residing within specified directories.
%build
%configure --without-ncurses
%make_build
g++ $RPM_OPT_FLAGS %{S:2} -o fdupes_wrapper
%install
%make_install
install -D -m644 %{SOURCE1} %{buildroot}%{_rpmmacrodir}/macros.%{name}
install -D -m755 fdupes_wrapper %{buildroot}/usr/lib/rpm/fdupes_wrapper
%check
./%{name} testdir
@ -53,5 +58,6 @@ install -D -m644 %{SOURCE1} %{buildroot}%{_rpmmacrodir}/macros.%{name}
%{_bindir}/%{name}
%{_mandir}/man1/%{name}.1*
%{_rpmmacrodir}/macros.%{name}
/usr/lib/rpm/fdupes_wrapper
%changelog

145
fdupes_wrapper.cpp Normal file
View File

@ -0,0 +1,145 @@
/*
* A little helper to wrap around fdupes and create hard/soft links of the
* dups found. Used in openSUSE rpm.
*
* Copyright 2022 Jiri Slaby <jslaby@suse.cz>
* 2022 Stephan Kulow <coolo@suse.de>
*
* SPDX-License-Identifier: MIT
*/
#include <algorithm>
#include <array>
#include <iostream>
#include <list>
#include <map>
#include <string>
#include <sys/param.h>
#include <sys/stat.h>
#include <tuple>
#include <unistd.h>
#include <utility>
#include <vector>
typedef std::map<ino_t, std::vector<std::string>> dups_map;
typedef std::pair<ino_t, size_t> nlink_pair;
bool cmp_nlink(const nlink_pair& a, const nlink_pair& b)
{
return a.second > b.second;
}
void sort_by_count(const dups_map& in, std::vector<ino_t>& out)
{
out.clear();
std::list<nlink_pair> nlinks;
for (auto it = in.cbegin(); it != in.cend(); ++it) {
nlinks.push_back(std::make_pair(it->first, it->second.size()));
}
nlinks.sort(cmp_nlink);
for (auto it = nlinks.cbegin(); it != nlinks.cend(); ++it) {
out.push_back(it->first);
}
}
void link_file(const std::string& file, const std::string& target, bool symlink)
{
std::cout << "Linking " << file << " -> " << target << std::endl;
if (unlink(file.c_str())) {
std::cerr << "Removing '" << file << "' failed." << std::endl;
exit(1);
}
int ret;
if (symlink) {
ret = ::symlink(target.c_str(), file.c_str());
} else {
ret = link(target.c_str(), file.c_str());
}
if (ret) {
std::cerr << "Linking '" << file << "' failed." << std::endl;
exit(1);
}
}
void handle_dups(const dups_map& dups, bool symlink)
{
// all are hardlinks to the same data
if (dups.size() < 2)
return;
std::vector<ino_t> sorted;
sort_by_count(dups, sorted);
auto inodes = sorted.begin();
std::string target = dups.at(*inodes).front();
for (++inodes; inodes != sorted.end(); ++inodes) {
const std::vector<std::string> files = dups.at(*inodes);
for (auto it = files.begin(); it != files.end(); ++it) {
link_file(*it, target, symlink);
}
}
}
int main(int argc, char** argv)
{
bool symlink = false;
std::string root;
while (1) {
int result = getopt(argc, argv, "s");
if (result == -1)
break; /* end of list */
switch (result) {
case 's':
symlink = true;
break;
default: /* unknown */
break;
}
}
if (optind < argc) {
root = argv[optind++];
} else {
std::cerr << "Missing directory argument.";
}
if (optind < argc) {
std::cerr << "Too many arguments.";
return 1;
}
/* fdupes options used:
-q: hide progress indicator
-p: don't consider files with different owner/group or permission bits as duplicates
-n: exclude zero-length files from consideration
-o name: output order of duplicates
-r: follow subdirectories
-H: also report hard links as duplicates
*/
std::string command = "fdupes -q -p -n -o name -r -H '" + root + "'";
FILE* pipe = popen(command.c_str(), "r");
if (!pipe) {
throw std::runtime_error("popen() failed!");
}
std::array<char, MAXPATHLEN> buffer;
dups_map dups;
while (fgets(buffer.data(), buffer.size(), pipe) != nullptr) {
std::string line = buffer.data();
if (line.length() < 2) {
handle_dups(dups, symlink);
dups.clear();
continue;
}
if (line.back() != '\n') {
std::cerr << "Too long lines? '" << line << "'" << std::endl;
return 1;
}
line.pop_back();
struct stat sb;
if (stat(line.c_str(), &sb)) {
std::cerr << "Stat on '" << buffer.data() << "' failed" << std::endl;
return 1;
}
dups[sb.st_ino].push_back(line);
}
pclose(pipe);
return 0;
}

View File

@ -1,21 +1 @@
%fdupes(s) \
_target=""; \
_symlinks=0; \
%{-s:_symlinks=1;} \
fdupes -q -p -n -H -o name -r %1 | \
while read _file; do \
if test -z "$_target" ; then \
_target="$_file"; \
else \
if test -z "$_file" ; then \
_target=""; \
continue ; \
fi ; \
if test "$_symlinks" = 1; then \
ln -sf "${_target#%{buildroot}}" "$_file"; \
else \
ln -f "$_target" "$_file"; \
fi ;\
fi ; \
done \
%{nil}
%fdupes /usr/lib/rpm/fdupes_wrapper