SHA256
1
0
forked from pool/fdupes

Accepting request 961811 from home:coolo:branches:utilities

This time I branched all staging failures to make sure it's the last one

- A more correct approach to creating symlinks (old bug actually):
  Do not link the files as given by fdupes, but turn them into
  relative links (it works by chance if given a buildroot, but
  fails if running on a subdirectory)
- Support multiple directories given (as glob to the macro)

OBS-URL: https://build.opensuse.org/request/show/961811
OBS-URL: https://build.opensuse.org/package/show/utilities/fdupes?expand=0&rev=25
This commit is contained in:
Peter Simons 2022-03-15 08:17:48 +00:00 committed by Git OBS Bridge
parent 760afc07b9
commit 34728dc6e5
3 changed files with 87 additions and 26 deletions

View File

@ -1,3 +1,12 @@
-------------------------------------------------------------------
Tue Mar 15 07:41:35 UTC 2022 - Stephan Kulow <coolo@suse.com>
- A more correct approach to creating symlinks (old bug actually):
Do not link the files as given by fdupes, but turn them into
relative links (it works by chance if given a buildroot, but
fails if running on a subdirectory)
- Support multiple directories given (as glob to the macro)
-------------------------------------------------------------------
Mon Mar 14 13:44:54 UTC 2022 - Stephan Kulow <coolo@suse.com>

View File

@ -20,10 +20,65 @@
#include <unistd.h>
#include <utility>
#include <vector>
#include <sstream>
using namespace std;
typedef std::map<ino_t, std::vector<std::string>> dups_map;
typedef std::pair<ino_t, size_t> nlink_pair;
vector<string> split_paths(const string& path)
{
string token;
vector<string> paths;
stringstream ss(path);
while (getline(ss, token, '/')) {
if (token == "..") {
paths.pop_back();
} else if (token != "." || ss.eof()) {
paths.push_back(token);
}
}
return paths;
}
string merge_paths(vector<string> paths)
{
string path;
for (const auto& s : paths) {
if (s.empty())
continue;
if (!path.empty())
path += "/";
path += s;
}
return path;
}
string relative(const string& p1, const string& p2)
{
vector<string> paths1 = split_paths(p1);
paths1.pop_back();
vector<string> paths2 = split_paths(p2);
vector<string> paths;
vector<string>::const_iterator it1 = paths1.begin();
vector<string>::const_iterator it2 = paths2.begin();
// first remove the common parts
while (it1 != paths1.end() && *it1 == *it2) {
it1++;
it2++;
}
for (; it1 != paths1.end(); ++it1) {
paths.push_back("..");
}
for (; it2 != paths2.end(); ++it2) {
paths.push_back(*it2);
}
return merge_paths(paths);
}
bool cmp_nlink(const nlink_pair& a, const nlink_pair& b)
{
return a.second > b.second;
@ -61,6 +116,14 @@ void link_file(const std::string& file, const std::string& target, bool symlink)
}
}
std::string target_for_link(string target, const std::string &file, bool symlink)
{
if (!symlink) // hardlinks don't care
return target;
return relative(file, target);
}
void handle_dups(const dups_map& dups, const std::string& buildroot, bool symlink)
{
// all are hardlinks to the same data
@ -70,14 +133,11 @@ void handle_dups(const dups_map& dups, const std::string& buildroot, bool symlin
sort_by_count(dups, sorted);
auto inodes = sorted.begin();
std::string target = dups.at(*inodes).front();
if (symlink) {
target.replace(0, buildroot.length(), "");
}
for (++inodes; inodes != sorted.end(); ++inodes) {
const std::vector<std::string> files = dups.at(*inodes);
for (auto it = files.begin(); it != files.end(); ++it) {
link_file(*it, target, symlink);
link_file(*it, target_for_link(target, *it, symlink), symlink);
}
}
}
@ -85,7 +145,7 @@ void handle_dups(const dups_map& dups, const std::string& buildroot, bool symlin
int main(int argc, char** argv)
{
bool symlink = false;
std::string root;
std::vector<std::string> roots;
std::string buildroot;
while (1) {
int result = getopt(argc, argv, "sb:");
@ -95,32 +155,22 @@ int main(int argc, char** argv)
case 's':
symlink = true;
break;
case 'b':
buildroot = optarg;
break;
default: /* unknown */
break;
}
}
if (buildroot.empty()) {
if (symlink) {
std::cerr << "Missing -b argument to remove bootroot from symlink targets";
return 1;
while (optind < argc) {
std::string root = argv[optind++];
if (root.front() != '/') {
char buffer[PATH_MAX];
root = std::string(getcwd(buffer, PATH_MAX)) + '/' + root;
}
// eliminate final slash from directory argument
if (buildroot.back() == '/') {
buildroot.pop_back();
roots.push_back(root);
}
}
if (optind < argc) {
root = argv[optind++];
} else {
if (roots.empty()) {
std::cerr << "Missing directory argument.";
}
if (optind < argc) {
std::cerr << "Too many arguments.";
return 1;
}
/* fdupes options used:
-q: hide progress indicator
-p: don't consider files with different owner/group or permission bits as duplicates
@ -129,13 +179,15 @@ int main(int argc, char** argv)
-r: follow subdirectories
-H: also report hard links as duplicates
*/
std::string command = "fdupes -q -p -n -o name";
std::string command = "fdupes -q -p -r -n -o name";
if (!symlink) {
/* if we create symlinks, avoid looking at hard links being duplicated. This way
fdupes is faster and won't break them up anyway */
command += " -H";
}
command += " -r '" + root + "'";
for (auto it = roots.begin(); it != roots.end(); ++it) {
command += " '" + *it + "'";
}
FILE* pipe = popen(command.c_str(), "r");
if (!pipe) {
throw std::runtime_error("popen() failed!");

View File

@ -1 +1 @@
%fdupes /usr/lib/rpm/fdupes_wrapper -b %{buildroot}
%fdupes /usr/lib/rpm/fdupes_wrapper