forked from pool/fdupes
Accepting request 961811 from home:coolo:branches:utilities
This time I branched all staging failures to make sure it's the last one - A more correct approach to creating symlinks (old bug actually): Do not link the files as given by fdupes, but turn them into relative links (it works by chance if given a buildroot, but fails if running on a subdirectory) - Support multiple directories given (as glob to the macro) OBS-URL: https://build.opensuse.org/request/show/961811 OBS-URL: https://build.opensuse.org/package/show/utilities/fdupes?expand=0&rev=25
This commit is contained in:
parent
760afc07b9
commit
34728dc6e5
@ -1,3 +1,12 @@
|
||||
-------------------------------------------------------------------
|
||||
Tue Mar 15 07:41:35 UTC 2022 - Stephan Kulow <coolo@suse.com>
|
||||
|
||||
- A more correct approach to creating symlinks (old bug actually):
|
||||
Do not link the files as given by fdupes, but turn them into
|
||||
relative links (it works by chance if given a buildroot, but
|
||||
fails if running on a subdirectory)
|
||||
- Support multiple directories given (as glob to the macro)
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Mon Mar 14 13:44:54 UTC 2022 - Stephan Kulow <coolo@suse.com>
|
||||
|
||||
|
@ -20,10 +20,65 @@
|
||||
#include <unistd.h>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
typedef std::map<ino_t, std::vector<std::string>> dups_map;
|
||||
typedef std::pair<ino_t, size_t> nlink_pair;
|
||||
|
||||
vector<string> split_paths(const string& path)
|
||||
{
|
||||
string token;
|
||||
vector<string> paths;
|
||||
stringstream ss(path);
|
||||
while (getline(ss, token, '/')) {
|
||||
if (token == "..") {
|
||||
paths.pop_back();
|
||||
} else if (token != "." || ss.eof()) {
|
||||
paths.push_back(token);
|
||||
}
|
||||
}
|
||||
return paths;
|
||||
}
|
||||
|
||||
string merge_paths(vector<string> paths)
|
||||
{
|
||||
string path;
|
||||
for (const auto& s : paths) {
|
||||
if (s.empty())
|
||||
continue;
|
||||
if (!path.empty())
|
||||
path += "/";
|
||||
path += s;
|
||||
}
|
||||
|
||||
return path;
|
||||
}
|
||||
|
||||
string relative(const string& p1, const string& p2)
|
||||
{
|
||||
vector<string> paths1 = split_paths(p1);
|
||||
paths1.pop_back();
|
||||
vector<string> paths2 = split_paths(p2);
|
||||
vector<string> paths;
|
||||
vector<string>::const_iterator it1 = paths1.begin();
|
||||
vector<string>::const_iterator it2 = paths2.begin();
|
||||
// first remove the common parts
|
||||
while (it1 != paths1.end() && *it1 == *it2) {
|
||||
it1++;
|
||||
it2++;
|
||||
}
|
||||
for (; it1 != paths1.end(); ++it1) {
|
||||
paths.push_back("..");
|
||||
}
|
||||
for (; it2 != paths2.end(); ++it2) {
|
||||
paths.push_back(*it2);
|
||||
}
|
||||
|
||||
return merge_paths(paths);
|
||||
}
|
||||
|
||||
bool cmp_nlink(const nlink_pair& a, const nlink_pair& b)
|
||||
{
|
||||
return a.second > b.second;
|
||||
@ -61,6 +116,14 @@ void link_file(const std::string& file, const std::string& target, bool symlink)
|
||||
}
|
||||
}
|
||||
|
||||
std::string target_for_link(string target, const std::string &file, bool symlink)
|
||||
{
|
||||
if (!symlink) // hardlinks don't care
|
||||
return target;
|
||||
|
||||
return relative(file, target);
|
||||
}
|
||||
|
||||
void handle_dups(const dups_map& dups, const std::string& buildroot, bool symlink)
|
||||
{
|
||||
// all are hardlinks to the same data
|
||||
@ -70,14 +133,11 @@ void handle_dups(const dups_map& dups, const std::string& buildroot, bool symlin
|
||||
sort_by_count(dups, sorted);
|
||||
auto inodes = sorted.begin();
|
||||
std::string target = dups.at(*inodes).front();
|
||||
if (symlink) {
|
||||
target.replace(0, buildroot.length(), "");
|
||||
}
|
||||
|
||||
for (++inodes; inodes != sorted.end(); ++inodes) {
|
||||
const std::vector<std::string> files = dups.at(*inodes);
|
||||
for (auto it = files.begin(); it != files.end(); ++it) {
|
||||
link_file(*it, target, symlink);
|
||||
link_file(*it, target_for_link(target, *it, symlink), symlink);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -85,7 +145,7 @@ void handle_dups(const dups_map& dups, const std::string& buildroot, bool symlin
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
bool symlink = false;
|
||||
std::string root;
|
||||
std::vector<std::string> roots;
|
||||
std::string buildroot;
|
||||
while (1) {
|
||||
int result = getopt(argc, argv, "sb:");
|
||||
@ -95,32 +155,22 @@ int main(int argc, char** argv)
|
||||
case 's':
|
||||
symlink = true;
|
||||
break;
|
||||
case 'b':
|
||||
buildroot = optarg;
|
||||
break;
|
||||
default: /* unknown */
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (buildroot.empty()) {
|
||||
if (symlink) {
|
||||
std::cerr << "Missing -b argument to remove bootroot from symlink targets";
|
||||
return 1;
|
||||
while (optind < argc) {
|
||||
std::string root = argv[optind++];
|
||||
if (root.front() != '/') {
|
||||
char buffer[PATH_MAX];
|
||||
root = std::string(getcwd(buffer, PATH_MAX)) + '/' + root;
|
||||
}
|
||||
// eliminate final slash from directory argument
|
||||
if (buildroot.back() == '/') {
|
||||
buildroot.pop_back();
|
||||
roots.push_back(root);
|
||||
}
|
||||
}
|
||||
if (optind < argc) {
|
||||
root = argv[optind++];
|
||||
} else {
|
||||
|
||||
if (roots.empty()) {
|
||||
std::cerr << "Missing directory argument.";
|
||||
}
|
||||
if (optind < argc) {
|
||||
std::cerr << "Too many arguments.";
|
||||
return 1;
|
||||
}
|
||||
/* fdupes options used:
|
||||
-q: hide progress indicator
|
||||
-p: don't consider files with different owner/group or permission bits as duplicates
|
||||
@ -129,13 +179,15 @@ int main(int argc, char** argv)
|
||||
-r: follow subdirectories
|
||||
-H: also report hard links as duplicates
|
||||
*/
|
||||
std::string command = "fdupes -q -p -n -o name";
|
||||
std::string command = "fdupes -q -p -r -n -o name";
|
||||
if (!symlink) {
|
||||
/* if we create symlinks, avoid looking at hard links being duplicated. This way
|
||||
fdupes is faster and won't break them up anyway */
|
||||
command += " -H";
|
||||
}
|
||||
command += " -r '" + root + "'";
|
||||
for (auto it = roots.begin(); it != roots.end(); ++it) {
|
||||
command += " '" + *it + "'";
|
||||
}
|
||||
FILE* pipe = popen(command.c_str(), "r");
|
||||
if (!pipe) {
|
||||
throw std::runtime_error("popen() failed!");
|
||||
|
@ -1 +1 @@
|
||||
%fdupes /usr/lib/rpm/fdupes_wrapper -b %{buildroot}
|
||||
%fdupes /usr/lib/rpm/fdupes_wrapper
|
||||
|
Loading…
Reference in New Issue
Block a user