forked from pool/fdupes
Accepting request 961811 from home:coolo:branches:utilities
This time I branched all staging failures to make sure it's the last one - A more correct approach to creating symlinks (old bug actually): Do not link the files as given by fdupes, but turn them into relative links (it works by chance if given a buildroot, but fails if running on a subdirectory) - Support multiple directories given (as glob to the macro) OBS-URL: https://build.opensuse.org/request/show/961811 OBS-URL: https://build.opensuse.org/package/show/utilities/fdupes?expand=0&rev=25
This commit is contained in:
parent
760afc07b9
commit
34728dc6e5
@ -1,3 +1,12 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Mar 15 07:41:35 UTC 2022 - Stephan Kulow <coolo@suse.com>
|
||||||
|
|
||||||
|
- A more correct approach to creating symlinks (old bug actually):
|
||||||
|
Do not link the files as given by fdupes, but turn them into
|
||||||
|
relative links (it works by chance if given a buildroot, but
|
||||||
|
fails if running on a subdirectory)
|
||||||
|
- Support multiple directories given (as glob to the macro)
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
Mon Mar 14 13:44:54 UTC 2022 - Stephan Kulow <coolo@suse.com>
|
Mon Mar 14 13:44:54 UTC 2022 - Stephan Kulow <coolo@suse.com>
|
||||||
|
|
||||||
|
@ -20,10 +20,65 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
typedef std::map<ino_t, std::vector<std::string>> dups_map;
|
typedef std::map<ino_t, std::vector<std::string>> dups_map;
|
||||||
typedef std::pair<ino_t, size_t> nlink_pair;
|
typedef std::pair<ino_t, size_t> nlink_pair;
|
||||||
|
|
||||||
|
vector<string> split_paths(const string& path)
|
||||||
|
{
|
||||||
|
string token;
|
||||||
|
vector<string> paths;
|
||||||
|
stringstream ss(path);
|
||||||
|
while (getline(ss, token, '/')) {
|
||||||
|
if (token == "..") {
|
||||||
|
paths.pop_back();
|
||||||
|
} else if (token != "." || ss.eof()) {
|
||||||
|
paths.push_back(token);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return paths;
|
||||||
|
}
|
||||||
|
|
||||||
|
string merge_paths(vector<string> paths)
|
||||||
|
{
|
||||||
|
string path;
|
||||||
|
for (const auto& s : paths) {
|
||||||
|
if (s.empty())
|
||||||
|
continue;
|
||||||
|
if (!path.empty())
|
||||||
|
path += "/";
|
||||||
|
path += s;
|
||||||
|
}
|
||||||
|
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
|
string relative(const string& p1, const string& p2)
|
||||||
|
{
|
||||||
|
vector<string> paths1 = split_paths(p1);
|
||||||
|
paths1.pop_back();
|
||||||
|
vector<string> paths2 = split_paths(p2);
|
||||||
|
vector<string> paths;
|
||||||
|
vector<string>::const_iterator it1 = paths1.begin();
|
||||||
|
vector<string>::const_iterator it2 = paths2.begin();
|
||||||
|
// first remove the common parts
|
||||||
|
while (it1 != paths1.end() && *it1 == *it2) {
|
||||||
|
it1++;
|
||||||
|
it2++;
|
||||||
|
}
|
||||||
|
for (; it1 != paths1.end(); ++it1) {
|
||||||
|
paths.push_back("..");
|
||||||
|
}
|
||||||
|
for (; it2 != paths2.end(); ++it2) {
|
||||||
|
paths.push_back(*it2);
|
||||||
|
}
|
||||||
|
|
||||||
|
return merge_paths(paths);
|
||||||
|
}
|
||||||
|
|
||||||
bool cmp_nlink(const nlink_pair& a, const nlink_pair& b)
|
bool cmp_nlink(const nlink_pair& a, const nlink_pair& b)
|
||||||
{
|
{
|
||||||
return a.second > b.second;
|
return a.second > b.second;
|
||||||
@ -61,6 +116,14 @@ void link_file(const std::string& file, const std::string& target, bool symlink)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string target_for_link(string target, const std::string &file, bool symlink)
|
||||||
|
{
|
||||||
|
if (!symlink) // hardlinks don't care
|
||||||
|
return target;
|
||||||
|
|
||||||
|
return relative(file, target);
|
||||||
|
}
|
||||||
|
|
||||||
void handle_dups(const dups_map& dups, const std::string& buildroot, bool symlink)
|
void handle_dups(const dups_map& dups, const std::string& buildroot, bool symlink)
|
||||||
{
|
{
|
||||||
// all are hardlinks to the same data
|
// all are hardlinks to the same data
|
||||||
@ -70,14 +133,11 @@ void handle_dups(const dups_map& dups, const std::string& buildroot, bool symlin
|
|||||||
sort_by_count(dups, sorted);
|
sort_by_count(dups, sorted);
|
||||||
auto inodes = sorted.begin();
|
auto inodes = sorted.begin();
|
||||||
std::string target = dups.at(*inodes).front();
|
std::string target = dups.at(*inodes).front();
|
||||||
if (symlink) {
|
|
||||||
target.replace(0, buildroot.length(), "");
|
|
||||||
}
|
|
||||||
|
|
||||||
for (++inodes; inodes != sorted.end(); ++inodes) {
|
for (++inodes; inodes != sorted.end(); ++inodes) {
|
||||||
const std::vector<std::string> files = dups.at(*inodes);
|
const std::vector<std::string> files = dups.at(*inodes);
|
||||||
for (auto it = files.begin(); it != files.end(); ++it) {
|
for (auto it = files.begin(); it != files.end(); ++it) {
|
||||||
link_file(*it, target, symlink);
|
link_file(*it, target_for_link(target, *it, symlink), symlink);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -85,7 +145,7 @@ void handle_dups(const dups_map& dups, const std::string& buildroot, bool symlin
|
|||||||
int main(int argc, char** argv)
|
int main(int argc, char** argv)
|
||||||
{
|
{
|
||||||
bool symlink = false;
|
bool symlink = false;
|
||||||
std::string root;
|
std::vector<std::string> roots;
|
||||||
std::string buildroot;
|
std::string buildroot;
|
||||||
while (1) {
|
while (1) {
|
||||||
int result = getopt(argc, argv, "sb:");
|
int result = getopt(argc, argv, "sb:");
|
||||||
@ -95,32 +155,22 @@ int main(int argc, char** argv)
|
|||||||
case 's':
|
case 's':
|
||||||
symlink = true;
|
symlink = true;
|
||||||
break;
|
break;
|
||||||
case 'b':
|
|
||||||
buildroot = optarg;
|
|
||||||
break;
|
|
||||||
default: /* unknown */
|
default: /* unknown */
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (buildroot.empty()) {
|
while (optind < argc) {
|
||||||
if (symlink) {
|
std::string root = argv[optind++];
|
||||||
std::cerr << "Missing -b argument to remove bootroot from symlink targets";
|
if (root.front() != '/') {
|
||||||
return 1;
|
char buffer[PATH_MAX];
|
||||||
}
|
root = std::string(getcwd(buffer, PATH_MAX)) + '/' + root;
|
||||||
// eliminate final slash from directory argument
|
|
||||||
if (buildroot.back() == '/') {
|
|
||||||
buildroot.pop_back();
|
|
||||||
}
|
}
|
||||||
|
roots.push_back(root);
|
||||||
}
|
}
|
||||||
if (optind < argc) {
|
|
||||||
root = argv[optind++];
|
if (roots.empty()) {
|
||||||
} else {
|
|
||||||
std::cerr << "Missing directory argument.";
|
std::cerr << "Missing directory argument.";
|
||||||
}
|
}
|
||||||
if (optind < argc) {
|
|
||||||
std::cerr << "Too many arguments.";
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
/* fdupes options used:
|
/* fdupes options used:
|
||||||
-q: hide progress indicator
|
-q: hide progress indicator
|
||||||
-p: don't consider files with different owner/group or permission bits as duplicates
|
-p: don't consider files with different owner/group or permission bits as duplicates
|
||||||
@ -129,13 +179,15 @@ int main(int argc, char** argv)
|
|||||||
-r: follow subdirectories
|
-r: follow subdirectories
|
||||||
-H: also report hard links as duplicates
|
-H: also report hard links as duplicates
|
||||||
*/
|
*/
|
||||||
std::string command = "fdupes -q -p -n -o name";
|
std::string command = "fdupes -q -p -r -n -o name";
|
||||||
if (!symlink) {
|
if (!symlink) {
|
||||||
/* if we create symlinks, avoid looking at hard links being duplicated. This way
|
/* if we create symlinks, avoid looking at hard links being duplicated. This way
|
||||||
fdupes is faster and won't break them up anyway */
|
fdupes is faster and won't break them up anyway */
|
||||||
command += " -H";
|
command += " -H";
|
||||||
}
|
}
|
||||||
command += " -r '" + root + "'";
|
for (auto it = roots.begin(); it != roots.end(); ++it) {
|
||||||
|
command += " '" + *it + "'";
|
||||||
|
}
|
||||||
FILE* pipe = popen(command.c_str(), "r");
|
FILE* pipe = popen(command.c_str(), "r");
|
||||||
if (!pipe) {
|
if (!pipe) {
|
||||||
throw std::runtime_error("popen() failed!");
|
throw std::runtime_error("popen() failed!");
|
||||||
|
@ -1 +1 @@
|
|||||||
%fdupes /usr/lib/rpm/fdupes_wrapper -b %{buildroot}
|
%fdupes /usr/lib/rpm/fdupes_wrapper
|
||||||
|
Loading…
Reference in New Issue
Block a user