/* * A little helper to wrap around jdupes and create hard/soft links of the * dups found. Used in openSUSE rpm. * * Copyright 2022 Jiri Slaby * 2022 Stephan Kulow * * SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include #include #include #include #include typedef std::map> dups_map; typedef std::pair nlink_pair; bool cmp_nlink(const nlink_pair& a, const nlink_pair& b) { return a.second > b.second; } void sort_by_count(const dups_map& in, std::vector& out) { out.clear(); std::list nlinks; for (auto it = in.cbegin(); it != in.cend(); ++it) { nlinks.push_back(std::make_pair(it->first, it->second.size())); } nlinks.sort(cmp_nlink); for (auto it = nlinks.cbegin(); it != nlinks.cend(); ++it) { out.push_back(it->first); } } void link_file(const std::string& file, const std::string& target, bool symlink) { std::cout << "Linking " << file << " -> " << target << std::endl; if (unlink(file.c_str())) { std::cerr << "Removing '" << file << "' failed." << std::endl; exit(1); } int ret; if (symlink) { ret = ::symlink(target.c_str(), file.c_str()); } else { ret = link(target.c_str(), file.c_str()); } if (ret) { std::cerr << "Linking '" << file << "' failed." << std::endl; exit(1); } } void handle_dups(const dups_map& dups, const std::string& buildroot, bool symlink) { // all are hardlinks to the same data if (dups.size() < 2) return; std::vector sorted; sort_by_count(dups, sorted); auto inodes = sorted.begin(); std::string target = dups.at(*inodes).front(); if (symlink) { target.replace(0, buildroot.length(), ""); } for (++inodes; inodes != sorted.end(); ++inodes) { const std::vector files = dups.at(*inodes); for (auto it = files.begin(); it != files.end(); ++it) { link_file(*it, target, symlink); } } } int main(int argc, char** argv) { bool symlink = false; std::string root; std::string buildroot; while (1) { int result = getopt(argc, argv, "sb:"); if (result == -1) break; /* end of list */ switch (result) { case 's': symlink = true; break; case 'b': buildroot = optarg; break; default: /* unknown */ break; } } if (buildroot.empty()) { if (symlink) { std::cerr << "Missing -b argument to remove bootroot from symlink targets"; return 1; } // eliminate final slash from directory argument if (buildroot.back() == '/') { buildroot.pop_back(); } } if (optind < argc) { root = argv[optind++]; } else { std::cerr << "Missing directory argument."; } if (optind < argc) { std::cerr << "Too many arguments."; return 1; } /* jdupes options used: -q: hide progress indicator -p: don't consider files with different owner/group or permission bits as duplicates -o name: output order of duplicates -r: follow subdirectories -H: also report hard links as duplicates */ std::string command = "jdupes -q -p -o name"; if (!symlink) { /* if we create symlinks, avoid looking at hard links being duplicated. This way jdupes is faster and won't break them up anyway */ command += " -H"; } command += " -r '" + root + "'"; FILE* pipe = popen(command.c_str(), "r"); if (!pipe) { throw std::runtime_error("popen() failed!"); } std::array buffer; dups_map dups; while (fgets(buffer.data(), buffer.size(), pipe) != nullptr) { std::string line = buffer.data(); if (line.length() < 2) { handle_dups(dups, buildroot, symlink); dups.clear(); continue; } if (line.back() != '\n') { std::cerr << "Too long lines? '" << line << "'" << std::endl; return 1; } line.pop_back(); struct stat sb; if (stat(line.c_str(), &sb)) { std::cerr << "Stat on '" << buffer.data() << "' failed" << std::endl; return 1; } dups[sb.st_ino].push_back(line); } pclose(pipe); return 0; }