From 52cc4d7b52c7759c1f8c5f4229e3530f6aeef1f4 Mon Sep 17 00:00:00 2001 From: Emmanuele Bassi Date: Fri, 12 Apr 2024 15:26:22 +0100 Subject: [PATCH] Remove unused cmph files The cmph utility tool is not built, and the licensing of wingetopt.[ch] is dubious at best. --- girepository/cmph/main.c | 342 ---------------------------------- girepository/cmph/wingetopt.c | 179 ------------------ girepository/cmph/wingetopt.h | 25 --- 3 files changed, 546 deletions(-) delete mode 100644 girepository/cmph/main.c delete mode 100644 girepository/cmph/wingetopt.c delete mode 100644 girepository/cmph/wingetopt.h diff --git a/girepository/cmph/main.c b/girepository/cmph/main.c deleted file mode 100644 index 7ebac7171..000000000 --- a/girepository/cmph/main.c +++ /dev/null @@ -1,342 +0,0 @@ -#ifdef WIN32 -#include "wingetopt.h" -#else -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include "cmph.h" -#include "hash.h" - -#ifdef WIN32 -#define VERSION "0.8" -#else -#include "config.h" -#endif - - -void usage(const char *prg) -{ - fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg); -} -void usage_long(const char *prg) -{ - cmph_uint32 i; - fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg); - fprintf(stderr, "Minimum perfect hashing tool\n\n"); - fprintf(stderr, " -h\t print this help message\n"); - fprintf(stderr, " -c\t c value determines:\n"); - fprintf(stderr, " \t * the number of vertices in the graph for the algorithms BMZ and CHM\n"); - fprintf(stderr, " \t * the number of bits per key required in the FCH algorithm\n"); - fprintf(stderr, " \t * the load factor in the CHD_PH algorithm\n"); - fprintf(stderr, " -a\t algorithm - valid values are\n"); - for (i = 0; i < CMPH_COUNT; ++i) fprintf(stderr, " \t * %s\n", cmph_names[i]); - fprintf(stderr, " -f\t hash function (may be used multiple times) - valid values are\n"); - for (i = 0; i < CMPH_HASH_COUNT; ++i) fprintf(stderr, " \t * %s\n", cmph_hash_names[i]); - fprintf(stderr, " -V\t print version number and exit\n"); - fprintf(stderr, " -v\t increase verbosity (may be used multiple times)\n"); - fprintf(stderr, " -k\t number of keys\n"); - fprintf(stderr, " -g\t generation mode\n"); - fprintf(stderr, " -s\t random seed\n"); - fprintf(stderr, " -m\t minimum perfect hash function file \n"); - fprintf(stderr, " -M\t main memory availability (in MB) used in BRZ algorithm \n"); - fprintf(stderr, " -d\t temporary directory used in BRZ algorithm \n"); - fprintf(stderr, " -b\t the meaning of this parameter depends on the algorithm selected in the -a option:\n"); - fprintf(stderr, " \t * For BRZ it is used to make the maximal number of keys in a bucket lower than 256.\n"); - fprintf(stderr, " \t In this case its value should be an integer in the range [64,175]. Default is 128.\n\n"); - fprintf(stderr, " \t * For BDZ it is used to determine the size of some precomputed rank\n"); - fprintf(stderr, " \t information and its value should be an integer in the range [3,10]. Default\n"); - fprintf(stderr, " \t is 7. The larger is this value, the more compact are the resulting functions\n"); - fprintf(stderr, " \t and the slower are them at evaluation time.\n\n"); - fprintf(stderr, " \t * For CHD and CHD_PH it is used to set the average number of keys per bucket\n"); - fprintf(stderr, " \t and its value should be an integer in the range [1,32]. Default is 4. The\n"); - fprintf(stderr, " \t larger is this value, the slower is the construction of the functions.\n"); - fprintf(stderr, " \t This parameter has no effect for other algorithms.\n\n"); - fprintf(stderr, " -t\t set the number of keys per bin for a t-perfect hashing function. A t-perfect\n"); - fprintf(stderr, " \t hash function allows at most t collisions in a given bin. This parameter applies\n"); - fprintf(stderr, " \t only to the CHD and CHD_PH algorithms. Its value should be an integer in the\n"); - fprintf(stderr, " \t range [1,128]. Default is 1\n"); - fprintf(stderr, " keysfile\t line separated file with keys\n"); -} - -int main(int argc, char **argv) -{ - cmph_uint32 verbosity = 0; - char generate = 0; - char *mphf_file = NULL; - FILE *mphf_fd = stdout; - const char *keys_file = NULL; - FILE *keys_fd; - cmph_uint32 nkeys = UINT_MAX; - cmph_uint32 seed = UINT_MAX; - CMPH_HASH *hashes = NULL; - cmph_uint32 nhashes = 0; - cmph_uint32 i; - CMPH_ALGO mph_algo = CMPH_CHM; - double c = 0; - cmph_config_t *config = NULL; - cmph_t *mphf = NULL; - char * tmp_dir = NULL; - cmph_io_adapter_t *source; - cmph_uint32 memory_availability = 0; - cmph_uint32 b = 0; - cmph_uint32 keys_per_bin = 1; - while (1) - { - char ch = (char)getopt(argc, argv, "hVvgc:k:a:M:b:t:f:m:d:s:"); - if (ch == -1) break; - switch (ch) - { - case 's': - { - char *cptr; - seed = (cmph_uint32)strtoul(optarg, &cptr, 10); - if(*cptr != 0) { - fprintf(stderr, "Invalid seed %s\n", optarg); - exit(1); - } - } - break; - case 'c': - { - char *endptr; - c = strtod(optarg, &endptr); - if(*endptr != 0) { - fprintf(stderr, "Invalid c value %s\n", optarg); - exit(1); - } - } - break; - case 'g': - generate = 1; - break; - case 'k': - { - char *endptr; - nkeys = (cmph_uint32)strtoul(optarg, &endptr, 10); - if(*endptr != 0) { - fprintf(stderr, "Invalid number of keys %s\n", optarg); - exit(1); - } - } - break; - case 'm': - mphf_file = strdup(optarg); - break; - case 'd': - tmp_dir = strdup(optarg); - break; - case 'M': - { - char *cptr; - memory_availability = (cmph_uint32)strtoul(optarg, &cptr, 10); - if(*cptr != 0) { - fprintf(stderr, "Invalid memory availability %s\n", optarg); - exit(1); - } - } - break; - case 'b': - { - char *cptr; - b = (cmph_uint32)strtoul(optarg, &cptr, 10); - if(*cptr != 0) { - fprintf(stderr, "Parameter b was not found: %s\n", optarg); - exit(1); - } - } - break; - case 't': - { - char *cptr; - keys_per_bin = (cmph_uint32)strtoul(optarg, &cptr, 10); - if(*cptr != 0) { - fprintf(stderr, "Parameter t was not found: %s\n", optarg); - exit(1); - } - } - break; - case 'v': - ++verbosity; - break; - case 'V': - printf("%s\n", VERSION); - return 0; - case 'h': - usage_long(argv[0]); - return 0; - case 'a': - { - char valid = 0; - for (i = 0; i < CMPH_COUNT; ++i) - { - if (strcmp(cmph_names[i], optarg) == 0) - { - mph_algo = i; - valid = 1; - break; - } - } - if (!valid) - { - fprintf(stderr, "Invalid mph algorithm: %s. It is not available in version %s\n", optarg, VERSION); - return -1; - } - } - break; - case 'f': - { - char valid = 0; - for (i = 0; i < CMPH_HASH_COUNT; ++i) - { - if (strcmp(cmph_hash_names[i], optarg) == 0) - { - hashes = (CMPH_HASH *)realloc(hashes, sizeof(CMPH_HASH) * ( nhashes + 2 )); - hashes[nhashes] = i; - hashes[nhashes + 1] = CMPH_HASH_COUNT; - ++nhashes; - valid = 1; - break; - } - } - if (!valid) - { - fprintf(stderr, "Invalid hash function: %s\n", optarg); - return -1; - } - } - break; - default: - usage(argv[0]); - return 1; - } - } - - if (optind != argc - 1) - { - usage(argv[0]); - return 1; - } - keys_file = argv[optind]; - - if (seed == UINT_MAX) seed = (cmph_uint32)time(NULL); - srand(seed); - int ret = 0; - if (mphf_file == NULL) - { - mphf_file = (char *)malloc(strlen(keys_file) + 5); - memcpy(mphf_file, keys_file, strlen(keys_file)); - memcpy(mphf_file + strlen(keys_file), ".mph\0", (size_t)5); - } - - keys_fd = fopen(keys_file, "r"); - - if (keys_fd == NULL) - { - fprintf(stderr, "Unable to open file %s: %s\n", keys_file, strerror(errno)); - return -1; - } - - if (seed == UINT_MAX) seed = (cmph_uint32)time(NULL); - if(nkeys == UINT_MAX) source = cmph_io_nlfile_adapter(keys_fd); - else source = cmph_io_nlnkfile_adapter(keys_fd, nkeys); - if (generate) - { - //Create mphf - mphf_fd = fopen(mphf_file, "w"); - config = cmph_config_new(source); - cmph_config_set_algo(config, mph_algo); - if (nhashes) cmph_config_set_hashfuncs(config, hashes); - cmph_config_set_verbosity(config, verbosity); - cmph_config_set_tmp_dir(config, (cmph_uint8 *) tmp_dir); - cmph_config_set_mphf_fd(config, mphf_fd); - cmph_config_set_memory_availability(config, memory_availability); - cmph_config_set_b(config, b); - cmph_config_set_keys_per_bin(config, keys_per_bin); - - //if((mph_algo == CMPH_BMZ || mph_algo == CMPH_BRZ) && c >= 2.0) c=1.15; - if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15; - if (c != 0) cmph_config_set_graphsize(config, c); - mphf = cmph_new(config); - - cmph_config_destroy(config); - if (mphf == NULL) - { - fprintf(stderr, "Unable to create minimum perfect hashing function\n"); - //cmph_config_destroy(config); - free(mphf_file); - return -1; - } - - if (mphf_fd == NULL) - { - fprintf(stderr, "Unable to open output file %s: %s\n", mphf_file, strerror(errno)); - free(mphf_file); - return -1; - } - cmph_dump(mphf, mphf_fd); - cmph_destroy(mphf); - fclose(mphf_fd); - } - else - { - cmph_uint8 * hashtable = NULL; - mphf_fd = fopen(mphf_file, "r"); - if (mphf_fd == NULL) - { - fprintf(stderr, "Unable to open input file %s: %s\n", mphf_file, strerror(errno)); - free(mphf_file); - return -1; - } - mphf = cmph_load(mphf_fd); - fclose(mphf_fd); - if (!mphf) - { - fprintf(stderr, "Unable to parser input file %s\n", mphf_file); - free(mphf_file); - return -1; - } - cmph_uint32 siz = cmph_size(mphf); - hashtable = (cmph_uint8*)calloc(siz, sizeof(cmph_uint8)); - memset(hashtable, 0,(size_t) siz); - //check all keys - for (i = 0; i < source->nkeys; ++i) - { - cmph_uint32 h; - char *buf; - cmph_uint32 buflen = 0; - source->read(source->data, &buf, &buflen); - h = cmph_search(mphf, buf, buflen); - if (!(h < siz)) - { - fprintf(stderr, "Unknown key %*s in the input.\n", buflen, buf); - ret = 1; - } else if(hashtable[h] >= keys_per_bin) - { - fprintf(stderr, "More than %u keys were mapped to bin %u\n", keys_per_bin, h); - fprintf(stderr, "Duplicated or unknown key %*s in the input\n", buflen, buf); - ret = 1; - } else hashtable[h]++; - - if (verbosity) - { - printf("%s -> %u\n", buf, h); - } - source->dispose(source->data, buf, buflen); - } - - cmph_destroy(mphf); - free(hashtable); - } - fclose(keys_fd); - free(mphf_file); - free(tmp_dir); - cmph_io_nlfile_adapter_destroy(source); - return ret; - -} diff --git a/girepository/cmph/wingetopt.c b/girepository/cmph/wingetopt.c deleted file mode 100644 index c981d0f04..000000000 --- a/girepository/cmph/wingetopt.c +++ /dev/null @@ -1,179 +0,0 @@ -#ifdef WIN32 -/***************************************************************************** - * - * MODULE NAME : GETOPT.C - * - * COPYRIGHTS: - * This module contains code made available by IBM - * Corporation on an AS IS basis. Any one receiving the - * module is considered to be licensed under IBM copyrights - * to use the IBM-provided source code in any way he or she - * deems fit, including copying it, compiling it, modifying - * it, and redistributing it, with or without - * modifications. No license under any IBM patents or - * patent applications is to be implied from this copyright - * license. - * - * A user of the module should understand that IBM cannot - * provide technical support for the module and will not be - * responsible for any consequences of use of the program. - * - * Any notices, including this one, are not to be removed - * from the module without the prior written consent of - * IBM. - * - * AUTHOR: Original author: - * G. R. Blair (BOBBLAIR at AUSVM1) - * Internet: bobblair@bobblair.austin.ibm.com - * - * Extensively revised by: - * John Q. Walker II, Ph.D. (JOHHQ at RALVM6) - * Internet: johnq@ralvm6.vnet.ibm.com - * - *****************************************************************************/ - -/****************************************************************************** - * getopt() - * - * The getopt() function is a command line parser. It returns the next - * option character in argv that matches an option character in opstring. - * - * The argv argument points to an array of argc+1 elements containing argc - * pointers to character strings followed by a null pointer. - * - * The opstring argument points to a string of option characters; if an - * option character is followed by a colon, the option is expected to have - * an argument that may or may not be separated from it by white space. - * The external variable optarg is set to point to the start of the option - * argument on return from getopt(). - * - * The getopt() function places in optind the argv index of the next argument - * to be processed. The system initializes the external variable optind to - * 1 before the first call to getopt(). - * - * When all options have been processed (that is, up to the first nonoption - * argument), getopt() returns EOF. The special option "--" may be used to - * delimit the end of the options; EOF will be returned, and "--" will be - * skipped. - * - * The getopt() function returns a question mark (?) when it encounters an - * option character not included in opstring. This error message can be - * disabled by setting opterr to zero. Otherwise, it returns the option - * character that was detected. - * - * If the special option "--" is detected, or all options have been - * processed, EOF is returned. - * - * Options are marked by either a minus sign (-) or a slash (/). - * - * No errors are defined. - *****************************************************************************/ - -#include /* for EOF */ -#include /* for strchr() */ - -/* static (global) variables that are specified as exported by getopt() */ -extern char *optarg; /* pointer to the start of the option argument */ -extern int optind; /* number of the next argv[] to be evaluated */ -extern int opterr; /* non-zero if a question mark should be returned - when a non-valid option character is detected */ - -/* handle possible future character set concerns by putting this in a macro */ -#define _next_char(string) (char)(*(string+1)) - -int getopt(int argc, char *argv[], char *opstring) -{ - static char *pIndexPosition = NULL; /* place inside current argv string */ - char *pArgString = NULL; /* where to start from next */ - char *pOptString; /* the string in our program */ - - - if (pIndexPosition != NULL) { - /* we last left off inside an argv string */ - if (*(++pIndexPosition)) { - /* there is more to come in the most recent argv */ - pArgString = pIndexPosition; - } - } - - if (pArgString == NULL) { - /* we didn't leave off in the middle of an argv string */ - if (optind >= argc) { - /* more command-line arguments than the argument count */ - pIndexPosition = NULL; /* not in the middle of anything */ - return EOF; /* used up all command-line arguments */ - } - - /*--------------------------------------------------------------------- - * If the next argv[] is not an option, there can be no more options. - *-------------------------------------------------------------------*/ - pArgString = argv[optind++]; /* set this to the next argument ptr */ - - if (('/' != *pArgString) && /* doesn't start with a slash or a dash? */ - ('-' != *pArgString)) { - --optind; /* point to current arg once we're done */ - optarg = NULL; /* no argument follows the option */ - pIndexPosition = NULL; /* not in the middle of anything */ - return EOF; /* used up all the command-line flags */ - } - - /* check for special end-of-flags markers */ - if ((strcmp(pArgString, "-") == 0) || - (strcmp(pArgString, "--") == 0)) { - optarg = NULL; /* no argument follows the option */ - pIndexPosition = NULL; /* not in the middle of anything */ - return EOF; /* encountered the special flag */ - } - - pArgString++; /* look past the / or - */ - } - - if (':' == *pArgString) { /* is it a colon? */ - /*--------------------------------------------------------------------- - * Rare case: if opterr is non-zero, return a question mark; - * otherwise, just return the colon we're on. - *-------------------------------------------------------------------*/ - return (opterr ? (int)'?' : (int)':'); - } - else if ((pOptString = strchr(opstring, *pArgString)) == 0) { - /*--------------------------------------------------------------------- - * The letter on the command-line wasn't any good. - *-------------------------------------------------------------------*/ - optarg = NULL; /* no argument follows the option */ - pIndexPosition = NULL; /* not in the middle of anything */ - return (opterr ? (int)'?' : (int)*pArgString); - } - else { - /*--------------------------------------------------------------------- - * The letter on the command-line matches one we expect to see - *-------------------------------------------------------------------*/ - if (':' == _next_char(pOptString)) { /* is the next letter a colon? */ - /* It is a colon. Look for an argument string. */ - if ('\0' != _next_char(pArgString)) { /* argument in this argv? */ - optarg = &pArgString[1]; /* Yes, it is */ - } - else { - /*------------------------------------------------------------- - * The argument string must be in the next argv. - * But, what if there is none (bad input from the user)? - * In that case, return the letter, and optarg as NULL. - *-----------------------------------------------------------*/ - if (optind < argc) - optarg = argv[optind++]; - else { - optarg = NULL; - return (opterr ? (int)'?' : (int)*pArgString); - } - } - pIndexPosition = NULL; /* not in the middle of anything */ - } - else { - /* it's not a colon, so just return the letter */ - optarg = NULL; /* no argument follows the option */ - pIndexPosition = pArgString; /* point to the letter we're on */ - } - return (int)*pArgString; /* return the letter that matched */ - } -} - -#endif //WIN32 diff --git a/girepository/cmph/wingetopt.h b/girepository/cmph/wingetopt.h deleted file mode 100644 index 9596853d9..000000000 --- a/girepository/cmph/wingetopt.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef WIN32 - #include -#else - #ifndef _GETOPT_ - #define _GETOPT_ - - #include /* for EOF */ - #include /* for strchr() */ - - char *optarg = NULL; /* pointer to the start of the option argument */ - int optind = 1; /* number of the next argv[] to be evaluated */ - int opterr = 1; /* non-zero if a question mark should be returned */ - - int getopt(int argc, char *argv[], char *opstring); - #endif //_GETOPT_ -#endif //WIN32 - -#ifdef __cplusplus -} -#endif -