Merge branch 'ebassi/drop-unused-cmph-files' into 'main'

Remove unused cmph files

See merge request GNOME/glib!4003
This commit is contained in:
Philip Withnall 2024-04-12 14:42:52 +00:00
commit 4607dd77a1
3 changed files with 0 additions and 546 deletions

View File

@ -1,342 +0,0 @@
#ifdef WIN32
#include "wingetopt.h"
#else
#include <getopt.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <time.h>
#include <limits.h>
#include <assert.h>
#include "cmph.h"
#include "hash.h"
#ifdef WIN32
#define VERSION "0.8"
#else
#include "config.h"
#endif
void usage(const char *prg)
{
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
}
void usage_long(const char *prg)
{
cmph_uint32 i;
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
fprintf(stderr, "Minimum perfect hashing tool\n\n");
fprintf(stderr, " -h\t print this help message\n");
fprintf(stderr, " -c\t c value determines:\n");
fprintf(stderr, " \t * the number of vertices in the graph for the algorithms BMZ and CHM\n");
fprintf(stderr, " \t * the number of bits per key required in the FCH algorithm\n");
fprintf(stderr, " \t * the load factor in the CHD_PH algorithm\n");
fprintf(stderr, " -a\t algorithm - valid values are\n");
for (i = 0; i < CMPH_COUNT; ++i) fprintf(stderr, " \t * %s\n", cmph_names[i]);
fprintf(stderr, " -f\t hash function (may be used multiple times) - valid values are\n");
for (i = 0; i < CMPH_HASH_COUNT; ++i) fprintf(stderr, " \t * %s\n", cmph_hash_names[i]);
fprintf(stderr, " -V\t print version number and exit\n");
fprintf(stderr, " -v\t increase verbosity (may be used multiple times)\n");
fprintf(stderr, " -k\t number of keys\n");
fprintf(stderr, " -g\t generation mode\n");
fprintf(stderr, " -s\t random seed\n");
fprintf(stderr, " -m\t minimum perfect hash function file \n");
fprintf(stderr, " -M\t main memory availability (in MB) used in BRZ algorithm \n");
fprintf(stderr, " -d\t temporary directory used in BRZ algorithm \n");
fprintf(stderr, " -b\t the meaning of this parameter depends on the algorithm selected in the -a option:\n");
fprintf(stderr, " \t * For BRZ it is used to make the maximal number of keys in a bucket lower than 256.\n");
fprintf(stderr, " \t In this case its value should be an integer in the range [64,175]. Default is 128.\n\n");
fprintf(stderr, " \t * For BDZ it is used to determine the size of some precomputed rank\n");
fprintf(stderr, " \t information and its value should be an integer in the range [3,10]. Default\n");
fprintf(stderr, " \t is 7. The larger is this value, the more compact are the resulting functions\n");
fprintf(stderr, " \t and the slower are them at evaluation time.\n\n");
fprintf(stderr, " \t * For CHD and CHD_PH it is used to set the average number of keys per bucket\n");
fprintf(stderr, " \t and its value should be an integer in the range [1,32]. Default is 4. The\n");
fprintf(stderr, " \t larger is this value, the slower is the construction of the functions.\n");
fprintf(stderr, " \t This parameter has no effect for other algorithms.\n\n");
fprintf(stderr, " -t\t set the number of keys per bin for a t-perfect hashing function. A t-perfect\n");
fprintf(stderr, " \t hash function allows at most t collisions in a given bin. This parameter applies\n");
fprintf(stderr, " \t only to the CHD and CHD_PH algorithms. Its value should be an integer in the\n");
fprintf(stderr, " \t range [1,128]. Default is 1\n");
fprintf(stderr, " keysfile\t line separated file with keys\n");
}
int main(int argc, char **argv)
{
cmph_uint32 verbosity = 0;
char generate = 0;
char *mphf_file = NULL;
FILE *mphf_fd = stdout;
const char *keys_file = NULL;
FILE *keys_fd;
cmph_uint32 nkeys = UINT_MAX;
cmph_uint32 seed = UINT_MAX;
CMPH_HASH *hashes = NULL;
cmph_uint32 nhashes = 0;
cmph_uint32 i;
CMPH_ALGO mph_algo = CMPH_CHM;
double c = 0;
cmph_config_t *config = NULL;
cmph_t *mphf = NULL;
char * tmp_dir = NULL;
cmph_io_adapter_t *source;
cmph_uint32 memory_availability = 0;
cmph_uint32 b = 0;
cmph_uint32 keys_per_bin = 1;
while (1)
{
char ch = (char)getopt(argc, argv, "hVvgc:k:a:M:b:t:f:m:d:s:");
if (ch == -1) break;
switch (ch)
{
case 's':
{
char *cptr;
seed = (cmph_uint32)strtoul(optarg, &cptr, 10);
if(*cptr != 0) {
fprintf(stderr, "Invalid seed %s\n", optarg);
exit(1);
}
}
break;
case 'c':
{
char *endptr;
c = strtod(optarg, &endptr);
if(*endptr != 0) {
fprintf(stderr, "Invalid c value %s\n", optarg);
exit(1);
}
}
break;
case 'g':
generate = 1;
break;
case 'k':
{
char *endptr;
nkeys = (cmph_uint32)strtoul(optarg, &endptr, 10);
if(*endptr != 0) {
fprintf(stderr, "Invalid number of keys %s\n", optarg);
exit(1);
}
}
break;
case 'm':
mphf_file = strdup(optarg);
break;
case 'd':
tmp_dir = strdup(optarg);
break;
case 'M':
{
char *cptr;
memory_availability = (cmph_uint32)strtoul(optarg, &cptr, 10);
if(*cptr != 0) {
fprintf(stderr, "Invalid memory availability %s\n", optarg);
exit(1);
}
}
break;
case 'b':
{
char *cptr;
b = (cmph_uint32)strtoul(optarg, &cptr, 10);
if(*cptr != 0) {
fprintf(stderr, "Parameter b was not found: %s\n", optarg);
exit(1);
}
}
break;
case 't':
{
char *cptr;
keys_per_bin = (cmph_uint32)strtoul(optarg, &cptr, 10);
if(*cptr != 0) {
fprintf(stderr, "Parameter t was not found: %s\n", optarg);
exit(1);
}
}
break;
case 'v':
++verbosity;
break;
case 'V':
printf("%s\n", VERSION);
return 0;
case 'h':
usage_long(argv[0]);
return 0;
case 'a':
{
char valid = 0;
for (i = 0; i < CMPH_COUNT; ++i)
{
if (strcmp(cmph_names[i], optarg) == 0)
{
mph_algo = i;
valid = 1;
break;
}
}
if (!valid)
{
fprintf(stderr, "Invalid mph algorithm: %s. It is not available in version %s\n", optarg, VERSION);
return -1;
}
}
break;
case 'f':
{
char valid = 0;
for (i = 0; i < CMPH_HASH_COUNT; ++i)
{
if (strcmp(cmph_hash_names[i], optarg) == 0)
{
hashes = (CMPH_HASH *)realloc(hashes, sizeof(CMPH_HASH) * ( nhashes + 2 ));
hashes[nhashes] = i;
hashes[nhashes + 1] = CMPH_HASH_COUNT;
++nhashes;
valid = 1;
break;
}
}
if (!valid)
{
fprintf(stderr, "Invalid hash function: %s\n", optarg);
return -1;
}
}
break;
default:
usage(argv[0]);
return 1;
}
}
if (optind != argc - 1)
{
usage(argv[0]);
return 1;
}
keys_file = argv[optind];
if (seed == UINT_MAX) seed = (cmph_uint32)time(NULL);
srand(seed);
int ret = 0;
if (mphf_file == NULL)
{
mphf_file = (char *)malloc(strlen(keys_file) + 5);
memcpy(mphf_file, keys_file, strlen(keys_file));
memcpy(mphf_file + strlen(keys_file), ".mph\0", (size_t)5);
}
keys_fd = fopen(keys_file, "r");
if (keys_fd == NULL)
{
fprintf(stderr, "Unable to open file %s: %s\n", keys_file, strerror(errno));
return -1;
}
if (seed == UINT_MAX) seed = (cmph_uint32)time(NULL);
if(nkeys == UINT_MAX) source = cmph_io_nlfile_adapter(keys_fd);
else source = cmph_io_nlnkfile_adapter(keys_fd, nkeys);
if (generate)
{
//Create mphf
mphf_fd = fopen(mphf_file, "w");
config = cmph_config_new(source);
cmph_config_set_algo(config, mph_algo);
if (nhashes) cmph_config_set_hashfuncs(config, hashes);
cmph_config_set_verbosity(config, verbosity);
cmph_config_set_tmp_dir(config, (cmph_uint8 *) tmp_dir);
cmph_config_set_mphf_fd(config, mphf_fd);
cmph_config_set_memory_availability(config, memory_availability);
cmph_config_set_b(config, b);
cmph_config_set_keys_per_bin(config, keys_per_bin);
//if((mph_algo == CMPH_BMZ || mph_algo == CMPH_BRZ) && c >= 2.0) c=1.15;
if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15;
if (c != 0) cmph_config_set_graphsize(config, c);
mphf = cmph_new(config);
cmph_config_destroy(config);
if (mphf == NULL)
{
fprintf(stderr, "Unable to create minimum perfect hashing function\n");
//cmph_config_destroy(config);
free(mphf_file);
return -1;
}
if (mphf_fd == NULL)
{
fprintf(stderr, "Unable to open output file %s: %s\n", mphf_file, strerror(errno));
free(mphf_file);
return -1;
}
cmph_dump(mphf, mphf_fd);
cmph_destroy(mphf);
fclose(mphf_fd);
}
else
{
cmph_uint8 * hashtable = NULL;
mphf_fd = fopen(mphf_file, "r");
if (mphf_fd == NULL)
{
fprintf(stderr, "Unable to open input file %s: %s\n", mphf_file, strerror(errno));
free(mphf_file);
return -1;
}
mphf = cmph_load(mphf_fd);
fclose(mphf_fd);
if (!mphf)
{
fprintf(stderr, "Unable to parser input file %s\n", mphf_file);
free(mphf_file);
return -1;
}
cmph_uint32 siz = cmph_size(mphf);
hashtable = (cmph_uint8*)calloc(siz, sizeof(cmph_uint8));
memset(hashtable, 0,(size_t) siz);
//check all keys
for (i = 0; i < source->nkeys; ++i)
{
cmph_uint32 h;
char *buf;
cmph_uint32 buflen = 0;
source->read(source->data, &buf, &buflen);
h = cmph_search(mphf, buf, buflen);
if (!(h < siz))
{
fprintf(stderr, "Unknown key %*s in the input.\n", buflen, buf);
ret = 1;
} else if(hashtable[h] >= keys_per_bin)
{
fprintf(stderr, "More than %u keys were mapped to bin %u\n", keys_per_bin, h);
fprintf(stderr, "Duplicated or unknown key %*s in the input\n", buflen, buf);
ret = 1;
} else hashtable[h]++;
if (verbosity)
{
printf("%s -> %u\n", buf, h);
}
source->dispose(source->data, buf, buflen);
}
cmph_destroy(mphf);
free(hashtable);
}
fclose(keys_fd);
free(mphf_file);
free(tmp_dir);
cmph_io_nlfile_adapter_destroy(source);
return ret;
}

View File

@ -1,179 +0,0 @@
#ifdef WIN32
/*****************************************************************************
*
* MODULE NAME : GETOPT.C
*
* COPYRIGHTS:
* This module contains code made available by IBM
* Corporation on an AS IS basis. Any one receiving the
* module is considered to be licensed under IBM copyrights
* to use the IBM-provided source code in any way he or she
* deems fit, including copying it, compiling it, modifying
* it, and redistributing it, with or without
* modifications. No license under any IBM patents or
* patent applications is to be implied from this copyright
* license.
*
* A user of the module should understand that IBM cannot
* provide technical support for the module and will not be
* responsible for any consequences of use of the program.
*
* Any notices, including this one, are not to be removed
* from the module without the prior written consent of
* IBM.
*
* AUTHOR: Original author:
* G. R. Blair (BOBBLAIR at AUSVM1)
* Internet: bobblair@bobblair.austin.ibm.com
*
* Extensively revised by:
* John Q. Walker II, Ph.D. (JOHHQ at RALVM6)
* Internet: johnq@ralvm6.vnet.ibm.com
*
*****************************************************************************/
/******************************************************************************
* getopt()
*
* The getopt() function is a command line parser. It returns the next
* option character in argv that matches an option character in opstring.
*
* The argv argument points to an array of argc+1 elements containing argc
* pointers to character strings followed by a null pointer.
*
* The opstring argument points to a string of option characters; if an
* option character is followed by a colon, the option is expected to have
* an argument that may or may not be separated from it by white space.
* The external variable optarg is set to point to the start of the option
* argument on return from getopt().
*
* The getopt() function places in optind the argv index of the next argument
* to be processed. The system initializes the external variable optind to
* 1 before the first call to getopt().
*
* When all options have been processed (that is, up to the first nonoption
* argument), getopt() returns EOF. The special option "--" may be used to
* delimit the end of the options; EOF will be returned, and "--" will be
* skipped.
*
* The getopt() function returns a question mark (?) when it encounters an
* option character not included in opstring. This error message can be
* disabled by setting opterr to zero. Otherwise, it returns the option
* character that was detected.
*
* If the special option "--" is detected, or all options have been
* processed, EOF is returned.
*
* Options are marked by either a minus sign (-) or a slash (/).
*
* No errors are defined.
*****************************************************************************/
#include <stdio.h> /* for EOF */
#include <string.h> /* for strchr() */
/* static (global) variables that are specified as exported by getopt() */
extern char *optarg; /* pointer to the start of the option argument */
extern int optind; /* number of the next argv[] to be evaluated */
extern int opterr; /* non-zero if a question mark should be returned
when a non-valid option character is detected */
/* handle possible future character set concerns by putting this in a macro */
#define _next_char(string) (char)(*(string+1))
int getopt(int argc, char *argv[], char *opstring)
{
static char *pIndexPosition = NULL; /* place inside current argv string */
char *pArgString = NULL; /* where to start from next */
char *pOptString; /* the string in our program */
if (pIndexPosition != NULL) {
/* we last left off inside an argv string */
if (*(++pIndexPosition)) {
/* there is more to come in the most recent argv */
pArgString = pIndexPosition;
}
}
if (pArgString == NULL) {
/* we didn't leave off in the middle of an argv string */
if (optind >= argc) {
/* more command-line arguments than the argument count */
pIndexPosition = NULL; /* not in the middle of anything */
return EOF; /* used up all command-line arguments */
}
/*---------------------------------------------------------------------
* If the next argv[] is not an option, there can be no more options.
*-------------------------------------------------------------------*/
pArgString = argv[optind++]; /* set this to the next argument ptr */
if (('/' != *pArgString) && /* doesn't start with a slash or a dash? */
('-' != *pArgString)) {
--optind; /* point to current arg once we're done */
optarg = NULL; /* no argument follows the option */
pIndexPosition = NULL; /* not in the middle of anything */
return EOF; /* used up all the command-line flags */
}
/* check for special end-of-flags markers */
if ((strcmp(pArgString, "-") == 0) ||
(strcmp(pArgString, "--") == 0)) {
optarg = NULL; /* no argument follows the option */
pIndexPosition = NULL; /* not in the middle of anything */
return EOF; /* encountered the special flag */
}
pArgString++; /* look past the / or - */
}
if (':' == *pArgString) { /* is it a colon? */
/*---------------------------------------------------------------------
* Rare case: if opterr is non-zero, return a question mark;
* otherwise, just return the colon we're on.
*-------------------------------------------------------------------*/
return (opterr ? (int)'?' : (int)':');
}
else if ((pOptString = strchr(opstring, *pArgString)) == 0) {
/*---------------------------------------------------------------------
* The letter on the command-line wasn't any good.
*-------------------------------------------------------------------*/
optarg = NULL; /* no argument follows the option */
pIndexPosition = NULL; /* not in the middle of anything */
return (opterr ? (int)'?' : (int)*pArgString);
}
else {
/*---------------------------------------------------------------------
* The letter on the command-line matches one we expect to see
*-------------------------------------------------------------------*/
if (':' == _next_char(pOptString)) { /* is the next letter a colon? */
/* It is a colon. Look for an argument string. */
if ('\0' != _next_char(pArgString)) { /* argument in this argv? */
optarg = &pArgString[1]; /* Yes, it is */
}
else {
/*-------------------------------------------------------------
* The argument string must be in the next argv.
* But, what if there is none (bad input from the user)?
* In that case, return the letter, and optarg as NULL.
*-----------------------------------------------------------*/
if (optind < argc)
optarg = argv[optind++];
else {
optarg = NULL;
return (opterr ? (int)'?' : (int)*pArgString);
}
}
pIndexPosition = NULL; /* not in the middle of anything */
}
else {
/* it's not a colon, so just return the letter */
optarg = NULL; /* no argument follows the option */
pIndexPosition = pArgString; /* point to the letter we're on */
}
return (int)*pArgString; /* return the letter that matched */
}
}
#endif //WIN32

View File

@ -1,25 +0,0 @@
#ifdef __cplusplus
extern "C" {
#endif
#ifndef WIN32
#include <getopt.h>
#else
#ifndef _GETOPT_
#define _GETOPT_
#include <stdio.h> /* for EOF */
#include <string.h> /* for strchr() */
char *optarg = NULL; /* pointer to the start of the option argument */
int optind = 1; /* number of the next argv[] to be evaluated */
int opterr = 1; /* non-zero if a question mark should be returned */
int getopt(int argc, char *argv[], char *opstring);
#endif //_GETOPT_
#endif //WIN32
#ifdef __cplusplus
}
#endif