mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-24 13:06:14 +01:00
Import CMPH 1.0
This will be used for typelib indexing. See README-CMPH-IMPORT.txt for more information.
This commit is contained in:
parent
ff33cc0791
commit
6178293a83
5
cmph/README-CMPH-IMPORT.txt
Normal file
5
cmph/README-CMPH-IMPORT.txt
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
This import of CMPH was made from revision bfdcc3a3a18dfb9 of
|
||||||
|
git://cmph.git.sourceforge.net/gitroot/cmph/cmph
|
||||||
|
|
||||||
|
Only the following files were taken, and everything else deleted:
|
||||||
|
COPYING src/*.[ch]
|
703
cmph/bdz.c
Executable file
703
cmph/bdz.c
Executable file
@ -0,0 +1,703 @@
|
|||||||
|
#include "bdz.h"
|
||||||
|
#include "cmph_structs.h"
|
||||||
|
#include "bdz_structs.h"
|
||||||
|
#include "hash.h"
|
||||||
|
#include "bitbool.h"
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <string.h>
|
||||||
|
//#define DEBUG
|
||||||
|
#include "debug.h"
|
||||||
|
#define UNASSIGNED 3U
|
||||||
|
#define NULL_EDGE 0xffffffff
|
||||||
|
|
||||||
|
//cmph_uint32 ngrafos = 0;
|
||||||
|
//cmph_uint32 ngrafos_aciclicos = 0;
|
||||||
|
// table used for looking up the number of assigned vertices a 8-bit integer
|
||||||
|
const cmph_uint8 bdz_lookup_table[] =
|
||||||
|
{
|
||||||
|
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||||
|
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||||
|
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||||
|
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||||
|
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||||
|
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||||
|
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||||
|
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||||
|
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||||
|
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||||
|
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||||
|
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||||
|
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||||
|
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||||
|
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||||
|
2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 0
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
cmph_uint32 vertices[3];
|
||||||
|
cmph_uint32 next_edges[3];
|
||||||
|
}bdz_edge_t;
|
||||||
|
|
||||||
|
typedef cmph_uint32 * bdz_queue_t;
|
||||||
|
|
||||||
|
static void bdz_alloc_queue(bdz_queue_t * queuep, cmph_uint32 nedges)
|
||||||
|
{
|
||||||
|
(*queuep)=malloc(nedges*sizeof(cmph_uint32));
|
||||||
|
};
|
||||||
|
static void bdz_free_queue(bdz_queue_t * queue)
|
||||||
|
{
|
||||||
|
free(*queue);
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
cmph_uint32 nedges;
|
||||||
|
bdz_edge_t * edges;
|
||||||
|
cmph_uint32 * first_edge;
|
||||||
|
cmph_uint8 * vert_degree;
|
||||||
|
}bdz_graph3_t;
|
||||||
|
|
||||||
|
|
||||||
|
static void bdz_alloc_graph3(bdz_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
|
||||||
|
{
|
||||||
|
graph3->edges=malloc(nedges*sizeof(bdz_edge_t));
|
||||||
|
graph3->first_edge=malloc(nvertices*sizeof(cmph_uint32));
|
||||||
|
graph3->vert_degree=malloc((size_t)nvertices);
|
||||||
|
};
|
||||||
|
static void bdz_init_graph3(bdz_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
|
||||||
|
{
|
||||||
|
memset(graph3->first_edge,0xff,nvertices*sizeof(cmph_uint32));
|
||||||
|
memset(graph3->vert_degree,0,(size_t)nvertices);
|
||||||
|
graph3->nedges=0;
|
||||||
|
};
|
||||||
|
static void bdz_free_graph3(bdz_graph3_t *graph3)
|
||||||
|
{
|
||||||
|
free(graph3->edges);
|
||||||
|
free(graph3->first_edge);
|
||||||
|
free(graph3->vert_degree);
|
||||||
|
};
|
||||||
|
|
||||||
|
static void bdz_partial_free_graph3(bdz_graph3_t *graph3)
|
||||||
|
{
|
||||||
|
free(graph3->first_edge);
|
||||||
|
free(graph3->vert_degree);
|
||||||
|
graph3->first_edge = NULL;
|
||||||
|
graph3->vert_degree = NULL;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void bdz_add_edge(bdz_graph3_t * graph3, cmph_uint32 v0, cmph_uint32 v1, cmph_uint32 v2)
|
||||||
|
{
|
||||||
|
graph3->edges[graph3->nedges].vertices[0]=v0;
|
||||||
|
graph3->edges[graph3->nedges].vertices[1]=v1;
|
||||||
|
graph3->edges[graph3->nedges].vertices[2]=v2;
|
||||||
|
graph3->edges[graph3->nedges].next_edges[0]=graph3->first_edge[v0];
|
||||||
|
graph3->edges[graph3->nedges].next_edges[1]=graph3->first_edge[v1];
|
||||||
|
graph3->edges[graph3->nedges].next_edges[2]=graph3->first_edge[v2];
|
||||||
|
graph3->first_edge[v0]=graph3->first_edge[v1]=graph3->first_edge[v2]=graph3->nedges;
|
||||||
|
graph3->vert_degree[v0]++;
|
||||||
|
graph3->vert_degree[v1]++;
|
||||||
|
graph3->vert_degree[v2]++;
|
||||||
|
graph3->nedges++;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void bdz_dump_graph(bdz_graph3_t* graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for(i=0;i<nedges;i++){
|
||||||
|
printf("\nedge %d %d %d %d ",i,graph3->edges[i].vertices[0],
|
||||||
|
graph3->edges[i].vertices[1],graph3->edges[i].vertices[2]);
|
||||||
|
printf(" nexts %d %d %d",graph3->edges[i].next_edges[0],
|
||||||
|
graph3->edges[i].next_edges[1],graph3->edges[i].next_edges[2]);
|
||||||
|
};
|
||||||
|
|
||||||
|
for(i=0;i<nvertices;i++){
|
||||||
|
printf("\nfirst for vertice %d %d ",i,graph3->first_edge[i]);
|
||||||
|
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
static void bdz_remove_edge(bdz_graph3_t * graph3, cmph_uint32 curr_edge)
|
||||||
|
{
|
||||||
|
cmph_uint32 i,j=0,vert,edge1,edge2;
|
||||||
|
for(i=0;i<3;i++){
|
||||||
|
vert=graph3->edges[curr_edge].vertices[i];
|
||||||
|
edge1=graph3->first_edge[vert];
|
||||||
|
edge2=NULL_EDGE;
|
||||||
|
while(edge1!=curr_edge&&edge1!=NULL_EDGE){
|
||||||
|
edge2=edge1;
|
||||||
|
if(graph3->edges[edge1].vertices[0]==vert){
|
||||||
|
j=0;
|
||||||
|
} else if(graph3->edges[edge1].vertices[1]==vert){
|
||||||
|
j=1;
|
||||||
|
} else
|
||||||
|
j=2;
|
||||||
|
edge1=graph3->edges[edge1].next_edges[j];
|
||||||
|
};
|
||||||
|
if(edge1==NULL_EDGE){
|
||||||
|
printf("\nerror remove edge %d dump graph",curr_edge);
|
||||||
|
bdz_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4);
|
||||||
|
exit(-1);
|
||||||
|
};
|
||||||
|
|
||||||
|
if(edge2!=NULL_EDGE){
|
||||||
|
graph3->edges[edge2].next_edges[j] =
|
||||||
|
graph3->edges[edge1].next_edges[i];
|
||||||
|
} else
|
||||||
|
graph3->first_edge[vert]=
|
||||||
|
graph3->edges[edge1].next_edges[i];
|
||||||
|
graph3->vert_degree[vert]--;
|
||||||
|
};
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_queue_t queue, bdz_graph3_t* graph3)
|
||||||
|
{
|
||||||
|
cmph_uint32 i,v0,v1,v2;
|
||||||
|
cmph_uint32 queue_head=0,queue_tail=0;
|
||||||
|
cmph_uint32 curr_edge;
|
||||||
|
cmph_uint32 tmp_edge;
|
||||||
|
cmph_uint8 * marked_edge =malloc((size_t)(nedges >> 3) + 1);
|
||||||
|
memset(marked_edge, 0, (size_t)(nedges >> 3) + 1);
|
||||||
|
|
||||||
|
for(i=0;i<nedges;i++){
|
||||||
|
v0=graph3->edges[i].vertices[0];
|
||||||
|
v1=graph3->edges[i].vertices[1];
|
||||||
|
v2=graph3->edges[i].vertices[2];
|
||||||
|
if(graph3->vert_degree[v0]==1 ||
|
||||||
|
graph3->vert_degree[v1]==1 ||
|
||||||
|
graph3->vert_degree[v2]==1){
|
||||||
|
if(!GETBIT(marked_edge,i)) {
|
||||||
|
queue[queue_head++]=i;
|
||||||
|
SETBIT(marked_edge,i);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
while(queue_tail!=queue_head){
|
||||||
|
curr_edge=queue[queue_tail++];
|
||||||
|
bdz_remove_edge(graph3,curr_edge);
|
||||||
|
v0=graph3->edges[curr_edge].vertices[0];
|
||||||
|
v1=graph3->edges[curr_edge].vertices[1];
|
||||||
|
v2=graph3->edges[curr_edge].vertices[2];
|
||||||
|
if(graph3->vert_degree[v0]==1 ) {
|
||||||
|
tmp_edge=graph3->first_edge[v0];
|
||||||
|
if(!GETBIT(marked_edge,tmp_edge)) {
|
||||||
|
queue[queue_head++]=tmp_edge;
|
||||||
|
SETBIT(marked_edge,tmp_edge);
|
||||||
|
};
|
||||||
|
|
||||||
|
};
|
||||||
|
if(graph3->vert_degree[v1]==1) {
|
||||||
|
tmp_edge=graph3->first_edge[v1];
|
||||||
|
if(!GETBIT(marked_edge,tmp_edge)){
|
||||||
|
queue[queue_head++]=tmp_edge;
|
||||||
|
SETBIT(marked_edge,tmp_edge);
|
||||||
|
};
|
||||||
|
|
||||||
|
};
|
||||||
|
if(graph3->vert_degree[v2]==1){
|
||||||
|
tmp_edge=graph3->first_edge[v2];
|
||||||
|
if(!GETBIT(marked_edge,tmp_edge)){
|
||||||
|
queue[queue_head++]=tmp_edge;
|
||||||
|
SETBIT(marked_edge,tmp_edge);
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
free(marked_edge);
|
||||||
|
return (int)(queue_head-nedges);/* returns 0 if successful otherwies return negative number*/
|
||||||
|
};
|
||||||
|
|
||||||
|
static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t queue);
|
||||||
|
static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t queue);
|
||||||
|
static void ranking(bdz_config_data_t *bdz);
|
||||||
|
static cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint8 * g, cmph_uint32 vertex);
|
||||||
|
|
||||||
|
bdz_config_data_t *bdz_config_new()
|
||||||
|
{
|
||||||
|
bdz_config_data_t *bdz;
|
||||||
|
bdz = (bdz_config_data_t *)malloc(sizeof(bdz_config_data_t));
|
||||||
|
assert(bdz);
|
||||||
|
memset(bdz, 0, sizeof(bdz_config_data_t));
|
||||||
|
bdz->hashfunc = CMPH_HASH_JENKINS;
|
||||||
|
bdz->g = NULL;
|
||||||
|
bdz->hl = NULL;
|
||||||
|
bdz->k = 0; //kth index in ranktable, $k = log_2(n=3r)/\varepsilon$
|
||||||
|
bdz->b = 7; // number of bits of k
|
||||||
|
bdz->ranktablesize = 0; //number of entries in ranktable, $n/k +1$
|
||||||
|
bdz->ranktable = NULL; // rank table
|
||||||
|
return bdz;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bdz_config_destroy(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
bdz_config_data_t *data = (bdz_config_data_t *)mph->data;
|
||||||
|
DEBUGP("Destroying algorithm dependent data\n");
|
||||||
|
free(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bdz_config_set_b(cmph_config_t *mph, cmph_uint32 b)
|
||||||
|
{
|
||||||
|
bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data;
|
||||||
|
if (b <= 2 || b > 10) b = 7; // validating restrictions over parameter b.
|
||||||
|
bdz->b = (cmph_uint8)b;
|
||||||
|
DEBUGP("b: %u\n", b);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void bdz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||||
|
{
|
||||||
|
bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data;
|
||||||
|
CMPH_HASH *hashptr = hashfuncs;
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
while(*hashptr != CMPH_HASH_COUNT)
|
||||||
|
{
|
||||||
|
if (i >= 1) break; //bdz only uses one linear hash function
|
||||||
|
bdz->hashfunc = *hashptr;
|
||||||
|
++i, ++hashptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_t *bdz_new(cmph_config_t *mph, double c)
|
||||||
|
{
|
||||||
|
cmph_t *mphf = NULL;
|
||||||
|
bdz_data_t *bdzf = NULL;
|
||||||
|
cmph_uint32 iterations;
|
||||||
|
bdz_queue_t edges;
|
||||||
|
bdz_graph3_t graph3;
|
||||||
|
bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data;
|
||||||
|
#ifdef CMPH_TIMING
|
||||||
|
double construction_time_begin = 0.0;
|
||||||
|
double construction_time = 0.0;
|
||||||
|
ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
if (c == 0) c = 1.23; // validating restrictions over parameter c.
|
||||||
|
DEBUGP("c: %f\n", c);
|
||||||
|
bdz->m = mph->key_source->nkeys;
|
||||||
|
bdz->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3);
|
||||||
|
if ((bdz->r % 2) == 0) bdz->r+=1;
|
||||||
|
bdz->n = 3*bdz->r;
|
||||||
|
|
||||||
|
bdz->k = (1U << bdz->b);
|
||||||
|
DEBUGP("b: %u -- k: %u\n", bdz->b, bdz->k);
|
||||||
|
|
||||||
|
bdz->ranktablesize = (cmph_uint32)ceil(bdz->n/(double)bdz->k);
|
||||||
|
DEBUGP("ranktablesize: %u\n", bdz->ranktablesize);
|
||||||
|
|
||||||
|
|
||||||
|
bdz_alloc_graph3(&graph3, bdz->m, bdz->n);
|
||||||
|
bdz_alloc_queue(&edges,bdz->m);
|
||||||
|
DEBUGP("Created hypergraph\n");
|
||||||
|
|
||||||
|
DEBUGP("m (edges): %u n (vertices): %u r: %u c: %f \n", bdz->m, bdz->n, bdz->r, c);
|
||||||
|
|
||||||
|
// Mapping step
|
||||||
|
iterations = 1000;
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
|
||||||
|
}
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
int ok;
|
||||||
|
DEBUGP("linear hash function \n");
|
||||||
|
bdz->hl = hash_state_new(bdz->hashfunc, 15);
|
||||||
|
|
||||||
|
ok = bdz_mapping(mph, &graph3, edges);
|
||||||
|
//ok = 0;
|
||||||
|
if (!ok)
|
||||||
|
{
|
||||||
|
--iterations;
|
||||||
|
hash_state_destroy(bdz->hl);
|
||||||
|
bdz->hl = NULL;
|
||||||
|
DEBUGP("%u iterations remaining\n", iterations);
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations);
|
||||||
|
}
|
||||||
|
if (iterations == 0) break;
|
||||||
|
}
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (iterations == 0)
|
||||||
|
{
|
||||||
|
bdz_free_queue(&edges);
|
||||||
|
bdz_free_graph3(&graph3);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
bdz_partial_free_graph3(&graph3);
|
||||||
|
// Assigning step
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Entering assigning step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
|
||||||
|
}
|
||||||
|
assigning(bdz, &graph3, edges);
|
||||||
|
|
||||||
|
bdz_free_queue(&edges);
|
||||||
|
bdz_free_graph3(&graph3);
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Entering ranking step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
|
||||||
|
}
|
||||||
|
ranking(bdz);
|
||||||
|
#ifdef CMPH_TIMING
|
||||||
|
ELAPSED_TIME_IN_SECONDS(&construction_time);
|
||||||
|
#endif
|
||||||
|
mphf = (cmph_t *)malloc(sizeof(cmph_t));
|
||||||
|
mphf->algo = mph->algo;
|
||||||
|
bdzf = (bdz_data_t *)malloc(sizeof(bdz_data_t));
|
||||||
|
bdzf->g = bdz->g;
|
||||||
|
bdz->g = NULL; //transfer memory ownership
|
||||||
|
bdzf->hl = bdz->hl;
|
||||||
|
bdz->hl = NULL; //transfer memory ownership
|
||||||
|
bdzf->ranktable = bdz->ranktable;
|
||||||
|
bdz->ranktable = NULL; //transfer memory ownership
|
||||||
|
bdzf->ranktablesize = bdz->ranktablesize;
|
||||||
|
bdzf->k = bdz->k;
|
||||||
|
bdzf->b = bdz->b;
|
||||||
|
bdzf->n = bdz->n;
|
||||||
|
bdzf->m = bdz->m;
|
||||||
|
bdzf->r = bdz->r;
|
||||||
|
mphf->data = bdzf;
|
||||||
|
mphf->size = bdz->m;
|
||||||
|
|
||||||
|
DEBUGP("Successfully generated minimal perfect hash\n");
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef CMPH_TIMING
|
||||||
|
register cmph_uint32 space_usage = bdz_packed_size(mphf)*8;
|
||||||
|
register cmph_uint32 keys_per_bucket = 1;
|
||||||
|
construction_time = construction_time - construction_time_begin;
|
||||||
|
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz->m, bdz->m/(double)bdz->n, keys_per_bucket, construction_time, space_usage/(double)bdz->m);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return mphf;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t queue)
|
||||||
|
{
|
||||||
|
cmph_uint32 e;
|
||||||
|
int cycles = 0;
|
||||||
|
cmph_uint32 hl[3];
|
||||||
|
bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data;
|
||||||
|
bdz_init_graph3(graph3, bdz->m, bdz->n);
|
||||||
|
mph->key_source->rewind(mph->key_source->data);
|
||||||
|
for (e = 0; e < mph->key_source->nkeys; ++e)
|
||||||
|
{
|
||||||
|
cmph_uint32 h0, h1, h2;
|
||||||
|
cmph_uint32 keylen;
|
||||||
|
char *key = NULL;
|
||||||
|
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||||
|
hash_vector(bdz->hl, key, keylen,hl);
|
||||||
|
h0 = hl[0] % bdz->r;
|
||||||
|
h1 = hl[1] % bdz->r + bdz->r;
|
||||||
|
h2 = hl[2] % bdz->r + (bdz->r << 1);
|
||||||
|
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||||
|
bdz_add_edge(graph3,h0,h1,h2);
|
||||||
|
}
|
||||||
|
cycles = bdz_generate_queue(bdz->m, bdz->n, queue, graph3);
|
||||||
|
return (cycles == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t queue)
|
||||||
|
{
|
||||||
|
cmph_uint32 i;
|
||||||
|
cmph_uint32 nedges=graph3->nedges;
|
||||||
|
cmph_uint32 curr_edge;
|
||||||
|
cmph_uint32 v0,v1,v2;
|
||||||
|
cmph_uint8 * marked_vertices =malloc((size_t)(bdz->n >> 3) + 1);
|
||||||
|
cmph_uint32 sizeg = (cmph_uint32)ceil(bdz->n/4.0);
|
||||||
|
bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8));
|
||||||
|
memset(marked_vertices, 0, (size_t)(bdz->n >> 3) + 1);
|
||||||
|
memset(bdz->g, 0xff, (size_t)(sizeg));
|
||||||
|
|
||||||
|
for(i=nedges-1;i+1>=1;i--){
|
||||||
|
curr_edge=queue[i];
|
||||||
|
v0=graph3->edges[curr_edge].vertices[0];
|
||||||
|
v1=graph3->edges[curr_edge].vertices[1];
|
||||||
|
v2=graph3->edges[curr_edge].vertices[2];
|
||||||
|
DEBUGP("B:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz->g, v0), GETVALUE(bdz->g, v1), GETVALUE(bdz->g, v2));
|
||||||
|
if(!GETBIT(marked_vertices, v0)){
|
||||||
|
if(!GETBIT(marked_vertices,v1))
|
||||||
|
{
|
||||||
|
SETVALUE1(bdz->g, v1, UNASSIGNED);
|
||||||
|
SETBIT(marked_vertices, v1);
|
||||||
|
}
|
||||||
|
if(!GETBIT(marked_vertices,v2))
|
||||||
|
{
|
||||||
|
SETVALUE1(bdz->g, v2, UNASSIGNED);
|
||||||
|
SETBIT(marked_vertices, v2);
|
||||||
|
}
|
||||||
|
SETVALUE1(bdz->g, v0, (6-(GETVALUE(bdz->g, v1) + GETVALUE(bdz->g,v2)))%3);
|
||||||
|
SETBIT(marked_vertices, v0);
|
||||||
|
} else if(!GETBIT(marked_vertices, v1)) {
|
||||||
|
if(!GETBIT(marked_vertices, v2))
|
||||||
|
{
|
||||||
|
SETVALUE1(bdz->g, v2, UNASSIGNED);
|
||||||
|
SETBIT(marked_vertices, v2);
|
||||||
|
}
|
||||||
|
SETVALUE1(bdz->g, v1, (7-(GETVALUE(bdz->g, v0)+GETVALUE(bdz->g, v2)))%3);
|
||||||
|
SETBIT(marked_vertices, v1);
|
||||||
|
}else {
|
||||||
|
SETVALUE1(bdz->g, v2, (8-(GETVALUE(bdz->g,v0)+GETVALUE(bdz->g, v1)))%3);
|
||||||
|
SETBIT(marked_vertices, v2);
|
||||||
|
}
|
||||||
|
DEBUGP("A:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz->g, v0), GETVALUE(bdz->g, v1), GETVALUE(bdz->g, v2));
|
||||||
|
};
|
||||||
|
free(marked_vertices);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void ranking(bdz_config_data_t *bdz)
|
||||||
|
{
|
||||||
|
cmph_uint32 i, j, offset = 0U, count = 0U, size = (bdz->k >> 2U), nbytes_total = (cmph_uint32)ceil(bdz->n/4.0), nbytes;
|
||||||
|
bdz->ranktable = (cmph_uint32 *)calloc((size_t)bdz->ranktablesize, sizeof(cmph_uint32));
|
||||||
|
// ranktable computation
|
||||||
|
bdz->ranktable[0] = 0;
|
||||||
|
i = 1;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if(i == bdz->ranktablesize) break;
|
||||||
|
nbytes = size < nbytes_total? size : nbytes_total;
|
||||||
|
for(j = 0; j < nbytes; j++)
|
||||||
|
{
|
||||||
|
count += bdz_lookup_table[*(bdz->g + offset + j)];
|
||||||
|
}
|
||||||
|
bdz->ranktable[i] = count;
|
||||||
|
offset += nbytes;
|
||||||
|
nbytes_total -= size;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int bdz_dump(cmph_t *mphf, FILE *fd)
|
||||||
|
{
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen;
|
||||||
|
register size_t nbytes;
|
||||||
|
bdz_data_t *data = (bdz_data_t *)mphf->data;
|
||||||
|
__cmph_dump(mphf, fd);
|
||||||
|
|
||||||
|
hash_state_dump(data->hl, &buf, &buflen);
|
||||||
|
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||||
|
nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(&(data->r), sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
|
||||||
|
cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/4.0);
|
||||||
|
nbytes = fwrite(data->g, sizeof(cmph_uint8)*sizeg, (size_t)1, fd);
|
||||||
|
|
||||||
|
nbytes = fwrite(&(data->k), sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(&(data->b), sizeof(cmph_uint8), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(&(data->ranktablesize), sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
|
||||||
|
nbytes = fwrite(data->ranktable, sizeof(cmph_uint32)*(data->ranktablesize), (size_t)1, fd);
|
||||||
|
#ifdef DEBUG
|
||||||
|
cmph_uint32 i;
|
||||||
|
fprintf(stderr, "G: ");
|
||||||
|
for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", GETVALUE(data->g, i));
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
#endif
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bdz_load(FILE *f, cmph_t *mphf)
|
||||||
|
{
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen, sizeg;
|
||||||
|
register size_t nbytes;
|
||||||
|
bdz_data_t *bdz = (bdz_data_t *)malloc(sizeof(bdz_data_t));
|
||||||
|
|
||||||
|
DEBUGP("Loading bdz mphf\n");
|
||||||
|
mphf->data = bdz;
|
||||||
|
|
||||||
|
nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
DEBUGP("Hash state has %u bytes\n", buflen);
|
||||||
|
buf = (char *)malloc((size_t)buflen);
|
||||||
|
nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
|
||||||
|
bdz->hl = hash_state_load(buf, buflen);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
|
||||||
|
DEBUGP("Reading m and n\n");
|
||||||
|
nbytes = fread(&(bdz->n), sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
nbytes = fread(&(bdz->m), sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
nbytes = fread(&(bdz->r), sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
sizeg = (cmph_uint32)ceil(bdz->n/4.0);
|
||||||
|
bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8));
|
||||||
|
nbytes = fread(bdz->g, sizeg*sizeof(cmph_uint8), (size_t)1, f);
|
||||||
|
|
||||||
|
nbytes = fread(&(bdz->k), sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
nbytes = fread(&(bdz->b), sizeof(cmph_uint8), (size_t)1, f);
|
||||||
|
nbytes = fread(&(bdz->ranktablesize), sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
|
||||||
|
bdz->ranktable = (cmph_uint32 *)calloc((size_t)bdz->ranktablesize, sizeof(cmph_uint32));
|
||||||
|
nbytes = fread(bdz->ranktable, sizeof(cmph_uint32)*(bdz->ranktablesize), (size_t)1, f);
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
fprintf(stderr, "G: ");
|
||||||
|
for (i = 0; i < bdz->n; ++i) fprintf(stderr, "%u ", GETVALUE(bdz->g,i));
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
cmph_uint32 bdz_search_ph(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
bdz_data_t *bdz = mphf->data;
|
||||||
|
cmph_uint32 hl[3];
|
||||||
|
hash_vector(bdz->hl, key, keylen, hl);
|
||||||
|
cmph_uint32 vertex;
|
||||||
|
hl[0] = hl[0] % bdz->r;
|
||||||
|
hl[1] = hl[1] % bdz->r + bdz->r;
|
||||||
|
hl[2] = hl[2] % bdz->r + (bdz->r << 1);
|
||||||
|
vertex = hl[(GETVALUE(bdz->g, hl[0]) + GETVALUE(bdz->g, hl[1]) + GETVALUE(bdz->g, hl[2])) % 3];
|
||||||
|
return vertex;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint8 * g, cmph_uint32 vertex)
|
||||||
|
{
|
||||||
|
register cmph_uint32 index = vertex >> b;
|
||||||
|
register cmph_uint32 base_rank = ranktable[index];
|
||||||
|
register cmph_uint32 beg_idx_v = index << b;
|
||||||
|
register cmph_uint32 beg_idx_b = beg_idx_v >> 2;
|
||||||
|
register cmph_uint32 end_idx_b = vertex >> 2;
|
||||||
|
while(beg_idx_b < end_idx_b)
|
||||||
|
{
|
||||||
|
base_rank += bdz_lookup_table[*(g + beg_idx_b++)];
|
||||||
|
|
||||||
|
}
|
||||||
|
beg_idx_v = beg_idx_b << 2;
|
||||||
|
while(beg_idx_v < vertex)
|
||||||
|
{
|
||||||
|
if(GETVALUE(g, beg_idx_v) != UNASSIGNED) base_rank++;
|
||||||
|
beg_idx_v++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return base_rank;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
register cmph_uint32 vertex;
|
||||||
|
register bdz_data_t *bdz = mphf->data;
|
||||||
|
cmph_uint32 hl[3];
|
||||||
|
hash_vector(bdz->hl, key, keylen, hl);
|
||||||
|
hl[0] = hl[0] % bdz->r;
|
||||||
|
hl[1] = hl[1] % bdz->r + bdz->r;
|
||||||
|
hl[2] = hl[2] % bdz->r + (bdz->r << 1);
|
||||||
|
vertex = hl[(GETVALUE(bdz->g, hl[0]) + GETVALUE(bdz->g, hl[1]) + GETVALUE(bdz->g, hl[2])) % 3];
|
||||||
|
return rank(bdz->b, bdz->ranktable, bdz->g, vertex);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void bdz_destroy(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
bdz_data_t *data = (bdz_data_t *)mphf->data;
|
||||||
|
free(data->g);
|
||||||
|
hash_state_destroy(data->hl);
|
||||||
|
free(data->ranktable);
|
||||||
|
free(data);
|
||||||
|
free(mphf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn void bdz_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void bdz_pack(cmph_t *mphf, void *packed_mphf)
|
||||||
|
{
|
||||||
|
bdz_data_t *data = (bdz_data_t *)mphf->data;
|
||||||
|
cmph_uint8 * ptr = packed_mphf;
|
||||||
|
|
||||||
|
// packing hl type
|
||||||
|
CMPH_HASH hl_type = hash_get_type(data->hl);
|
||||||
|
*((cmph_uint32 *) ptr) = hl_type;
|
||||||
|
ptr += sizeof(cmph_uint32);
|
||||||
|
|
||||||
|
// packing hl
|
||||||
|
hash_state_pack(data->hl, ptr);
|
||||||
|
ptr += hash_state_packed_size(hl_type);
|
||||||
|
|
||||||
|
// packing r
|
||||||
|
*((cmph_uint32 *) ptr) = data->r;
|
||||||
|
ptr += sizeof(data->r);
|
||||||
|
|
||||||
|
// packing ranktablesize
|
||||||
|
*((cmph_uint32 *) ptr) = data->ranktablesize;
|
||||||
|
ptr += sizeof(data->ranktablesize);
|
||||||
|
|
||||||
|
// packing ranktable
|
||||||
|
memcpy(ptr, data->ranktable, sizeof(cmph_uint32)*(data->ranktablesize));
|
||||||
|
ptr += sizeof(cmph_uint32)*(data->ranktablesize);
|
||||||
|
|
||||||
|
// packing b
|
||||||
|
*ptr++ = data->b;
|
||||||
|
|
||||||
|
// packing g
|
||||||
|
cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/4.0);
|
||||||
|
memcpy(ptr, data->g, sizeof(cmph_uint8)*sizeg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 bdz_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_packed_size(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
bdz_data_t *data = (bdz_data_t *)mphf->data;
|
||||||
|
|
||||||
|
CMPH_HASH hl_type = hash_get_type(data->hl);
|
||||||
|
|
||||||
|
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*(data->ranktablesize) + sizeof(cmph_uint8) + sizeof(cmph_uint8)* (cmph_uint32)(ceil(data->n/4.0)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** cmph_uint32 bdz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
|
||||||
|
register cmph_uint32 vertex;
|
||||||
|
register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf;
|
||||||
|
register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4;
|
||||||
|
|
||||||
|
register cmph_uint32 *ranktable = (cmph_uint32*)(hl_ptr + hash_state_packed_size(hl_type));
|
||||||
|
|
||||||
|
register cmph_uint32 r = *ranktable++;
|
||||||
|
register cmph_uint32 ranktablesize = *ranktable++;
|
||||||
|
register cmph_uint8 * g = (cmph_uint8 *)(ranktable + ranktablesize);
|
||||||
|
register cmph_uint8 b = *g++;
|
||||||
|
|
||||||
|
cmph_uint32 hl[3];
|
||||||
|
hash_vector_packed(hl_ptr, hl_type, key, keylen, hl);
|
||||||
|
hl[0] = hl[0] % r;
|
||||||
|
hl[1] = hl[1] % r + r;
|
||||||
|
hl[2] = hl[2] % r + (r << 1);
|
||||||
|
vertex = hl[(GETVALUE(g, hl[0]) + GETVALUE(g, hl[1]) + GETVALUE(g, hl[2])) % 3];
|
||||||
|
return rank(b, ranktable, g, vertex);
|
||||||
|
}
|
43
cmph/bdz.h
Executable file
43
cmph/bdz.h
Executable file
@ -0,0 +1,43 @@
|
|||||||
|
#ifndef __CMPH_BDZ_H__
|
||||||
|
#define __CMPH_BDZ_H__
|
||||||
|
|
||||||
|
#include "cmph.h"
|
||||||
|
|
||||||
|
typedef struct __bdz_data_t bdz_data_t;
|
||||||
|
typedef struct __bdz_config_data_t bdz_config_data_t;
|
||||||
|
|
||||||
|
bdz_config_data_t *bdz_config_new();
|
||||||
|
void bdz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
|
||||||
|
void bdz_config_destroy(cmph_config_t *mph);
|
||||||
|
void bdz_config_set_b(cmph_config_t *mph, cmph_uint32 b);
|
||||||
|
cmph_t *bdz_new(cmph_config_t *mph, double c);
|
||||||
|
|
||||||
|
void bdz_load(FILE *f, cmph_t *mphf);
|
||||||
|
int bdz_dump(cmph_t *mphf, FILE *f);
|
||||||
|
void bdz_destroy(cmph_t *mphf);
|
||||||
|
cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** \fn void bdz_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void bdz_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 bdz_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_packed_size(cmph_t *mphf);
|
||||||
|
|
||||||
|
/** cmph_uint32 bdz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
#endif
|
33
cmph/bdz_gen_lookup_table.c
Executable file
33
cmph/bdz_gen_lookup_table.c
Executable file
@ -0,0 +1,33 @@
|
|||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
void help(char * prname)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "USE: %s <n><wordsizeinbits>\n", prname);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char ** argv)
|
||||||
|
{
|
||||||
|
if(argc != 3) help(argv[0]);
|
||||||
|
int n = atoi(argv[1]);
|
||||||
|
int wordsize = (atoi(argv[2]) >> 1);
|
||||||
|
int i, j, n_assigned;
|
||||||
|
for(i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
int num = i;
|
||||||
|
n_assigned = 0;
|
||||||
|
for(j = 0; j < wordsize; j++)
|
||||||
|
{
|
||||||
|
if ((num & 0x0003) != 3)
|
||||||
|
{
|
||||||
|
n_assigned++;
|
||||||
|
//fprintf(stderr, "num:%d\n", num);
|
||||||
|
}
|
||||||
|
num = num >> 2;
|
||||||
|
}
|
||||||
|
if(i%16 == 0) fprintf(stderr, "\n");
|
||||||
|
fprintf(stderr, "%d, ", n_assigned);
|
||||||
|
}
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
}
|
621
cmph/bdz_ph.c
Executable file
621
cmph/bdz_ph.c
Executable file
@ -0,0 +1,621 @@
|
|||||||
|
#include "bdz_ph.h"
|
||||||
|
#include "cmph_structs.h"
|
||||||
|
#include "bdz_structs_ph.h"
|
||||||
|
#include "hash.h"
|
||||||
|
#include "bitbool.h"
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <string.h>
|
||||||
|
//#define DEBUG
|
||||||
|
#include "debug.h"
|
||||||
|
#define UNASSIGNED 3
|
||||||
|
#define NULL_EDGE 0xffffffff
|
||||||
|
|
||||||
|
|
||||||
|
static cmph_uint8 pow3_table[5] = {1,3,9,27,81};
|
||||||
|
static cmph_uint8 lookup_table[5][256] = {
|
||||||
|
{0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0},
|
||||||
|
{0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1},
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1},
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
cmph_uint32 vertices[3];
|
||||||
|
cmph_uint32 next_edges[3];
|
||||||
|
}bdz_ph_edge_t;
|
||||||
|
|
||||||
|
typedef cmph_uint32 * bdz_ph_queue_t;
|
||||||
|
|
||||||
|
static void bdz_ph_alloc_queue(bdz_ph_queue_t * queuep, cmph_uint32 nedges)
|
||||||
|
{
|
||||||
|
(*queuep)=malloc(nedges*sizeof(cmph_uint32));
|
||||||
|
};
|
||||||
|
static void bdz_ph_free_queue(bdz_ph_queue_t * queue)
|
||||||
|
{
|
||||||
|
free(*queue);
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
cmph_uint32 nedges;
|
||||||
|
bdz_ph_edge_t * edges;
|
||||||
|
cmph_uint32 * first_edge;
|
||||||
|
cmph_uint8 * vert_degree;
|
||||||
|
}bdz_ph_graph3_t;
|
||||||
|
|
||||||
|
|
||||||
|
static void bdz_ph_alloc_graph3(bdz_ph_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
|
||||||
|
{
|
||||||
|
graph3->edges=malloc(nedges*sizeof(bdz_ph_edge_t));
|
||||||
|
graph3->first_edge=malloc(nvertices*sizeof(cmph_uint32));
|
||||||
|
graph3->vert_degree=malloc((size_t)nvertices);
|
||||||
|
};
|
||||||
|
static void bdz_ph_init_graph3(bdz_ph_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
|
||||||
|
{
|
||||||
|
memset(graph3->first_edge,0xff,nvertices*sizeof(cmph_uint32));
|
||||||
|
memset(graph3->vert_degree,0,(size_t)nvertices);
|
||||||
|
graph3->nedges=0;
|
||||||
|
};
|
||||||
|
static void bdz_ph_free_graph3(bdz_ph_graph3_t *graph3)
|
||||||
|
{
|
||||||
|
free(graph3->edges);
|
||||||
|
free(graph3->first_edge);
|
||||||
|
free(graph3->vert_degree);
|
||||||
|
};
|
||||||
|
|
||||||
|
static void bdz_ph_partial_free_graph3(bdz_ph_graph3_t *graph3)
|
||||||
|
{
|
||||||
|
free(graph3->first_edge);
|
||||||
|
free(graph3->vert_degree);
|
||||||
|
graph3->first_edge = NULL;
|
||||||
|
graph3->vert_degree = NULL;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void bdz_ph_add_edge(bdz_ph_graph3_t * graph3, cmph_uint32 v0, cmph_uint32 v1, cmph_uint32 v2)
|
||||||
|
{
|
||||||
|
graph3->edges[graph3->nedges].vertices[0]=v0;
|
||||||
|
graph3->edges[graph3->nedges].vertices[1]=v1;
|
||||||
|
graph3->edges[graph3->nedges].vertices[2]=v2;
|
||||||
|
graph3->edges[graph3->nedges].next_edges[0]=graph3->first_edge[v0];
|
||||||
|
graph3->edges[graph3->nedges].next_edges[1]=graph3->first_edge[v1];
|
||||||
|
graph3->edges[graph3->nedges].next_edges[2]=graph3->first_edge[v2];
|
||||||
|
graph3->first_edge[v0]=graph3->first_edge[v1]=graph3->first_edge[v2]=graph3->nedges;
|
||||||
|
graph3->vert_degree[v0]++;
|
||||||
|
graph3->vert_degree[v1]++;
|
||||||
|
graph3->vert_degree[v2]++;
|
||||||
|
graph3->nedges++;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void bdz_ph_dump_graph(bdz_ph_graph3_t* graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for(i=0;i<nedges;i++){
|
||||||
|
printf("\nedge %d %d %d %d ",i,graph3->edges[i].vertices[0],
|
||||||
|
graph3->edges[i].vertices[1],graph3->edges[i].vertices[2]);
|
||||||
|
printf(" nexts %d %d %d",graph3->edges[i].next_edges[0],
|
||||||
|
graph3->edges[i].next_edges[1],graph3->edges[i].next_edges[2]);
|
||||||
|
};
|
||||||
|
|
||||||
|
for(i=0;i<nvertices;i++){
|
||||||
|
printf("\nfirst for vertice %d %d ",i,graph3->first_edge[i]);
|
||||||
|
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
static void bdz_ph_remove_edge(bdz_ph_graph3_t * graph3, cmph_uint32 curr_edge)
|
||||||
|
{
|
||||||
|
cmph_uint32 i,j=0,vert,edge1,edge2;
|
||||||
|
for(i=0;i<3;i++){
|
||||||
|
vert=graph3->edges[curr_edge].vertices[i];
|
||||||
|
edge1=graph3->first_edge[vert];
|
||||||
|
edge2=NULL_EDGE;
|
||||||
|
while(edge1!=curr_edge&&edge1!=NULL_EDGE){
|
||||||
|
edge2=edge1;
|
||||||
|
if(graph3->edges[edge1].vertices[0]==vert){
|
||||||
|
j=0;
|
||||||
|
} else if(graph3->edges[edge1].vertices[1]==vert){
|
||||||
|
j=1;
|
||||||
|
} else
|
||||||
|
j=2;
|
||||||
|
edge1=graph3->edges[edge1].next_edges[j];
|
||||||
|
};
|
||||||
|
if(edge1==NULL_EDGE){
|
||||||
|
printf("\nerror remove edge %d dump graph",curr_edge);
|
||||||
|
bdz_ph_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4);
|
||||||
|
exit(-1);
|
||||||
|
};
|
||||||
|
|
||||||
|
if(edge2!=NULL_EDGE){
|
||||||
|
graph3->edges[edge2].next_edges[j] =
|
||||||
|
graph3->edges[edge1].next_edges[i];
|
||||||
|
} else
|
||||||
|
graph3->first_edge[vert]=
|
||||||
|
graph3->edges[edge1].next_edges[i];
|
||||||
|
graph3->vert_degree[vert]--;
|
||||||
|
};
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
static int bdz_ph_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_ph_queue_t queue, bdz_ph_graph3_t* graph3)
|
||||||
|
{
|
||||||
|
cmph_uint32 i,v0,v1,v2;
|
||||||
|
cmph_uint32 queue_head=0,queue_tail=0;
|
||||||
|
cmph_uint32 curr_edge;
|
||||||
|
cmph_uint32 tmp_edge;
|
||||||
|
cmph_uint8 * marked_edge =malloc((size_t)(nedges >> 3) + 1);
|
||||||
|
memset(marked_edge, 0, (size_t)(nedges >> 3) + 1);
|
||||||
|
|
||||||
|
for(i=0;i<nedges;i++){
|
||||||
|
v0=graph3->edges[i].vertices[0];
|
||||||
|
v1=graph3->edges[i].vertices[1];
|
||||||
|
v2=graph3->edges[i].vertices[2];
|
||||||
|
if(graph3->vert_degree[v0]==1 ||
|
||||||
|
graph3->vert_degree[v1]==1 ||
|
||||||
|
graph3->vert_degree[v2]==1){
|
||||||
|
if(!GETBIT(marked_edge,i)) {
|
||||||
|
queue[queue_head++]=i;
|
||||||
|
SETBIT(marked_edge,i);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
while(queue_tail!=queue_head){
|
||||||
|
curr_edge=queue[queue_tail++];
|
||||||
|
bdz_ph_remove_edge(graph3,curr_edge);
|
||||||
|
v0=graph3->edges[curr_edge].vertices[0];
|
||||||
|
v1=graph3->edges[curr_edge].vertices[1];
|
||||||
|
v2=graph3->edges[curr_edge].vertices[2];
|
||||||
|
if(graph3->vert_degree[v0]==1 ) {
|
||||||
|
tmp_edge=graph3->first_edge[v0];
|
||||||
|
if(!GETBIT(marked_edge,tmp_edge)) {
|
||||||
|
queue[queue_head++]=tmp_edge;
|
||||||
|
SETBIT(marked_edge,tmp_edge);
|
||||||
|
};
|
||||||
|
|
||||||
|
};
|
||||||
|
if(graph3->vert_degree[v1]==1) {
|
||||||
|
tmp_edge=graph3->first_edge[v1];
|
||||||
|
if(!GETBIT(marked_edge,tmp_edge)){
|
||||||
|
queue[queue_head++]=tmp_edge;
|
||||||
|
SETBIT(marked_edge,tmp_edge);
|
||||||
|
};
|
||||||
|
|
||||||
|
};
|
||||||
|
if(graph3->vert_degree[v2]==1){
|
||||||
|
tmp_edge=graph3->first_edge[v2];
|
||||||
|
if(!GETBIT(marked_edge,tmp_edge)){
|
||||||
|
queue[queue_head++]=tmp_edge;
|
||||||
|
SETBIT(marked_edge,tmp_edge);
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
free(marked_edge);
|
||||||
|
return (int)queue_head - (int)nedges;/* returns 0 if successful otherwies return negative number*/
|
||||||
|
};
|
||||||
|
|
||||||
|
static int bdz_ph_mapping(cmph_config_t *mph, bdz_ph_graph3_t* graph3, bdz_ph_queue_t queue);
|
||||||
|
static void assigning(bdz_ph_config_data_t *bdz_ph, bdz_ph_graph3_t* graph3, bdz_ph_queue_t queue);
|
||||||
|
static void bdz_ph_optimization(bdz_ph_config_data_t *bdz_ph);
|
||||||
|
|
||||||
|
bdz_ph_config_data_t *bdz_ph_config_new()
|
||||||
|
{
|
||||||
|
bdz_ph_config_data_t *bdz_ph;
|
||||||
|
bdz_ph = (bdz_ph_config_data_t *)malloc(sizeof(bdz_ph_config_data_t));
|
||||||
|
assert(bdz_ph);
|
||||||
|
memset(bdz_ph, 0, sizeof(bdz_ph_config_data_t));
|
||||||
|
bdz_ph->hashfunc = CMPH_HASH_JENKINS;
|
||||||
|
bdz_ph->g = NULL;
|
||||||
|
bdz_ph->hl = NULL;
|
||||||
|
return bdz_ph;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bdz_ph_config_destroy(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
bdz_ph_config_data_t *data = (bdz_ph_config_data_t *)mph->data;
|
||||||
|
DEBUGP("Destroying algorithm dependent data\n");
|
||||||
|
free(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bdz_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||||
|
{
|
||||||
|
bdz_ph_config_data_t *bdz_ph = (bdz_ph_config_data_t *)mph->data;
|
||||||
|
CMPH_HASH *hashptr = hashfuncs;
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
while(*hashptr != CMPH_HASH_COUNT)
|
||||||
|
{
|
||||||
|
if (i >= 1) break; //bdz_ph only uses one linear hash function
|
||||||
|
bdz_ph->hashfunc = *hashptr;
|
||||||
|
++i, ++hashptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_t *bdz_ph_new(cmph_config_t *mph, double c)
|
||||||
|
{
|
||||||
|
cmph_t *mphf = NULL;
|
||||||
|
bdz_ph_data_t *bdz_phf = NULL;
|
||||||
|
cmph_uint32 iterations;
|
||||||
|
bdz_ph_queue_t edges;
|
||||||
|
bdz_ph_graph3_t graph3;
|
||||||
|
bdz_ph_config_data_t *bdz_ph = (bdz_ph_config_data_t *)mph->data;
|
||||||
|
#ifdef CMPH_TIMING
|
||||||
|
double construction_time_begin = 0.0;
|
||||||
|
double construction_time = 0.0;
|
||||||
|
ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
if (c == 0) c = 1.23; // validating restrictions over parameter c.
|
||||||
|
DEBUGP("c: %f\n", c);
|
||||||
|
bdz_ph->m = mph->key_source->nkeys;
|
||||||
|
bdz_ph->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3);
|
||||||
|
if ((bdz_ph->r % 2) == 0) bdz_ph->r += 1;
|
||||||
|
bdz_ph->n = 3*bdz_ph->r;
|
||||||
|
|
||||||
|
|
||||||
|
bdz_ph_alloc_graph3(&graph3, bdz_ph->m, bdz_ph->n);
|
||||||
|
bdz_ph_alloc_queue(&edges,bdz_ph->m);
|
||||||
|
DEBUGP("Created hypergraph\n");
|
||||||
|
|
||||||
|
DEBUGP("m (edges): %u n (vertices): %u r: %u c: %f \n", bdz_ph->m, bdz_ph->n, bdz_ph->r, c);
|
||||||
|
|
||||||
|
// Mapping step
|
||||||
|
iterations = 100;
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", bdz_ph->m, bdz_ph->n);
|
||||||
|
}
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
int ok;
|
||||||
|
DEBUGP("linear hash function \n");
|
||||||
|
bdz_ph->hl = hash_state_new(bdz_ph->hashfunc, 15);
|
||||||
|
|
||||||
|
ok = bdz_ph_mapping(mph, &graph3, edges);
|
||||||
|
if (!ok)
|
||||||
|
{
|
||||||
|
--iterations;
|
||||||
|
hash_state_destroy(bdz_ph->hl);
|
||||||
|
bdz_ph->hl = NULL;
|
||||||
|
DEBUGP("%u iterations remaining\n", iterations);
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations);
|
||||||
|
}
|
||||||
|
if (iterations == 0) break;
|
||||||
|
}
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (iterations == 0)
|
||||||
|
{
|
||||||
|
// free(bdz_ph->g);
|
||||||
|
bdz_ph_free_queue(&edges);
|
||||||
|
bdz_ph_free_graph3(&graph3);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
bdz_ph_partial_free_graph3(&graph3);
|
||||||
|
// Assigning step
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Entering assigning step for mph creation of %u keys with graph sized %u\n", bdz_ph->m, bdz_ph->n);
|
||||||
|
}
|
||||||
|
assigning(bdz_ph, &graph3, edges);
|
||||||
|
|
||||||
|
bdz_ph_free_queue(&edges);
|
||||||
|
bdz_ph_free_graph3(&graph3);
|
||||||
|
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Starting optimization step\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
bdz_ph_optimization(bdz_ph);
|
||||||
|
|
||||||
|
#ifdef CMPH_TIMING
|
||||||
|
ELAPSED_TIME_IN_SECONDS(&construction_time);
|
||||||
|
#endif
|
||||||
|
mphf = (cmph_t *)malloc(sizeof(cmph_t));
|
||||||
|
mphf->algo = mph->algo;
|
||||||
|
bdz_phf = (bdz_ph_data_t *)malloc(sizeof(bdz_ph_data_t));
|
||||||
|
bdz_phf->g = bdz_ph->g;
|
||||||
|
bdz_ph->g = NULL; //transfer memory ownership
|
||||||
|
bdz_phf->hl = bdz_ph->hl;
|
||||||
|
bdz_ph->hl = NULL; //transfer memory ownership
|
||||||
|
bdz_phf->n = bdz_ph->n;
|
||||||
|
bdz_phf->m = bdz_ph->m;
|
||||||
|
bdz_phf->r = bdz_ph->r;
|
||||||
|
mphf->data = bdz_phf;
|
||||||
|
mphf->size = bdz_ph->n;
|
||||||
|
|
||||||
|
DEBUGP("Successfully generated minimal perfect hash\n");
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CMPH_TIMING
|
||||||
|
register cmph_uint32 space_usage = bdz_ph_packed_size(mphf)*8;
|
||||||
|
register cmph_uint32 keys_per_bucket = 1;
|
||||||
|
construction_time = construction_time - construction_time_begin;
|
||||||
|
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz_ph->m, bdz_ph->m/(double)bdz_ph->n, keys_per_bucket, construction_time, space_usage/(double)bdz_ph->m);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return mphf;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int bdz_ph_mapping(cmph_config_t *mph, bdz_ph_graph3_t* graph3, bdz_ph_queue_t queue)
|
||||||
|
{
|
||||||
|
cmph_uint32 e;
|
||||||
|
int cycles = 0;
|
||||||
|
cmph_uint32 hl[3];
|
||||||
|
|
||||||
|
bdz_ph_config_data_t *bdz_ph = (bdz_ph_config_data_t *)mph->data;
|
||||||
|
bdz_ph_init_graph3(graph3, bdz_ph->m, bdz_ph->n);
|
||||||
|
mph->key_source->rewind(mph->key_source->data);
|
||||||
|
for (e = 0; e < mph->key_source->nkeys; ++e)
|
||||||
|
{
|
||||||
|
cmph_uint32 h0, h1, h2;
|
||||||
|
cmph_uint32 keylen;
|
||||||
|
char *key = NULL;
|
||||||
|
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||||
|
hash_vector(bdz_ph->hl, key, keylen, hl);
|
||||||
|
h0 = hl[0] % bdz_ph->r;
|
||||||
|
h1 = hl[1] % bdz_ph->r + bdz_ph->r;
|
||||||
|
h2 = hl[2] % bdz_ph->r + (bdz_ph->r << 1);
|
||||||
|
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||||
|
bdz_ph_add_edge(graph3,h0,h1,h2);
|
||||||
|
}
|
||||||
|
cycles = bdz_ph_generate_queue(bdz_ph->m, bdz_ph->n, queue, graph3);
|
||||||
|
return (cycles == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void assigning(bdz_ph_config_data_t *bdz_ph, bdz_ph_graph3_t* graph3, bdz_ph_queue_t queue)
|
||||||
|
{
|
||||||
|
cmph_uint32 i;
|
||||||
|
cmph_uint32 nedges=graph3->nedges;
|
||||||
|
cmph_uint32 curr_edge;
|
||||||
|
cmph_uint32 v0,v1,v2;
|
||||||
|
cmph_uint8 * marked_vertices =malloc((size_t)(bdz_ph->n >> 3) + 1);
|
||||||
|
cmph_uint32 sizeg = (cmph_uint32)ceil(bdz_ph->n/4.0);
|
||||||
|
bdz_ph->g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8));
|
||||||
|
memset(marked_vertices, 0, (size_t)(bdz_ph->n >> 3) + 1);
|
||||||
|
//memset(bdz_ph->g, 0xff, sizeg);
|
||||||
|
|
||||||
|
for(i=nedges-1;i+1>=1;i--){
|
||||||
|
curr_edge=queue[i];
|
||||||
|
v0=graph3->edges[curr_edge].vertices[0];
|
||||||
|
v1=graph3->edges[curr_edge].vertices[1];
|
||||||
|
v2=graph3->edges[curr_edge].vertices[2];
|
||||||
|
DEBUGP("B:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz_ph->g, v0), GETVALUE(bdz_ph->g, v1), GETVALUE(bdz_ph->g, v2));
|
||||||
|
if(!GETBIT(marked_vertices, v0)){
|
||||||
|
if(!GETBIT(marked_vertices,v1))
|
||||||
|
{
|
||||||
|
//SETVALUE(bdz_ph->g, v1, UNASSIGNED);
|
||||||
|
SETBIT(marked_vertices, v1);
|
||||||
|
}
|
||||||
|
if(!GETBIT(marked_vertices,v2))
|
||||||
|
{
|
||||||
|
//SETVALUE(bdz_ph->g, v2, UNASSIGNED);
|
||||||
|
SETBIT(marked_vertices, v2);
|
||||||
|
}
|
||||||
|
SETVALUE0(bdz_ph->g, v0, (6-(GETVALUE(bdz_ph->g, v1) + GETVALUE(bdz_ph->g,v2)))%3);
|
||||||
|
SETBIT(marked_vertices, v0);
|
||||||
|
} else if(!GETBIT(marked_vertices, v1)) {
|
||||||
|
if(!GETBIT(marked_vertices, v2))
|
||||||
|
{
|
||||||
|
//SETVALUE(bdz_ph->g, v2, UNASSIGNED);
|
||||||
|
SETBIT(marked_vertices, v2);
|
||||||
|
}
|
||||||
|
SETVALUE0(bdz_ph->g, v1, (7 - (GETVALUE(bdz_ph->g, v0)+GETVALUE(bdz_ph->g, v2)))%3);
|
||||||
|
SETBIT(marked_vertices, v1);
|
||||||
|
}else {
|
||||||
|
SETVALUE0(bdz_ph->g, v2, (8-(GETVALUE(bdz_ph->g,v0)+GETVALUE(bdz_ph->g, v1)))%3);
|
||||||
|
SETBIT(marked_vertices, v2);
|
||||||
|
}
|
||||||
|
DEBUGP("A:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz_ph->g, v0), GETVALUE(bdz_ph->g, v1), GETVALUE(bdz_ph->g, v2));
|
||||||
|
};
|
||||||
|
free(marked_vertices);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bdz_ph_optimization(bdz_ph_config_data_t *bdz_ph)
|
||||||
|
{
|
||||||
|
cmph_uint32 i;
|
||||||
|
cmph_uint8 byte = 0;
|
||||||
|
cmph_uint32 sizeg = (cmph_uint32)ceil(bdz_ph->n/5.0);
|
||||||
|
cmph_uint8 * new_g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8));
|
||||||
|
cmph_uint8 value;
|
||||||
|
cmph_uint32 idx;
|
||||||
|
for(i = 0; i < bdz_ph->n; i++)
|
||||||
|
{
|
||||||
|
idx = i/5;
|
||||||
|
byte = new_g[idx];
|
||||||
|
value = GETVALUE(bdz_ph->g, i);
|
||||||
|
byte = (cmph_uint8) (byte + value*pow3_table[i%5U]);
|
||||||
|
new_g[idx] = byte;
|
||||||
|
}
|
||||||
|
free(bdz_ph->g);
|
||||||
|
bdz_ph->g = new_g;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int bdz_ph_dump(cmph_t *mphf, FILE *fd)
|
||||||
|
{
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen;
|
||||||
|
cmph_uint32 sizeg = 0;
|
||||||
|
register size_t nbytes;
|
||||||
|
bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
|
||||||
|
__cmph_dump(mphf, fd);
|
||||||
|
|
||||||
|
hash_state_dump(data->hl, &buf, &buflen);
|
||||||
|
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||||
|
nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(&(data->r), sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
sizeg = (cmph_uint32)ceil(data->n/5.0);
|
||||||
|
nbytes = fwrite(data->g, sizeof(cmph_uint8)*sizeg, (size_t)1, fd);
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
cmph_uint32 i;
|
||||||
|
fprintf(stderr, "G: ");
|
||||||
|
for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", GETVALUE(data->g, i));
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
#endif
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bdz_ph_load(FILE *f, cmph_t *mphf)
|
||||||
|
{
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen;
|
||||||
|
cmph_uint32 sizeg = 0;
|
||||||
|
register size_t nbytes;
|
||||||
|
bdz_ph_data_t *bdz_ph = (bdz_ph_data_t *)malloc(sizeof(bdz_ph_data_t));
|
||||||
|
|
||||||
|
DEBUGP("Loading bdz_ph mphf\n");
|
||||||
|
mphf->data = bdz_ph;
|
||||||
|
|
||||||
|
nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
DEBUGP("Hash state has %u bytes\n", buflen);
|
||||||
|
buf = (char *)malloc((size_t)buflen);
|
||||||
|
nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
|
||||||
|
bdz_ph->hl = hash_state_load(buf, buflen);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
|
||||||
|
DEBUGP("Reading m and n\n");
|
||||||
|
nbytes = fread(&(bdz_ph->n), sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
nbytes = fread(&(bdz_ph->m), sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
nbytes = fread(&(bdz_ph->r), sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
sizeg = (cmph_uint32)ceil(bdz_ph->n/5.0);
|
||||||
|
bdz_ph->g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8));
|
||||||
|
nbytes = fread(bdz_ph->g, sizeg*sizeof(cmph_uint8), (size_t)1, f);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
register bdz_ph_data_t *bdz_ph = mphf->data;
|
||||||
|
cmph_uint32 hl[3];
|
||||||
|
register cmph_uint8 byte0, byte1, byte2;
|
||||||
|
register cmph_uint32 vertex;
|
||||||
|
|
||||||
|
hash_vector(bdz_ph->hl, key, keylen,hl);
|
||||||
|
hl[0] = hl[0] % bdz_ph->r;
|
||||||
|
hl[1] = hl[1] % bdz_ph->r + bdz_ph->r;
|
||||||
|
hl[2] = hl[2] % bdz_ph->r + (bdz_ph->r << 1);
|
||||||
|
|
||||||
|
byte0 = bdz_ph->g[hl[0]/5];
|
||||||
|
byte1 = bdz_ph->g[hl[1]/5];
|
||||||
|
byte2 = bdz_ph->g[hl[2]/5];
|
||||||
|
|
||||||
|
byte0 = lookup_table[hl[0]%5U][byte0];
|
||||||
|
byte1 = lookup_table[hl[1]%5U][byte1];
|
||||||
|
byte2 = lookup_table[hl[2]%5U][byte2];
|
||||||
|
vertex = hl[(byte0 + byte1 + byte2)%3];
|
||||||
|
|
||||||
|
return vertex;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void bdz_ph_destroy(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
|
||||||
|
free(data->g);
|
||||||
|
hash_state_destroy(data->hl);
|
||||||
|
free(data);
|
||||||
|
free(mphf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn void bdz_ph_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void bdz_ph_pack(cmph_t *mphf, void *packed_mphf)
|
||||||
|
{
|
||||||
|
bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
|
||||||
|
cmph_uint8 * ptr = packed_mphf;
|
||||||
|
|
||||||
|
// packing hl type
|
||||||
|
CMPH_HASH hl_type = hash_get_type(data->hl);
|
||||||
|
*((cmph_uint32 *) ptr) = hl_type;
|
||||||
|
ptr += sizeof(cmph_uint32);
|
||||||
|
|
||||||
|
// packing hl
|
||||||
|
hash_state_pack(data->hl, ptr);
|
||||||
|
ptr += hash_state_packed_size(hl_type);
|
||||||
|
|
||||||
|
// packing r
|
||||||
|
*((cmph_uint32 *) ptr) = data->r;
|
||||||
|
ptr += sizeof(data->r);
|
||||||
|
|
||||||
|
// packing g
|
||||||
|
cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/5.0);
|
||||||
|
memcpy(ptr, data->g, sizeof(cmph_uint8)*sizeg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 bdz_ph_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_ph_packed_size(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
|
||||||
|
CMPH_HASH hl_type = hash_get_type(data->hl);
|
||||||
|
cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/5.0);
|
||||||
|
return (cmph_uint32) (sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 2*sizeof(cmph_uint32) + sizeof(cmph_uint8)*sizeg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** cmph_uint32 bdz_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
|
||||||
|
register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf;
|
||||||
|
register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4;
|
||||||
|
|
||||||
|
register cmph_uint8 * ptr = hl_ptr + hash_state_packed_size(hl_type);
|
||||||
|
|
||||||
|
register cmph_uint32 r = *((cmph_uint32*) ptr);
|
||||||
|
register cmph_uint8 * g = ptr + 4;
|
||||||
|
|
||||||
|
cmph_uint32 hl[3];
|
||||||
|
register cmph_uint8 byte0, byte1, byte2;
|
||||||
|
register cmph_uint32 vertex;
|
||||||
|
|
||||||
|
hash_vector_packed(hl_ptr, hl_type, key, keylen, hl);
|
||||||
|
|
||||||
|
hl[0] = hl[0] % r;
|
||||||
|
hl[1] = hl[1] % r + r;
|
||||||
|
hl[2] = hl[2] % r + (r << 1);
|
||||||
|
|
||||||
|
byte0 = g[hl[0]/5];
|
||||||
|
byte1 = g[hl[1]/5];
|
||||||
|
byte2 = g[hl[2]/5];
|
||||||
|
|
||||||
|
byte0 = lookup_table[hl[0]%5][byte0];
|
||||||
|
byte1 = lookup_table[hl[1]%5][byte1];
|
||||||
|
byte2 = lookup_table[hl[2]%5][byte2];
|
||||||
|
vertex = hl[(byte0 + byte1 + byte2)%3];
|
||||||
|
|
||||||
|
return vertex;
|
||||||
|
}
|
42
cmph/bdz_ph.h
Executable file
42
cmph/bdz_ph.h
Executable file
@ -0,0 +1,42 @@
|
|||||||
|
#ifndef __CMPH_BDZ_PH_H__
|
||||||
|
#define __CMPH_BDZ_PH_H__
|
||||||
|
|
||||||
|
#include "cmph.h"
|
||||||
|
|
||||||
|
typedef struct __bdz_ph_data_t bdz_ph_data_t;
|
||||||
|
typedef struct __bdz_ph_config_data_t bdz_ph_config_data_t;
|
||||||
|
|
||||||
|
bdz_ph_config_data_t *bdz_ph_config_new();
|
||||||
|
void bdz_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
|
||||||
|
void bdz_ph_config_destroy(cmph_config_t *mph);
|
||||||
|
cmph_t *bdz_ph_new(cmph_config_t *mph, double c);
|
||||||
|
|
||||||
|
void bdz_ph_load(FILE *f, cmph_t *mphf);
|
||||||
|
int bdz_ph_dump(cmph_t *mphf, FILE *f);
|
||||||
|
void bdz_ph_destroy(cmph_t *mphf);
|
||||||
|
cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** \fn void bdz_ph_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void bdz_ph_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 bdz_ph_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_ph_packed_size(cmph_t *mphf);
|
||||||
|
|
||||||
|
/** cmph_uint32 bdz_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
#endif
|
36
cmph/bdz_structs.h
Executable file
36
cmph/bdz_structs.h
Executable file
@ -0,0 +1,36 @@
|
|||||||
|
#ifndef __CMPH_BDZ_STRUCTS_H__
|
||||||
|
#define __CMPH_BDZ_STRUCTS_H__
|
||||||
|
|
||||||
|
#include "hash_state.h"
|
||||||
|
|
||||||
|
struct __bdz_data_t
|
||||||
|
{
|
||||||
|
cmph_uint32 m; //edges (words) count
|
||||||
|
cmph_uint32 n; //vertex count
|
||||||
|
cmph_uint32 r; //partition vertex count
|
||||||
|
cmph_uint8 *g;
|
||||||
|
hash_state_t *hl; // linear hashing
|
||||||
|
|
||||||
|
cmph_uint32 k; //kth index in ranktable, $k = log_2(n=3r)/\varepsilon$
|
||||||
|
cmph_uint8 b; // number of bits of k
|
||||||
|
cmph_uint32 ranktablesize; //number of entries in ranktable, $n/k +1$
|
||||||
|
cmph_uint32 *ranktable; // rank table
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct __bdz_config_data_t
|
||||||
|
{
|
||||||
|
cmph_uint32 m; //edges (words) count
|
||||||
|
cmph_uint32 n; //vertex count
|
||||||
|
cmph_uint32 r; //partition vertex count
|
||||||
|
cmph_uint8 *g;
|
||||||
|
hash_state_t *hl; // linear hashing
|
||||||
|
|
||||||
|
cmph_uint32 k; //kth index in ranktable, $k = log_2(n=3r)/\varepsilon$
|
||||||
|
cmph_uint8 b; // number of bits of k
|
||||||
|
cmph_uint32 ranktablesize; //number of entries in ranktable, $n/k +1$
|
||||||
|
cmph_uint32 *ranktable; // rank table
|
||||||
|
CMPH_HASH hashfunc;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
26
cmph/bdz_structs_ph.h
Executable file
26
cmph/bdz_structs_ph.h
Executable file
@ -0,0 +1,26 @@
|
|||||||
|
#ifndef __CMPH_BDZ_STRUCTS_PH_H__
|
||||||
|
#define __CMPH_BDZ_STRUCTS_PH_H__
|
||||||
|
|
||||||
|
#include "hash_state.h"
|
||||||
|
|
||||||
|
struct __bdz_ph_data_t
|
||||||
|
{
|
||||||
|
cmph_uint32 m; //edges (words) count
|
||||||
|
cmph_uint32 n; //vertex count
|
||||||
|
cmph_uint32 r; //partition vertex count
|
||||||
|
cmph_uint8 *g;
|
||||||
|
hash_state_t *hl; // linear hashing
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct __bdz_ph_config_data_t
|
||||||
|
{
|
||||||
|
CMPH_HASH hashfunc;
|
||||||
|
cmph_uint32 m; //edges (words) count
|
||||||
|
cmph_uint32 n; //vertex count
|
||||||
|
cmph_uint32 r; //partition vertex count
|
||||||
|
cmph_uint8 *g;
|
||||||
|
hash_state_t *hl; // linear hashing
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
179
cmph/bitbool.h
Normal file
179
cmph/bitbool.h
Normal file
@ -0,0 +1,179 @@
|
|||||||
|
#ifndef _CMPH_BITBOOL_H__
|
||||||
|
#define _CMPH_BITBOOL_H__
|
||||||
|
#include "cmph_types.h"
|
||||||
|
|
||||||
|
static const cmph_uint8 bitmask[] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
|
||||||
|
|
||||||
|
static const cmph_uint32 bitmask32[] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7,
|
||||||
|
1 << 8, 1 << 9, 1 << 10, 1 << 11, 1 << 12, 1 << 13, 1 << 14, 1 << 15,
|
||||||
|
1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 21, 1 << 22, 1 << 23,
|
||||||
|
1 << 24, 1 << 25, 1 << 26, 1 << 27, 1 << 28, 1 << 29, 1 << 30, 1U << 31
|
||||||
|
};
|
||||||
|
|
||||||
|
static const cmph_uint8 valuemask[] = { 0xfc, 0xf3, 0xcf, 0x3f};
|
||||||
|
|
||||||
|
|
||||||
|
/** \def GETBIT(array, i)
|
||||||
|
* \brief get the value of an 1-bit integer stored in an array.
|
||||||
|
* \param array to get 1-bit integer values from
|
||||||
|
* \param i is the index in array to get the 1-bit integer value from
|
||||||
|
*
|
||||||
|
* GETBIT(array, i) is a macro that gets the value of an 1-bit integer stored in array.
|
||||||
|
*/
|
||||||
|
#define GETBIT(array, i) ((array[i >> 3] & bitmask[i & 0x00000007]) >> (i & 0x00000007))
|
||||||
|
|
||||||
|
/** \def SETBIT(array, i)
|
||||||
|
* \brief set 1 to an 1-bit integer stored in an array.
|
||||||
|
* \param array to store 1-bit integer values
|
||||||
|
* \param i is the index in array to set the the bit to 1
|
||||||
|
*
|
||||||
|
* SETBIT(array, i) is a macro that sets 1 to an 1-bit integer stored in an array.
|
||||||
|
*/
|
||||||
|
#define SETBIT(array, i) (array[i >> 3] |= bitmask[i & 0x00000007])
|
||||||
|
|
||||||
|
/** \def UNSETBIT(array, i)
|
||||||
|
* \brief set 0 to an 1-bit integer stored in an array.
|
||||||
|
* \param array to store 1-bit integer values
|
||||||
|
* \param i is the index in array to set the the bit to 0
|
||||||
|
*
|
||||||
|
* UNSETBIT(array, i) is a macro that sets 0 to an 1-bit integer stored in an array.
|
||||||
|
*/
|
||||||
|
#define UNSETBIT(array, i) (array[i >> 3] ^= ((bitmask[i & 0x00000007])))
|
||||||
|
|
||||||
|
//#define GETBIT(array, i) (array[(i) / 8] & bitmask[(i) % 8])
|
||||||
|
//#define SETBIT(array, i) (array[(i) / 8] |= bitmask[(i) % 8])
|
||||||
|
//#define UNSETBIT(array, i) (array[(i) / 8] ^= ((bitmask[(i) % 8])))
|
||||||
|
|
||||||
|
|
||||||
|
/** \def SETVALUE1(array, i, v)
|
||||||
|
* \brief set a value for a 2-bit integer stored in an array initialized with 1s.
|
||||||
|
* \param array to store 2-bit integer values
|
||||||
|
* \param i is the index in array to set the value v
|
||||||
|
* \param v is the value to be set
|
||||||
|
*
|
||||||
|
* SETVALUE1(array, i, v) is a macro that set a value for a 2-bit integer stored in an array.
|
||||||
|
* The array should be initialized with all bits set to 1. For example:
|
||||||
|
* memset(array, 0xff, arraySize);
|
||||||
|
*/
|
||||||
|
#define SETVALUE1(array, i, v) (array[i >> 2] &= (cmph_uint8)((v << ((i & 0x00000003) << 1)) | valuemask[i & 0x00000003]))
|
||||||
|
|
||||||
|
/** \def SETVALUE0(array, i, v)
|
||||||
|
* \brief set a value for a 2-bit integer stored in an array initialized with 0s.
|
||||||
|
* \param array to store 2-bit integer values
|
||||||
|
* \param i is the index in array to set the value v
|
||||||
|
* \param v is the value to be set
|
||||||
|
*
|
||||||
|
* SETVALUE0(array, i, v) is a macro that set a value for a 2-bit integer stored in an array.
|
||||||
|
* The array should be initialized with all bits set to 0. For example:
|
||||||
|
* memset(array, 0, arraySize);
|
||||||
|
*/
|
||||||
|
#define SETVALUE0(array, i, v) (array[i >> 2] |= (cmph_uint8)(v << ((i & 0x00000003) << 1)))
|
||||||
|
|
||||||
|
|
||||||
|
/** \def GETVALUE(array, i)
|
||||||
|
* \brief get a value for a 2-bit integer stored in an array.
|
||||||
|
* \param array to get 2-bit integer values from
|
||||||
|
* \param i is the index in array to get the value from
|
||||||
|
*
|
||||||
|
* GETVALUE(array, i) is a macro that get a value for a 2-bit integer stored in an array.
|
||||||
|
*/
|
||||||
|
#define GETVALUE(array, i) ((cmph_uint8)((array[i >> 2] >> ((i & 0x00000003U) << 1U)) & 0x00000003U))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/** \def SETBIT32(array, i)
|
||||||
|
* \brief set 1 to an 1-bit integer stored in an array of 32-bit words.
|
||||||
|
* \param array to store 1-bit integer values. The entries are 32-bit words.
|
||||||
|
* \param i is the index in array to set the the bit to 1
|
||||||
|
*
|
||||||
|
* SETBIT32(array, i) is a macro that sets 1 to an 1-bit integer stored in an array of 32-bit words.
|
||||||
|
*/
|
||||||
|
#define SETBIT32(array, i) (array[i >> 5] |= bitmask32[i & 0x0000001f])
|
||||||
|
|
||||||
|
/** \def GETBIT32(array, i)
|
||||||
|
* \brief get the value of an 1-bit integer stored in an array of 32-bit words.
|
||||||
|
* \param array to get 1-bit integer values from. The entries are 32-bit words.
|
||||||
|
* \param i is the index in array to get the 1-bit integer value from
|
||||||
|
*
|
||||||
|
* GETBIT32(array, i) is a macro that gets the value of an 1-bit integer stored in an array of 32-bit words.
|
||||||
|
*/
|
||||||
|
#define GETBIT32(array, i) (array[i >> 5] & bitmask32[i & 0x0000001f])
|
||||||
|
|
||||||
|
/** \def UNSETBIT32(array, i)
|
||||||
|
* \brief set 0 to an 1-bit integer stored in an array of 32-bit words.
|
||||||
|
* \param array to store 1-bit integer values. The entries ar 32-bit words
|
||||||
|
* \param i is the index in array to set the the bit to 0
|
||||||
|
*
|
||||||
|
* UNSETBIT32(array, i) is a macro that sets 0 to an 1-bit integer stored in an array of 32-bit words.
|
||||||
|
*/
|
||||||
|
#define UNSETBIT32(array, i) (array[i >> 5] ^= ((bitmask32[i & 0x0000001f])))
|
||||||
|
|
||||||
|
#define BITS_TABLE_SIZE(n, bits_length) ((n * bits_length + 31) >> 5)
|
||||||
|
|
||||||
|
static inline void set_bits_value(cmph_uint32 * bits_table, cmph_uint32 index, cmph_uint32 bits_string,
|
||||||
|
cmph_uint32 string_length, cmph_uint32 string_mask)
|
||||||
|
{
|
||||||
|
register cmph_uint32 bit_idx = index * string_length;
|
||||||
|
register cmph_uint32 word_idx = bit_idx >> 5;
|
||||||
|
register cmph_uint32 shift1 = bit_idx & 0x0000001f;
|
||||||
|
register cmph_uint32 shift2 = 32 - shift1;
|
||||||
|
|
||||||
|
bits_table[word_idx] &= ~((string_mask) << shift1);
|
||||||
|
bits_table[word_idx] |= bits_string << shift1;
|
||||||
|
|
||||||
|
if(shift2 < string_length)
|
||||||
|
{
|
||||||
|
bits_table[word_idx+1] &= ~((string_mask) >> shift2);
|
||||||
|
bits_table[word_idx+1] |= bits_string >> shift2;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline cmph_uint32 get_bits_value(cmph_uint32 * bits_table,cmph_uint32 index, cmph_uint32 string_length, cmph_uint32 string_mask)
|
||||||
|
{
|
||||||
|
register cmph_uint32 bit_idx = index * string_length;
|
||||||
|
register cmph_uint32 word_idx = bit_idx >> 5;
|
||||||
|
register cmph_uint32 shift1 = bit_idx & 0x0000001f;
|
||||||
|
register cmph_uint32 shift2 = 32-shift1;
|
||||||
|
register cmph_uint32 bits_string;
|
||||||
|
|
||||||
|
bits_string = (bits_table[word_idx] >> shift1) & string_mask;
|
||||||
|
|
||||||
|
if(shift2 < string_length)
|
||||||
|
bits_string |= (bits_table[word_idx+1] << shift2) & string_mask;
|
||||||
|
|
||||||
|
return bits_string;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline void set_bits_at_pos(cmph_uint32 * bits_table, cmph_uint32 pos, cmph_uint32 bits_string, cmph_uint32 string_length)
|
||||||
|
{
|
||||||
|
register cmph_uint32 word_idx = pos >> 5;
|
||||||
|
register cmph_uint32 shift1 = pos & 0x0000001f;
|
||||||
|
register cmph_uint32 shift2 = 32-shift1;
|
||||||
|
register cmph_uint32 string_mask = (1U << string_length) - 1;
|
||||||
|
|
||||||
|
bits_table[word_idx] &= ~((string_mask) << shift1);
|
||||||
|
bits_table[word_idx] |= bits_string << shift1;
|
||||||
|
if(shift2 < string_length)
|
||||||
|
{
|
||||||
|
bits_table[word_idx+1] &= ~((string_mask) >> shift2);
|
||||||
|
bits_table[word_idx+1] |= bits_string >> shift2;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline cmph_uint32 get_bits_at_pos(cmph_uint32 * bits_table,cmph_uint32 pos,cmph_uint32 string_length)
|
||||||
|
{
|
||||||
|
register cmph_uint32 word_idx = pos >> 5;
|
||||||
|
register cmph_uint32 shift1 = pos & 0x0000001f;
|
||||||
|
register cmph_uint32 shift2 = 32 - shift1;
|
||||||
|
register cmph_uint32 string_mask = (1U << string_length) - 1;
|
||||||
|
register cmph_uint32 bits_string;
|
||||||
|
|
||||||
|
bits_string = (bits_table[word_idx] >> shift1) & string_mask;
|
||||||
|
|
||||||
|
if(shift2 < string_length)
|
||||||
|
bits_string |= (bits_table[word_idx+1] << shift2) & string_mask;
|
||||||
|
return bits_string;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
620
cmph/bmz.c
Normal file
620
cmph/bmz.c
Normal file
@ -0,0 +1,620 @@
|
|||||||
|
#include "graph.h"
|
||||||
|
#include "bmz.h"
|
||||||
|
#include "cmph_structs.h"
|
||||||
|
#include "bmz_structs.h"
|
||||||
|
#include "hash.h"
|
||||||
|
#include "vqueue.h"
|
||||||
|
#include "bitbool.h"
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
//#define DEBUG
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
|
static int bmz_gen_edges(cmph_config_t *mph);
|
||||||
|
static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited);
|
||||||
|
static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited);
|
||||||
|
static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint8 * visited);
|
||||||
|
|
||||||
|
bmz_config_data_t *bmz_config_new()
|
||||||
|
{
|
||||||
|
bmz_config_data_t *bmz = NULL;
|
||||||
|
bmz = (bmz_config_data_t *)malloc(sizeof(bmz_config_data_t));
|
||||||
|
assert(bmz);
|
||||||
|
memset(bmz, 0, sizeof(bmz_config_data_t));
|
||||||
|
bmz->hashfuncs[0] = CMPH_HASH_JENKINS;
|
||||||
|
bmz->hashfuncs[1] = CMPH_HASH_JENKINS;
|
||||||
|
bmz->g = NULL;
|
||||||
|
bmz->graph = NULL;
|
||||||
|
bmz->hashes = NULL;
|
||||||
|
return bmz;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bmz_config_destroy(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
bmz_config_data_t *data = (bmz_config_data_t *)mph->data;
|
||||||
|
DEBUGP("Destroying algorithm dependent data\n");
|
||||||
|
free(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bmz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||||
|
{
|
||||||
|
bmz_config_data_t *bmz = (bmz_config_data_t *)mph->data;
|
||||||
|
CMPH_HASH *hashptr = hashfuncs;
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
while(*hashptr != CMPH_HASH_COUNT)
|
||||||
|
{
|
||||||
|
if (i >= 2) break; //bmz only uses two hash functions
|
||||||
|
bmz->hashfuncs[i] = *hashptr;
|
||||||
|
++i, ++hashptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_t *bmz_new(cmph_config_t *mph, double c)
|
||||||
|
{
|
||||||
|
cmph_t *mphf = NULL;
|
||||||
|
bmz_data_t *bmzf = NULL;
|
||||||
|
cmph_uint32 i;
|
||||||
|
cmph_uint32 iterations;
|
||||||
|
cmph_uint32 iterations_map = 20;
|
||||||
|
cmph_uint8 *used_edges = NULL;
|
||||||
|
cmph_uint8 restart_mapping = 0;
|
||||||
|
cmph_uint8 * visited = NULL;
|
||||||
|
|
||||||
|
bmz_config_data_t *bmz = (bmz_config_data_t *)mph->data;
|
||||||
|
if (c == 0) c = 1.15; // validating restrictions over parameter c.
|
||||||
|
DEBUGP("c: %f\n", c);
|
||||||
|
bmz->m = mph->key_source->nkeys;
|
||||||
|
bmz->n = (cmph_uint32)ceil(c * mph->key_source->nkeys);
|
||||||
|
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz->m, bmz->n, c);
|
||||||
|
bmz->graph = graph_new(bmz->n, bmz->m);
|
||||||
|
DEBUGP("Created graph\n");
|
||||||
|
|
||||||
|
bmz->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*3);
|
||||||
|
for(i = 0; i < 3; ++i) bmz->hashes[i] = NULL;
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
// Mapping step
|
||||||
|
cmph_uint32 biggest_g_value = 0;
|
||||||
|
cmph_uint32 biggest_edge_value = 1;
|
||||||
|
iterations = 100;
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", bmz->m, bmz->n);
|
||||||
|
}
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
int ok;
|
||||||
|
DEBUGP("hash function 1\n");
|
||||||
|
bmz->hashes[0] = hash_state_new(bmz->hashfuncs[0], bmz->n);
|
||||||
|
DEBUGP("hash function 2\n");
|
||||||
|
bmz->hashes[1] = hash_state_new(bmz->hashfuncs[1], bmz->n);
|
||||||
|
DEBUGP("Generating edges\n");
|
||||||
|
ok = bmz_gen_edges(mph);
|
||||||
|
if (!ok)
|
||||||
|
{
|
||||||
|
--iterations;
|
||||||
|
hash_state_destroy(bmz->hashes[0]);
|
||||||
|
bmz->hashes[0] = NULL;
|
||||||
|
hash_state_destroy(bmz->hashes[1]);
|
||||||
|
bmz->hashes[1] = NULL;
|
||||||
|
DEBUGP("%u iterations remaining\n", iterations);
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "simple graph creation failure - %u iterations remaining\n", iterations);
|
||||||
|
}
|
||||||
|
if (iterations == 0) break;
|
||||||
|
}
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
if (iterations == 0)
|
||||||
|
{
|
||||||
|
graph_destroy(bmz->graph);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
// Ordering step
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Starting ordering step\n");
|
||||||
|
}
|
||||||
|
graph_obtain_critical_nodes(bmz->graph);
|
||||||
|
|
||||||
|
// Searching step
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Starting Searching step.\n");
|
||||||
|
fprintf(stderr, "\tTraversing critical vertices.\n");
|
||||||
|
}
|
||||||
|
DEBUGP("Searching step\n");
|
||||||
|
visited = (cmph_uint8 *)malloc((size_t)bmz->n/8 + 1);
|
||||||
|
memset(visited, 0, (size_t)bmz->n/8 + 1);
|
||||||
|
used_edges = (cmph_uint8 *)malloc((size_t)bmz->m/8 + 1);
|
||||||
|
memset(used_edges, 0, (size_t)bmz->m/8 + 1);
|
||||||
|
free(bmz->g);
|
||||||
|
bmz->g = (cmph_uint32 *)calloc((size_t)bmz->n, sizeof(cmph_uint32));
|
||||||
|
assert(bmz->g);
|
||||||
|
for (i = 0; i < bmz->n; ++i) // critical nodes
|
||||||
|
{
|
||||||
|
if (graph_node_is_critical(bmz->graph, i) && (!GETBIT(visited,i)))
|
||||||
|
{
|
||||||
|
if(c > 1.14) restart_mapping = bmz_traverse_critical_nodes(bmz, i, &biggest_g_value, &biggest_edge_value, used_edges, visited);
|
||||||
|
else restart_mapping = bmz_traverse_critical_nodes_heuristic(bmz, i, &biggest_g_value, &biggest_edge_value, used_edges, visited);
|
||||||
|
if(restart_mapping) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(!restart_mapping)
|
||||||
|
{
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "\tTraversing non critical vertices.\n");
|
||||||
|
}
|
||||||
|
bmz_traverse_non_critical_nodes(bmz, used_edges, visited); // non_critical_nodes
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
iterations_map--;
|
||||||
|
if (mph->verbosity) fprintf(stderr, "Restarting mapping step. %u iterations remaining.\n", iterations_map);
|
||||||
|
}
|
||||||
|
free(used_edges);
|
||||||
|
free(visited);
|
||||||
|
}while(restart_mapping && iterations_map > 0);
|
||||||
|
graph_destroy(bmz->graph);
|
||||||
|
bmz->graph = NULL;
|
||||||
|
if (iterations_map == 0)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
mphf = (cmph_t *)malloc(sizeof(cmph_t));
|
||||||
|
mphf->algo = mph->algo;
|
||||||
|
bmzf = (bmz_data_t *)malloc(sizeof(bmz_data_t));
|
||||||
|
bmzf->g = bmz->g;
|
||||||
|
bmz->g = NULL; //transfer memory ownership
|
||||||
|
bmzf->hashes = bmz->hashes;
|
||||||
|
bmz->hashes = NULL; //transfer memory ownership
|
||||||
|
bmzf->n = bmz->n;
|
||||||
|
bmzf->m = bmz->m;
|
||||||
|
mphf->data = bmzf;
|
||||||
|
mphf->size = bmz->m;
|
||||||
|
|
||||||
|
DEBUGP("Successfully generated minimal perfect hash\n");
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
|
||||||
|
}
|
||||||
|
return mphf;
|
||||||
|
}
|
||||||
|
|
||||||
|
static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited)
|
||||||
|
{
|
||||||
|
cmph_uint32 next_g;
|
||||||
|
cmph_uint32 u; /* Auxiliary vertex */
|
||||||
|
cmph_uint32 lav; /* lookahead vertex */
|
||||||
|
cmph_uint8 collision;
|
||||||
|
vqueue_t * q = vqueue_new((cmph_uint32)(graph_ncritical_nodes(bmz->graph)) + 1);
|
||||||
|
graph_iterator_t it, it1;
|
||||||
|
|
||||||
|
DEBUGP("Labelling critical vertices\n");
|
||||||
|
bmz->g[v] = (cmph_uint32)ceil ((double)(*biggest_edge_value)/2) - 1;
|
||||||
|
SETBIT(visited, v);
|
||||||
|
next_g = (cmph_uint32)floor((double)(*biggest_edge_value/2)); /* next_g is incremented in the do..while statement*/
|
||||||
|
vqueue_insert(q, v);
|
||||||
|
while(!vqueue_is_empty(q))
|
||||||
|
{
|
||||||
|
v = vqueue_remove(q);
|
||||||
|
it = graph_neighbors_it(bmz->graph, v);
|
||||||
|
while ((u = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR)
|
||||||
|
{
|
||||||
|
if (graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u)))
|
||||||
|
{
|
||||||
|
collision = 1;
|
||||||
|
while(collision) // lookahead to resolve collisions
|
||||||
|
{
|
||||||
|
next_g = *biggest_g_value + 1;
|
||||||
|
it1 = graph_neighbors_it(bmz->graph, u);
|
||||||
|
collision = 0;
|
||||||
|
while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR)
|
||||||
|
{
|
||||||
|
if (graph_node_is_critical(bmz->graph, lav) && GETBIT(visited,lav))
|
||||||
|
{
|
||||||
|
if(next_g + bmz->g[lav] >= bmz->m)
|
||||||
|
{
|
||||||
|
vqueue_destroy(q);
|
||||||
|
return 1; // restart mapping step.
|
||||||
|
}
|
||||||
|
if (GETBIT(used_edges, (next_g + bmz->g[lav])))
|
||||||
|
{
|
||||||
|
collision = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (next_g > *biggest_g_value) *biggest_g_value = next_g;
|
||||||
|
}
|
||||||
|
// Marking used edges...
|
||||||
|
it1 = graph_neighbors_it(bmz->graph, u);
|
||||||
|
while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR)
|
||||||
|
{
|
||||||
|
if (graph_node_is_critical(bmz->graph, lav) && GETBIT(visited, lav))
|
||||||
|
{
|
||||||
|
SETBIT(used_edges,(next_g + bmz->g[lav]));
|
||||||
|
if(next_g + bmz->g[lav] > *biggest_edge_value) *biggest_edge_value = next_g + bmz->g[lav];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bmz->g[u] = next_g; // Labelling vertex u.
|
||||||
|
SETBIT(visited,u);
|
||||||
|
vqueue_insert(q, u);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
vqueue_destroy(q);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited)
|
||||||
|
{
|
||||||
|
cmph_uint32 next_g;
|
||||||
|
cmph_uint32 u; /* Auxiliary vertex */
|
||||||
|
cmph_uint32 lav; /* lookahead vertex */
|
||||||
|
cmph_uint8 collision;
|
||||||
|
cmph_uint32 * unused_g_values = NULL;
|
||||||
|
cmph_uint32 unused_g_values_capacity = 0;
|
||||||
|
cmph_uint32 nunused_g_values = 0;
|
||||||
|
vqueue_t * q = vqueue_new((cmph_uint32)(0.5*graph_ncritical_nodes(bmz->graph))+1);
|
||||||
|
graph_iterator_t it, it1;
|
||||||
|
|
||||||
|
DEBUGP("Labelling critical vertices\n");
|
||||||
|
bmz->g[v] = (cmph_uint32)ceil ((double)(*biggest_edge_value)/2) - 1;
|
||||||
|
SETBIT(visited, v);
|
||||||
|
next_g = (cmph_uint32)floor((double)(*biggest_edge_value/2)); /* next_g is incremented in the do..while statement*/
|
||||||
|
vqueue_insert(q, v);
|
||||||
|
while(!vqueue_is_empty(q))
|
||||||
|
{
|
||||||
|
v = vqueue_remove(q);
|
||||||
|
it = graph_neighbors_it(bmz->graph, v);
|
||||||
|
while ((u = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR)
|
||||||
|
{
|
||||||
|
if (graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u)))
|
||||||
|
{
|
||||||
|
cmph_uint32 next_g_index = 0;
|
||||||
|
collision = 1;
|
||||||
|
while(collision) // lookahead to resolve collisions
|
||||||
|
{
|
||||||
|
if (next_g_index < nunused_g_values)
|
||||||
|
{
|
||||||
|
next_g = unused_g_values[next_g_index++];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
next_g = *biggest_g_value + 1;
|
||||||
|
next_g_index = UINT_MAX;
|
||||||
|
}
|
||||||
|
it1 = graph_neighbors_it(bmz->graph, u);
|
||||||
|
collision = 0;
|
||||||
|
while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR)
|
||||||
|
{
|
||||||
|
if (graph_node_is_critical(bmz->graph, lav) && GETBIT(visited,lav))
|
||||||
|
{
|
||||||
|
if(next_g + bmz->g[lav] >= bmz->m)
|
||||||
|
{
|
||||||
|
vqueue_destroy(q);
|
||||||
|
free(unused_g_values);
|
||||||
|
return 1; // restart mapping step.
|
||||||
|
}
|
||||||
|
if (GETBIT(used_edges, (next_g + bmz->g[lav])))
|
||||||
|
{
|
||||||
|
collision = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(collision && (next_g > *biggest_g_value)) // saving the current g value stored in next_g.
|
||||||
|
{
|
||||||
|
if(nunused_g_values == unused_g_values_capacity)
|
||||||
|
{
|
||||||
|
unused_g_values = (cmph_uint32 *)realloc(unused_g_values, (unused_g_values_capacity + BUFSIZ)*sizeof(cmph_uint32));
|
||||||
|
unused_g_values_capacity += BUFSIZ;
|
||||||
|
}
|
||||||
|
unused_g_values[nunused_g_values++] = next_g;
|
||||||
|
|
||||||
|
}
|
||||||
|
if (next_g > *biggest_g_value) *biggest_g_value = next_g;
|
||||||
|
}
|
||||||
|
next_g_index--;
|
||||||
|
if (next_g_index < nunused_g_values) unused_g_values[next_g_index] = unused_g_values[--nunused_g_values];
|
||||||
|
|
||||||
|
// Marking used edges...
|
||||||
|
it1 = graph_neighbors_it(bmz->graph, u);
|
||||||
|
while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR)
|
||||||
|
{
|
||||||
|
if (graph_node_is_critical(bmz->graph, lav) && GETBIT(visited, lav))
|
||||||
|
{
|
||||||
|
SETBIT(used_edges,(next_g + bmz->g[lav]));
|
||||||
|
if(next_g + bmz->g[lav] > *biggest_edge_value) *biggest_edge_value = next_g + bmz->g[lav];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bmz->g[u] = next_g; // Labelling vertex u.
|
||||||
|
SETBIT(visited, u);
|
||||||
|
vqueue_insert(q, u);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
vqueue_destroy(q);
|
||||||
|
free(unused_g_values);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static cmph_uint32 next_unused_edge(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint32 unused_edge_index)
|
||||||
|
{
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
assert(unused_edge_index < bmz->m);
|
||||||
|
if(GETBIT(used_edges, unused_edge_index)) unused_edge_index ++;
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
return unused_edge_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bmz_traverse(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint32 v, cmph_uint32 * unused_edge_index, cmph_uint8 * visited)
|
||||||
|
{
|
||||||
|
graph_iterator_t it = graph_neighbors_it(bmz->graph, v);
|
||||||
|
cmph_uint32 neighbor = 0;
|
||||||
|
while((neighbor = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR)
|
||||||
|
{
|
||||||
|
if(GETBIT(visited,neighbor)) continue;
|
||||||
|
//DEBUGP("Visiting neighbor %u\n", neighbor);
|
||||||
|
*unused_edge_index = next_unused_edge(bmz, used_edges, *unused_edge_index);
|
||||||
|
bmz->g[neighbor] = *unused_edge_index - bmz->g[v];
|
||||||
|
//if (bmz->g[neighbor] >= bmz->m) bmz->g[neighbor] += bmz->m;
|
||||||
|
SETBIT(visited, neighbor);
|
||||||
|
(*unused_edge_index)++;
|
||||||
|
bmz_traverse(bmz, used_edges, neighbor, unused_edge_index, visited);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint8 * visited)
|
||||||
|
{
|
||||||
|
|
||||||
|
cmph_uint32 i, v1, v2, unused_edge_index = 0;
|
||||||
|
DEBUGP("Labelling non critical vertices\n");
|
||||||
|
for(i = 0; i < bmz->m; i++)
|
||||||
|
{
|
||||||
|
v1 = graph_vertex_id(bmz->graph, i, 0);
|
||||||
|
v2 = graph_vertex_id(bmz->graph, i, 1);
|
||||||
|
if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue;
|
||||||
|
if(GETBIT(visited,v1)) bmz_traverse(bmz, used_edges, v1, &unused_edge_index, visited);
|
||||||
|
else bmz_traverse(bmz, used_edges, v2, &unused_edge_index, visited);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
for(i = 0; i < bmz->n; i++)
|
||||||
|
{
|
||||||
|
if(!GETBIT(visited,i))
|
||||||
|
{
|
||||||
|
bmz->g[i] = 0;
|
||||||
|
SETBIT(visited, i);
|
||||||
|
bmz_traverse(bmz, used_edges, i, &unused_edge_index, visited);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static int bmz_gen_edges(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
cmph_uint32 e;
|
||||||
|
bmz_config_data_t *bmz = (bmz_config_data_t *)mph->data;
|
||||||
|
cmph_uint8 multiple_edges = 0;
|
||||||
|
DEBUGP("Generating edges for %u vertices\n", bmz->n);
|
||||||
|
graph_clear_edges(bmz->graph);
|
||||||
|
mph->key_source->rewind(mph->key_source->data);
|
||||||
|
for (e = 0; e < mph->key_source->nkeys; ++e)
|
||||||
|
{
|
||||||
|
cmph_uint32 h1, h2;
|
||||||
|
cmph_uint32 keylen;
|
||||||
|
char *key = NULL;
|
||||||
|
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||||
|
|
||||||
|
// if (key == NULL)fprintf(stderr, "key = %s -- read BMZ\n", key);
|
||||||
|
h1 = hash(bmz->hashes[0], key, keylen) % bmz->n;
|
||||||
|
h2 = hash(bmz->hashes[1], key, keylen) % bmz->n;
|
||||||
|
if (h1 == h2) if (++h2 >= bmz->n) h2 = 0;
|
||||||
|
if (h1 == h2)
|
||||||
|
{
|
||||||
|
if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
|
||||||
|
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
//DEBUGP("Adding edge: %u -> %u for key %s\n", h1, h2, key);
|
||||||
|
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||||
|
// fprintf(stderr, "key = %s -- dispose BMZ\n", key);
|
||||||
|
multiple_edges = graph_contains_edge(bmz->graph, h1, h2);
|
||||||
|
if (mph->verbosity && multiple_edges) fprintf(stderr, "A non simple graph was generated\n");
|
||||||
|
if (multiple_edges) return 0; // checking multiple edge restriction.
|
||||||
|
graph_add_edge(bmz->graph, h1, h2);
|
||||||
|
}
|
||||||
|
return !multiple_edges;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bmz_dump(cmph_t *mphf, FILE *fd)
|
||||||
|
{
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen;
|
||||||
|
cmph_uint32 two = 2; //number of hash functions
|
||||||
|
bmz_data_t *data = (bmz_data_t *)mphf->data;
|
||||||
|
register size_t nbytes;
|
||||||
|
__cmph_dump(mphf, fd);
|
||||||
|
|
||||||
|
nbytes = fwrite(&two, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
|
||||||
|
hash_state_dump(data->hashes[0], &buf, &buflen);
|
||||||
|
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||||
|
nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
hash_state_dump(data->hashes[1], &buf, &buflen);
|
||||||
|
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||||
|
nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
|
||||||
|
nbytes = fwrite(data->g, sizeof(cmph_uint32)*(data->n), (size_t)1, fd);
|
||||||
|
#ifdef DEBUG
|
||||||
|
cmph_uint32 i;
|
||||||
|
fprintf(stderr, "G: ");
|
||||||
|
for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", data->g[i]);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
#endif
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bmz_load(FILE *f, cmph_t *mphf)
|
||||||
|
{
|
||||||
|
cmph_uint32 nhashes;
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen;
|
||||||
|
cmph_uint32 i;
|
||||||
|
bmz_data_t *bmz = (bmz_data_t *)malloc(sizeof(bmz_data_t));
|
||||||
|
register size_t nbytes;
|
||||||
|
DEBUGP("Loading bmz mphf\n");
|
||||||
|
mphf->data = bmz;
|
||||||
|
nbytes = fread(&nhashes, sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
bmz->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*(nhashes + 1));
|
||||||
|
bmz->hashes[nhashes] = NULL;
|
||||||
|
DEBUGP("Reading %u hashes\n", nhashes);
|
||||||
|
for (i = 0; i < nhashes; ++i)
|
||||||
|
{
|
||||||
|
hash_state_t *state = NULL;
|
||||||
|
nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
DEBUGP("Hash state has %u bytes\n", buflen);
|
||||||
|
buf = (char *)malloc((size_t)buflen);
|
||||||
|
nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
|
||||||
|
state = hash_state_load(buf, buflen);
|
||||||
|
bmz->hashes[i] = state;
|
||||||
|
free(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUGP("Reading m and n\n");
|
||||||
|
nbytes = fread(&(bmz->n), sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
nbytes = fread(&(bmz->m), sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
|
||||||
|
bmz->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*bmz->n);
|
||||||
|
nbytes = fread(bmz->g, bmz->n*sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
#ifdef DEBUG
|
||||||
|
fprintf(stderr, "G: ");
|
||||||
|
for (i = 0; i < bmz->n; ++i) fprintf(stderr, "%u ", bmz->g[i]);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
bmz_data_t *bmz = mphf->data;
|
||||||
|
cmph_uint32 h1 = hash(bmz->hashes[0], key, keylen) % bmz->n;
|
||||||
|
cmph_uint32 h2 = hash(bmz->hashes[1], key, keylen) % bmz->n;
|
||||||
|
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
||||||
|
if (h1 == h2 && ++h2 > bmz->n) h2 = 0;
|
||||||
|
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, bmz->g[h1], bmz->g[h2], bmz->m);
|
||||||
|
return bmz->g[h1] + bmz->g[h2];
|
||||||
|
}
|
||||||
|
void bmz_destroy(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
bmz_data_t *data = (bmz_data_t *)mphf->data;
|
||||||
|
free(data->g);
|
||||||
|
hash_state_destroy(data->hashes[0]);
|
||||||
|
hash_state_destroy(data->hashes[1]);
|
||||||
|
free(data->hashes);
|
||||||
|
free(data);
|
||||||
|
free(mphf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn void bmz_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void bmz_pack(cmph_t *mphf, void *packed_mphf)
|
||||||
|
{
|
||||||
|
|
||||||
|
bmz_data_t *data = (bmz_data_t *)mphf->data;
|
||||||
|
cmph_uint8 * ptr = packed_mphf;
|
||||||
|
|
||||||
|
// packing h1 type
|
||||||
|
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
|
||||||
|
*((cmph_uint32 *) ptr) = h1_type;
|
||||||
|
ptr += sizeof(cmph_uint32);
|
||||||
|
|
||||||
|
// packing h1
|
||||||
|
hash_state_pack(data->hashes[0], ptr);
|
||||||
|
ptr += hash_state_packed_size(h1_type);
|
||||||
|
|
||||||
|
// packing h2 type
|
||||||
|
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
|
||||||
|
*((cmph_uint32 *) ptr) = h2_type;
|
||||||
|
ptr += sizeof(cmph_uint32);
|
||||||
|
|
||||||
|
// packing h2
|
||||||
|
hash_state_pack(data->hashes[1], ptr);
|
||||||
|
ptr += hash_state_packed_size(h2_type);
|
||||||
|
|
||||||
|
// packing n
|
||||||
|
*((cmph_uint32 *) ptr) = data->n;
|
||||||
|
ptr += sizeof(data->n);
|
||||||
|
|
||||||
|
// packing g
|
||||||
|
memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 bmz_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 bmz_packed_size(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
bmz_data_t *data = (bmz_data_t *)mphf->data;
|
||||||
|
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
|
||||||
|
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
|
||||||
|
|
||||||
|
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
|
||||||
|
3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** cmph_uint32 bmz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 bmz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
register cmph_uint8 *h1_ptr = packed_mphf;
|
||||||
|
register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr);
|
||||||
|
h1_ptr += 4;
|
||||||
|
|
||||||
|
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
|
||||||
|
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
|
||||||
|
h2_ptr += 4;
|
||||||
|
|
||||||
|
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
|
||||||
|
|
||||||
|
register cmph_uint32 n = *g_ptr++;
|
||||||
|
|
||||||
|
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
|
||||||
|
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
|
||||||
|
if (h1 == h2 && ++h2 > n) h2 = 0;
|
||||||
|
return (g_ptr[h1] + g_ptr[h2]);
|
||||||
|
}
|
42
cmph/bmz.h
Normal file
42
cmph/bmz.h
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
#ifndef __CMPH_BMZ_H__
|
||||||
|
#define __CMPH_BMZ_H__
|
||||||
|
|
||||||
|
#include "cmph.h"
|
||||||
|
|
||||||
|
typedef struct __bmz_data_t bmz_data_t;
|
||||||
|
typedef struct __bmz_config_data_t bmz_config_data_t;
|
||||||
|
|
||||||
|
bmz_config_data_t *bmz_config_new();
|
||||||
|
void bmz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
|
||||||
|
void bmz_config_destroy(cmph_config_t *mph);
|
||||||
|
cmph_t *bmz_new(cmph_config_t *mph, double c);
|
||||||
|
|
||||||
|
void bmz_load(FILE *f, cmph_t *mphf);
|
||||||
|
int bmz_dump(cmph_t *mphf, FILE *f);
|
||||||
|
void bmz_destroy(cmph_t *mphf);
|
||||||
|
cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** \fn void bmz_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void bmz_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 bmz_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 bmz_packed_size(cmph_t *mphf);
|
||||||
|
|
||||||
|
/** cmph_uint32 bmz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 bmz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
#endif
|
632
cmph/bmz8.c
Normal file
632
cmph/bmz8.c
Normal file
@ -0,0 +1,632 @@
|
|||||||
|
#include "graph.h"
|
||||||
|
#include "bmz8.h"
|
||||||
|
#include "cmph_structs.h"
|
||||||
|
#include "bmz8_structs.h"
|
||||||
|
#include "hash.h"
|
||||||
|
#include "vqueue.h"
|
||||||
|
#include "bitbool.h"
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
//#define DEBUG
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
|
static int bmz8_gen_edges(cmph_config_t *mph);
|
||||||
|
static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint32 v, cmph_uint8 * biggest_g_value, cmph_uint8 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited);
|
||||||
|
static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz8, cmph_uint32 v, cmph_uint8 * biggest_g_value, cmph_uint8 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited);
|
||||||
|
static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmph_uint8 * visited);
|
||||||
|
|
||||||
|
bmz8_config_data_t *bmz8_config_new()
|
||||||
|
{
|
||||||
|
bmz8_config_data_t *bmz8;
|
||||||
|
bmz8 = (bmz8_config_data_t *)malloc(sizeof(bmz8_config_data_t));
|
||||||
|
assert(bmz8);
|
||||||
|
memset(bmz8, 0, sizeof(bmz8_config_data_t));
|
||||||
|
bmz8->hashfuncs[0] = CMPH_HASH_JENKINS;
|
||||||
|
bmz8->hashfuncs[1] = CMPH_HASH_JENKINS;
|
||||||
|
bmz8->g = NULL;
|
||||||
|
bmz8->graph = NULL;
|
||||||
|
bmz8->hashes = NULL;
|
||||||
|
return bmz8;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bmz8_config_destroy(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
bmz8_config_data_t *data = (bmz8_config_data_t *)mph->data;
|
||||||
|
DEBUGP("Destroying algorithm dependent data\n");
|
||||||
|
free(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bmz8_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||||
|
{
|
||||||
|
bmz8_config_data_t *bmz8 = (bmz8_config_data_t *)mph->data;
|
||||||
|
CMPH_HASH *hashptr = hashfuncs;
|
||||||
|
cmph_uint8 i = 0;
|
||||||
|
while(*hashptr != CMPH_HASH_COUNT)
|
||||||
|
{
|
||||||
|
if (i >= 2) break; //bmz8 only uses two hash functions
|
||||||
|
bmz8->hashfuncs[i] = *hashptr;
|
||||||
|
++i, ++hashptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_t *bmz8_new(cmph_config_t *mph, double c)
|
||||||
|
{
|
||||||
|
cmph_t *mphf = NULL;
|
||||||
|
bmz8_data_t *bmz8f = NULL;
|
||||||
|
cmph_uint8 i;
|
||||||
|
cmph_uint8 iterations;
|
||||||
|
cmph_uint8 iterations_map = 20;
|
||||||
|
cmph_uint8 *used_edges = NULL;
|
||||||
|
cmph_uint8 restart_mapping = 0;
|
||||||
|
cmph_uint8 * visited = NULL;
|
||||||
|
bmz8_config_data_t *bmz8 = (bmz8_config_data_t *)mph->data;
|
||||||
|
|
||||||
|
if (mph->key_source->nkeys >= 256)
|
||||||
|
{
|
||||||
|
if (mph->verbosity) fprintf(stderr, "The number of keys in BMZ8 must be lower than 256.\n");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (c == 0) c = 1.15; // validating restrictions over parameter c.
|
||||||
|
DEBUGP("c: %f\n", c);
|
||||||
|
bmz8->m = (cmph_uint8) mph->key_source->nkeys;
|
||||||
|
bmz8->n = (cmph_uint8) ceil(c * mph->key_source->nkeys);
|
||||||
|
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz8->m, bmz8->n, c);
|
||||||
|
bmz8->graph = graph_new(bmz8->n, bmz8->m);
|
||||||
|
DEBUGP("Created graph\n");
|
||||||
|
|
||||||
|
bmz8->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*3);
|
||||||
|
for(i = 0; i < 3; ++i) bmz8->hashes[i] = NULL;
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
// Mapping step
|
||||||
|
cmph_uint8 biggest_g_value = 0;
|
||||||
|
cmph_uint8 biggest_edge_value = 1;
|
||||||
|
iterations = 100;
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", bmz8->m, bmz8->n);
|
||||||
|
}
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
int ok;
|
||||||
|
DEBUGP("hash function 1\n");
|
||||||
|
bmz8->hashes[0] = hash_state_new(bmz8->hashfuncs[0], bmz8->n);
|
||||||
|
DEBUGP("hash function 2\n");
|
||||||
|
bmz8->hashes[1] = hash_state_new(bmz8->hashfuncs[1], bmz8->n);
|
||||||
|
DEBUGP("Generating edges\n");
|
||||||
|
ok = bmz8_gen_edges(mph);
|
||||||
|
if (!ok)
|
||||||
|
{
|
||||||
|
--iterations;
|
||||||
|
hash_state_destroy(bmz8->hashes[0]);
|
||||||
|
bmz8->hashes[0] = NULL;
|
||||||
|
hash_state_destroy(bmz8->hashes[1]);
|
||||||
|
bmz8->hashes[1] = NULL;
|
||||||
|
DEBUGP("%u iterations remaining\n", iterations);
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "simple graph creation failure - %u iterations remaining\n", iterations);
|
||||||
|
}
|
||||||
|
if (iterations == 0) break;
|
||||||
|
}
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
if (iterations == 0)
|
||||||
|
{
|
||||||
|
graph_destroy(bmz8->graph);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ordering step
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Starting ordering step\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
graph_obtain_critical_nodes(bmz8->graph);
|
||||||
|
|
||||||
|
// Searching step
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Starting Searching step.\n");
|
||||||
|
fprintf(stderr, "\tTraversing critical vertices.\n");
|
||||||
|
}
|
||||||
|
DEBUGP("Searching step\n");
|
||||||
|
visited = (cmph_uint8 *)malloc((size_t)bmz8->n/8 + 1);
|
||||||
|
memset(visited, 0, (size_t)bmz8->n/8 + 1);
|
||||||
|
used_edges = (cmph_uint8 *)malloc((size_t)bmz8->m/8 + 1);
|
||||||
|
memset(used_edges, 0, (size_t)bmz8->m/8 + 1);
|
||||||
|
free(bmz8->g);
|
||||||
|
bmz8->g = (cmph_uint8 *)calloc((size_t)bmz8->n, sizeof(cmph_uint8));
|
||||||
|
assert(bmz8->g);
|
||||||
|
for (i = 0; i < bmz8->n; ++i) // critical nodes
|
||||||
|
{
|
||||||
|
if (graph_node_is_critical(bmz8->graph, i) && (!GETBIT(visited,i)))
|
||||||
|
{
|
||||||
|
if(c > 1.14) restart_mapping = bmz8_traverse_critical_nodes(bmz8, i, &biggest_g_value, &biggest_edge_value, used_edges, visited);
|
||||||
|
else restart_mapping = bmz8_traverse_critical_nodes_heuristic(bmz8, i, &biggest_g_value, &biggest_edge_value, used_edges, visited);
|
||||||
|
if(restart_mapping) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(!restart_mapping)
|
||||||
|
{
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "\tTraversing non critical vertices.\n");
|
||||||
|
}
|
||||||
|
bmz8_traverse_non_critical_nodes(bmz8, used_edges, visited); // non_critical_nodes
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
iterations_map--;
|
||||||
|
if (mph->verbosity) fprintf(stderr, "Restarting mapping step. %u iterations remaining.\n", iterations_map);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(used_edges);
|
||||||
|
free(visited);
|
||||||
|
|
||||||
|
}while(restart_mapping && iterations_map > 0);
|
||||||
|
graph_destroy(bmz8->graph);
|
||||||
|
bmz8->graph = NULL;
|
||||||
|
if (iterations_map == 0)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
mphf = (cmph_t *)malloc(sizeof(cmph_t));
|
||||||
|
mphf->algo = mph->algo;
|
||||||
|
bmz8f = (bmz8_data_t *)malloc(sizeof(bmz8_data_t));
|
||||||
|
bmz8f->g = bmz8->g;
|
||||||
|
bmz8->g = NULL; //transfer memory ownership
|
||||||
|
bmz8f->hashes = bmz8->hashes;
|
||||||
|
bmz8->hashes = NULL; //transfer memory ownership
|
||||||
|
bmz8f->n = bmz8->n;
|
||||||
|
bmz8f->m = bmz8->m;
|
||||||
|
mphf->data = bmz8f;
|
||||||
|
mphf->size = bmz8->m;
|
||||||
|
DEBUGP("Successfully generated minimal perfect hash\n");
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
|
||||||
|
}
|
||||||
|
return mphf;
|
||||||
|
}
|
||||||
|
|
||||||
|
static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint32 v, cmph_uint8 * biggest_g_value, cmph_uint8 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited)
|
||||||
|
{
|
||||||
|
cmph_uint8 next_g;
|
||||||
|
cmph_uint32 u; /* Auxiliary vertex */
|
||||||
|
cmph_uint32 lav; /* lookahead vertex */
|
||||||
|
cmph_uint8 collision;
|
||||||
|
vqueue_t * q = vqueue_new((cmph_uint32)(graph_ncritical_nodes(bmz8->graph)));
|
||||||
|
graph_iterator_t it, it1;
|
||||||
|
|
||||||
|
DEBUGP("Labelling critical vertices\n");
|
||||||
|
bmz8->g[v] = (cmph_uint8)(ceil ((double)(*biggest_edge_value)/2) - 1);
|
||||||
|
SETBIT(visited, v);
|
||||||
|
next_g = (cmph_uint8)floor((double)(*biggest_edge_value/2)); /* next_g is incremented in the do..while statement*/
|
||||||
|
vqueue_insert(q, v);
|
||||||
|
while(!vqueue_is_empty(q))
|
||||||
|
{
|
||||||
|
v = vqueue_remove(q);
|
||||||
|
it = graph_neighbors_it(bmz8->graph, v);
|
||||||
|
while ((u = graph_next_neighbor(bmz8->graph, &it)) != GRAPH_NO_NEIGHBOR)
|
||||||
|
{
|
||||||
|
if (graph_node_is_critical(bmz8->graph, u) && (!GETBIT(visited,u)))
|
||||||
|
{
|
||||||
|
collision = 1;
|
||||||
|
while(collision) // lookahead to resolve collisions
|
||||||
|
{
|
||||||
|
next_g = (cmph_uint8)(*biggest_g_value + 1);
|
||||||
|
it1 = graph_neighbors_it(bmz8->graph, u);
|
||||||
|
collision = 0;
|
||||||
|
while((lav = graph_next_neighbor(bmz8->graph, &it1)) != GRAPH_NO_NEIGHBOR)
|
||||||
|
{
|
||||||
|
if (graph_node_is_critical(bmz8->graph, lav) && GETBIT(visited,lav))
|
||||||
|
{
|
||||||
|
if(next_g + bmz8->g[lav] >= bmz8->m)
|
||||||
|
{
|
||||||
|
vqueue_destroy(q);
|
||||||
|
return 1; // restart mapping step.
|
||||||
|
}
|
||||||
|
if (GETBIT(used_edges, (next_g + bmz8->g[lav])))
|
||||||
|
{
|
||||||
|
collision = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (next_g > *biggest_g_value) *biggest_g_value = next_g;
|
||||||
|
}
|
||||||
|
// Marking used edges...
|
||||||
|
it1 = graph_neighbors_it(bmz8->graph, u);
|
||||||
|
while((lav = graph_next_neighbor(bmz8->graph, &it1)) != GRAPH_NO_NEIGHBOR)
|
||||||
|
{
|
||||||
|
if (graph_node_is_critical(bmz8->graph, lav) && GETBIT(visited, lav))
|
||||||
|
{
|
||||||
|
SETBIT(used_edges,(next_g + bmz8->g[lav]));
|
||||||
|
|
||||||
|
if(next_g + bmz8->g[lav] > *biggest_edge_value)
|
||||||
|
*biggest_edge_value = (cmph_uint8)(next_g + bmz8->g[lav]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bmz8->g[u] = next_g; // Labelling vertex u.
|
||||||
|
SETBIT(visited,u);
|
||||||
|
vqueue_insert(q, u);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
vqueue_destroy(q);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz8, cmph_uint32 v, cmph_uint8 * biggest_g_value, cmph_uint8 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited)
|
||||||
|
{
|
||||||
|
cmph_uint8 next_g;
|
||||||
|
cmph_uint32 u;
|
||||||
|
cmph_uint32 lav;
|
||||||
|
cmph_uint8 collision;
|
||||||
|
cmph_uint8 * unused_g_values = NULL;
|
||||||
|
cmph_uint8 unused_g_values_capacity = 0;
|
||||||
|
cmph_uint8 nunused_g_values = 0;
|
||||||
|
vqueue_t * q = vqueue_new((cmph_uint32)(graph_ncritical_nodes(bmz8->graph)));
|
||||||
|
graph_iterator_t it, it1;
|
||||||
|
|
||||||
|
DEBUGP("Labelling critical vertices\n");
|
||||||
|
bmz8->g[v] = (cmph_uint8)(ceil ((double)(*biggest_edge_value)/2) - 1);
|
||||||
|
SETBIT(visited, v);
|
||||||
|
next_g = (cmph_uint8)floor((double)(*biggest_edge_value/2));
|
||||||
|
vqueue_insert(q, v);
|
||||||
|
while(!vqueue_is_empty(q))
|
||||||
|
{
|
||||||
|
v = vqueue_remove(q);
|
||||||
|
it = graph_neighbors_it(bmz8->graph, v);
|
||||||
|
while ((u = graph_next_neighbor(bmz8->graph, &it)) != GRAPH_NO_NEIGHBOR)
|
||||||
|
{
|
||||||
|
if (graph_node_is_critical(bmz8->graph, u) && (!GETBIT(visited,u)))
|
||||||
|
{
|
||||||
|
cmph_uint8 next_g_index = 0;
|
||||||
|
collision = 1;
|
||||||
|
while(collision) // lookahead to resolve collisions
|
||||||
|
{
|
||||||
|
if (next_g_index < nunused_g_values)
|
||||||
|
{
|
||||||
|
next_g = unused_g_values[next_g_index++];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
next_g = (cmph_uint8)(*biggest_g_value + 1);
|
||||||
|
next_g_index = 255;//UINT_MAX;
|
||||||
|
}
|
||||||
|
it1 = graph_neighbors_it(bmz8->graph, u);
|
||||||
|
collision = 0;
|
||||||
|
while((lav = graph_next_neighbor(bmz8->graph, &it1)) != GRAPH_NO_NEIGHBOR)
|
||||||
|
{
|
||||||
|
if (graph_node_is_critical(bmz8->graph, lav) && GETBIT(visited,lav))
|
||||||
|
{
|
||||||
|
if(next_g + bmz8->g[lav] >= bmz8->m)
|
||||||
|
{
|
||||||
|
vqueue_destroy(q);
|
||||||
|
free(unused_g_values);
|
||||||
|
return 1; // restart mapping step.
|
||||||
|
}
|
||||||
|
if (GETBIT(used_edges, (next_g + bmz8->g[lav])))
|
||||||
|
{
|
||||||
|
collision = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(collision && (next_g > *biggest_g_value)) // saving the current g value stored in next_g.
|
||||||
|
{
|
||||||
|
if(nunused_g_values == unused_g_values_capacity)
|
||||||
|
{
|
||||||
|
unused_g_values = (cmph_uint8*)realloc(unused_g_values, ((size_t)(unused_g_values_capacity + BUFSIZ))*sizeof(cmph_uint8));
|
||||||
|
unused_g_values_capacity += (cmph_uint8)BUFSIZ;
|
||||||
|
}
|
||||||
|
unused_g_values[nunused_g_values++] = next_g;
|
||||||
|
|
||||||
|
}
|
||||||
|
if (next_g > *biggest_g_value) *biggest_g_value = next_g;
|
||||||
|
}
|
||||||
|
|
||||||
|
next_g_index--;
|
||||||
|
if (next_g_index < nunused_g_values) unused_g_values[next_g_index] = unused_g_values[--nunused_g_values];
|
||||||
|
|
||||||
|
// Marking used edges...
|
||||||
|
it1 = graph_neighbors_it(bmz8->graph, u);
|
||||||
|
while((lav = graph_next_neighbor(bmz8->graph, &it1)) != GRAPH_NO_NEIGHBOR)
|
||||||
|
{
|
||||||
|
if (graph_node_is_critical(bmz8->graph, lav) && GETBIT(visited, lav))
|
||||||
|
{
|
||||||
|
SETBIT(used_edges,(next_g + bmz8->g[lav]));
|
||||||
|
if(next_g + bmz8->g[lav] > *biggest_edge_value)
|
||||||
|
*biggest_edge_value = (cmph_uint8)(next_g + bmz8->g[lav]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bmz8->g[u] = next_g; // Labelling vertex u.
|
||||||
|
SETBIT(visited, u);
|
||||||
|
vqueue_insert(q, u);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
vqueue_destroy(q);
|
||||||
|
free(unused_g_values);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static cmph_uint8 next_unused_edge(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmph_uint32 unused_edge_index)
|
||||||
|
{
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
assert(unused_edge_index < bmz8->m);
|
||||||
|
if(GETBIT(used_edges, unused_edge_index)) unused_edge_index ++;
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
return (cmph_uint8)unused_edge_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bmz8_traverse(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmph_uint32 v, cmph_uint8 * unused_edge_index, cmph_uint8 * visited)
|
||||||
|
{
|
||||||
|
graph_iterator_t it = graph_neighbors_it(bmz8->graph, v);
|
||||||
|
cmph_uint32 neighbor = 0;
|
||||||
|
while((neighbor = graph_next_neighbor(bmz8->graph, &it)) != GRAPH_NO_NEIGHBOR)
|
||||||
|
{
|
||||||
|
if(GETBIT(visited,neighbor)) continue;
|
||||||
|
//DEBUGP("Visiting neighbor %u\n", neighbor);
|
||||||
|
*unused_edge_index = next_unused_edge(bmz8, used_edges, *unused_edge_index);
|
||||||
|
bmz8->g[neighbor] = (cmph_uint8)(*unused_edge_index - bmz8->g[v]);
|
||||||
|
//if (bmz8->g[neighbor] >= bmz8->m) bmz8->g[neighbor] += bmz8->m;
|
||||||
|
SETBIT(visited, neighbor);
|
||||||
|
(*unused_edge_index)++;
|
||||||
|
bmz8_traverse(bmz8, used_edges, neighbor, unused_edge_index, visited);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmph_uint8 * visited)
|
||||||
|
{
|
||||||
|
|
||||||
|
cmph_uint8 i, v1, v2, unused_edge_index = 0;
|
||||||
|
DEBUGP("Labelling non critical vertices\n");
|
||||||
|
for(i = 0; i < bmz8->m; i++)
|
||||||
|
{
|
||||||
|
v1 = (cmph_uint8)graph_vertex_id(bmz8->graph, i, 0);
|
||||||
|
v2 = (cmph_uint8)graph_vertex_id(bmz8->graph, i, 1);
|
||||||
|
if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue;
|
||||||
|
if(GETBIT(visited,v1)) bmz8_traverse(bmz8, used_edges, v1, &unused_edge_index, visited);
|
||||||
|
else bmz8_traverse(bmz8, used_edges, v2, &unused_edge_index, visited);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
for(i = 0; i < bmz8->n; i++)
|
||||||
|
{
|
||||||
|
if(!GETBIT(visited,i))
|
||||||
|
{
|
||||||
|
bmz8->g[i] = 0;
|
||||||
|
SETBIT(visited, i);
|
||||||
|
bmz8_traverse(bmz8, used_edges, i, &unused_edge_index, visited);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static int bmz8_gen_edges(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
cmph_uint8 e;
|
||||||
|
bmz8_config_data_t *bmz8 = (bmz8_config_data_t *)mph->data;
|
||||||
|
cmph_uint8 multiple_edges = 0;
|
||||||
|
DEBUGP("Generating edges for %u vertices\n", bmz8->n);
|
||||||
|
graph_clear_edges(bmz8->graph);
|
||||||
|
mph->key_source->rewind(mph->key_source->data);
|
||||||
|
for (e = 0; e < mph->key_source->nkeys; ++e)
|
||||||
|
{
|
||||||
|
cmph_uint8 h1, h2;
|
||||||
|
cmph_uint32 keylen;
|
||||||
|
char *key = NULL;
|
||||||
|
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||||
|
|
||||||
|
// if (key == NULL)fprintf(stderr, "key = %s -- read BMZ\n", key);
|
||||||
|
h1 = (cmph_uint8)(hash(bmz8->hashes[0], key, keylen) % bmz8->n);
|
||||||
|
h2 = (cmph_uint8)(hash(bmz8->hashes[1], key, keylen) % bmz8->n);
|
||||||
|
if (h1 == h2) if (++h2 >= bmz8->n) h2 = 0;
|
||||||
|
if (h1 == h2)
|
||||||
|
{
|
||||||
|
if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
|
||||||
|
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
//DEBUGP("Adding edge: %u -> %u for key %s\n", h1, h2, key);
|
||||||
|
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||||
|
// fprintf(stderr, "key = %s -- dispose BMZ\n", key);
|
||||||
|
multiple_edges = graph_contains_edge(bmz8->graph, h1, h2);
|
||||||
|
if (mph->verbosity && multiple_edges) fprintf(stderr, "A non simple graph was generated\n");
|
||||||
|
if (multiple_edges) return 0; // checking multiple edge restriction.
|
||||||
|
graph_add_edge(bmz8->graph, h1, h2);
|
||||||
|
}
|
||||||
|
return !multiple_edges;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bmz8_dump(cmph_t *mphf, FILE *fd)
|
||||||
|
{
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen;
|
||||||
|
cmph_uint8 two = 2; //number of hash functions
|
||||||
|
bmz8_data_t *data = (bmz8_data_t *)mphf->data;
|
||||||
|
register size_t nbytes;
|
||||||
|
__cmph_dump(mphf, fd);
|
||||||
|
|
||||||
|
nbytes = fwrite(&two, sizeof(cmph_uint8), (size_t)1, fd);
|
||||||
|
|
||||||
|
hash_state_dump(data->hashes[0], &buf, &buflen);
|
||||||
|
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||||
|
nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
hash_state_dump(data->hashes[1], &buf, &buflen);
|
||||||
|
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||||
|
nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
nbytes = fwrite(&(data->n), sizeof(cmph_uint8), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(&(data->m), sizeof(cmph_uint8), (size_t)1, fd);
|
||||||
|
|
||||||
|
nbytes = fwrite(data->g, sizeof(cmph_uint8)*(data->n), (size_t)1, fd);
|
||||||
|
/* #ifdef DEBUG
|
||||||
|
fprintf(stderr, "G: ");
|
||||||
|
for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", data->g[i]);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
#endif*/
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bmz8_load(FILE *f, cmph_t *mphf)
|
||||||
|
{
|
||||||
|
cmph_uint8 nhashes;
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen;
|
||||||
|
cmph_uint8 i;
|
||||||
|
register size_t nbytes;
|
||||||
|
bmz8_data_t *bmz8 = (bmz8_data_t *)malloc(sizeof(bmz8_data_t));
|
||||||
|
|
||||||
|
DEBUGP("Loading bmz8 mphf\n");
|
||||||
|
mphf->data = bmz8;
|
||||||
|
nbytes = fread(&nhashes, sizeof(cmph_uint8), (size_t)1, f);
|
||||||
|
bmz8->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*(size_t)(nhashes + 1));
|
||||||
|
bmz8->hashes[nhashes] = NULL;
|
||||||
|
DEBUGP("Reading %u hashes\n", nhashes);
|
||||||
|
for (i = 0; i < nhashes; ++i)
|
||||||
|
{
|
||||||
|
hash_state_t *state = NULL;
|
||||||
|
nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
DEBUGP("Hash state has %u bytes\n", buflen);
|
||||||
|
buf = (char *)malloc((size_t)buflen);
|
||||||
|
nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
|
||||||
|
state = hash_state_load(buf, buflen);
|
||||||
|
bmz8->hashes[i] = state;
|
||||||
|
free(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUGP("Reading m and n\n");
|
||||||
|
nbytes = fread(&(bmz8->n), sizeof(cmph_uint8), (size_t)1, f);
|
||||||
|
nbytes = fread(&(bmz8->m), sizeof(cmph_uint8), (size_t)1, f);
|
||||||
|
|
||||||
|
bmz8->g = (cmph_uint8 *)malloc(sizeof(cmph_uint8)*bmz8->n);
|
||||||
|
nbytes = fread(bmz8->g, bmz8->n*sizeof(cmph_uint8), (size_t)1, f);
|
||||||
|
#ifdef DEBUG
|
||||||
|
fprintf(stderr, "G: ");
|
||||||
|
for (i = 0; i < bmz8->n; ++i) fprintf(stderr, "%u ", bmz8->g[i]);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
cmph_uint8 bmz8_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
bmz8_data_t *bmz8 = mphf->data;
|
||||||
|
cmph_uint8 h1 = (cmph_uint8)(hash(bmz8->hashes[0], key, keylen) % bmz8->n);
|
||||||
|
cmph_uint8 h2 = (cmph_uint8)(hash(bmz8->hashes[1], key, keylen) % bmz8->n);
|
||||||
|
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
||||||
|
if (h1 == h2 && ++h2 > bmz8->n) h2 = 0;
|
||||||
|
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, bmz8->g[h1], bmz8->g[h2], bmz8->m);
|
||||||
|
return (cmph_uint8)(bmz8->g[h1] + bmz8->g[h2]);
|
||||||
|
}
|
||||||
|
void bmz8_destroy(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
bmz8_data_t *data = (bmz8_data_t *)mphf->data;
|
||||||
|
free(data->g);
|
||||||
|
hash_state_destroy(data->hashes[0]);
|
||||||
|
hash_state_destroy(data->hashes[1]);
|
||||||
|
free(data->hashes);
|
||||||
|
free(data);
|
||||||
|
free(mphf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn void bmz8_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void bmz8_pack(cmph_t *mphf, void *packed_mphf)
|
||||||
|
{
|
||||||
|
bmz8_data_t *data = (bmz8_data_t *)mphf->data;
|
||||||
|
cmph_uint8 * ptr = packed_mphf;
|
||||||
|
|
||||||
|
// packing h1 type
|
||||||
|
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
|
||||||
|
*((cmph_uint32 *) ptr) = h1_type;
|
||||||
|
ptr += sizeof(cmph_uint32);
|
||||||
|
|
||||||
|
// packing h1
|
||||||
|
hash_state_pack(data->hashes[0], ptr);
|
||||||
|
ptr += hash_state_packed_size(h1_type);
|
||||||
|
|
||||||
|
// packing h2 type
|
||||||
|
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
|
||||||
|
*((cmph_uint32 *) ptr) = h2_type;
|
||||||
|
ptr += sizeof(cmph_uint32);
|
||||||
|
|
||||||
|
// packing h2
|
||||||
|
hash_state_pack(data->hashes[1], ptr);
|
||||||
|
ptr += hash_state_packed_size(h2_type);
|
||||||
|
|
||||||
|
// packing n
|
||||||
|
*ptr++ = data->n;
|
||||||
|
|
||||||
|
// packing g
|
||||||
|
memcpy(ptr, data->g, sizeof(cmph_uint8)*data->n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 bmz8_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 bmz8_packed_size(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
bmz8_data_t *data = (bmz8_data_t *)mphf->data;
|
||||||
|
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
|
||||||
|
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
|
||||||
|
|
||||||
|
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
|
||||||
|
2*sizeof(cmph_uint32) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*data->n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** cmph_uint8 bmz8_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint8 bmz8_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
register cmph_uint8 *h1_ptr = packed_mphf;
|
||||||
|
register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr);
|
||||||
|
h1_ptr += 4;
|
||||||
|
|
||||||
|
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
|
||||||
|
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
|
||||||
|
h2_ptr += 4;
|
||||||
|
|
||||||
|
register cmph_uint8 *g_ptr = h2_ptr + hash_state_packed_size(h2_type);
|
||||||
|
|
||||||
|
register cmph_uint8 n = *g_ptr++;
|
||||||
|
|
||||||
|
register cmph_uint8 h1 = (cmph_uint8)(hash_packed(h1_ptr, h1_type, key, keylen) % n);
|
||||||
|
register cmph_uint8 h2 = (cmph_uint8)(hash_packed(h2_ptr, h2_type, key, keylen) % n);
|
||||||
|
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
||||||
|
if (h1 == h2 && ++h2 > n) h2 = 0;
|
||||||
|
return (cmph_uint8)(g_ptr[h1] + g_ptr[h2]);
|
||||||
|
}
|
42
cmph/bmz8.h
Normal file
42
cmph/bmz8.h
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
#ifndef __CMPH_BMZ8_H__
|
||||||
|
#define __CMPH_BMZ8_H__
|
||||||
|
|
||||||
|
#include "cmph.h"
|
||||||
|
|
||||||
|
typedef struct __bmz8_data_t bmz8_data_t;
|
||||||
|
typedef struct __bmz8_config_data_t bmz8_config_data_t;
|
||||||
|
|
||||||
|
bmz8_config_data_t *bmz8_config_new();
|
||||||
|
void bmz8_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
|
||||||
|
void bmz8_config_destroy(cmph_config_t *mph);
|
||||||
|
cmph_t *bmz8_new(cmph_config_t *mph, double c);
|
||||||
|
|
||||||
|
void bmz8_load(FILE *f, cmph_t *mphf);
|
||||||
|
int bmz8_dump(cmph_t *mphf, FILE *f);
|
||||||
|
void bmz8_destroy(cmph_t *mphf);
|
||||||
|
cmph_uint8 bmz8_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** \fn void bmz8_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void bmz8_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 bmz8_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 bmz8_packed_size(cmph_t *mphf);
|
||||||
|
|
||||||
|
/** cmph_uint8 bmz8_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint8 bmz8_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
#endif
|
25
cmph/bmz8_structs.h
Normal file
25
cmph/bmz8_structs.h
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
#ifndef __CMPH_BMZ8_STRUCTS_H__
|
||||||
|
#define __CMPH_BMZ8_STRUCTS_H__
|
||||||
|
|
||||||
|
#include "hash_state.h"
|
||||||
|
|
||||||
|
struct __bmz8_data_t
|
||||||
|
{
|
||||||
|
cmph_uint8 m; //edges (words) count
|
||||||
|
cmph_uint8 n; //vertex count
|
||||||
|
cmph_uint8 *g;
|
||||||
|
hash_state_t **hashes;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct __bmz8_config_data_t
|
||||||
|
{
|
||||||
|
CMPH_HASH hashfuncs[2];
|
||||||
|
cmph_uint8 m; //edges (words) count
|
||||||
|
cmph_uint8 n; //vertex count
|
||||||
|
graph_t *graph;
|
||||||
|
cmph_uint8 *g;
|
||||||
|
hash_state_t **hashes;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
25
cmph/bmz_structs.h
Normal file
25
cmph/bmz_structs.h
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
#ifndef __CMPH_BMZ_STRUCTS_H__
|
||||||
|
#define __CMPH_BMZ_STRUCTS_H__
|
||||||
|
|
||||||
|
#include "hash_state.h"
|
||||||
|
|
||||||
|
struct __bmz_data_t
|
||||||
|
{
|
||||||
|
cmph_uint32 m; //edges (words) count
|
||||||
|
cmph_uint32 n; //vertex count
|
||||||
|
cmph_uint32 *g;
|
||||||
|
hash_state_t **hashes;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct __bmz_config_data_t
|
||||||
|
{
|
||||||
|
CMPH_HASH hashfuncs[2];
|
||||||
|
cmph_uint32 m; //edges (words) count
|
||||||
|
cmph_uint32 n; //vertex count
|
||||||
|
graph_t *graph;
|
||||||
|
cmph_uint32 *g;
|
||||||
|
hash_state_t **hashes;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
985
cmph/brz.c
Executable file
985
cmph/brz.c
Executable file
@ -0,0 +1,985 @@
|
|||||||
|
#include "graph.h"
|
||||||
|
#include "fch.h"
|
||||||
|
#include "fch_structs.h"
|
||||||
|
#include "bmz8.h"
|
||||||
|
#include "bmz8_structs.h"
|
||||||
|
#include "brz.h"
|
||||||
|
#include "cmph_structs.h"
|
||||||
|
#include "brz_structs.h"
|
||||||
|
#include "buffer_manager.h"
|
||||||
|
#include "cmph.h"
|
||||||
|
#include "hash.h"
|
||||||
|
#include "bitbool.h"
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <string.h>
|
||||||
|
#define MAX_BUCKET_SIZE 255
|
||||||
|
//#define DEBUG
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
|
static int brz_gen_mphf(cmph_config_t *mph);
|
||||||
|
static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n);
|
||||||
|
static void brz_destroy_keys_vd(cmph_uint8 ** keys_vd, cmph_uint32 nkeys);
|
||||||
|
static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fchf, cmph_uint32 index, cmph_uint32 *buflen);
|
||||||
|
static char * brz_copy_partial_bmz8_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen);
|
||||||
|
brz_config_data_t *brz_config_new()
|
||||||
|
{
|
||||||
|
brz_config_data_t *brz = NULL;
|
||||||
|
brz = (brz_config_data_t *)malloc(sizeof(brz_config_data_t));
|
||||||
|
brz->algo = CMPH_FCH;
|
||||||
|
brz->b = 128;
|
||||||
|
brz->hashfuncs[0] = CMPH_HASH_JENKINS;
|
||||||
|
brz->hashfuncs[1] = CMPH_HASH_JENKINS;
|
||||||
|
brz->hashfuncs[2] = CMPH_HASH_JENKINS;
|
||||||
|
brz->size = NULL;
|
||||||
|
brz->offset = NULL;
|
||||||
|
brz->g = NULL;
|
||||||
|
brz->h1 = NULL;
|
||||||
|
brz->h2 = NULL;
|
||||||
|
brz->h0 = NULL;
|
||||||
|
brz->memory_availability = 1024*1024;
|
||||||
|
brz->tmp_dir = (cmph_uint8 *)calloc((size_t)10, sizeof(cmph_uint8));
|
||||||
|
brz->mphf_fd = NULL;
|
||||||
|
strcpy((char *)(brz->tmp_dir), "/var/tmp/");
|
||||||
|
assert(brz);
|
||||||
|
return brz;
|
||||||
|
}
|
||||||
|
|
||||||
|
void brz_config_destroy(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
brz_config_data_t *data = (brz_config_data_t *)mph->data;
|
||||||
|
free(data->tmp_dir);
|
||||||
|
DEBUGP("Destroying algorithm dependent data\n");
|
||||||
|
free(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||||
|
{
|
||||||
|
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
|
||||||
|
CMPH_HASH *hashptr = hashfuncs;
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
while(*hashptr != CMPH_HASH_COUNT)
|
||||||
|
{
|
||||||
|
if (i >= 3) break; //brz only uses three hash functions
|
||||||
|
brz->hashfuncs[i] = *hashptr;
|
||||||
|
++i, ++hashptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void brz_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability)
|
||||||
|
{
|
||||||
|
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
|
||||||
|
if(memory_availability > 0) brz->memory_availability = memory_availability*1024*1024;
|
||||||
|
}
|
||||||
|
|
||||||
|
void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
|
||||||
|
{
|
||||||
|
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
|
||||||
|
if(tmp_dir)
|
||||||
|
{
|
||||||
|
size_t len = strlen((char *)tmp_dir);
|
||||||
|
free(brz->tmp_dir);
|
||||||
|
if(tmp_dir[len-1] != '/')
|
||||||
|
{
|
||||||
|
brz->tmp_dir = (cmph_uint8 *)calloc((size_t)len+2, sizeof(cmph_uint8));
|
||||||
|
sprintf((char *)(brz->tmp_dir), "%s/", (char *)tmp_dir);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
brz->tmp_dir = (cmph_uint8 *)calloc((size_t)len+1, sizeof(cmph_uint8));
|
||||||
|
sprintf((char *)(brz->tmp_dir), "%s", (char *)tmp_dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void brz_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd)
|
||||||
|
{
|
||||||
|
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
|
||||||
|
brz->mphf_fd = mphf_fd;
|
||||||
|
assert(brz->mphf_fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
void brz_config_set_b(cmph_config_t *mph, cmph_uint32 b)
|
||||||
|
{
|
||||||
|
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
|
||||||
|
if(b <= 64 || b >= 175)
|
||||||
|
{
|
||||||
|
b = 128;
|
||||||
|
}
|
||||||
|
brz->b = (cmph_uint8)b;
|
||||||
|
}
|
||||||
|
|
||||||
|
void brz_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
|
||||||
|
{
|
||||||
|
if (algo == CMPH_BMZ8 || algo == CMPH_FCH) // supported algorithms
|
||||||
|
{
|
||||||
|
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
|
||||||
|
brz->algo = algo;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_t *brz_new(cmph_config_t *mph, double c)
|
||||||
|
{
|
||||||
|
cmph_t *mphf = NULL;
|
||||||
|
brz_data_t *brzf = NULL;
|
||||||
|
cmph_uint32 i;
|
||||||
|
cmph_uint32 iterations = 20;
|
||||||
|
|
||||||
|
DEBUGP("c: %f\n", c);
|
||||||
|
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
|
||||||
|
switch(brz->algo) // validating restrictions over parameter c.
|
||||||
|
{
|
||||||
|
case CMPH_BMZ8:
|
||||||
|
if (c == 0 || c >= 2.0) c = 1;
|
||||||
|
break;
|
||||||
|
case CMPH_FCH:
|
||||||
|
if (c <= 2.0) c = 2.6;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
brz->c = c;
|
||||||
|
brz->m = mph->key_source->nkeys;
|
||||||
|
DEBUGP("m: %u\n", brz->m);
|
||||||
|
brz->k = (cmph_uint32)ceil(brz->m/((double)brz->b));
|
||||||
|
DEBUGP("k: %u\n", brz->k);
|
||||||
|
brz->size = (cmph_uint8 *) calloc((size_t)brz->k, sizeof(cmph_uint8));
|
||||||
|
|
||||||
|
// Clustering the keys by graph id.
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Partioning the set of keys.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
int ok;
|
||||||
|
DEBUGP("hash function 3\n");
|
||||||
|
brz->h0 = hash_state_new(brz->hashfuncs[2], brz->k);
|
||||||
|
DEBUGP("Generating graphs\n");
|
||||||
|
ok = brz_gen_mphf(mph);
|
||||||
|
if (!ok)
|
||||||
|
{
|
||||||
|
--iterations;
|
||||||
|
hash_state_destroy(brz->h0);
|
||||||
|
brz->h0 = NULL;
|
||||||
|
DEBUGP("%u iterations remaining to create the graphs in a external file\n", iterations);
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Failure: A graph with more than 255 keys was created - %u iterations remaining\n", iterations);
|
||||||
|
}
|
||||||
|
if (iterations == 0) break;
|
||||||
|
}
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
if (iterations == 0)
|
||||||
|
{
|
||||||
|
DEBUGP("Graphs with more than 255 keys were created in all 20 iterations\n");
|
||||||
|
free(brz->size);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
DEBUGP("Graphs generated\n");
|
||||||
|
|
||||||
|
brz->offset = (cmph_uint32 *)calloc((size_t)brz->k, sizeof(cmph_uint32));
|
||||||
|
for (i = 1; i < brz->k; ++i)
|
||||||
|
{
|
||||||
|
brz->offset[i] = brz->size[i-1] + brz->offset[i-1];
|
||||||
|
}
|
||||||
|
// Generating a mphf
|
||||||
|
mphf = (cmph_t *)malloc(sizeof(cmph_t));
|
||||||
|
mphf->algo = mph->algo;
|
||||||
|
brzf = (brz_data_t *)malloc(sizeof(brz_data_t));
|
||||||
|
brzf->g = brz->g;
|
||||||
|
brz->g = NULL; //transfer memory ownership
|
||||||
|
brzf->h1 = brz->h1;
|
||||||
|
brz->h1 = NULL; //transfer memory ownership
|
||||||
|
brzf->h2 = brz->h2;
|
||||||
|
brz->h2 = NULL; //transfer memory ownership
|
||||||
|
brzf->h0 = brz->h0;
|
||||||
|
brz->h0 = NULL; //transfer memory ownership
|
||||||
|
brzf->size = brz->size;
|
||||||
|
brz->size = NULL; //transfer memory ownership
|
||||||
|
brzf->offset = brz->offset;
|
||||||
|
brz->offset = NULL; //transfer memory ownership
|
||||||
|
brzf->k = brz->k;
|
||||||
|
brzf->c = brz->c;
|
||||||
|
brzf->m = brz->m;
|
||||||
|
brzf->algo = brz->algo;
|
||||||
|
mphf->data = brzf;
|
||||||
|
mphf->size = brz->m;
|
||||||
|
DEBUGP("Successfully generated minimal perfect hash\n");
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
|
||||||
|
}
|
||||||
|
return mphf;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int brz_gen_mphf(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
cmph_uint32 i, e, error;
|
||||||
|
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
|
||||||
|
cmph_uint32 memory_usage = 0;
|
||||||
|
cmph_uint32 nkeys_in_buffer = 0;
|
||||||
|
cmph_uint8 *buffer = (cmph_uint8 *)malloc((size_t)brz->memory_availability);
|
||||||
|
cmph_uint32 *buckets_size = (cmph_uint32 *)calloc((size_t)brz->k, sizeof(cmph_uint32));
|
||||||
|
cmph_uint32 *keys_index = NULL;
|
||||||
|
cmph_uint8 **buffer_merge = NULL;
|
||||||
|
cmph_uint32 *buffer_h0 = NULL;
|
||||||
|
cmph_uint32 nflushes = 0;
|
||||||
|
cmph_uint32 h0;
|
||||||
|
register size_t nbytes;
|
||||||
|
FILE * tmp_fd = NULL;
|
||||||
|
buffer_manager_t * buff_manager = NULL;
|
||||||
|
char *filename = NULL;
|
||||||
|
char *key = NULL;
|
||||||
|
cmph_uint32 keylen;
|
||||||
|
cmph_uint32 cur_bucket = 0;
|
||||||
|
cmph_uint8 nkeys_vd = 0;
|
||||||
|
cmph_uint8 ** keys_vd = NULL;
|
||||||
|
|
||||||
|
mph->key_source->rewind(mph->key_source->data);
|
||||||
|
DEBUGP("Generating graphs from %u keys\n", brz->m);
|
||||||
|
// Partitioning
|
||||||
|
for (e = 0; e < brz->m; ++e)
|
||||||
|
{
|
||||||
|
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||||
|
|
||||||
|
/* Buffers management */
|
||||||
|
if (memory_usage + keylen + sizeof(keylen) > brz->memory_availability) // flush buffers
|
||||||
|
{
|
||||||
|
if(mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Flushing %u\n", nkeys_in_buffer);
|
||||||
|
}
|
||||||
|
cmph_uint32 value = buckets_size[0];
|
||||||
|
cmph_uint32 sum = 0;
|
||||||
|
cmph_uint32 keylen1 = 0;
|
||||||
|
buckets_size[0] = 0;
|
||||||
|
for(i = 1; i < brz->k; i++)
|
||||||
|
{
|
||||||
|
if(buckets_size[i] == 0) continue;
|
||||||
|
sum += value;
|
||||||
|
value = buckets_size[i];
|
||||||
|
buckets_size[i] = sum;
|
||||||
|
|
||||||
|
}
|
||||||
|
memory_usage = 0;
|
||||||
|
keys_index = (cmph_uint32 *)calloc((size_t)nkeys_in_buffer, sizeof(cmph_uint32));
|
||||||
|
for(i = 0; i < nkeys_in_buffer; i++)
|
||||||
|
{
|
||||||
|
memcpy(&keylen1, buffer + memory_usage, sizeof(keylen1));
|
||||||
|
h0 = hash(brz->h0, (char *)(buffer + memory_usage + sizeof(keylen1)), keylen1) % brz->k;
|
||||||
|
keys_index[buckets_size[h0]] = memory_usage;
|
||||||
|
buckets_size[h0]++;
|
||||||
|
memory_usage += keylen1 + (cmph_uint32)sizeof(keylen1);
|
||||||
|
}
|
||||||
|
filename = (char *)calloc(strlen((char *)(brz->tmp_dir)) + 11, sizeof(char));
|
||||||
|
sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
|
||||||
|
tmp_fd = fopen(filename, "wb");
|
||||||
|
free(filename);
|
||||||
|
filename = NULL;
|
||||||
|
for(i = 0; i < nkeys_in_buffer; i++)
|
||||||
|
{
|
||||||
|
memcpy(&keylen1, buffer + keys_index[i], sizeof(keylen1));
|
||||||
|
nbytes = fwrite(buffer + keys_index[i], (size_t)1, keylen1 + sizeof(keylen1), tmp_fd);
|
||||||
|
}
|
||||||
|
nkeys_in_buffer = 0;
|
||||||
|
memory_usage = 0;
|
||||||
|
memset((void *)buckets_size, 0, brz->k*sizeof(cmph_uint32));
|
||||||
|
nflushes++;
|
||||||
|
free(keys_index);
|
||||||
|
fclose(tmp_fd);
|
||||||
|
}
|
||||||
|
memcpy(buffer + memory_usage, &keylen, sizeof(keylen));
|
||||||
|
memcpy(buffer + memory_usage + sizeof(keylen), key, (size_t)keylen);
|
||||||
|
memory_usage += keylen + (cmph_uint32)sizeof(keylen);
|
||||||
|
h0 = hash(brz->h0, key, keylen) % brz->k;
|
||||||
|
|
||||||
|
if ((brz->size[h0] == MAX_BUCKET_SIZE) || (brz->algo == CMPH_BMZ8 && ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h0]) < brz->size[h0])))
|
||||||
|
{
|
||||||
|
free(buffer);
|
||||||
|
free(buckets_size);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
brz->size[h0] = (cmph_uint8)(brz->size[h0] + 1U);
|
||||||
|
buckets_size[h0] ++;
|
||||||
|
nkeys_in_buffer++;
|
||||||
|
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||||
|
}
|
||||||
|
if (memory_usage != 0) // flush buffers
|
||||||
|
{
|
||||||
|
if(mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Flushing %u\n", nkeys_in_buffer);
|
||||||
|
}
|
||||||
|
cmph_uint32 value = buckets_size[0];
|
||||||
|
cmph_uint32 sum = 0;
|
||||||
|
cmph_uint32 keylen1 = 0;
|
||||||
|
buckets_size[0] = 0;
|
||||||
|
for(i = 1; i < brz->k; i++)
|
||||||
|
{
|
||||||
|
if(buckets_size[i] == 0) continue;
|
||||||
|
sum += value;
|
||||||
|
value = buckets_size[i];
|
||||||
|
buckets_size[i] = sum;
|
||||||
|
}
|
||||||
|
memory_usage = 0;
|
||||||
|
keys_index = (cmph_uint32 *)calloc((size_t)nkeys_in_buffer, sizeof(cmph_uint32));
|
||||||
|
for(i = 0; i < nkeys_in_buffer; i++)
|
||||||
|
{
|
||||||
|
memcpy(&keylen1, buffer + memory_usage, sizeof(keylen1));
|
||||||
|
h0 = hash(brz->h0, (char *)(buffer + memory_usage + sizeof(keylen1)), keylen1) % brz->k;
|
||||||
|
keys_index[buckets_size[h0]] = memory_usage;
|
||||||
|
buckets_size[h0]++;
|
||||||
|
memory_usage += keylen1 + (cmph_uint32)sizeof(keylen1);
|
||||||
|
}
|
||||||
|
filename = (char *)calloc(strlen((char *)(brz->tmp_dir)) + 11, sizeof(char));
|
||||||
|
sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
|
||||||
|
tmp_fd = fopen(filename, "wb");
|
||||||
|
free(filename);
|
||||||
|
filename = NULL;
|
||||||
|
for(i = 0; i < nkeys_in_buffer; i++)
|
||||||
|
{
|
||||||
|
memcpy(&keylen1, buffer + keys_index[i], sizeof(keylen1));
|
||||||
|
nbytes = fwrite(buffer + keys_index[i], (size_t)1, keylen1 + sizeof(keylen1), tmp_fd);
|
||||||
|
}
|
||||||
|
nkeys_in_buffer = 0;
|
||||||
|
memory_usage = 0;
|
||||||
|
memset((void *)buckets_size, 0, brz->k*sizeof(cmph_uint32));
|
||||||
|
nflushes++;
|
||||||
|
free(keys_index);
|
||||||
|
fclose(tmp_fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(buffer);
|
||||||
|
free(buckets_size);
|
||||||
|
if(nflushes > 1024) return 0; // Too many files generated.
|
||||||
|
// mphf generation
|
||||||
|
if(mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "\nMPHF generation \n");
|
||||||
|
}
|
||||||
|
/* Starting to dump to disk the resultant MPHF: __cmph_dump function */
|
||||||
|
nbytes = fwrite(cmph_names[CMPH_BRZ], (size_t)(strlen(cmph_names[CMPH_BRZ]) + 1), (size_t)1, brz->mphf_fd);
|
||||||
|
nbytes = fwrite(&(brz->m), sizeof(brz->m), (size_t)1, brz->mphf_fd);
|
||||||
|
nbytes = fwrite(&(brz->c), sizeof(double), (size_t)1, brz->mphf_fd);
|
||||||
|
nbytes = fwrite(&(brz->algo), sizeof(brz->algo), (size_t)1, brz->mphf_fd);
|
||||||
|
nbytes = fwrite(&(brz->k), sizeof(cmph_uint32), (size_t)1, brz->mphf_fd); // number of MPHFs
|
||||||
|
nbytes = fwrite(brz->size, sizeof(cmph_uint8)*(brz->k), (size_t)1, brz->mphf_fd);
|
||||||
|
|
||||||
|
//tmp_fds = (FILE **)calloc(nflushes, sizeof(FILE *));
|
||||||
|
buff_manager = buffer_manager_new(brz->memory_availability, nflushes);
|
||||||
|
buffer_merge = (cmph_uint8 **)calloc((size_t)nflushes, sizeof(cmph_uint8 *));
|
||||||
|
buffer_h0 = (cmph_uint32 *)calloc((size_t)nflushes, sizeof(cmph_uint32));
|
||||||
|
|
||||||
|
memory_usage = 0;
|
||||||
|
for(i = 0; i < nflushes; i++)
|
||||||
|
{
|
||||||
|
filename = (char *)calloc(strlen((char *)(brz->tmp_dir)) + 11, sizeof(char));
|
||||||
|
sprintf(filename, "%s%u.cmph",brz->tmp_dir, i);
|
||||||
|
buffer_manager_open(buff_manager, i, filename);
|
||||||
|
free(filename);
|
||||||
|
filename = NULL;
|
||||||
|
key = (char *)buffer_manager_read_key(buff_manager, i, &keylen);
|
||||||
|
h0 = hash(brz->h0, key+sizeof(keylen), keylen) % brz->k;
|
||||||
|
buffer_h0[i] = h0;
|
||||||
|
buffer_merge[i] = (cmph_uint8 *)key;
|
||||||
|
key = NULL; //transfer memory ownership
|
||||||
|
}
|
||||||
|
e = 0;
|
||||||
|
keys_vd = (cmph_uint8 **)calloc((size_t)MAX_BUCKET_SIZE, sizeof(cmph_uint8 *));
|
||||||
|
nkeys_vd = 0;
|
||||||
|
error = 0;
|
||||||
|
while(e < brz->m)
|
||||||
|
{
|
||||||
|
i = brz_min_index(buffer_h0, nflushes);
|
||||||
|
cur_bucket = buffer_h0[i];
|
||||||
|
key = (char *)buffer_manager_read_key(buff_manager, i, &keylen);
|
||||||
|
if(key)
|
||||||
|
{
|
||||||
|
while(key)
|
||||||
|
{
|
||||||
|
//keylen = strlen(key);
|
||||||
|
h0 = hash(brz->h0, key+sizeof(keylen), keylen) % brz->k;
|
||||||
|
if (h0 != buffer_h0[i]) break;
|
||||||
|
keys_vd[nkeys_vd++] = (cmph_uint8 *)key;
|
||||||
|
key = NULL; //transfer memory ownership
|
||||||
|
e++;
|
||||||
|
key = (char *)buffer_manager_read_key(buff_manager, i, &keylen);
|
||||||
|
}
|
||||||
|
if (key)
|
||||||
|
{
|
||||||
|
assert(nkeys_vd < brz->size[cur_bucket]);
|
||||||
|
keys_vd[nkeys_vd++] = buffer_merge[i];
|
||||||
|
buffer_merge[i] = NULL; //transfer memory ownership
|
||||||
|
e++;
|
||||||
|
buffer_h0[i] = h0;
|
||||||
|
buffer_merge[i] = (cmph_uint8 *)key;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(!key)
|
||||||
|
{
|
||||||
|
assert(nkeys_vd < brz->size[cur_bucket]);
|
||||||
|
keys_vd[nkeys_vd++] = buffer_merge[i];
|
||||||
|
buffer_merge[i] = NULL; //transfer memory ownership
|
||||||
|
e++;
|
||||||
|
buffer_h0[i] = UINT_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(nkeys_vd == brz->size[cur_bucket]) // Generating mphf for each bucket.
|
||||||
|
{
|
||||||
|
cmph_io_adapter_t *source = NULL;
|
||||||
|
cmph_config_t *config = NULL;
|
||||||
|
cmph_t *mphf_tmp = NULL;
|
||||||
|
char *bufmphf = NULL;
|
||||||
|
cmph_uint32 buflenmphf = 0;
|
||||||
|
// Source of keys
|
||||||
|
source = cmph_io_byte_vector_adapter(keys_vd, (cmph_uint32)nkeys_vd);
|
||||||
|
config = cmph_config_new(source);
|
||||||
|
cmph_config_set_algo(config, brz->algo);
|
||||||
|
//cmph_config_set_algo(config, CMPH_BMZ8);
|
||||||
|
cmph_config_set_graphsize(config, brz->c);
|
||||||
|
mphf_tmp = cmph_new(config);
|
||||||
|
if (mphf_tmp == NULL)
|
||||||
|
{
|
||||||
|
if(mph->verbosity) fprintf(stderr, "ERROR: Can't generate MPHF for bucket %u out of %u\n", cur_bucket + 1, brz->k);
|
||||||
|
error = 1;
|
||||||
|
cmph_config_destroy(config);
|
||||||
|
brz_destroy_keys_vd(keys_vd, nkeys_vd);
|
||||||
|
cmph_io_byte_vector_adapter_destroy(source);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if(mph->verbosity)
|
||||||
|
{
|
||||||
|
if (cur_bucket % 1000 == 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "MPHF for bucket %u out of %u was generated.\n", cur_bucket + 1, brz->k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
switch(brz->algo)
|
||||||
|
{
|
||||||
|
case CMPH_FCH:
|
||||||
|
{
|
||||||
|
fch_data_t * fchf = NULL;
|
||||||
|
fchf = (fch_data_t *)mphf_tmp->data;
|
||||||
|
bufmphf = brz_copy_partial_fch_mphf(brz, fchf, cur_bucket, &buflenmphf);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ8:
|
||||||
|
{
|
||||||
|
bmz8_data_t * bmzf = NULL;
|
||||||
|
bmzf = (bmz8_data_t *)mphf_tmp->data;
|
||||||
|
bufmphf = brz_copy_partial_bmz8_mphf(brz, bmzf, cur_bucket, &buflenmphf);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default: assert(0);
|
||||||
|
}
|
||||||
|
nbytes = fwrite(bufmphf, (size_t)buflenmphf, (size_t)1, brz->mphf_fd);
|
||||||
|
free(bufmphf);
|
||||||
|
bufmphf = NULL;
|
||||||
|
cmph_config_destroy(config);
|
||||||
|
brz_destroy_keys_vd(keys_vd, nkeys_vd);
|
||||||
|
cmph_destroy(mphf_tmp);
|
||||||
|
cmph_io_byte_vector_adapter_destroy(source);
|
||||||
|
nkeys_vd = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buffer_manager_destroy(buff_manager);
|
||||||
|
free(keys_vd);
|
||||||
|
free(buffer_merge);
|
||||||
|
free(buffer_h0);
|
||||||
|
if (error) return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n)
|
||||||
|
{
|
||||||
|
cmph_uint32 i, min_index = 0;
|
||||||
|
for(i = 1; i < n; i++)
|
||||||
|
{
|
||||||
|
if(vector[i] < vector[min_index]) min_index = i;
|
||||||
|
}
|
||||||
|
return min_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void brz_destroy_keys_vd(cmph_uint8 ** keys_vd, cmph_uint32 nkeys)
|
||||||
|
{
|
||||||
|
cmph_uint8 i;
|
||||||
|
for(i = 0; i < nkeys; i++) { free(keys_vd[i]); keys_vd[i] = NULL;}
|
||||||
|
}
|
||||||
|
|
||||||
|
static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fchf, cmph_uint32 index, cmph_uint32 *buflen)
|
||||||
|
{
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
cmph_uint32 buflenh1 = 0;
|
||||||
|
cmph_uint32 buflenh2 = 0;
|
||||||
|
char * bufh1 = NULL;
|
||||||
|
char * bufh2 = NULL;
|
||||||
|
char * buf = NULL;
|
||||||
|
cmph_uint32 n = fchf->b;//brz->size[index];
|
||||||
|
hash_state_dump(fchf->h1, &bufh1, &buflenh1);
|
||||||
|
hash_state_dump(fchf->h2, &bufh2, &buflenh2);
|
||||||
|
*buflen = buflenh1 + buflenh2 + n + 2U * (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
buf = (char *)malloc((size_t)(*buflen));
|
||||||
|
memcpy(buf, &buflenh1, sizeof(cmph_uint32));
|
||||||
|
memcpy(buf+sizeof(cmph_uint32), bufh1, (size_t)buflenh1);
|
||||||
|
memcpy(buf+sizeof(cmph_uint32)+buflenh1, &buflenh2, sizeof(cmph_uint32));
|
||||||
|
memcpy(buf+2*sizeof(cmph_uint32)+buflenh1, bufh2, (size_t)buflenh2);
|
||||||
|
for (i = 0; i < n; i++) memcpy(buf+2*sizeof(cmph_uint32)+buflenh1+buflenh2+i,(fchf->g + i), (size_t)1);
|
||||||
|
free(bufh1);
|
||||||
|
free(bufh2);
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
static char * brz_copy_partial_bmz8_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen)
|
||||||
|
{
|
||||||
|
cmph_uint32 buflenh1 = 0;
|
||||||
|
cmph_uint32 buflenh2 = 0;
|
||||||
|
char * bufh1 = NULL;
|
||||||
|
char * bufh2 = NULL;
|
||||||
|
char * buf = NULL;
|
||||||
|
cmph_uint32 n = (cmph_uint32)ceil(brz->c * brz->size[index]);
|
||||||
|
hash_state_dump(bmzf->hashes[0], &bufh1, &buflenh1);
|
||||||
|
hash_state_dump(bmzf->hashes[1], &bufh2, &buflenh2);
|
||||||
|
*buflen = buflenh1 + buflenh2 + n + 2U * (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
buf = (char *)malloc((size_t)(*buflen));
|
||||||
|
memcpy(buf, &buflenh1, sizeof(cmph_uint32));
|
||||||
|
memcpy(buf+sizeof(cmph_uint32), bufh1, (size_t)buflenh1);
|
||||||
|
memcpy(buf+sizeof(cmph_uint32)+buflenh1, &buflenh2, sizeof(cmph_uint32));
|
||||||
|
memcpy(buf+2*sizeof(cmph_uint32)+buflenh1, bufh2, (size_t)buflenh2);
|
||||||
|
memcpy(buf+2*sizeof(cmph_uint32)+buflenh1+buflenh2,bmzf->g, (size_t)n);
|
||||||
|
free(bufh1);
|
||||||
|
free(bufh2);
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int brz_dump(cmph_t *mphf, FILE *fd)
|
||||||
|
{
|
||||||
|
brz_data_t *data = (brz_data_t *)mphf->data;
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen;
|
||||||
|
register size_t nbytes;
|
||||||
|
DEBUGP("Dumping brzf\n");
|
||||||
|
// The initial part of the MPHF have already been dumped to disk during construction
|
||||||
|
// Dumping h0
|
||||||
|
hash_state_dump(data->h0, &buf, &buflen);
|
||||||
|
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||||
|
nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
|
||||||
|
free(buf);
|
||||||
|
// Dumping m and the vector offset.
|
||||||
|
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(data->offset, sizeof(cmph_uint32)*(data->k), (size_t)1, fd);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void brz_load(FILE *f, cmph_t *mphf)
|
||||||
|
{
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen;
|
||||||
|
register size_t nbytes;
|
||||||
|
cmph_uint32 i, n;
|
||||||
|
brz_data_t *brz = (brz_data_t *)malloc(sizeof(brz_data_t));
|
||||||
|
|
||||||
|
DEBUGP("Loading brz mphf\n");
|
||||||
|
mphf->data = brz;
|
||||||
|
nbytes = fread(&(brz->c), sizeof(double), (size_t)1, f);
|
||||||
|
nbytes = fread(&(brz->algo), sizeof(brz->algo), (size_t)1, f); // Reading algo.
|
||||||
|
nbytes = fread(&(brz->k), sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
brz->size = (cmph_uint8 *) malloc(sizeof(cmph_uint8)*brz->k);
|
||||||
|
nbytes = fread(brz->size, sizeof(cmph_uint8)*(brz->k), (size_t)1, f);
|
||||||
|
brz->h1 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k);
|
||||||
|
brz->h2 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k);
|
||||||
|
brz->g = (cmph_uint8 **) calloc((size_t)brz->k, sizeof(cmph_uint8 *));
|
||||||
|
DEBUGP("Reading c = %f k = %u algo = %u \n", brz->c, brz->k, brz->algo);
|
||||||
|
//loading h_i1, h_i2 and g_i.
|
||||||
|
for(i = 0; i < brz->k; i++)
|
||||||
|
{
|
||||||
|
// h1
|
||||||
|
nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
DEBUGP("Hash state 1 has %u bytes\n", buflen);
|
||||||
|
buf = (char *)malloc((size_t)buflen);
|
||||||
|
nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
|
||||||
|
brz->h1[i] = hash_state_load(buf, buflen);
|
||||||
|
free(buf);
|
||||||
|
//h2
|
||||||
|
nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
DEBUGP("Hash state 2 has %u bytes\n", buflen);
|
||||||
|
buf = (char *)malloc((size_t)buflen);
|
||||||
|
nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
|
||||||
|
brz->h2[i] = hash_state_load(buf, buflen);
|
||||||
|
free(buf);
|
||||||
|
switch(brz->algo)
|
||||||
|
{
|
||||||
|
case CMPH_FCH:
|
||||||
|
n = fch_calc_b(brz->c, brz->size[i]);
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ8:
|
||||||
|
n = (cmph_uint32)ceil(brz->c * brz->size[i]);
|
||||||
|
break;
|
||||||
|
default: assert(0);
|
||||||
|
}
|
||||||
|
DEBUGP("g_i has %u bytes\n", n);
|
||||||
|
brz->g[i] = (cmph_uint8 *)calloc((size_t)n, sizeof(cmph_uint8));
|
||||||
|
nbytes = fread(brz->g[i], sizeof(cmph_uint8)*n, (size_t)1, f);
|
||||||
|
}
|
||||||
|
//loading h0
|
||||||
|
nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
DEBUGP("Hash state has %u bytes\n", buflen);
|
||||||
|
buf = (char *)malloc((size_t)buflen);
|
||||||
|
nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
|
||||||
|
brz->h0 = hash_state_load(buf, buflen);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
//loading c, m, and the vector offset.
|
||||||
|
nbytes = fread(&(brz->m), sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
brz->offset = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*brz->k);
|
||||||
|
nbytes = fread(brz->offset, sizeof(cmph_uint32)*(brz->k), (size_t)1, f);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
static cmph_uint32 brz_bmz8_search(brz_data_t *brz, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
|
||||||
|
{
|
||||||
|
register cmph_uint32 h0;
|
||||||
|
|
||||||
|
hash_vector(brz->h0, key, keylen, fingerprint);
|
||||||
|
h0 = fingerprint[2] % brz->k;
|
||||||
|
|
||||||
|
register cmph_uint32 m = brz->size[h0];
|
||||||
|
register cmph_uint32 n = (cmph_uint32)ceil(brz->c * m);
|
||||||
|
register cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % n;
|
||||||
|
register cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % n;
|
||||||
|
register cmph_uint8 mphf_bucket;
|
||||||
|
|
||||||
|
if (h1 == h2 && ++h2 >= n) h2 = 0;
|
||||||
|
mphf_bucket = (cmph_uint8)(brz->g[h0][h1] + brz->g[h0][h2]);
|
||||||
|
DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0);
|
||||||
|
DEBUGP("key: %s g[h1]: %u g[h2]: %u offset[h0]: %u edges: %u\n", key, brz->g[h0][h1], brz->g[h0][h2], brz->offset[h0], brz->m);
|
||||||
|
DEBUGP("Address: %u\n", mphf_bucket + brz->offset[h0]);
|
||||||
|
return (mphf_bucket + brz->offset[h0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static cmph_uint32 brz_fch_search(brz_data_t *brz, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
|
||||||
|
{
|
||||||
|
register cmph_uint32 h0;
|
||||||
|
|
||||||
|
hash_vector(brz->h0, key, keylen, fingerprint);
|
||||||
|
h0 = fingerprint[2] % brz->k;
|
||||||
|
|
||||||
|
register cmph_uint32 m = brz->size[h0];
|
||||||
|
register cmph_uint32 b = fch_calc_b(brz->c, m);
|
||||||
|
register double p1 = fch_calc_p1(m);
|
||||||
|
register double p2 = fch_calc_p2(b);
|
||||||
|
register cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % m;
|
||||||
|
register cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % m;
|
||||||
|
register cmph_uint8 mphf_bucket = 0;
|
||||||
|
h1 = mixh10h11h12(b, p1, p2, h1);
|
||||||
|
mphf_bucket = (cmph_uint8)((h2 + brz->g[h0][h1]) % m);
|
||||||
|
return (mphf_bucket + brz->offset[h0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 brz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
brz_data_t *brz = mphf->data;
|
||||||
|
cmph_uint32 fingerprint[3];
|
||||||
|
switch(brz->algo)
|
||||||
|
{
|
||||||
|
case CMPH_FCH:
|
||||||
|
return brz_fch_search(brz, key, keylen, fingerprint);
|
||||||
|
case CMPH_BMZ8:
|
||||||
|
return brz_bmz8_search(brz, key, keylen, fingerprint);
|
||||||
|
default: assert(0);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
void brz_destroy(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
cmph_uint32 i;
|
||||||
|
brz_data_t *data = (brz_data_t *)mphf->data;
|
||||||
|
if(data->g)
|
||||||
|
{
|
||||||
|
for(i = 0; i < data->k; i++)
|
||||||
|
{
|
||||||
|
free(data->g[i]);
|
||||||
|
hash_state_destroy(data->h1[i]);
|
||||||
|
hash_state_destroy(data->h2[i]);
|
||||||
|
}
|
||||||
|
free(data->g);
|
||||||
|
free(data->h1);
|
||||||
|
free(data->h2);
|
||||||
|
}
|
||||||
|
hash_state_destroy(data->h0);
|
||||||
|
free(data->size);
|
||||||
|
free(data->offset);
|
||||||
|
free(data);
|
||||||
|
free(mphf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn void brz_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void brz_pack(cmph_t *mphf, void *packed_mphf)
|
||||||
|
{
|
||||||
|
brz_data_t *data = (brz_data_t *)mphf->data;
|
||||||
|
cmph_uint8 * ptr = packed_mphf;
|
||||||
|
cmph_uint32 i,n;
|
||||||
|
|
||||||
|
// packing internal algo type
|
||||||
|
memcpy(ptr, &(data->algo), sizeof(data->algo));
|
||||||
|
ptr += sizeof(data->algo);
|
||||||
|
|
||||||
|
// packing h0 type
|
||||||
|
CMPH_HASH h0_type = hash_get_type(data->h0);
|
||||||
|
memcpy(ptr, &h0_type, sizeof(h0_type));
|
||||||
|
ptr += sizeof(h0_type);
|
||||||
|
|
||||||
|
// packing h0
|
||||||
|
hash_state_pack(data->h0, ptr);
|
||||||
|
ptr += hash_state_packed_size(h0_type);
|
||||||
|
|
||||||
|
// packing k
|
||||||
|
memcpy(ptr, &(data->k), sizeof(data->k));
|
||||||
|
ptr += sizeof(data->k);
|
||||||
|
|
||||||
|
// packing c
|
||||||
|
*((cmph_uint64 *)ptr) = (cmph_uint64)data->c;
|
||||||
|
ptr += sizeof(data->c);
|
||||||
|
|
||||||
|
// packing h1 type
|
||||||
|
CMPH_HASH h1_type = hash_get_type(data->h1[0]);
|
||||||
|
memcpy(ptr, &h1_type, sizeof(h1_type));
|
||||||
|
ptr += sizeof(h1_type);
|
||||||
|
|
||||||
|
// packing h2 type
|
||||||
|
CMPH_HASH h2_type = hash_get_type(data->h2[0]);
|
||||||
|
memcpy(ptr, &h2_type, sizeof(h2_type));
|
||||||
|
ptr += sizeof(h2_type);
|
||||||
|
|
||||||
|
// packing size
|
||||||
|
memcpy(ptr, data->size, sizeof(cmph_uint8)*data->k);
|
||||||
|
ptr += data->k;
|
||||||
|
|
||||||
|
// packing offset
|
||||||
|
memcpy(ptr, data->offset, sizeof(cmph_uint32)*data->k);
|
||||||
|
ptr += sizeof(cmph_uint32)*data->k;
|
||||||
|
|
||||||
|
#if defined (__ia64) || defined (__x86_64__)
|
||||||
|
cmph_uint64 * g_is_ptr = (cmph_uint64 *)ptr;
|
||||||
|
#else
|
||||||
|
cmph_uint32 * g_is_ptr = (cmph_uint32 *)ptr;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cmph_uint8 * g_i = (cmph_uint8 *) (g_is_ptr + data->k);
|
||||||
|
|
||||||
|
for(i = 0; i < data->k; i++)
|
||||||
|
{
|
||||||
|
#if defined (__ia64) || defined (__x86_64__)
|
||||||
|
*g_is_ptr++ = (cmph_uint64)g_i;
|
||||||
|
#else
|
||||||
|
*g_is_ptr++ = (cmph_uint32)g_i;
|
||||||
|
#endif
|
||||||
|
// packing h1[i]
|
||||||
|
hash_state_pack(data->h1[i], g_i);
|
||||||
|
g_i += hash_state_packed_size(h1_type);
|
||||||
|
|
||||||
|
// packing h2[i]
|
||||||
|
hash_state_pack(data->h2[i], g_i);
|
||||||
|
g_i += hash_state_packed_size(h2_type);
|
||||||
|
|
||||||
|
// packing g_i
|
||||||
|
switch(data->algo)
|
||||||
|
{
|
||||||
|
case CMPH_FCH:
|
||||||
|
n = fch_calc_b(data->c, data->size[i]);
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ8:
|
||||||
|
n = (cmph_uint32)ceil(data->c * data->size[i]);
|
||||||
|
break;
|
||||||
|
default: assert(0);
|
||||||
|
}
|
||||||
|
memcpy(g_i, data->g[i], sizeof(cmph_uint8)*n);
|
||||||
|
g_i += n;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 brz_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 brz_packed_size(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
cmph_uint32 i;
|
||||||
|
cmph_uint32 size = 0;
|
||||||
|
brz_data_t *data = (brz_data_t *)mphf->data;
|
||||||
|
CMPH_HASH h0_type = hash_get_type(data->h0);
|
||||||
|
CMPH_HASH h1_type = hash_get_type(data->h1[0]);
|
||||||
|
CMPH_HASH h2_type = hash_get_type(data->h2[0]);
|
||||||
|
size = (cmph_uint32)(2*sizeof(CMPH_ALGO) + 3*sizeof(CMPH_HASH) + hash_state_packed_size(h0_type) + sizeof(cmph_uint32) +
|
||||||
|
sizeof(double) + sizeof(cmph_uint8)*data->k + sizeof(cmph_uint32)*data->k);
|
||||||
|
// pointers to g_is
|
||||||
|
#if defined (__ia64) || defined (__x86_64__)
|
||||||
|
size += (cmph_uint32) sizeof(cmph_uint64)*data->k;
|
||||||
|
#else
|
||||||
|
size += (cmph_uint32) sizeof(cmph_uint32)*data->k;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
size += hash_state_packed_size(h1_type) * data->k;
|
||||||
|
size += hash_state_packed_size(h2_type) * data->k;
|
||||||
|
|
||||||
|
cmph_uint32 n = 0;
|
||||||
|
for(i = 0; i < data->k; i++)
|
||||||
|
{
|
||||||
|
switch(data->algo)
|
||||||
|
{
|
||||||
|
case CMPH_FCH:
|
||||||
|
n = fch_calc_b(data->c, data->size[i]);
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ8:
|
||||||
|
n = (cmph_uint32)ceil(data->c * data->size[i]);
|
||||||
|
break;
|
||||||
|
default: assert(0);
|
||||||
|
}
|
||||||
|
size += n;
|
||||||
|
}
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static cmph_uint32 brz_bmz8_search_packed(cmph_uint32 *packed_mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
|
||||||
|
{
|
||||||
|
register CMPH_HASH h0_type = *packed_mphf++;
|
||||||
|
register cmph_uint32 *h0_ptr = packed_mphf;
|
||||||
|
packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type));
|
||||||
|
|
||||||
|
register cmph_uint32 k = *packed_mphf++;
|
||||||
|
|
||||||
|
register double c = (double)(*((cmph_uint64*)packed_mphf));
|
||||||
|
packed_mphf += 2;
|
||||||
|
|
||||||
|
register CMPH_HASH h1_type = *packed_mphf++;
|
||||||
|
|
||||||
|
register CMPH_HASH h2_type = *packed_mphf++;
|
||||||
|
|
||||||
|
register cmph_uint8 * size = (cmph_uint8 *) packed_mphf;
|
||||||
|
packed_mphf = (cmph_uint32 *)(size + k);
|
||||||
|
|
||||||
|
register cmph_uint32 * offset = packed_mphf;
|
||||||
|
packed_mphf += k;
|
||||||
|
|
||||||
|
register cmph_uint32 h0;
|
||||||
|
|
||||||
|
hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint);
|
||||||
|
h0 = fingerprint[2] % k;
|
||||||
|
|
||||||
|
register cmph_uint32 m = size[h0];
|
||||||
|
register cmph_uint32 n = (cmph_uint32)ceil(c * m);
|
||||||
|
|
||||||
|
#if defined (__ia64) || defined (__x86_64__)
|
||||||
|
register cmph_uint64 * g_is_ptr = (cmph_uint64 *)packed_mphf;
|
||||||
|
#else
|
||||||
|
register cmph_uint32 * g_is_ptr = packed_mphf;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0];
|
||||||
|
|
||||||
|
register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
|
||||||
|
|
||||||
|
register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type);
|
||||||
|
|
||||||
|
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
|
||||||
|
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
|
||||||
|
|
||||||
|
register cmph_uint8 mphf_bucket;
|
||||||
|
|
||||||
|
if (h1 == h2 && ++h2 >= n) h2 = 0;
|
||||||
|
mphf_bucket = (cmph_uint8)(g[h1] + g[h2]);
|
||||||
|
DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0);
|
||||||
|
DEBUGP("Address: %u\n", mphf_bucket + offset[h0]);
|
||||||
|
return (mphf_bucket + offset[h0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static cmph_uint32 brz_fch_search_packed(cmph_uint32 *packed_mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
|
||||||
|
{
|
||||||
|
register CMPH_HASH h0_type = *packed_mphf++;
|
||||||
|
|
||||||
|
register cmph_uint32 *h0_ptr = packed_mphf;
|
||||||
|
packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type));
|
||||||
|
|
||||||
|
register cmph_uint32 k = *packed_mphf++;
|
||||||
|
|
||||||
|
register double c = (double)(*((cmph_uint64*)packed_mphf));
|
||||||
|
packed_mphf += 2;
|
||||||
|
|
||||||
|
register CMPH_HASH h1_type = *packed_mphf++;
|
||||||
|
|
||||||
|
register CMPH_HASH h2_type = *packed_mphf++;
|
||||||
|
|
||||||
|
register cmph_uint8 * size = (cmph_uint8 *) packed_mphf;
|
||||||
|
packed_mphf = (cmph_uint32 *)(size + k);
|
||||||
|
|
||||||
|
register cmph_uint32 * offset = packed_mphf;
|
||||||
|
packed_mphf += k;
|
||||||
|
|
||||||
|
register cmph_uint32 h0;
|
||||||
|
|
||||||
|
hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint);
|
||||||
|
h0 = fingerprint[2] % k;
|
||||||
|
|
||||||
|
register cmph_uint32 m = size[h0];
|
||||||
|
register cmph_uint32 b = fch_calc_b(c, m);
|
||||||
|
register double p1 = fch_calc_p1(m);
|
||||||
|
register double p2 = fch_calc_p2(b);
|
||||||
|
|
||||||
|
#if defined (__ia64) || defined (__x86_64__)
|
||||||
|
register cmph_uint64 * g_is_ptr = (cmph_uint64 *)packed_mphf;
|
||||||
|
#else
|
||||||
|
register cmph_uint32 * g_is_ptr = packed_mphf;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0];
|
||||||
|
|
||||||
|
register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
|
||||||
|
|
||||||
|
register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type);
|
||||||
|
|
||||||
|
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m;
|
||||||
|
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m;
|
||||||
|
|
||||||
|
register cmph_uint8 mphf_bucket = 0;
|
||||||
|
h1 = mixh10h11h12(b, p1, p2, h1);
|
||||||
|
mphf_bucket = (cmph_uint8)((h2 + g[h1]) % m);
|
||||||
|
return (mphf_bucket + offset[h0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** cmph_uint32 brz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
register cmph_uint32 *ptr = (cmph_uint32 *)packed_mphf;
|
||||||
|
register CMPH_ALGO algo = *ptr++;
|
||||||
|
cmph_uint32 fingerprint[3];
|
||||||
|
switch(algo)
|
||||||
|
{
|
||||||
|
case CMPH_FCH:
|
||||||
|
return brz_fch_search_packed(ptr, key, keylen, fingerprint);
|
||||||
|
case CMPH_BMZ8:
|
||||||
|
return brz_bmz8_search_packed(ptr, key, keylen, fingerprint);
|
||||||
|
default: assert(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
47
cmph/brz.h
Normal file
47
cmph/brz.h
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
#ifndef __CMPH_BRZ_H__
|
||||||
|
#define __CMPH_BRZ_H__
|
||||||
|
|
||||||
|
#include "cmph.h"
|
||||||
|
|
||||||
|
typedef struct __brz_data_t brz_data_t;
|
||||||
|
typedef struct __brz_config_data_t brz_config_data_t;
|
||||||
|
|
||||||
|
brz_config_data_t *brz_config_new();
|
||||||
|
void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
|
||||||
|
void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir);
|
||||||
|
void brz_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd);
|
||||||
|
void brz_config_set_b(cmph_config_t *mph, cmph_uint32 b);
|
||||||
|
void brz_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo);
|
||||||
|
void brz_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability);
|
||||||
|
void brz_config_destroy(cmph_config_t *mph);
|
||||||
|
cmph_t *brz_new(cmph_config_t *mph, double c);
|
||||||
|
|
||||||
|
void brz_load(FILE *f, cmph_t *mphf);
|
||||||
|
int brz_dump(cmph_t *mphf, FILE *f);
|
||||||
|
void brz_destroy(cmph_t *mphf);
|
||||||
|
cmph_uint32 brz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** \fn void brz_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void brz_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 brz_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 brz_packed_size(cmph_t *mphf);
|
||||||
|
|
||||||
|
/** cmph_uint32 brz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
#endif
|
39
cmph/brz_structs.h
Executable file
39
cmph/brz_structs.h
Executable file
@ -0,0 +1,39 @@
|
|||||||
|
#ifndef __CMPH_BRZ_STRUCTS_H__
|
||||||
|
#define __CMPH_BRZ_STRUCTS_H__
|
||||||
|
|
||||||
|
#include "hash_state.h"
|
||||||
|
|
||||||
|
struct __brz_data_t
|
||||||
|
{
|
||||||
|
CMPH_ALGO algo; // CMPH algo for generating the MPHFs for the buckets (Just CMPH_FCH and CMPH_BMZ8)
|
||||||
|
cmph_uint32 m; // edges (words) count
|
||||||
|
double c; // constant c
|
||||||
|
cmph_uint8 *size; // size[i] stores the number of edges represented by g[i][...].
|
||||||
|
cmph_uint32 *offset; // offset[i] stores the sum: size[0] + size[1] + ... size[i-1].
|
||||||
|
cmph_uint8 **g; // g function.
|
||||||
|
cmph_uint32 k; // number of components
|
||||||
|
hash_state_t **h1;
|
||||||
|
hash_state_t **h2;
|
||||||
|
hash_state_t * h0;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __brz_config_data_t
|
||||||
|
{
|
||||||
|
CMPH_HASH hashfuncs[3];
|
||||||
|
CMPH_ALGO algo; // CMPH algo for generating the MPHFs for the buckets (Just CMPH_FCH and CMPH_BMZ8)
|
||||||
|
double c; // constant c
|
||||||
|
cmph_uint32 m; // edges (words) count
|
||||||
|
cmph_uint8 *size; // size[i] stores the number of edges represented by g[i][...].
|
||||||
|
cmph_uint32 *offset; // offset[i] stores the sum: size[0] + size[1] + ... size[i-1].
|
||||||
|
cmph_uint8 **g; // g function.
|
||||||
|
cmph_uint8 b; // parameter b.
|
||||||
|
cmph_uint32 k; // number of components
|
||||||
|
hash_state_t **h1;
|
||||||
|
hash_state_t **h2;
|
||||||
|
hash_state_t * h0;
|
||||||
|
cmph_uint32 memory_availability;
|
||||||
|
cmph_uint8 * tmp_dir; // temporary directory
|
||||||
|
FILE * mphf_fd; // mphf file
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
103
cmph/buffer_entry.c
Normal file
103
cmph/buffer_entry.c
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
#include "buffer_entry.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
struct __buffer_entry_t
|
||||||
|
{
|
||||||
|
FILE *fd;
|
||||||
|
cmph_uint8 * buff;
|
||||||
|
cmph_uint32 capacity, // buffer entry capacity
|
||||||
|
nbytes, // buffer entry used bytes
|
||||||
|
pos; // current read position in buffer entry
|
||||||
|
cmph_uint8 eof; // flag to indicate end of file
|
||||||
|
};
|
||||||
|
|
||||||
|
buffer_entry_t * buffer_entry_new(cmph_uint32 capacity)
|
||||||
|
{
|
||||||
|
buffer_entry_t *buff_entry = (buffer_entry_t *)malloc(sizeof(buffer_entry_t));
|
||||||
|
assert(buff_entry);
|
||||||
|
buff_entry->fd = NULL;
|
||||||
|
buff_entry->buff = NULL;
|
||||||
|
buff_entry->capacity = capacity;
|
||||||
|
buff_entry->nbytes = capacity;
|
||||||
|
buff_entry->pos = capacity;
|
||||||
|
buff_entry->eof = 0;
|
||||||
|
return buff_entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
void buffer_entry_open(buffer_entry_t * buffer_entry, char * filename)
|
||||||
|
{
|
||||||
|
buffer_entry->fd = fopen(filename, "rb");
|
||||||
|
}
|
||||||
|
|
||||||
|
void buffer_entry_set_capacity(buffer_entry_t * buffer_entry, cmph_uint32 capacity)
|
||||||
|
{
|
||||||
|
buffer_entry->capacity = capacity;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
cmph_uint32 buffer_entry_get_capacity(buffer_entry_t * buffer_entry)
|
||||||
|
{
|
||||||
|
return buffer_entry->capacity;
|
||||||
|
}
|
||||||
|
|
||||||
|
void buffer_entry_load(buffer_entry_t * buffer_entry)
|
||||||
|
{
|
||||||
|
free(buffer_entry->buff);
|
||||||
|
buffer_entry->buff = (cmph_uint8 *)calloc((size_t)buffer_entry->capacity, sizeof(cmph_uint8));
|
||||||
|
buffer_entry->nbytes = (cmph_uint32)fread(buffer_entry->buff, (size_t)1, (size_t)buffer_entry->capacity, buffer_entry->fd);
|
||||||
|
if (buffer_entry->nbytes != buffer_entry->capacity) buffer_entry->eof = 1;
|
||||||
|
buffer_entry->pos = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 * keylen)
|
||||||
|
{
|
||||||
|
cmph_uint8 * buf = NULL;
|
||||||
|
cmph_uint32 lacked_bytes = sizeof(*keylen);
|
||||||
|
cmph_uint32 copied_bytes = 0;
|
||||||
|
if(buffer_entry->eof && (buffer_entry->pos == buffer_entry->nbytes)) // end
|
||||||
|
{
|
||||||
|
free(buf);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes)
|
||||||
|
{
|
||||||
|
copied_bytes = buffer_entry->nbytes - buffer_entry->pos;
|
||||||
|
lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes;
|
||||||
|
if (copied_bytes != 0) memcpy(keylen, buffer_entry->buff + buffer_entry->pos, (size_t)copied_bytes);
|
||||||
|
buffer_entry_load(buffer_entry);
|
||||||
|
}
|
||||||
|
memcpy(keylen + copied_bytes, buffer_entry->buff + buffer_entry->pos, (size_t)lacked_bytes);
|
||||||
|
buffer_entry->pos += lacked_bytes;
|
||||||
|
|
||||||
|
lacked_bytes = *keylen;
|
||||||
|
copied_bytes = 0;
|
||||||
|
buf = (cmph_uint8 *)malloc(*keylen + sizeof(*keylen));
|
||||||
|
memcpy(buf, keylen, sizeof(*keylen));
|
||||||
|
if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes) {
|
||||||
|
copied_bytes = buffer_entry->nbytes - buffer_entry->pos;
|
||||||
|
lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes;
|
||||||
|
if (copied_bytes != 0) {
|
||||||
|
memcpy(buf + sizeof(*keylen), buffer_entry->buff + buffer_entry->pos, (size_t)copied_bytes);
|
||||||
|
}
|
||||||
|
buffer_entry_load(buffer_entry);
|
||||||
|
}
|
||||||
|
memcpy(buf+sizeof(*keylen)+copied_bytes, buffer_entry->buff + buffer_entry->pos, (size_t)lacked_bytes);
|
||||||
|
buffer_entry->pos += lacked_bytes;
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
void buffer_entry_destroy(buffer_entry_t * buffer_entry)
|
||||||
|
{
|
||||||
|
fclose(buffer_entry->fd);
|
||||||
|
buffer_entry->fd = NULL;
|
||||||
|
free(buffer_entry->buff);
|
||||||
|
buffer_entry->buff = NULL;
|
||||||
|
buffer_entry->capacity = 0;
|
||||||
|
buffer_entry->nbytes = 0;
|
||||||
|
buffer_entry->pos = 0;
|
||||||
|
buffer_entry->eof = 0;
|
||||||
|
free(buffer_entry);
|
||||||
|
}
|
14
cmph/buffer_entry.h
Normal file
14
cmph/buffer_entry.h
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
#ifndef __CMPH_BUFFER_ENTRY_H__
|
||||||
|
#define __CMPH_BUFFER_ENTRY_H__
|
||||||
|
|
||||||
|
#include "cmph_types.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
typedef struct __buffer_entry_t buffer_entry_t;
|
||||||
|
|
||||||
|
buffer_entry_t * buffer_entry_new(cmph_uint32 capacity);
|
||||||
|
void buffer_entry_set_capacity(buffer_entry_t * buffer_entry, cmph_uint32 capacity);
|
||||||
|
cmph_uint32 buffer_entry_get_capacity(buffer_entry_t * buffer_entry);
|
||||||
|
void buffer_entry_open(buffer_entry_t * buffer_entry, char * filename);
|
||||||
|
cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 * keylen);
|
||||||
|
void buffer_entry_destroy(buffer_entry_t * buffer_entry);
|
||||||
|
#endif
|
66
cmph/buffer_manage.c
Normal file
66
cmph/buffer_manage.c
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
#include "buffer_manage.h"
|
||||||
|
#include "buffer_entry.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
struct __buffer_manage_t
|
||||||
|
{
|
||||||
|
cmph_uint32 memory_avail; // memory available
|
||||||
|
buffer_entry_t ** buffer_entries; // buffer entries to be managed
|
||||||
|
cmph_uint32 nentries; // number of entries to be managed
|
||||||
|
cmph_uint32 *memory_avail_list; // memory available list
|
||||||
|
int pos_avail_list; // current position in memory available list
|
||||||
|
};
|
||||||
|
|
||||||
|
buffer_manage_t * buffer_manage_new(cmph_uint32 memory_avail, cmph_uint32 nentries)
|
||||||
|
{
|
||||||
|
cmph_uint32 memory_avail_entry, i;
|
||||||
|
buffer_manage_t *buff_manage = (buffer_manage_t *)malloc(sizeof(buffer_manage_t));
|
||||||
|
assert(buff_manage);
|
||||||
|
buff_manage->memory_avail = memory_avail;
|
||||||
|
buff_manage->buffer_entries = (buffer_entry_t **)calloc((size_t)nentries, sizeof(buffer_entry_t *));
|
||||||
|
buff_manage->memory_avail_list = (cmph_uint32 *)calloc((size_t)nentries, sizeof(cmph_uint32));
|
||||||
|
buff_manage->pos_avail_list = -1;
|
||||||
|
buff_manage->nentries = nentries;
|
||||||
|
memory_avail_entry = buff_manage->memory_avail/buff_manage->nentries + 1;
|
||||||
|
for(i = 0; i < buff_manage->nentries; i++)
|
||||||
|
{
|
||||||
|
buff_manage->buffer_entries[i] = buffer_entry_new(memory_avail_entry);
|
||||||
|
}
|
||||||
|
return buff_manage;
|
||||||
|
}
|
||||||
|
|
||||||
|
void buffer_manage_open(buffer_manage_t * buffer_manage, cmph_uint32 index, char * filename)
|
||||||
|
{
|
||||||
|
buffer_entry_open(buffer_manage->buffer_entries[index], filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint8 * buffer_manage_read_key(buffer_manage_t * buffer_manage, cmph_uint32 index)
|
||||||
|
{
|
||||||
|
cmph_uint8 * key = NULL;
|
||||||
|
if (buffer_manage->pos_avail_list >= 0 ) // recovering memory
|
||||||
|
{
|
||||||
|
cmph_uint32 new_capacity = buffer_entry_get_capacity(buffer_manage->buffer_entries[index]) + buffer_manage->memory_avail_list[(buffer_manage->pos_avail_list)--];
|
||||||
|
buffer_entry_set_capacity(buffer_manage->buffer_entries[index], new_capacity);
|
||||||
|
//fprintf(stderr, "recovering memory\n");
|
||||||
|
}
|
||||||
|
key = buffer_entry_read_key(buffer_manage->buffer_entries[index]);
|
||||||
|
if (key == NULL) // storing memory to be recovered
|
||||||
|
{
|
||||||
|
buffer_manage->memory_avail_list[++(buffer_manage->pos_avail_list)] = buffer_entry_get_capacity(buffer_manage->buffer_entries[index]);
|
||||||
|
//fprintf(stderr, "storing memory to be recovered\n");
|
||||||
|
}
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
|
||||||
|
void buffer_manage_destroy(buffer_manage_t * buffer_manage)
|
||||||
|
{
|
||||||
|
cmph_uint32 i;
|
||||||
|
for(i = 0; i < buffer_manage->nentries; i++)
|
||||||
|
{
|
||||||
|
buffer_entry_destroy(buffer_manage->buffer_entries[i]);
|
||||||
|
}
|
||||||
|
free(buffer_manage->memory_avail_list);
|
||||||
|
free(buffer_manage->buffer_entries);
|
||||||
|
free(buffer_manage);
|
||||||
|
}
|
12
cmph/buffer_manage.h
Normal file
12
cmph/buffer_manage.h
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#ifndef __CMPH_BUFFER_MANAGE_H__
|
||||||
|
#define __CMPH_BUFFER_MANAGE_H__
|
||||||
|
|
||||||
|
#include "cmph_types.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
typedef struct __buffer_manage_t buffer_manage_t;
|
||||||
|
|
||||||
|
buffer_manage_t * buffer_manage_new(cmph_uint32 memory_avail, cmph_uint32 nentries);
|
||||||
|
void buffer_manage_open(buffer_manage_t * buffer_manage, cmph_uint32 index, char * filename);
|
||||||
|
cmph_uint8 * buffer_manage_read_key(buffer_manage_t * buffer_manage, cmph_uint32 index);
|
||||||
|
void buffer_manage_destroy(buffer_manage_t * buffer_manage);
|
||||||
|
#endif
|
64
cmph/buffer_manager.c
Normal file
64
cmph/buffer_manager.c
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
#include "buffer_manager.h"
|
||||||
|
#include "buffer_entry.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
struct __buffer_manager_t
|
||||||
|
{
|
||||||
|
cmph_uint32 memory_avail; // memory available
|
||||||
|
buffer_entry_t ** buffer_entries; // buffer entries to be managed
|
||||||
|
cmph_uint32 nentries; // number of entries to be managed
|
||||||
|
cmph_uint32 *memory_avail_list; // memory available list
|
||||||
|
int pos_avail_list; // current position in memory available list
|
||||||
|
};
|
||||||
|
|
||||||
|
buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nentries)
|
||||||
|
{
|
||||||
|
cmph_uint32 memory_avail_entry, i;
|
||||||
|
buffer_manager_t *buff_manager = (buffer_manager_t *)malloc(sizeof(buffer_manager_t));
|
||||||
|
assert(buff_manager);
|
||||||
|
buff_manager->memory_avail = memory_avail;
|
||||||
|
buff_manager->buffer_entries = (buffer_entry_t **)calloc((size_t)nentries, sizeof(buffer_entry_t *));
|
||||||
|
buff_manager->memory_avail_list = (cmph_uint32 *)calloc((size_t)nentries, sizeof(cmph_uint32));
|
||||||
|
buff_manager->pos_avail_list = -1;
|
||||||
|
buff_manager->nentries = nentries;
|
||||||
|
memory_avail_entry = buff_manager->memory_avail/buff_manager->nentries + 1;
|
||||||
|
for(i = 0; i < buff_manager->nentries; i++)
|
||||||
|
{
|
||||||
|
buff_manager->buffer_entries[i] = buffer_entry_new(memory_avail_entry);
|
||||||
|
}
|
||||||
|
return buff_manager;
|
||||||
|
}
|
||||||
|
|
||||||
|
void buffer_manager_open(buffer_manager_t * buffer_manager, cmph_uint32 index, char * filename)
|
||||||
|
{
|
||||||
|
buffer_entry_open(buffer_manager->buffer_entries[index], filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index, cmph_uint32 * keylen)
|
||||||
|
{
|
||||||
|
cmph_uint8 * key = NULL;
|
||||||
|
if (buffer_manager->pos_avail_list >= 0 ) // recovering memory
|
||||||
|
{
|
||||||
|
cmph_uint32 new_capacity = buffer_entry_get_capacity(buffer_manager->buffer_entries[index]) + buffer_manager->memory_avail_list[(buffer_manager->pos_avail_list)--];
|
||||||
|
buffer_entry_set_capacity(buffer_manager->buffer_entries[index], new_capacity);
|
||||||
|
}
|
||||||
|
key = buffer_entry_read_key(buffer_manager->buffer_entries[index], keylen);
|
||||||
|
if (key == NULL) // storing memory to be recovered
|
||||||
|
{
|
||||||
|
buffer_manager->memory_avail_list[++(buffer_manager->pos_avail_list)] = buffer_entry_get_capacity(buffer_manager->buffer_entries[index]);
|
||||||
|
}
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
|
||||||
|
void buffer_manager_destroy(buffer_manager_t * buffer_manager)
|
||||||
|
{
|
||||||
|
cmph_uint32 i;
|
||||||
|
for(i = 0; i < buffer_manager->nentries; i++)
|
||||||
|
{
|
||||||
|
buffer_entry_destroy(buffer_manager->buffer_entries[i]);
|
||||||
|
}
|
||||||
|
free(buffer_manager->memory_avail_list);
|
||||||
|
free(buffer_manager->buffer_entries);
|
||||||
|
free(buffer_manager);
|
||||||
|
}
|
12
cmph/buffer_manager.h
Normal file
12
cmph/buffer_manager.h
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#ifndef __CMPH_BUFFER_MANAGE_H__
|
||||||
|
#define __CMPH_BUFFER_MANAGE_H__
|
||||||
|
|
||||||
|
#include "cmph_types.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
typedef struct __buffer_manager_t buffer_manager_t;
|
||||||
|
|
||||||
|
buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nentries);
|
||||||
|
void buffer_manager_open(buffer_manager_t * buffer_manager, cmph_uint32 index, char * filename);
|
||||||
|
cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index, cmph_uint32 * keylen);
|
||||||
|
void buffer_manager_destroy(buffer_manager_t * buffer_manager);
|
||||||
|
#endif
|
271
cmph/chd.c
Normal file
271
cmph/chd.c
Normal file
@ -0,0 +1,271 @@
|
|||||||
|
#include<stdio.h>
|
||||||
|
#include<stdlib.h>
|
||||||
|
#include<string.h>
|
||||||
|
#include<math.h>
|
||||||
|
#include<time.h>
|
||||||
|
#include<assert.h>
|
||||||
|
#include<limits.h>
|
||||||
|
|
||||||
|
#include "cmph_structs.h"
|
||||||
|
#include "chd_structs.h"
|
||||||
|
#include "chd.h"
|
||||||
|
#include "bitbool.h"
|
||||||
|
//#define DEBUG
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
|
chd_config_data_t *chd_config_new(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
cmph_io_adapter_t *key_source = mph->key_source;
|
||||||
|
chd_config_data_t *chd;
|
||||||
|
chd = (chd_config_data_t *)malloc(sizeof(chd_config_data_t));
|
||||||
|
assert(chd);
|
||||||
|
memset(chd, 0, sizeof(chd_config_data_t));
|
||||||
|
|
||||||
|
chd->chd_ph = cmph_config_new(key_source);
|
||||||
|
cmph_config_set_algo(chd->chd_ph, CMPH_CHD_PH);
|
||||||
|
|
||||||
|
return chd;
|
||||||
|
}
|
||||||
|
|
||||||
|
void chd_config_destroy(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
chd_config_data_t *data = (chd_config_data_t *) mph->data;
|
||||||
|
DEBUGP("Destroying algorithm dependent data\n");
|
||||||
|
if(data->chd_ph)
|
||||||
|
{
|
||||||
|
cmph_config_destroy(data->chd_ph);
|
||||||
|
data->chd_ph = NULL;
|
||||||
|
}
|
||||||
|
free(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void chd_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||||
|
{
|
||||||
|
chd_config_data_t *data = (chd_config_data_t *) mph->data;
|
||||||
|
cmph_config_set_hashfuncs(data->chd_ph, hashfuncs);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void chd_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket)
|
||||||
|
{
|
||||||
|
chd_config_data_t *data = (chd_config_data_t *) mph->data;
|
||||||
|
cmph_config_set_b(data->chd_ph, keys_per_bucket);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void chd_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin)
|
||||||
|
{
|
||||||
|
chd_config_data_t *data = (chd_config_data_t *) mph->data;
|
||||||
|
cmph_config_set_keys_per_bin(data->chd_ph, keys_per_bin);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
cmph_t *chd_new(cmph_config_t *mph, double c)
|
||||||
|
{
|
||||||
|
cmph_t *mphf = NULL;
|
||||||
|
chd_data_t *chdf = NULL;
|
||||||
|
chd_config_data_t *chd = (chd_config_data_t *)mph->data;
|
||||||
|
chd_ph_config_data_t * chd_ph = (chd_ph_config_data_t *)chd->chd_ph->data;
|
||||||
|
compressed_rank_t cr;
|
||||||
|
|
||||||
|
register cmph_t * chd_phf = NULL;
|
||||||
|
register cmph_uint32 packed_chd_phf_size = 0;
|
||||||
|
cmph_uint8 * packed_chd_phf = NULL;
|
||||||
|
|
||||||
|
register cmph_uint32 packed_cr_size = 0;
|
||||||
|
cmph_uint8 * packed_cr = NULL;
|
||||||
|
|
||||||
|
register cmph_uint32 i, idx, nkeys, nvals, nbins;
|
||||||
|
cmph_uint32 * vals_table = NULL;
|
||||||
|
register cmph_uint32 * occup_table = NULL;
|
||||||
|
#ifdef CMPH_TIMING
|
||||||
|
double construction_time_begin = 0.0;
|
||||||
|
double construction_time = 0.0;
|
||||||
|
ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cmph_config_set_verbosity(chd->chd_ph, mph->verbosity);
|
||||||
|
cmph_config_set_graphsize(chd->chd_ph, c);
|
||||||
|
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Generating a CHD_PH perfect hash function with a load factor equal to %.3f\n", c);
|
||||||
|
}
|
||||||
|
|
||||||
|
chd_phf = cmph_new(chd->chd_ph);
|
||||||
|
|
||||||
|
if(chd_phf == NULL)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
packed_chd_phf_size = cmph_packed_size(chd_phf);
|
||||||
|
DEBUGP("packed_chd_phf_size = %u\n", packed_chd_phf_size);
|
||||||
|
|
||||||
|
/* Make sure that we have enough space to pack the mphf. */
|
||||||
|
packed_chd_phf = calloc((size_t)packed_chd_phf_size,(size_t)1);
|
||||||
|
|
||||||
|
/* Pack the mphf. */
|
||||||
|
cmph_pack(chd_phf, packed_chd_phf);
|
||||||
|
|
||||||
|
cmph_destroy(chd_phf);
|
||||||
|
|
||||||
|
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Compressing the range of the resulting CHD_PH perfect hash function\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
compressed_rank_init(&cr);
|
||||||
|
nbins = chd_ph->n;
|
||||||
|
nkeys = chd_ph->m;
|
||||||
|
nvals = nbins - nkeys;
|
||||||
|
|
||||||
|
vals_table = (cmph_uint32 *)calloc(nvals, sizeof(cmph_uint32));
|
||||||
|
occup_table = (cmph_uint32 *)chd_ph->occup_table;
|
||||||
|
|
||||||
|
for(i = 0, idx = 0; i < nbins; i++)
|
||||||
|
{
|
||||||
|
if(!GETBIT32(occup_table, i))
|
||||||
|
{
|
||||||
|
vals_table[idx++] = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
compressed_rank_generate(&cr, vals_table, nvals);
|
||||||
|
free(vals_table);
|
||||||
|
|
||||||
|
packed_cr_size = compressed_rank_packed_size(&cr);
|
||||||
|
packed_cr = (cmph_uint8 *) calloc(packed_cr_size, sizeof(cmph_uint8));
|
||||||
|
compressed_rank_pack(&cr, packed_cr);
|
||||||
|
compressed_rank_destroy(&cr);
|
||||||
|
|
||||||
|
mphf = (cmph_t *)malloc(sizeof(cmph_t));
|
||||||
|
mphf->algo = mph->algo;
|
||||||
|
chdf = (chd_data_t *)malloc(sizeof(chd_data_t));
|
||||||
|
|
||||||
|
chdf->packed_cr = packed_cr;
|
||||||
|
packed_cr = NULL; //transfer memory ownership
|
||||||
|
|
||||||
|
chdf->packed_chd_phf = packed_chd_phf;
|
||||||
|
packed_chd_phf = NULL; //transfer memory ownership
|
||||||
|
|
||||||
|
chdf->packed_chd_phf_size = packed_chd_phf_size;
|
||||||
|
chdf->packed_cr_size = packed_cr_size;
|
||||||
|
|
||||||
|
mphf->data = chdf;
|
||||||
|
mphf->size = nkeys;
|
||||||
|
|
||||||
|
DEBUGP("Successfully generated minimal perfect hash\n");
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
|
||||||
|
}
|
||||||
|
#ifdef CMPH_TIMING
|
||||||
|
ELAPSED_TIME_IN_SECONDS(&construction_time);
|
||||||
|
register cmph_uint32 space_usage = chd_packed_size(mphf)*8;
|
||||||
|
construction_time = construction_time - construction_time_begin;
|
||||||
|
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", nkeys, c, chd_ph->keys_per_bucket, construction_time, space_usage/(double)nkeys);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return mphf;
|
||||||
|
}
|
||||||
|
|
||||||
|
void chd_load(FILE *fd, cmph_t *mphf)
|
||||||
|
{
|
||||||
|
register size_t nbytes;
|
||||||
|
chd_data_t *chd = (chd_data_t *)malloc(sizeof(chd_data_t));
|
||||||
|
|
||||||
|
DEBUGP("Loading chd mphf\n");
|
||||||
|
mphf->data = chd;
|
||||||
|
|
||||||
|
nbytes = fread(&chd->packed_chd_phf_size, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
DEBUGP("Loading CHD_PH perfect hash function with %u bytes to disk\n", chd->packed_chd_phf_size);
|
||||||
|
chd->packed_chd_phf = (cmph_uint8 *) calloc((size_t)chd->packed_chd_phf_size,(size_t)1);
|
||||||
|
nbytes = fread(chd->packed_chd_phf, chd->packed_chd_phf_size, (size_t)1, fd);
|
||||||
|
|
||||||
|
nbytes = fread(&chd->packed_cr_size, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
DEBUGP("Loading Compressed rank structure, which has %u bytes\n", chd->packed_cr_size);
|
||||||
|
chd->packed_cr = (cmph_uint8 *) calloc((size_t)chd->packed_cr_size, (size_t)1);
|
||||||
|
nbytes = fread(chd->packed_cr, chd->packed_cr_size, (size_t)1, fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
int chd_dump(cmph_t *mphf, FILE *fd)
|
||||||
|
{
|
||||||
|
register size_t nbytes;
|
||||||
|
chd_data_t *data = (chd_data_t *)mphf->data;
|
||||||
|
|
||||||
|
__cmph_dump(mphf, fd);
|
||||||
|
// Dumping CHD_PH perfect hash function
|
||||||
|
|
||||||
|
DEBUGP("Dumping CHD_PH perfect hash function with %u bytes to disk\n", data->packed_chd_phf_size);
|
||||||
|
nbytes = fwrite(&data->packed_chd_phf_size, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(data->packed_chd_phf, data->packed_chd_phf_size, (size_t)1, fd);
|
||||||
|
|
||||||
|
DEBUGP("Dumping compressed rank structure with %u bytes to disk\n", buflen);
|
||||||
|
nbytes = fwrite(&data->packed_cr_size, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(data->packed_cr, data->packed_cr_size, (size_t)1, fd);
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void chd_destroy(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
chd_data_t *data = (chd_data_t *)mphf->data;
|
||||||
|
free(data->packed_chd_phf);
|
||||||
|
free(data->packed_cr);
|
||||||
|
free(data);
|
||||||
|
free(mphf);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline cmph_uint32 _chd_search(void * packed_chd_phf, void * packed_cr, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
register cmph_uint32 bin_idx = cmph_search_packed(packed_chd_phf, key, keylen);
|
||||||
|
register cmph_uint32 rank = compressed_rank_query_packed(packed_cr, bin_idx);
|
||||||
|
return bin_idx - rank;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 chd_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
register chd_data_t * chd = mphf->data;
|
||||||
|
return _chd_search(chd->packed_chd_phf, chd->packed_cr, key, keylen);
|
||||||
|
}
|
||||||
|
|
||||||
|
void chd_pack(cmph_t *mphf, void *packed_mphf)
|
||||||
|
{
|
||||||
|
chd_data_t *data = (chd_data_t *)mphf->data;
|
||||||
|
cmph_uint32 * ptr = packed_mphf;
|
||||||
|
cmph_uint8 * ptr8;
|
||||||
|
|
||||||
|
// packing packed_cr_size and packed_cr
|
||||||
|
*ptr = data->packed_cr_size;
|
||||||
|
ptr8 = (cmph_uint8 *) (ptr + 1);
|
||||||
|
|
||||||
|
memcpy(ptr8, data->packed_cr, data->packed_cr_size);
|
||||||
|
ptr8 += data->packed_cr_size;
|
||||||
|
|
||||||
|
ptr = (cmph_uint32 *) ptr8;
|
||||||
|
*ptr = data->packed_chd_phf_size;
|
||||||
|
|
||||||
|
ptr8 = (cmph_uint8 *) (ptr + 1);
|
||||||
|
memcpy(ptr8, data->packed_chd_phf, data->packed_chd_phf_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 chd_packed_size(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
register chd_data_t *data = (chd_data_t *)mphf->data;
|
||||||
|
return (cmph_uint32)(sizeof(CMPH_ALGO) + 2*sizeof(cmph_uint32) + data->packed_cr_size + data->packed_chd_phf_size);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 chd_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
|
||||||
|
register cmph_uint32 * ptr = packed_mphf;
|
||||||
|
register cmph_uint32 packed_cr_size = *ptr++;
|
||||||
|
register cmph_uint8 * packed_chd_phf = ((cmph_uint8 *) ptr) + packed_cr_size + sizeof(cmph_uint32);
|
||||||
|
return _chd_search(packed_chd_phf, ptr, key, keylen);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
59
cmph/chd.h
Normal file
59
cmph/chd.h
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
#ifndef _CMPH_CHD_H__
|
||||||
|
#define _CMPH_CHD_H__
|
||||||
|
|
||||||
|
#include "cmph.h"
|
||||||
|
|
||||||
|
typedef struct __chd_data_t chd_data_t;
|
||||||
|
typedef struct __chd_config_data_t chd_config_data_t;
|
||||||
|
|
||||||
|
/* Config API */
|
||||||
|
chd_config_data_t *chd_config_new(cmph_config_t * mph);
|
||||||
|
void chd_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
|
||||||
|
|
||||||
|
/** \fn void chd_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin);
|
||||||
|
* \brief Allows to set the number of keys per bin.
|
||||||
|
* \param mph pointer to the configuration structure
|
||||||
|
* \param keys_per_bin value for the number of keys per bin
|
||||||
|
*/
|
||||||
|
void chd_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin);
|
||||||
|
|
||||||
|
/** \fn void chd_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket);
|
||||||
|
* \brief Allows to set the number of keys per bucket.
|
||||||
|
* \param mph pointer to the configuration structure
|
||||||
|
* \param keys_per_bucket value for the number of keys per bucket
|
||||||
|
*/
|
||||||
|
void chd_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket);
|
||||||
|
void chd_config_destroy(cmph_config_t *mph);
|
||||||
|
|
||||||
|
|
||||||
|
/* Chd algorithm API */
|
||||||
|
cmph_t *chd_new(cmph_config_t *mph, double c);
|
||||||
|
void chd_load(FILE *fd, cmph_t *mphf);
|
||||||
|
int chd_dump(cmph_t *mphf, FILE *fd);
|
||||||
|
void chd_destroy(cmph_t *mphf);
|
||||||
|
cmph_uint32 chd_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** \fn void chd_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void chd_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 chd_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 chd_packed_size(cmph_t *mphf);
|
||||||
|
|
||||||
|
/** cmph_uint32 chd_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 chd_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
#endif
|
988
cmph/chd_ph.c
Normal file
988
cmph/chd_ph.c
Normal file
@ -0,0 +1,988 @@
|
|||||||
|
#include<stdio.h>
|
||||||
|
#include<stdlib.h>
|
||||||
|
#include<string.h>
|
||||||
|
#include<math.h>
|
||||||
|
#include<time.h>
|
||||||
|
#include<assert.h>
|
||||||
|
#include<limits.h>
|
||||||
|
|
||||||
|
#include "cmph_structs.h"
|
||||||
|
#include "chd_structs_ph.h"
|
||||||
|
#include "chd_ph.h"
|
||||||
|
#include"miller_rabin.h"
|
||||||
|
#include"bitbool.h"
|
||||||
|
|
||||||
|
|
||||||
|
//#define DEBUG
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
|
// NO_ELEMENT is equivalent to null pointer
|
||||||
|
#ifndef NO_ELEMENT
|
||||||
|
#define NO_ELEMENT UINT_MAX
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// struct used to represent items at mapping, ordering and searching phases
|
||||||
|
struct _chd_ph_item_t
|
||||||
|
{
|
||||||
|
cmph_uint32 f;
|
||||||
|
cmph_uint32 h;
|
||||||
|
};
|
||||||
|
typedef struct _chd_ph_item_t chd_ph_item_t;
|
||||||
|
|
||||||
|
// struct to represent the items at mapping phase only.
|
||||||
|
struct _chd_ph_map_item_t
|
||||||
|
{
|
||||||
|
cmph_uint32 f;
|
||||||
|
cmph_uint32 h;
|
||||||
|
cmph_uint32 bucket_num;
|
||||||
|
};
|
||||||
|
typedef struct _chd_ph_map_item_t chd_ph_map_item_t;
|
||||||
|
|
||||||
|
// struct to represent a bucket
|
||||||
|
struct _chd_ph_bucket_t
|
||||||
|
{
|
||||||
|
cmph_uint32 items_list; // offset
|
||||||
|
union
|
||||||
|
{
|
||||||
|
cmph_uint32 size;
|
||||||
|
cmph_uint32 bucket_id;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct _chd_ph_bucket_t chd_ph_bucket_t;
|
||||||
|
|
||||||
|
struct _chd_ph_sorted_list_t
|
||||||
|
{
|
||||||
|
cmph_uint32 buckets_list;
|
||||||
|
cmph_uint32 size;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct _chd_ph_sorted_list_t chd_ph_sorted_list_t;
|
||||||
|
|
||||||
|
|
||||||
|
static inline chd_ph_bucket_t * chd_ph_bucket_new(cmph_uint32 nbuckets);
|
||||||
|
static inline void chd_ph_bucket_clean(chd_ph_bucket_t * buckets, cmph_uint32 nbuckets);
|
||||||
|
static inline void chd_ph_bucket_destroy(chd_ph_bucket_t * buckets);
|
||||||
|
|
||||||
|
chd_ph_bucket_t * chd_ph_bucket_new(cmph_uint32 nbuckets)
|
||||||
|
{
|
||||||
|
chd_ph_bucket_t * buckets = (chd_ph_bucket_t *) calloc(nbuckets, sizeof(chd_ph_bucket_t));
|
||||||
|
return buckets;
|
||||||
|
}
|
||||||
|
|
||||||
|
void chd_ph_bucket_clean(chd_ph_bucket_t * buckets, cmph_uint32 nbuckets)
|
||||||
|
{
|
||||||
|
register cmph_uint32 i = 0;
|
||||||
|
assert(buckets);
|
||||||
|
for(i = 0; i < nbuckets; i++)
|
||||||
|
buckets[i].size = 0;
|
||||||
|
}
|
||||||
|
cmph_uint8 chd_ph_bucket_insert(chd_ph_bucket_t * buckets,chd_ph_map_item_t * map_items, chd_ph_item_t * items,
|
||||||
|
cmph_uint32 nbuckets,cmph_uint32 item_idx)
|
||||||
|
{
|
||||||
|
register cmph_uint32 i = 0;
|
||||||
|
register chd_ph_item_t * tmp_item;
|
||||||
|
register chd_ph_map_item_t * tmp_map_item = map_items + item_idx;
|
||||||
|
register chd_ph_bucket_t * bucket = buckets + tmp_map_item->bucket_num;
|
||||||
|
tmp_item = items + bucket->items_list;
|
||||||
|
|
||||||
|
for(i = 0; i < bucket->size; i++)
|
||||||
|
{
|
||||||
|
if(tmp_item->f == tmp_map_item->f && tmp_item->h == tmp_map_item->h)
|
||||||
|
{
|
||||||
|
DEBUGP("Item not added\n");
|
||||||
|
return 0;
|
||||||
|
};
|
||||||
|
tmp_item++;
|
||||||
|
};
|
||||||
|
tmp_item->f = tmp_map_item->f;
|
||||||
|
tmp_item->h = tmp_map_item->h;
|
||||||
|
bucket->size++;
|
||||||
|
return 1;
|
||||||
|
};
|
||||||
|
void chd_ph_bucket_destroy(chd_ph_bucket_t * buckets)
|
||||||
|
{
|
||||||
|
free(buckets);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_item_t * items,
|
||||||
|
cmph_uint32 *max_bucket_size);
|
||||||
|
|
||||||
|
static chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets,chd_ph_item_t ** items,
|
||||||
|
cmph_uint32 nbuckets,cmph_uint32 nitems, cmph_uint32 max_bucket_size);
|
||||||
|
|
||||||
|
static cmph_uint8 chd_ph_searching(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t *items ,
|
||||||
|
cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes, cmph_uint32 * disp_table);
|
||||||
|
|
||||||
|
static inline double chd_ph_space_lower_bound(cmph_uint32 _n, cmph_uint32 _r)
|
||||||
|
{
|
||||||
|
double r = _r, n = _n;
|
||||||
|
return (1 + (r/n - 1.0 + 1.0/(2.0*n))*log(1 - n/r))/log(2);
|
||||||
|
};
|
||||||
|
|
||||||
|
/* computes the entropy of non empty buckets.*/
|
||||||
|
static inline double chd_ph_get_entropy(cmph_uint32 * disp_table, cmph_uint32 n, cmph_uint32 max_probes)
|
||||||
|
{
|
||||||
|
register cmph_uint32 * probe_counts = (cmph_uint32 *) calloc(max_probes, sizeof(cmph_uint32));
|
||||||
|
register cmph_uint32 i;
|
||||||
|
register double entropy = 0;
|
||||||
|
|
||||||
|
for(i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
probe_counts[disp_table[i]]++;
|
||||||
|
};
|
||||||
|
|
||||||
|
for(i = 0; i < max_probes; i++)
|
||||||
|
{
|
||||||
|
if(probe_counts[i] > 0)
|
||||||
|
entropy -= probe_counts[i]*log((double)probe_counts[i]/(double)n)/log(2);
|
||||||
|
};
|
||||||
|
free(probe_counts);
|
||||||
|
return entropy;
|
||||||
|
};
|
||||||
|
|
||||||
|
chd_ph_config_data_t *chd_ph_config_new()
|
||||||
|
{
|
||||||
|
chd_ph_config_data_t *chd_ph;
|
||||||
|
chd_ph = (chd_ph_config_data_t *)malloc(sizeof(chd_ph_config_data_t));
|
||||||
|
assert(chd_ph);
|
||||||
|
memset(chd_ph, 0, sizeof(chd_ph_config_data_t));
|
||||||
|
|
||||||
|
chd_ph->hashfunc = CMPH_HASH_JENKINS;
|
||||||
|
chd_ph->cs = NULL;
|
||||||
|
chd_ph->nbuckets = 0;
|
||||||
|
chd_ph->n = 0;
|
||||||
|
chd_ph->hl = NULL;
|
||||||
|
|
||||||
|
chd_ph->m = 0;
|
||||||
|
chd_ph->use_h = 1;
|
||||||
|
chd_ph->keys_per_bin = 1;
|
||||||
|
chd_ph->keys_per_bucket = 4;
|
||||||
|
chd_ph->occup_table = 0;
|
||||||
|
|
||||||
|
return chd_ph;
|
||||||
|
}
|
||||||
|
|
||||||
|
void chd_ph_config_destroy(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
chd_ph_config_data_t *data = (chd_ph_config_data_t *) mph->data;
|
||||||
|
DEBUGP("Destroying algorithm dependent data\n");
|
||||||
|
if(data->occup_table)
|
||||||
|
{
|
||||||
|
free(data->occup_table);
|
||||||
|
data->occup_table = NULL;
|
||||||
|
}
|
||||||
|
free(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void chd_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||||
|
{
|
||||||
|
chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data;
|
||||||
|
CMPH_HASH *hashptr = hashfuncs;
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
while(*hashptr != CMPH_HASH_COUNT)
|
||||||
|
{
|
||||||
|
if (i >= 1) break; //chd_ph only uses one linear hash function
|
||||||
|
chd_ph->hashfunc = *hashptr;
|
||||||
|
++i, ++hashptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void chd_ph_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket)
|
||||||
|
{
|
||||||
|
assert(mph);
|
||||||
|
chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data;
|
||||||
|
if(keys_per_bucket < 1 || keys_per_bucket >= 15)
|
||||||
|
{
|
||||||
|
keys_per_bucket = 4;
|
||||||
|
}
|
||||||
|
chd_ph->keys_per_bucket = keys_per_bucket;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void chd_ph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin)
|
||||||
|
{
|
||||||
|
assert(mph);
|
||||||
|
chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data;
|
||||||
|
if(keys_per_bin <= 1 || keys_per_bin >= 128)
|
||||||
|
{
|
||||||
|
keys_per_bin = 1;
|
||||||
|
}
|
||||||
|
chd_ph->keys_per_bin = keys_per_bin;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_item_t * items, cmph_uint32 *max_bucket_size)
|
||||||
|
{
|
||||||
|
register cmph_uint32 i = 0, g = 0;
|
||||||
|
cmph_uint32 hl[3];
|
||||||
|
chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data;
|
||||||
|
char * key = NULL;
|
||||||
|
cmph_uint32 keylen = 0;
|
||||||
|
chd_ph_map_item_t * map_item;
|
||||||
|
chd_ph_map_item_t * map_items = malloc(chd_ph->m*sizeof(chd_ph_map_item_t));
|
||||||
|
register cmph_uint32 mapping_iterations = 1000;
|
||||||
|
*max_bucket_size = 0;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
mapping_iterations--;
|
||||||
|
if (chd_ph->hl) hash_state_destroy(chd_ph->hl);
|
||||||
|
chd_ph->hl = hash_state_new(chd_ph->hashfunc, chd_ph->m);
|
||||||
|
|
||||||
|
chd_ph_bucket_clean(buckets, chd_ph->nbuckets);
|
||||||
|
|
||||||
|
mph->key_source->rewind(mph->key_source->data);
|
||||||
|
|
||||||
|
for(i = 0; i < chd_ph->m; i++)
|
||||||
|
{
|
||||||
|
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||||
|
hash_vector(chd_ph->hl, key, keylen, hl);
|
||||||
|
|
||||||
|
map_item = (map_items + i);
|
||||||
|
|
||||||
|
g = hl[0] % chd_ph->nbuckets;
|
||||||
|
map_item->f = hl[1] % chd_ph->n;
|
||||||
|
map_item->h = hl[2] % (chd_ph->n - 1) + 1;
|
||||||
|
map_item->bucket_num=g;
|
||||||
|
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||||
|
// if(buckets[g].size == (chd_ph->keys_per_bucket << 2))
|
||||||
|
// {
|
||||||
|
// DEBUGP("BUCKET = %u -- SIZE = %u -- MAXIMUM SIZE = %u\n", g, buckets[g].size, (chd_ph->keys_per_bucket << 2));
|
||||||
|
// goto error;
|
||||||
|
// }
|
||||||
|
buckets[g].size++;
|
||||||
|
if(buckets[g].size > *max_bucket_size)
|
||||||
|
{
|
||||||
|
*max_bucket_size = buckets[g].size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buckets[0].items_list = 0;
|
||||||
|
for(i = 1; i < chd_ph->nbuckets; i++)
|
||||||
|
{
|
||||||
|
buckets[i].items_list = buckets[i-1].items_list + buckets[i - 1].size;
|
||||||
|
buckets[i - 1].size = 0;
|
||||||
|
};
|
||||||
|
buckets[i - 1].size = 0;
|
||||||
|
for(i = 0; i < chd_ph->m; i++)
|
||||||
|
{
|
||||||
|
map_item = (map_items + i);
|
||||||
|
if(!chd_ph_bucket_insert(buckets, map_items, items, chd_ph->nbuckets, i))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if(i == chd_ph->m)
|
||||||
|
{
|
||||||
|
free(map_items);
|
||||||
|
return 1; // SUCCESS
|
||||||
|
}
|
||||||
|
|
||||||
|
if(mapping_iterations == 0)
|
||||||
|
{
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
error:
|
||||||
|
free(map_items);
|
||||||
|
hash_state_destroy(chd_ph->hl);
|
||||||
|
chd_ph->hl = NULL;
|
||||||
|
return 0; // FAILURE
|
||||||
|
}
|
||||||
|
|
||||||
|
chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets, chd_ph_item_t ** _items,
|
||||||
|
cmph_uint32 nbuckets, cmph_uint32 nitems, cmph_uint32 max_bucket_size)
|
||||||
|
{
|
||||||
|
chd_ph_sorted_list_t * sorted_lists = (chd_ph_sorted_list_t *) calloc(max_bucket_size + 1, sizeof(chd_ph_sorted_list_t));
|
||||||
|
|
||||||
|
chd_ph_bucket_t * input_buckets = (*_buckets);
|
||||||
|
chd_ph_bucket_t * output_buckets;
|
||||||
|
chd_ph_item_t * input_items = (*_items);
|
||||||
|
chd_ph_item_t * output_items;
|
||||||
|
register cmph_uint32 i, j, bucket_size, position, position2;
|
||||||
|
// cmph_uint32 non_empty_buckets;
|
||||||
|
DEBUGP("MAX BUCKET SIZE = %u\n", max_bucket_size);
|
||||||
|
// Determine size of each list of buckets
|
||||||
|
for(i = 0; i < nbuckets; i++)
|
||||||
|
{
|
||||||
|
bucket_size = input_buckets[i].size;
|
||||||
|
if(bucket_size == 0)
|
||||||
|
continue;
|
||||||
|
sorted_lists[bucket_size].size++;
|
||||||
|
};
|
||||||
|
sorted_lists[1].buckets_list = 0;
|
||||||
|
// Determine final position of list of buckets into the contiguous array that will store all the buckets
|
||||||
|
for(i = 2; i <= max_bucket_size; i++)
|
||||||
|
{
|
||||||
|
sorted_lists[i].buckets_list = sorted_lists[i-1].buckets_list + sorted_lists[i-1].size;
|
||||||
|
sorted_lists[i-1].size = 0;
|
||||||
|
};
|
||||||
|
sorted_lists[i-1].size = 0;
|
||||||
|
// Store the buckets in a new array which is sorted by bucket sizes
|
||||||
|
output_buckets = calloc(nbuckets, sizeof(chd_ph_bucket_t)); // everything is initialized with zero
|
||||||
|
// non_empty_buckets = nbuckets;
|
||||||
|
|
||||||
|
for(i = 0; i < nbuckets; i++)
|
||||||
|
{
|
||||||
|
bucket_size = input_buckets[i].size;
|
||||||
|
if(bucket_size == 0)
|
||||||
|
{
|
||||||
|
// non_empty_buckets--;
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
position = sorted_lists[bucket_size].buckets_list + sorted_lists[bucket_size].size;
|
||||||
|
output_buckets[position].bucket_id = i;
|
||||||
|
output_buckets[position].items_list = input_buckets[i].items_list;
|
||||||
|
sorted_lists[bucket_size].size++;
|
||||||
|
};
|
||||||
|
/* for(i = non_empty_buckets; i < nbuckets; i++)
|
||||||
|
output_buckets[i].size=0;*/
|
||||||
|
// Return the buckets sorted in new order and free the old buckets sorted in old order
|
||||||
|
free(input_buckets);
|
||||||
|
(*_buckets) = output_buckets;
|
||||||
|
|
||||||
|
|
||||||
|
// Store the items according to the new order of buckets.
|
||||||
|
output_items = (chd_ph_item_t*)calloc(nitems, sizeof(chd_ph_item_t));
|
||||||
|
position = 0;
|
||||||
|
i = 0;
|
||||||
|
for(bucket_size = 1; bucket_size <= max_bucket_size; bucket_size++)
|
||||||
|
{
|
||||||
|
for(i = sorted_lists[bucket_size].buckets_list; i < sorted_lists[bucket_size].size + sorted_lists[bucket_size].buckets_list; i++)
|
||||||
|
{
|
||||||
|
position2 = output_buckets[i].items_list;
|
||||||
|
output_buckets[i].items_list = position;
|
||||||
|
for(j = 0; j < bucket_size; j++)
|
||||||
|
{
|
||||||
|
output_items[position].f = input_items[position2].f;
|
||||||
|
output_items[position].h = input_items[position2].h;
|
||||||
|
position++;
|
||||||
|
position2++;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
//Return the items sorted in new order and free the old items sorted in old order
|
||||||
|
free(input_items);
|
||||||
|
(*_items) = output_items;
|
||||||
|
return sorted_lists;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline cmph_uint8 place_bucket_probe(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets,
|
||||||
|
chd_ph_item_t *items, cmph_uint32 probe0_num, cmph_uint32 probe1_num,
|
||||||
|
cmph_uint32 bucket_num, cmph_uint32 size)
|
||||||
|
{
|
||||||
|
register cmph_uint32 i;
|
||||||
|
register chd_ph_item_t * item;
|
||||||
|
register cmph_uint32 position;
|
||||||
|
|
||||||
|
item = items + buckets[bucket_num].items_list;
|
||||||
|
// try place bucket with probe_num
|
||||||
|
if(chd_ph->keys_per_bin > 1)
|
||||||
|
{
|
||||||
|
for(i = 0; i < size; i++) // placement
|
||||||
|
{
|
||||||
|
position = (cmph_uint32)((item->f + ((cmph_uint64)item->h)*probe0_num + probe1_num) % chd_ph->n);
|
||||||
|
if(chd_ph->occup_table[position] >= chd_ph->keys_per_bin)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
(chd_ph->occup_table[position])++;
|
||||||
|
item++;
|
||||||
|
};
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
for(i = 0; i < size; i++) // placement
|
||||||
|
{
|
||||||
|
position = (cmph_uint32)((item->f + ((cmph_uint64)item->h)*probe0_num + probe1_num) % chd_ph->n);
|
||||||
|
if(GETBIT32(((cmph_uint32 *)chd_ph->occup_table), position))
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
SETBIT32(((cmph_uint32*)chd_ph->occup_table), position);
|
||||||
|
item++;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
if(i != size) // Undo the placement
|
||||||
|
{
|
||||||
|
item = items + buckets[bucket_num].items_list;
|
||||||
|
if(chd_ph->keys_per_bin > 1)
|
||||||
|
{
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if(i == 0)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
position = (cmph_uint32)((item->f + ((cmph_uint64 )item->h) * probe0_num + probe1_num) % chd_ph->n);
|
||||||
|
(chd_ph->occup_table[position])--;
|
||||||
|
item++;
|
||||||
|
i--;
|
||||||
|
};
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if(i == 0)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
position = (cmph_uint32)((item->f + ((cmph_uint64 )item->h) * probe0_num + probe1_num) % chd_ph->n);
|
||||||
|
UNSETBIT32(((cmph_uint32*)chd_ph->occup_table), position);
|
||||||
|
|
||||||
|
// ([position/32]^=(1<<(position%32));
|
||||||
|
item++;
|
||||||
|
i--;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline cmph_uint8 place_bucket(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items, cmph_uint32 max_probes,
|
||||||
|
cmph_uint32 * disp_table, cmph_uint32 bucket_num, cmph_uint32 size)
|
||||||
|
|
||||||
|
{
|
||||||
|
register cmph_uint32 probe0_num, probe1_num, probe_num;
|
||||||
|
probe0_num = 0;
|
||||||
|
probe1_num = 0;
|
||||||
|
probe_num = 0;
|
||||||
|
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if(place_bucket_probe(chd_ph, buckets, items, probe0_num, probe1_num, bucket_num,size))
|
||||||
|
{
|
||||||
|
disp_table[buckets[bucket_num].bucket_id] = probe0_num + probe1_num * chd_ph->n;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
probe0_num++;
|
||||||
|
if(probe0_num >= chd_ph->n)
|
||||||
|
{
|
||||||
|
probe0_num -= chd_ph->n;
|
||||||
|
probe1_num++;
|
||||||
|
};
|
||||||
|
probe_num++;
|
||||||
|
if(probe_num >= max_probes || probe1_num >= chd_ph->n)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
return 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline cmph_uint8 place_buckets1(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t * buckets, chd_ph_item_t *items,
|
||||||
|
cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
|
||||||
|
cmph_uint32 * disp_table)
|
||||||
|
{
|
||||||
|
register cmph_uint32 i = 0;
|
||||||
|
register cmph_uint32 curr_bucket = 0;
|
||||||
|
|
||||||
|
for(i = max_bucket_size; i > 0; i--)
|
||||||
|
{
|
||||||
|
curr_bucket = sorted_lists[i].buckets_list;
|
||||||
|
while(curr_bucket < sorted_lists[i].size + sorted_lists[i].buckets_list)
|
||||||
|
{
|
||||||
|
if(!place_bucket(chd_ph, buckets, items, max_probes, disp_table, curr_bucket, i))
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
curr_bucket++;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
return 1;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items,
|
||||||
|
cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
|
||||||
|
cmph_uint32 * disp_table)
|
||||||
|
{
|
||||||
|
register cmph_uint32 i,j, non_placed_bucket;
|
||||||
|
register cmph_uint32 curr_bucket;
|
||||||
|
register cmph_uint32 probe_num, probe0_num, probe1_num;
|
||||||
|
cmph_uint32 sorted_list_size;
|
||||||
|
#ifdef DEBUG
|
||||||
|
cmph_uint32 items_list;
|
||||||
|
cmph_uint32 bucket_id;
|
||||||
|
#endif
|
||||||
|
DEBUGP("USING HEURISTIC TO PLACE BUCKETS\n");
|
||||||
|
for(i = max_bucket_size; i > 0; i--)
|
||||||
|
{
|
||||||
|
probe_num = 0;
|
||||||
|
probe0_num = 0;
|
||||||
|
probe1_num = 0;
|
||||||
|
sorted_list_size = sorted_lists[i].size;
|
||||||
|
while(sorted_lists[i].size != 0)
|
||||||
|
{
|
||||||
|
curr_bucket = sorted_lists[i].buckets_list;
|
||||||
|
for(j = 0, non_placed_bucket = 0; j < sorted_lists[i].size; j++)
|
||||||
|
{
|
||||||
|
// if bucket is successfully placed remove it from list
|
||||||
|
if(place_bucket_probe(chd_ph, buckets, items, probe0_num, probe1_num, curr_bucket, i))
|
||||||
|
{
|
||||||
|
disp_table[buckets[curr_bucket].bucket_id] = probe0_num + probe1_num * chd_ph->n;
|
||||||
|
// DEBUGP("BUCKET %u PLACED --- DISPLACEMENT = %u\n", curr_bucket, disp_table[curr_bucket]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// DEBUGP("BUCKET %u NOT PLACED\n", curr_bucket);
|
||||||
|
#ifdef DEBUG
|
||||||
|
items_list = buckets[non_placed_bucket + sorted_lists[i].buckets_list].items_list;
|
||||||
|
bucket_id = buckets[non_placed_bucket + sorted_lists[i].buckets_list].bucket_id;
|
||||||
|
#endif
|
||||||
|
buckets[non_placed_bucket + sorted_lists[i].buckets_list].items_list = buckets[curr_bucket].items_list;
|
||||||
|
buckets[non_placed_bucket + sorted_lists[i].buckets_list].bucket_id = buckets[curr_bucket].bucket_id;
|
||||||
|
#ifdef DEBUG
|
||||||
|
buckets[curr_bucket].items_list=items_list;
|
||||||
|
buckets[curr_bucket].bucket_id=bucket_id;
|
||||||
|
#endif
|
||||||
|
non_placed_bucket++;
|
||||||
|
}
|
||||||
|
curr_bucket++;
|
||||||
|
};
|
||||||
|
sorted_lists[i].size = non_placed_bucket;
|
||||||
|
probe0_num++;
|
||||||
|
if(probe0_num >= chd_ph->n)
|
||||||
|
{
|
||||||
|
probe0_num -= chd_ph->n;
|
||||||
|
probe1_num++;
|
||||||
|
};
|
||||||
|
probe_num++;
|
||||||
|
if(probe_num >= max_probes || probe1_num >= chd_ph->n)
|
||||||
|
{
|
||||||
|
sorted_lists[i].size = sorted_list_size;
|
||||||
|
return 0;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
sorted_lists[i].size = sorted_list_size;
|
||||||
|
};
|
||||||
|
return 1;
|
||||||
|
};
|
||||||
|
|
||||||
|
cmph_uint8 chd_ph_searching(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t *items ,
|
||||||
|
cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
|
||||||
|
cmph_uint32 * disp_table)
|
||||||
|
{
|
||||||
|
if(chd_ph->use_h)
|
||||||
|
{
|
||||||
|
return place_buckets2(chd_ph, buckets, items, max_bucket_size, sorted_lists, max_probes, disp_table);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return place_buckets1(chd_ph, buckets, items, max_bucket_size, sorted_lists, max_probes, disp_table);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline cmph_uint8 chd_ph_check_bin_hashing(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t *items,
|
||||||
|
cmph_uint32 * disp_table, chd_ph_sorted_list_t * sorted_lists,cmph_uint32 max_bucket_size)
|
||||||
|
{
|
||||||
|
register cmph_uint32 bucket_size, i, j;
|
||||||
|
register cmph_uint32 position, probe0_num, probe1_num;
|
||||||
|
register cmph_uint32 m = 0;
|
||||||
|
register chd_ph_item_t * item;
|
||||||
|
if(chd_ph->keys_per_bin > 1)
|
||||||
|
memset(chd_ph->occup_table, 0, chd_ph->n);
|
||||||
|
else
|
||||||
|
memset(chd_ph->occup_table, 0, ((chd_ph->n + 31)/32) * sizeof(cmph_uint32));
|
||||||
|
|
||||||
|
for(bucket_size = 1; bucket_size <= max_bucket_size; bucket_size++)
|
||||||
|
for(i = sorted_lists[bucket_size].buckets_list; i < sorted_lists[bucket_size].size +
|
||||||
|
sorted_lists[bucket_size].buckets_list; i++)
|
||||||
|
{
|
||||||
|
j = bucket_size;
|
||||||
|
item = items + buckets[i].items_list;
|
||||||
|
probe0_num = disp_table[buckets[i].bucket_id] % chd_ph->n;
|
||||||
|
probe1_num = disp_table[buckets[i].bucket_id] / chd_ph->n;
|
||||||
|
for(; j > 0; j--)
|
||||||
|
{
|
||||||
|
m++;
|
||||||
|
position = (cmph_uint32)((item->f + ((cmph_uint64 )item->h) * probe0_num + probe1_num) % chd_ph->n);
|
||||||
|
if(chd_ph->keys_per_bin > 1)
|
||||||
|
{
|
||||||
|
if(chd_ph->occup_table[position] >= chd_ph->keys_per_bin)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
(chd_ph->occup_table[position])++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if(GETBIT32(((cmph_uint32*)chd_ph->occup_table), position))
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
SETBIT32(((cmph_uint32*)chd_ph->occup_table), position);
|
||||||
|
};
|
||||||
|
item++;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
DEBUGP("We were able to place m = %u keys\n", m);
|
||||||
|
return 1;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
cmph_t *chd_ph_new(cmph_config_t *mph, double c)
|
||||||
|
{
|
||||||
|
cmph_t *mphf = NULL;
|
||||||
|
chd_ph_data_t *chd_phf = NULL;
|
||||||
|
chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data;
|
||||||
|
|
||||||
|
register double load_factor = c;
|
||||||
|
register cmph_uint8 searching_success = 0;
|
||||||
|
register cmph_uint32 max_probes = 1 << 20; // default value for max_probes
|
||||||
|
register cmph_uint32 iterations = 100;
|
||||||
|
chd_ph_bucket_t * buckets = NULL;
|
||||||
|
chd_ph_item_t * items = NULL;
|
||||||
|
register cmph_uint8 failure = 0;
|
||||||
|
cmph_uint32 max_bucket_size = 0;
|
||||||
|
chd_ph_sorted_list_t * sorted_lists = NULL;
|
||||||
|
cmph_uint32 * disp_table = NULL;
|
||||||
|
register double space_lower_bound = 0;
|
||||||
|
#ifdef CMPH_TIMING
|
||||||
|
double construction_time_begin = 0.0;
|
||||||
|
double construction_time = 0.0;
|
||||||
|
ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
chd_ph->m = mph->key_source->nkeys;
|
||||||
|
DEBUGP("m = %u\n", chd_ph->m);
|
||||||
|
|
||||||
|
chd_ph->nbuckets = (cmph_uint32)(chd_ph->m/chd_ph->keys_per_bucket) + 1;
|
||||||
|
DEBUGP("nbuckets = %u\n", chd_ph->nbuckets);
|
||||||
|
|
||||||
|
if(load_factor < 0.5 )
|
||||||
|
{
|
||||||
|
load_factor = 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(load_factor >= 0.99)
|
||||||
|
{
|
||||||
|
load_factor = 0.99;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUGP("load_factor = %.3f\n", load_factor);
|
||||||
|
|
||||||
|
chd_ph->n = (cmph_uint32)(chd_ph->m/(chd_ph->keys_per_bin * load_factor)) + 1;
|
||||||
|
|
||||||
|
//Round the number of bins to the prime immediately above
|
||||||
|
if(chd_ph->n % 2 == 0) chd_ph->n++;
|
||||||
|
for(;;)
|
||||||
|
{
|
||||||
|
if(check_primality(chd_ph->n) == 1)
|
||||||
|
break;
|
||||||
|
chd_ph->n += 2; // just odd numbers can be primes for n > 2
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
DEBUGP("n = %u \n", chd_ph->n);
|
||||||
|
if(chd_ph->keys_per_bin == 1)
|
||||||
|
{
|
||||||
|
space_lower_bound = chd_ph_space_lower_bound(chd_ph->m, chd_ph->n);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "space lower bound is %.3f bits per key\n", space_lower_bound);
|
||||||
|
}
|
||||||
|
|
||||||
|
// We allocate the working tables
|
||||||
|
buckets = chd_ph_bucket_new(chd_ph->nbuckets);
|
||||||
|
items = (chd_ph_item_t *) calloc(chd_ph->m, sizeof(chd_ph_item_t));
|
||||||
|
|
||||||
|
max_probes = (cmph_uint32)(((log(chd_ph->m)/log(2))/20) * max_probes);
|
||||||
|
|
||||||
|
if(chd_ph->keys_per_bin == 1)
|
||||||
|
chd_ph->occup_table = (cmph_uint8 *) calloc(((chd_ph->n + 31)/32), sizeof(cmph_uint32));
|
||||||
|
else
|
||||||
|
chd_ph->occup_table = (cmph_uint8 *) calloc(chd_ph->n, sizeof(cmph_uint8));
|
||||||
|
|
||||||
|
disp_table = (cmph_uint32 *) calloc(chd_ph->nbuckets, sizeof(cmph_uint32));
|
||||||
|
//
|
||||||
|
// init_genrand(time(0));
|
||||||
|
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
iterations --;
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Starting mapping step for mph creation of %u keys with %u bins\n", chd_ph->m, chd_ph->n);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!chd_ph_mapping(mph, buckets, items, &max_bucket_size))
|
||||||
|
{
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Failure in mapping step\n");
|
||||||
|
}
|
||||||
|
failure = 1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Starting ordering step\n");
|
||||||
|
}
|
||||||
|
if(sorted_lists)
|
||||||
|
{
|
||||||
|
free(sorted_lists);
|
||||||
|
}
|
||||||
|
|
||||||
|
sorted_lists = chd_ph_ordering(&buckets, &items, chd_ph->nbuckets, chd_ph->m, max_bucket_size);
|
||||||
|
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Starting searching step\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
searching_success = chd_ph_searching(chd_ph, buckets, items, max_bucket_size, sorted_lists, max_probes, disp_table);
|
||||||
|
if(searching_success) break;
|
||||||
|
|
||||||
|
// reset occup_table
|
||||||
|
if(chd_ph->keys_per_bin > 1)
|
||||||
|
memset(chd_ph->occup_table, 0, chd_ph->n);
|
||||||
|
else
|
||||||
|
memset(chd_ph->occup_table, 0, ((chd_ph->n + 31)/32) * sizeof(cmph_uint32));
|
||||||
|
if(iterations == 0)
|
||||||
|
{
|
||||||
|
// Cleanup memory
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Failure because the max trials was exceeded\n");
|
||||||
|
}
|
||||||
|
failure = 1;
|
||||||
|
goto cleanup;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
{
|
||||||
|
if(!chd_ph_check_bin_hashing(chd_ph, buckets, items, disp_table,sorted_lists,max_bucket_size))
|
||||||
|
{
|
||||||
|
|
||||||
|
DEBUGP("Error for bin packing generation");
|
||||||
|
failure = 1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Starting compressing step\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if(chd_ph->cs)
|
||||||
|
{
|
||||||
|
free(chd_ph->cs);
|
||||||
|
}
|
||||||
|
chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t));
|
||||||
|
compressed_seq_init(chd_ph->cs);
|
||||||
|
compressed_seq_generate(chd_ph->cs, disp_table, chd_ph->nbuckets);
|
||||||
|
|
||||||
|
#ifdef CMPH_TIMING
|
||||||
|
ELAPSED_TIME_IN_SECONDS(&construction_time);
|
||||||
|
register double entropy = chd_ph_get_entropy(disp_table, chd_ph->nbuckets, max_probes);
|
||||||
|
DEBUGP("Entropy = %.4f\n", entropy/chd_ph->m);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
chd_ph_bucket_destroy(buckets);
|
||||||
|
free(items);
|
||||||
|
free(sorted_lists);
|
||||||
|
free(disp_table);
|
||||||
|
if(failure)
|
||||||
|
{
|
||||||
|
if(chd_ph->hl)
|
||||||
|
{
|
||||||
|
hash_state_destroy(chd_ph->hl);
|
||||||
|
}
|
||||||
|
chd_ph->hl = NULL;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
mphf = (cmph_t *)malloc(sizeof(cmph_t));
|
||||||
|
mphf->algo = mph->algo;
|
||||||
|
chd_phf = (chd_ph_data_t *)malloc(sizeof(chd_ph_data_t));
|
||||||
|
|
||||||
|
chd_phf->cs = chd_ph->cs;
|
||||||
|
chd_ph->cs = NULL; //transfer memory ownership
|
||||||
|
chd_phf->hl = chd_ph->hl;
|
||||||
|
chd_ph->hl = NULL; //transfer memory ownership
|
||||||
|
chd_phf->n = chd_ph->n;
|
||||||
|
chd_phf->nbuckets = chd_ph->nbuckets;
|
||||||
|
|
||||||
|
mphf->data = chd_phf;
|
||||||
|
mphf->size = chd_ph->n;
|
||||||
|
|
||||||
|
DEBUGP("Successfully generated minimal perfect hash\n");
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CMPH_TIMING
|
||||||
|
register cmph_uint32 space_usage = chd_ph_packed_size(mphf)*8;
|
||||||
|
construction_time = construction_time - construction_time_begin;
|
||||||
|
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\t%.4f\t%.4f\n", chd_ph->m, load_factor, chd_ph->keys_per_bucket, construction_time, space_usage/(double)chd_ph->m, space_lower_bound, entropy/chd_ph->m);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return mphf;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void chd_ph_load(FILE *fd, cmph_t *mphf)
|
||||||
|
{
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen;
|
||||||
|
register size_t nbytes;
|
||||||
|
chd_ph_data_t *chd_ph = (chd_ph_data_t *)malloc(sizeof(chd_ph_data_t));
|
||||||
|
|
||||||
|
DEBUGP("Loading chd_ph mphf\n");
|
||||||
|
mphf->data = chd_ph;
|
||||||
|
|
||||||
|
nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
DEBUGP("Hash state has %u bytes\n", buflen);
|
||||||
|
buf = (char *)malloc((size_t)buflen);
|
||||||
|
nbytes = fread(buf, (size_t)buflen, (size_t)1, fd);
|
||||||
|
chd_ph->hl = hash_state_load(buf, buflen);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
DEBUGP("Compressed sequence structure has %u bytes\n", buflen);
|
||||||
|
buf = (char *)malloc((size_t)buflen);
|
||||||
|
nbytes = fread(buf, (size_t)buflen, (size_t)1, fd);
|
||||||
|
chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t));
|
||||||
|
compressed_seq_load(chd_ph->cs, buf, buflen);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
// loading n and nbuckets
|
||||||
|
DEBUGP("Reading n and nbuckets\n");
|
||||||
|
nbytes = fread(&(chd_ph->n), sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fread(&(chd_ph->nbuckets), sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
int chd_ph_dump(cmph_t *mphf, FILE *fd)
|
||||||
|
{
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen;
|
||||||
|
register size_t nbytes;
|
||||||
|
chd_ph_data_t *data = (chd_ph_data_t *)mphf->data;
|
||||||
|
|
||||||
|
__cmph_dump(mphf, fd);
|
||||||
|
|
||||||
|
hash_state_dump(data->hl, &buf, &buflen);
|
||||||
|
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||||
|
nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
compressed_seq_dump(data->cs, &buf, &buflen);
|
||||||
|
DEBUGP("Dumping compressed sequence structure with %u bytes to disk\n", buflen);
|
||||||
|
nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
// dumping n and nbuckets
|
||||||
|
nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(&(data->nbuckets), sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void chd_ph_destroy(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
chd_ph_data_t *data = (chd_ph_data_t *)mphf->data;
|
||||||
|
compressed_seq_destroy(data->cs);
|
||||||
|
free(data->cs);
|
||||||
|
hash_state_destroy(data->hl);
|
||||||
|
free(data);
|
||||||
|
free(mphf);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 chd_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
register chd_ph_data_t * chd_ph = mphf->data;
|
||||||
|
cmph_uint32 hl[3];
|
||||||
|
register cmph_uint32 disp,position;
|
||||||
|
register cmph_uint32 probe0_num,probe1_num;
|
||||||
|
register cmph_uint32 f,g,h;
|
||||||
|
hash_vector(chd_ph->hl, key, keylen, hl);
|
||||||
|
g = hl[0] % chd_ph->nbuckets;
|
||||||
|
f = hl[1] % chd_ph->n;
|
||||||
|
h = hl[2] % (chd_ph->n-1) + 1;
|
||||||
|
|
||||||
|
disp = compressed_seq_query(chd_ph->cs, g);
|
||||||
|
probe0_num = disp % chd_ph->n;
|
||||||
|
probe1_num = disp/chd_ph->n;
|
||||||
|
position = (cmph_uint32)((f + ((cmph_uint64 )h)*probe0_num + probe1_num) % chd_ph->n);
|
||||||
|
return position;
|
||||||
|
}
|
||||||
|
|
||||||
|
void chd_ph_pack(cmph_t *mphf, void *packed_mphf)
|
||||||
|
{
|
||||||
|
chd_ph_data_t *data = (chd_ph_data_t *)mphf->data;
|
||||||
|
cmph_uint8 * ptr = packed_mphf;
|
||||||
|
|
||||||
|
// packing hl type
|
||||||
|
CMPH_HASH hl_type = hash_get_type(data->hl);
|
||||||
|
*((cmph_uint32 *) ptr) = hl_type;
|
||||||
|
ptr += sizeof(cmph_uint32);
|
||||||
|
|
||||||
|
// packing hl
|
||||||
|
hash_state_pack(data->hl, ptr);
|
||||||
|
ptr += hash_state_packed_size(hl_type);
|
||||||
|
|
||||||
|
// packing n
|
||||||
|
*((cmph_uint32 *) ptr) = data->n;
|
||||||
|
ptr += sizeof(data->n);
|
||||||
|
|
||||||
|
// packing nbuckets
|
||||||
|
*((cmph_uint32 *) ptr) = data->nbuckets;
|
||||||
|
ptr += sizeof(data->nbuckets);
|
||||||
|
|
||||||
|
// packing cs
|
||||||
|
compressed_seq_pack(data->cs, ptr);
|
||||||
|
//ptr += compressed_seq_packed_size(data->cs);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 chd_ph_packed_size(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
register chd_ph_data_t *data = (chd_ph_data_t *)mphf->data;
|
||||||
|
register CMPH_HASH hl_type = hash_get_type(data->hl);
|
||||||
|
register cmph_uint32 hash_state_pack_size = hash_state_packed_size(hl_type);
|
||||||
|
register cmph_uint32 cs_pack_size = compressed_seq_packed_size(data->cs);
|
||||||
|
|
||||||
|
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_pack_size + cs_pack_size + 3*sizeof(cmph_uint32));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 chd_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf;
|
||||||
|
register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4;
|
||||||
|
|
||||||
|
register cmph_uint32 * ptr = (cmph_uint32 *)(hl_ptr + hash_state_packed_size(hl_type));
|
||||||
|
register cmph_uint32 n = *ptr++;
|
||||||
|
register cmph_uint32 nbuckets = *ptr++;
|
||||||
|
cmph_uint32 hl[3];
|
||||||
|
|
||||||
|
register cmph_uint32 disp,position;
|
||||||
|
register cmph_uint32 probe0_num,probe1_num;
|
||||||
|
register cmph_uint32 f,g,h;
|
||||||
|
|
||||||
|
hash_vector_packed(hl_ptr, hl_type, key, keylen, hl);
|
||||||
|
|
||||||
|
g = hl[0] % nbuckets;
|
||||||
|
f = hl[1] % n;
|
||||||
|
h = hl[2] % (n-1) + 1;
|
||||||
|
|
||||||
|
disp = compressed_seq_query_packed(ptr, g);
|
||||||
|
probe0_num = disp % n;
|
||||||
|
probe1_num = disp/n;
|
||||||
|
position = (cmph_uint32)((f + ((cmph_uint64 )h)*probe0_num + probe1_num) % n);
|
||||||
|
return position;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
59
cmph/chd_ph.h
Normal file
59
cmph/chd_ph.h
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
#ifndef _CMPH_CHD_PH_H__
|
||||||
|
#define _CMPH_CHD_PH_H__
|
||||||
|
|
||||||
|
#include "cmph.h"
|
||||||
|
|
||||||
|
typedef struct __chd_ph_data_t chd_ph_data_t;
|
||||||
|
typedef struct __chd_ph_config_data_t chd_ph_config_data_t;
|
||||||
|
|
||||||
|
/* Config API */
|
||||||
|
chd_ph_config_data_t *chd_ph_config_new();
|
||||||
|
void chd_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
|
||||||
|
|
||||||
|
/** \fn void chd_ph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin);
|
||||||
|
* \brief Allows to set the number of keys per bin.
|
||||||
|
* \param mph pointer to the configuration structure
|
||||||
|
* \param keys_per_bin value for the number of keys per bin
|
||||||
|
*/
|
||||||
|
void chd_ph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin);
|
||||||
|
|
||||||
|
/** \fn void chd_ph_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket);
|
||||||
|
* \brief Allows to set the number of keys per bucket.
|
||||||
|
* \param mph pointer to the configuration structure
|
||||||
|
* \param keys_per_bucket value for the number of keys per bucket
|
||||||
|
*/
|
||||||
|
void chd_ph_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket);
|
||||||
|
void chd_ph_config_destroy(cmph_config_t *mph);
|
||||||
|
|
||||||
|
|
||||||
|
/* Chd algorithm API */
|
||||||
|
cmph_t *chd_ph_new(cmph_config_t *mph, double c);
|
||||||
|
void chd_ph_load(FILE *fd, cmph_t *mphf);
|
||||||
|
int chd_ph_dump(cmph_t *mphf, FILE *fd);
|
||||||
|
void chd_ph_destroy(cmph_t *mphf);
|
||||||
|
cmph_uint32 chd_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** \fn void chd_ph_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void chd_ph_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 chd_ph_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 chd_ph_packed_size(cmph_t *mphf);
|
||||||
|
|
||||||
|
/** cmph_uint32 chd_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 chd_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
#endif
|
21
cmph/chd_structs.h
Normal file
21
cmph/chd_structs.h
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
#ifndef __CMPH_CHD_STRUCTS_H__
|
||||||
|
#define __CMPH_CHD_STRUCTS_H__
|
||||||
|
|
||||||
|
#include "chd_structs_ph.h"
|
||||||
|
#include "chd_ph.h"
|
||||||
|
#include "compressed_rank.h"
|
||||||
|
|
||||||
|
struct __chd_data_t
|
||||||
|
{
|
||||||
|
cmph_uint32 packed_cr_size;
|
||||||
|
cmph_uint8 * packed_cr; // packed compressed rank structure to control the number of zeros in a bit vector
|
||||||
|
|
||||||
|
cmph_uint32 packed_chd_phf_size;
|
||||||
|
cmph_uint8 * packed_chd_phf;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __chd_config_data_t
|
||||||
|
{
|
||||||
|
cmph_config_t *chd_ph; // chd_ph algorithm must be used here
|
||||||
|
};
|
||||||
|
#endif
|
29
cmph/chd_structs_ph.h
Normal file
29
cmph/chd_structs_ph.h
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
#ifndef __CMPH_CHD_PH_STRUCTS_H__
|
||||||
|
#define __CMPH_CHD_PH_STRUCTS_H__
|
||||||
|
|
||||||
|
#include "hash_state.h"
|
||||||
|
#include "compressed_seq.h"
|
||||||
|
|
||||||
|
struct __chd_ph_data_t
|
||||||
|
{
|
||||||
|
compressed_seq_t * cs; // compressed displacement values
|
||||||
|
cmph_uint32 nbuckets; // number of buckets
|
||||||
|
cmph_uint32 n; // number of bins
|
||||||
|
hash_state_t *hl; // linear hash function
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __chd_ph_config_data_t
|
||||||
|
{
|
||||||
|
CMPH_HASH hashfunc; // linear hash function to be used
|
||||||
|
compressed_seq_t * cs; // compressed displacement values
|
||||||
|
cmph_uint32 nbuckets; // number of buckets
|
||||||
|
cmph_uint32 n; // number of bins
|
||||||
|
hash_state_t *hl; // linear hash function
|
||||||
|
|
||||||
|
cmph_uint32 m; // number of keys
|
||||||
|
cmph_uint8 use_h; // flag to indicate the of use of a heuristic (use_h = 1)
|
||||||
|
cmph_uint32 keys_per_bin;//maximum number of keys per bin
|
||||||
|
cmph_uint32 keys_per_bucket; // average number of keys per bucket
|
||||||
|
cmph_uint8 *occup_table; // table that indicates occupied positions
|
||||||
|
};
|
||||||
|
#endif
|
381
cmph/chm.c
Normal file
381
cmph/chm.c
Normal file
@ -0,0 +1,381 @@
|
|||||||
|
#include "graph.h"
|
||||||
|
#include "chm.h"
|
||||||
|
#include "cmph_structs.h"
|
||||||
|
#include "chm_structs.h"
|
||||||
|
#include "hash.h"
|
||||||
|
#include "bitbool.h"
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
//#define DEBUG
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
|
static int chm_gen_edges(cmph_config_t *mph);
|
||||||
|
static void chm_traverse(chm_config_data_t *chm, cmph_uint8 *visited, cmph_uint32 v);
|
||||||
|
|
||||||
|
chm_config_data_t *chm_config_new()
|
||||||
|
{
|
||||||
|
chm_config_data_t *chm = NULL;
|
||||||
|
chm = (chm_config_data_t *)malloc(sizeof(chm_config_data_t));
|
||||||
|
assert(chm);
|
||||||
|
memset(chm, 0, sizeof(chm_config_data_t));
|
||||||
|
chm->hashfuncs[0] = CMPH_HASH_JENKINS;
|
||||||
|
chm->hashfuncs[1] = CMPH_HASH_JENKINS;
|
||||||
|
chm->g = NULL;
|
||||||
|
chm->graph = NULL;
|
||||||
|
chm->hashes = NULL;
|
||||||
|
return chm;
|
||||||
|
}
|
||||||
|
void chm_config_destroy(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
chm_config_data_t *data = (chm_config_data_t *)mph->data;
|
||||||
|
DEBUGP("Destroying algorithm dependent data\n");
|
||||||
|
free(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void chm_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||||
|
{
|
||||||
|
chm_config_data_t *chm = (chm_config_data_t *)mph->data;
|
||||||
|
CMPH_HASH *hashptr = hashfuncs;
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
while(*hashptr != CMPH_HASH_COUNT)
|
||||||
|
{
|
||||||
|
if (i >= 2) break; //chm only uses two hash functions
|
||||||
|
chm->hashfuncs[i] = *hashptr;
|
||||||
|
++i, ++hashptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_t *chm_new(cmph_config_t *mph, double c)
|
||||||
|
{
|
||||||
|
cmph_t *mphf = NULL;
|
||||||
|
chm_data_t *chmf = NULL;
|
||||||
|
|
||||||
|
cmph_uint32 i;
|
||||||
|
cmph_uint32 iterations = 20;
|
||||||
|
cmph_uint8 *visited = NULL;
|
||||||
|
chm_config_data_t *chm = (chm_config_data_t *)mph->data;
|
||||||
|
chm->m = mph->key_source->nkeys;
|
||||||
|
if (c == 0) c = 2.09;
|
||||||
|
chm->n = (cmph_uint32)ceil(c * mph->key_source->nkeys);
|
||||||
|
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", chm->m, chm->n, c);
|
||||||
|
chm->graph = graph_new(chm->n, chm->m);
|
||||||
|
DEBUGP("Created graph\n");
|
||||||
|
|
||||||
|
chm->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*3);
|
||||||
|
for(i = 0; i < 3; ++i) chm->hashes[i] = NULL;
|
||||||
|
//Mapping step
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", chm->m, chm->n);
|
||||||
|
}
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
int ok;
|
||||||
|
chm->hashes[0] = hash_state_new(chm->hashfuncs[0], chm->n);
|
||||||
|
chm->hashes[1] = hash_state_new(chm->hashfuncs[1], chm->n);
|
||||||
|
ok = chm_gen_edges(mph);
|
||||||
|
if (!ok)
|
||||||
|
{
|
||||||
|
--iterations;
|
||||||
|
hash_state_destroy(chm->hashes[0]);
|
||||||
|
chm->hashes[0] = NULL;
|
||||||
|
hash_state_destroy(chm->hashes[1]);
|
||||||
|
chm->hashes[1] = NULL;
|
||||||
|
DEBUGP("%u iterations remaining\n", iterations);
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Acyclic graph creation failure - %u iterations remaining\n", iterations);
|
||||||
|
}
|
||||||
|
if (iterations == 0) break;
|
||||||
|
}
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
if (iterations == 0)
|
||||||
|
{
|
||||||
|
graph_destroy(chm->graph);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Assignment step
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Starting assignment step\n");
|
||||||
|
}
|
||||||
|
DEBUGP("Assignment step\n");
|
||||||
|
visited = (cmph_uint8 *)malloc((size_t)(chm->n/8 + 1));
|
||||||
|
memset(visited, 0, (size_t)(chm->n/8 + 1));
|
||||||
|
free(chm->g);
|
||||||
|
chm->g = (cmph_uint32 *)malloc(chm->n * sizeof(cmph_uint32));
|
||||||
|
assert(chm->g);
|
||||||
|
for (i = 0; i < chm->n; ++i)
|
||||||
|
{
|
||||||
|
if (!GETBIT(visited,i))
|
||||||
|
{
|
||||||
|
chm->g[i] = 0;
|
||||||
|
chm_traverse(chm, visited, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
graph_destroy(chm->graph);
|
||||||
|
free(visited);
|
||||||
|
chm->graph = NULL;
|
||||||
|
|
||||||
|
mphf = (cmph_t *)malloc(sizeof(cmph_t));
|
||||||
|
mphf->algo = mph->algo;
|
||||||
|
chmf = (chm_data_t *)malloc(sizeof(chm_data_t));
|
||||||
|
chmf->g = chm->g;
|
||||||
|
chm->g = NULL; //transfer memory ownership
|
||||||
|
chmf->hashes = chm->hashes;
|
||||||
|
chm->hashes = NULL; //transfer memory ownership
|
||||||
|
chmf->n = chm->n;
|
||||||
|
chmf->m = chm->m;
|
||||||
|
mphf->data = chmf;
|
||||||
|
mphf->size = chm->m;
|
||||||
|
DEBUGP("Successfully generated minimal perfect hash\n");
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
|
||||||
|
}
|
||||||
|
return mphf;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void chm_traverse(chm_config_data_t *chm, cmph_uint8 *visited, cmph_uint32 v)
|
||||||
|
{
|
||||||
|
|
||||||
|
graph_iterator_t it = graph_neighbors_it(chm->graph, v);
|
||||||
|
cmph_uint32 neighbor = 0;
|
||||||
|
SETBIT(visited,v);
|
||||||
|
|
||||||
|
DEBUGP("Visiting vertex %u\n", v);
|
||||||
|
while((neighbor = graph_next_neighbor(chm->graph, &it)) != GRAPH_NO_NEIGHBOR)
|
||||||
|
{
|
||||||
|
DEBUGP("Visiting neighbor %u\n", neighbor);
|
||||||
|
if(GETBIT(visited,neighbor)) continue;
|
||||||
|
DEBUGP("Visiting neighbor %u\n", neighbor);
|
||||||
|
DEBUGP("Visiting edge %u->%u with id %u\n", v, neighbor, graph_edge_id(chm->graph, v, neighbor));
|
||||||
|
chm->g[neighbor] = graph_edge_id(chm->graph, v, neighbor) - chm->g[v];
|
||||||
|
DEBUGP("g is %u (%u - %u mod %u)\n", chm->g[neighbor], graph_edge_id(chm->graph, v, neighbor), chm->g[v], chm->m);
|
||||||
|
chm_traverse(chm, visited, neighbor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int chm_gen_edges(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
cmph_uint32 e;
|
||||||
|
chm_config_data_t *chm = (chm_config_data_t *)mph->data;
|
||||||
|
int cycles = 0;
|
||||||
|
|
||||||
|
DEBUGP("Generating edges for %u vertices with hash functions %s and %s\n", chm->n, cmph_hash_names[chm->hashfuncs[0]], cmph_hash_names[chm->hashfuncs[1]]);
|
||||||
|
graph_clear_edges(chm->graph);
|
||||||
|
mph->key_source->rewind(mph->key_source->data);
|
||||||
|
for (e = 0; e < mph->key_source->nkeys; ++e)
|
||||||
|
{
|
||||||
|
cmph_uint32 h1, h2;
|
||||||
|
cmph_uint32 keylen;
|
||||||
|
char *key;
|
||||||
|
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||||
|
h1 = hash(chm->hashes[0], key, keylen) % chm->n;
|
||||||
|
h2 = hash(chm->hashes[1], key, keylen) % chm->n;
|
||||||
|
if (h1 == h2) if (++h2 >= chm->n) h2 = 0;
|
||||||
|
if (h1 == h2)
|
||||||
|
{
|
||||||
|
if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
|
||||||
|
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
DEBUGP("Adding edge: %u -> %u for key %s\n", h1, h2, key);
|
||||||
|
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||||
|
graph_add_edge(chm->graph, h1, h2);
|
||||||
|
}
|
||||||
|
cycles = graph_is_cyclic(chm->graph);
|
||||||
|
if (mph->verbosity && cycles) fprintf(stderr, "Cyclic graph generated\n");
|
||||||
|
DEBUGP("Looking for cycles: %u\n", cycles);
|
||||||
|
|
||||||
|
return ! cycles;
|
||||||
|
}
|
||||||
|
|
||||||
|
int chm_dump(cmph_t *mphf, FILE *fd)
|
||||||
|
{
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen;
|
||||||
|
cmph_uint32 two = 2; //number of hash functions
|
||||||
|
chm_data_t *data = (chm_data_t *)mphf->data;
|
||||||
|
register size_t nbytes;
|
||||||
|
|
||||||
|
__cmph_dump(mphf, fd);
|
||||||
|
|
||||||
|
nbytes = fwrite(&two, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
hash_state_dump(data->hashes[0], &buf, &buflen);
|
||||||
|
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||||
|
nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
hash_state_dump(data->hashes[1], &buf, &buflen);
|
||||||
|
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||||
|
nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
|
||||||
|
nbytes = fwrite(data->g, sizeof(cmph_uint32)*data->n, (size_t)1, fd);
|
||||||
|
/* #ifdef DEBUG
|
||||||
|
fprintf(stderr, "G: ");
|
||||||
|
for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", data->g[i]);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
#endif*/
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void chm_load(FILE *f, cmph_t *mphf)
|
||||||
|
{
|
||||||
|
cmph_uint32 nhashes;
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen;
|
||||||
|
cmph_uint32 i;
|
||||||
|
chm_data_t *chm = (chm_data_t *)malloc(sizeof(chm_data_t));
|
||||||
|
register size_t nbytes;
|
||||||
|
DEBUGP("Loading chm mphf\n");
|
||||||
|
mphf->data = chm;
|
||||||
|
nbytes = fread(&nhashes, sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
chm->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*(nhashes + 1));
|
||||||
|
chm->hashes[nhashes] = NULL;
|
||||||
|
DEBUGP("Reading %u hashes\n", nhashes);
|
||||||
|
for (i = 0; i < nhashes; ++i)
|
||||||
|
{
|
||||||
|
hash_state_t *state = NULL;
|
||||||
|
nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
DEBUGP("Hash state has %u bytes\n", buflen);
|
||||||
|
buf = (char *)malloc((size_t)buflen);
|
||||||
|
nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
|
||||||
|
state = hash_state_load(buf, buflen);
|
||||||
|
chm->hashes[i] = state;
|
||||||
|
free(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUGP("Reading m and n\n");
|
||||||
|
nbytes = fread(&(chm->n), sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
nbytes = fread(&(chm->m), sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
|
||||||
|
chm->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*chm->n);
|
||||||
|
nbytes = fread(chm->g, chm->n*sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
#ifdef DEBUG
|
||||||
|
fprintf(stderr, "G: ");
|
||||||
|
for (i = 0; i < chm->n; ++i) fprintf(stderr, "%u ", chm->g[i]);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
cmph_uint32 chm_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
chm_data_t *chm = mphf->data;
|
||||||
|
cmph_uint32 h1 = hash(chm->hashes[0], key, keylen) % chm->n;
|
||||||
|
cmph_uint32 h2 = hash(chm->hashes[1], key, keylen) % chm->n;
|
||||||
|
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
||||||
|
if (h1 == h2 && ++h2 >= chm->n) h2 = 0;
|
||||||
|
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, chm->g[h1], chm->g[h2], chm->m);
|
||||||
|
return (chm->g[h1] + chm->g[h2]) % chm->m;
|
||||||
|
}
|
||||||
|
void chm_destroy(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
chm_data_t *data = (chm_data_t *)mphf->data;
|
||||||
|
free(data->g);
|
||||||
|
hash_state_destroy(data->hashes[0]);
|
||||||
|
hash_state_destroy(data->hashes[1]);
|
||||||
|
free(data->hashes);
|
||||||
|
free(data);
|
||||||
|
free(mphf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn void chm_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void chm_pack(cmph_t *mphf, void *packed_mphf)
|
||||||
|
{
|
||||||
|
chm_data_t *data = (chm_data_t *)mphf->data;
|
||||||
|
cmph_uint8 * ptr = packed_mphf;
|
||||||
|
|
||||||
|
// packing h1 type
|
||||||
|
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
|
||||||
|
*((cmph_uint32 *) ptr) = h1_type;
|
||||||
|
ptr += sizeof(cmph_uint32);
|
||||||
|
|
||||||
|
// packing h1
|
||||||
|
hash_state_pack(data->hashes[0], ptr);
|
||||||
|
ptr += hash_state_packed_size(h1_type);
|
||||||
|
|
||||||
|
// packing h2 type
|
||||||
|
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
|
||||||
|
*((cmph_uint32 *) ptr) = h2_type;
|
||||||
|
ptr += sizeof(cmph_uint32);
|
||||||
|
|
||||||
|
// packing h2
|
||||||
|
hash_state_pack(data->hashes[1], ptr);
|
||||||
|
ptr += hash_state_packed_size(h2_type);
|
||||||
|
|
||||||
|
// packing n
|
||||||
|
*((cmph_uint32 *) ptr) = data->n;
|
||||||
|
ptr += sizeof(data->n);
|
||||||
|
|
||||||
|
// packing m
|
||||||
|
*((cmph_uint32 *) ptr) = data->m;
|
||||||
|
ptr += sizeof(data->m);
|
||||||
|
|
||||||
|
// packing g
|
||||||
|
memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 chm_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 chm_packed_size(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
chm_data_t *data = (chm_data_t *)mphf->data;
|
||||||
|
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
|
||||||
|
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
|
||||||
|
|
||||||
|
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
|
||||||
|
4*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** cmph_uint32 chm_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 chm_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
register cmph_uint8 *h1_ptr = packed_mphf;
|
||||||
|
register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr);
|
||||||
|
h1_ptr += 4;
|
||||||
|
|
||||||
|
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
|
||||||
|
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
|
||||||
|
h2_ptr += 4;
|
||||||
|
|
||||||
|
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
|
||||||
|
|
||||||
|
register cmph_uint32 n = *g_ptr++;
|
||||||
|
register cmph_uint32 m = *g_ptr++;
|
||||||
|
|
||||||
|
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
|
||||||
|
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
|
||||||
|
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
||||||
|
if (h1 == h2 && ++h2 >= n) h2 = 0;
|
||||||
|
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, g_ptr[h1], g_ptr[h2], m);
|
||||||
|
return (g_ptr[h1] + g_ptr[h2]) % m;
|
||||||
|
}
|
42
cmph/chm.h
Normal file
42
cmph/chm.h
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
#ifndef __CMPH_CHM_H__
|
||||||
|
#define __CMPH_CHM_H__
|
||||||
|
|
||||||
|
#include "cmph.h"
|
||||||
|
|
||||||
|
typedef struct __chm_data_t chm_data_t;
|
||||||
|
typedef struct __chm_config_data_t chm_config_data_t;
|
||||||
|
|
||||||
|
chm_config_data_t *chm_config_new();
|
||||||
|
void chm_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
|
||||||
|
void chm_config_destroy(cmph_config_t *mph);
|
||||||
|
cmph_t *chm_new(cmph_config_t *mph, double c);
|
||||||
|
|
||||||
|
void chm_load(FILE *f, cmph_t *mphf);
|
||||||
|
int chm_dump(cmph_t *mphf, FILE *f);
|
||||||
|
void chm_destroy(cmph_t *mphf);
|
||||||
|
cmph_uint32 chm_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** \fn void chm_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void chm_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 chm_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 chm_packed_size(cmph_t *mphf);
|
||||||
|
|
||||||
|
/** cmph_uint32 chm_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 chm_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
#endif
|
24
cmph/chm_structs.h
Normal file
24
cmph/chm_structs.h
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
#ifndef __CMPH_CHM_STRUCTS_H__
|
||||||
|
#define __CMPH_CHM_STRUCTS_H__
|
||||||
|
|
||||||
|
#include "hash_state.h"
|
||||||
|
|
||||||
|
struct __chm_data_t
|
||||||
|
{
|
||||||
|
cmph_uint32 m; //edges (words) count
|
||||||
|
cmph_uint32 n; //vertex count
|
||||||
|
cmph_uint32 *g;
|
||||||
|
hash_state_t **hashes;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __chm_config_data_t
|
||||||
|
{
|
||||||
|
CMPH_HASH hashfuncs[2];
|
||||||
|
cmph_uint32 m; //edges (words) count
|
||||||
|
cmph_uint32 n; //vertex count
|
||||||
|
graph_t *graph;
|
||||||
|
cmph_uint32 *g;
|
||||||
|
hash_state_t **hashes;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
845
cmph/cmph.c
Normal file
845
cmph/cmph.c
Normal file
@ -0,0 +1,845 @@
|
|||||||
|
#include "cmph.h"
|
||||||
|
#include "cmph_structs.h"
|
||||||
|
#include "chm.h"
|
||||||
|
#include "bmz.h"
|
||||||
|
#include "bmz8.h"
|
||||||
|
#include "brz.h"
|
||||||
|
#include "fch.h"
|
||||||
|
#include "bdz.h"
|
||||||
|
#include "bdz_ph.h"
|
||||||
|
#include "chd_ph.h"
|
||||||
|
#include "chd.h"
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <string.h>
|
||||||
|
//#define DEBUG
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
|
const char *cmph_names[] = {"bmz", "bmz8", "chm", "brz", "fch", "bdz", "bdz_ph", "chd_ph", "chd", NULL };
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
void *vector;
|
||||||
|
cmph_uint32 position; // access position when data is a vector
|
||||||
|
} cmph_vector_t;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Support a vector of struct as the source of keys.
|
||||||
|
*
|
||||||
|
* E.g. The keys could be the fieldB's in a vector of struct rec where
|
||||||
|
* struct rec is defined as:
|
||||||
|
* struct rec {
|
||||||
|
* fieldA;
|
||||||
|
* fieldB;
|
||||||
|
* fieldC;
|
||||||
|
* }
|
||||||
|
*/
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
void *vector; /* Pointer to the vector of struct */
|
||||||
|
cmph_uint32 position; /* current position */
|
||||||
|
cmph_uint32 struct_size; /* The size of the struct */
|
||||||
|
cmph_uint32 key_offset; /* The byte offset of the key in the struct */
|
||||||
|
cmph_uint32 key_len; /* The length of the key */
|
||||||
|
} cmph_struct_vector_t;
|
||||||
|
|
||||||
|
|
||||||
|
static cmph_io_adapter_t *cmph_io_vector_new(void * vector, cmph_uint32 nkeys);
|
||||||
|
static void cmph_io_vector_destroy(cmph_io_adapter_t * key_source);
|
||||||
|
|
||||||
|
static cmph_io_adapter_t *cmph_io_struct_vector_new(void * vector, cmph_uint32 struct_size, cmph_uint32 key_offset, cmph_uint32 key_len, cmph_uint32 nkeys);
|
||||||
|
static void cmph_io_struct_vector_destroy(cmph_io_adapter_t * key_source);
|
||||||
|
|
||||||
|
static int key_nlfile_read(void *data, char **key, cmph_uint32 *keylen)
|
||||||
|
{
|
||||||
|
FILE *fd = (FILE *)data;
|
||||||
|
*key = NULL;
|
||||||
|
*keylen = 0;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
char buf[BUFSIZ];
|
||||||
|
char *c = fgets(buf, BUFSIZ, fd);
|
||||||
|
if (c == NULL) return -1;
|
||||||
|
if (feof(fd)) return -1;
|
||||||
|
*key = (char *)realloc(*key, *keylen + strlen(buf) + 1);
|
||||||
|
memcpy(*key + *keylen, buf, strlen(buf));
|
||||||
|
*keylen += (cmph_uint32)strlen(buf);
|
||||||
|
if (buf[strlen(buf) - 1] != '\n') continue;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if ((*keylen) && (*key)[*keylen - 1] == '\n')
|
||||||
|
{
|
||||||
|
(*key)[(*keylen) - 1] = 0;
|
||||||
|
--(*keylen);
|
||||||
|
}
|
||||||
|
return (int)(*keylen);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int key_byte_vector_read(void *data, char **key, cmph_uint32 *keylen)
|
||||||
|
{
|
||||||
|
cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
|
||||||
|
cmph_uint8 **keys_vd = (cmph_uint8 **)cmph_vector->vector;
|
||||||
|
size_t size;
|
||||||
|
memcpy(keylen, keys_vd[cmph_vector->position], sizeof(*keylen));
|
||||||
|
size = *keylen;
|
||||||
|
*key = (char *)malloc(size);
|
||||||
|
memcpy(*key, keys_vd[cmph_vector->position] + sizeof(*keylen), size);
|
||||||
|
cmph_vector->position = cmph_vector->position + 1;
|
||||||
|
return (int)(*keylen);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static int key_struct_vector_read(void *data, char **key, cmph_uint32 *keylen)
|
||||||
|
{
|
||||||
|
cmph_struct_vector_t *cmph_struct_vector = (cmph_struct_vector_t *)data;
|
||||||
|
char *keys_vd = (char *)cmph_struct_vector->vector;
|
||||||
|
size_t size;
|
||||||
|
*keylen = cmph_struct_vector->key_len;
|
||||||
|
size = *keylen;
|
||||||
|
*key = (char *)malloc(size);
|
||||||
|
memcpy(*key, (keys_vd + (cmph_struct_vector->position * cmph_struct_vector->struct_size) + cmph_struct_vector->key_offset), size);
|
||||||
|
cmph_struct_vector->position = cmph_struct_vector->position + 1;
|
||||||
|
return (int)(*keylen);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int key_vector_read(void *data, char **key, cmph_uint32 *keylen)
|
||||||
|
{
|
||||||
|
cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
|
||||||
|
char **keys_vd = (char **)cmph_vector->vector;
|
||||||
|
size_t size;
|
||||||
|
*keylen = (cmph_uint32)strlen(keys_vd[cmph_vector->position]);
|
||||||
|
size = *keylen;
|
||||||
|
*key = (char *)malloc(size + 1);
|
||||||
|
strcpy(*key, keys_vd[cmph_vector->position]);
|
||||||
|
cmph_vector->position = cmph_vector->position + 1;
|
||||||
|
return (int)(*keylen);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void key_nlfile_dispose(void *data, char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
free(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void key_vector_dispose(void *data, char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
free(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void key_nlfile_rewind(void *data)
|
||||||
|
{
|
||||||
|
FILE *fd = (FILE *)data;
|
||||||
|
rewind(fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void key_struct_vector_rewind(void *data)
|
||||||
|
{
|
||||||
|
cmph_struct_vector_t *cmph_struct_vector = (cmph_struct_vector_t *)data;
|
||||||
|
cmph_struct_vector->position = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void key_vector_rewind(void *data)
|
||||||
|
{
|
||||||
|
cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
|
||||||
|
cmph_vector->position = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static cmph_uint32 count_nlfile_keys(FILE *fd)
|
||||||
|
{
|
||||||
|
cmph_uint32 count = 0;
|
||||||
|
register char * ptr;
|
||||||
|
rewind(fd);
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
char buf[BUFSIZ];
|
||||||
|
ptr = fgets(buf, BUFSIZ, fd);
|
||||||
|
if (feof(fd)) break;
|
||||||
|
if (buf[strlen(buf) - 1] != '\n') continue;
|
||||||
|
++count;
|
||||||
|
}
|
||||||
|
rewind(fd);
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_io_adapter_t *cmph_io_nlfile_adapter(FILE * keys_fd)
|
||||||
|
{
|
||||||
|
cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t));
|
||||||
|
assert(key_source);
|
||||||
|
key_source->data = (void *)keys_fd;
|
||||||
|
key_source->nkeys = count_nlfile_keys(keys_fd);
|
||||||
|
key_source->read = key_nlfile_read;
|
||||||
|
key_source->dispose = key_nlfile_dispose;
|
||||||
|
key_source->rewind = key_nlfile_rewind;
|
||||||
|
return key_source;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cmph_io_nlfile_adapter_destroy(cmph_io_adapter_t * key_source)
|
||||||
|
{
|
||||||
|
free(key_source);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_io_adapter_t *cmph_io_nlnkfile_adapter(FILE * keys_fd, cmph_uint32 nkeys)
|
||||||
|
{
|
||||||
|
cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t));
|
||||||
|
assert(key_source);
|
||||||
|
key_source->data = (void *)keys_fd;
|
||||||
|
key_source->nkeys = nkeys;
|
||||||
|
key_source->read = key_nlfile_read;
|
||||||
|
key_source->dispose = key_nlfile_dispose;
|
||||||
|
key_source->rewind = key_nlfile_rewind;
|
||||||
|
return key_source;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cmph_io_nlnkfile_adapter_destroy(cmph_io_adapter_t * key_source)
|
||||||
|
{
|
||||||
|
free(key_source);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static cmph_io_adapter_t *cmph_io_struct_vector_new(void * vector, cmph_uint32 struct_size, cmph_uint32 key_offset, cmph_uint32 key_len, cmph_uint32 nkeys)
|
||||||
|
{
|
||||||
|
cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t));
|
||||||
|
cmph_struct_vector_t * cmph_struct_vector = (cmph_struct_vector_t *)malloc(sizeof(cmph_struct_vector_t));
|
||||||
|
assert(key_source);
|
||||||
|
assert(cmph_struct_vector);
|
||||||
|
cmph_struct_vector->vector = vector;
|
||||||
|
cmph_struct_vector->position = 0;
|
||||||
|
cmph_struct_vector->struct_size = struct_size;
|
||||||
|
cmph_struct_vector->key_offset = key_offset;
|
||||||
|
cmph_struct_vector->key_len = key_len;
|
||||||
|
key_source->data = (void *)cmph_struct_vector;
|
||||||
|
key_source->nkeys = nkeys;
|
||||||
|
return key_source;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cmph_io_struct_vector_destroy(cmph_io_adapter_t * key_source)
|
||||||
|
{
|
||||||
|
cmph_struct_vector_t *cmph_struct_vector = (cmph_struct_vector_t *)key_source->data;
|
||||||
|
cmph_struct_vector->vector = NULL;
|
||||||
|
free(cmph_struct_vector);
|
||||||
|
free(key_source);
|
||||||
|
}
|
||||||
|
|
||||||
|
static cmph_io_adapter_t *cmph_io_vector_new(void * vector, cmph_uint32 nkeys)
|
||||||
|
{
|
||||||
|
cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t));
|
||||||
|
cmph_vector_t * cmph_vector = (cmph_vector_t *)malloc(sizeof(cmph_vector_t));
|
||||||
|
assert(key_source);
|
||||||
|
assert(cmph_vector);
|
||||||
|
cmph_vector->vector = vector;
|
||||||
|
cmph_vector->position = 0;
|
||||||
|
key_source->data = (void *)cmph_vector;
|
||||||
|
key_source->nkeys = nkeys;
|
||||||
|
return key_source;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cmph_io_vector_destroy(cmph_io_adapter_t * key_source)
|
||||||
|
{
|
||||||
|
cmph_vector_t *cmph_vector = (cmph_vector_t *)key_source->data;
|
||||||
|
cmph_vector->vector = NULL;
|
||||||
|
free(cmph_vector);
|
||||||
|
free(key_source);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_io_adapter_t *cmph_io_byte_vector_adapter(cmph_uint8 ** vector, cmph_uint32 nkeys)
|
||||||
|
{
|
||||||
|
cmph_io_adapter_t * key_source = cmph_io_vector_new(vector, nkeys);
|
||||||
|
key_source->read = key_byte_vector_read;
|
||||||
|
key_source->dispose = key_vector_dispose;
|
||||||
|
key_source->rewind = key_vector_rewind;
|
||||||
|
return key_source;
|
||||||
|
}
|
||||||
|
void cmph_io_byte_vector_adapter_destroy(cmph_io_adapter_t * key_source)
|
||||||
|
{
|
||||||
|
cmph_io_vector_destroy(key_source);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_io_adapter_t *cmph_io_struct_vector_adapter(void * vector, cmph_uint32 struct_size, cmph_uint32 key_offset, cmph_uint32 key_len, cmph_uint32 nkeys)
|
||||||
|
{
|
||||||
|
cmph_io_adapter_t * key_source = cmph_io_struct_vector_new(vector, struct_size, key_offset, key_len, nkeys);
|
||||||
|
key_source->read = key_struct_vector_read;
|
||||||
|
key_source->dispose = key_vector_dispose;
|
||||||
|
key_source->rewind = key_struct_vector_rewind;
|
||||||
|
return key_source;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cmph_io_struct_vector_adapter_destroy(cmph_io_adapter_t * key_source)
|
||||||
|
{
|
||||||
|
cmph_io_struct_vector_destroy(key_source);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys)
|
||||||
|
{
|
||||||
|
cmph_io_adapter_t * key_source = cmph_io_vector_new(vector, nkeys);
|
||||||
|
key_source->read = key_vector_read;
|
||||||
|
key_source->dispose = key_vector_dispose;
|
||||||
|
key_source->rewind = key_vector_rewind;
|
||||||
|
return key_source;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cmph_io_vector_adapter_destroy(cmph_io_adapter_t * key_source)
|
||||||
|
{
|
||||||
|
cmph_io_vector_destroy(key_source);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_config_t *cmph_config_new(cmph_io_adapter_t *key_source)
|
||||||
|
{
|
||||||
|
cmph_config_t *mph = NULL;
|
||||||
|
mph = __config_new(key_source);
|
||||||
|
assert(mph);
|
||||||
|
mph->algo = CMPH_CHM; // default value
|
||||||
|
mph->data = chm_config_new();
|
||||||
|
return mph;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
|
||||||
|
{
|
||||||
|
if (algo != mph->algo)
|
||||||
|
{
|
||||||
|
switch (mph->algo)
|
||||||
|
{
|
||||||
|
case CMPH_CHM:
|
||||||
|
chm_config_destroy(mph);
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ:
|
||||||
|
bmz_config_destroy(mph);
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ8:
|
||||||
|
bmz8_config_destroy(mph);
|
||||||
|
break;
|
||||||
|
case CMPH_BRZ:
|
||||||
|
brz_config_destroy(mph);
|
||||||
|
break;
|
||||||
|
case CMPH_FCH:
|
||||||
|
fch_config_destroy(mph);
|
||||||
|
break;
|
||||||
|
case CMPH_BDZ:
|
||||||
|
bdz_config_destroy(mph);
|
||||||
|
break;
|
||||||
|
case CMPH_BDZ_PH:
|
||||||
|
bdz_ph_config_destroy(mph);
|
||||||
|
break;
|
||||||
|
case CMPH_CHD_PH:
|
||||||
|
chd_ph_config_destroy(mph);
|
||||||
|
break;
|
||||||
|
case CMPH_CHD:
|
||||||
|
chd_config_destroy(mph);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
switch(algo)
|
||||||
|
{
|
||||||
|
case CMPH_CHM:
|
||||||
|
mph->data = chm_config_new();
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ:
|
||||||
|
mph->data = bmz_config_new();
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ8:
|
||||||
|
mph->data = bmz8_config_new();
|
||||||
|
break;
|
||||||
|
case CMPH_BRZ:
|
||||||
|
mph->data = brz_config_new();
|
||||||
|
break;
|
||||||
|
case CMPH_FCH:
|
||||||
|
mph->data = fch_config_new();
|
||||||
|
break;
|
||||||
|
case CMPH_BDZ:
|
||||||
|
mph->data = bdz_config_new();
|
||||||
|
break;
|
||||||
|
case CMPH_BDZ_PH:
|
||||||
|
mph->data = bdz_ph_config_new();
|
||||||
|
break;
|
||||||
|
case CMPH_CHD_PH:
|
||||||
|
mph->data = chd_ph_config_new();
|
||||||
|
break;
|
||||||
|
case CMPH_CHD:
|
||||||
|
mph->data = chd_config_new(mph);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mph->algo = algo;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
|
||||||
|
{
|
||||||
|
if (mph->algo == CMPH_BRZ)
|
||||||
|
{
|
||||||
|
brz_config_set_tmp_dir(mph, tmp_dir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void cmph_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd)
|
||||||
|
{
|
||||||
|
if (mph->algo == CMPH_BRZ)
|
||||||
|
{
|
||||||
|
brz_config_set_mphf_fd(mph, mphf_fd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void cmph_config_set_b(cmph_config_t *mph, cmph_uint32 b)
|
||||||
|
{
|
||||||
|
if (mph->algo == CMPH_BRZ)
|
||||||
|
{
|
||||||
|
brz_config_set_b(mph, b);
|
||||||
|
}
|
||||||
|
else if (mph->algo == CMPH_BDZ)
|
||||||
|
{
|
||||||
|
bdz_config_set_b(mph, b);
|
||||||
|
}
|
||||||
|
else if (mph->algo == CMPH_CHD_PH)
|
||||||
|
{
|
||||||
|
chd_ph_config_set_b(mph, b);
|
||||||
|
}
|
||||||
|
else if (mph->algo == CMPH_CHD)
|
||||||
|
{
|
||||||
|
chd_config_set_b(mph, b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void cmph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin)
|
||||||
|
{
|
||||||
|
if (mph->algo == CMPH_CHD_PH)
|
||||||
|
{
|
||||||
|
chd_ph_config_set_keys_per_bin(mph, keys_per_bin);
|
||||||
|
}
|
||||||
|
else if (mph->algo == CMPH_CHD)
|
||||||
|
{
|
||||||
|
chd_config_set_keys_per_bin(mph, keys_per_bin);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void cmph_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability)
|
||||||
|
{
|
||||||
|
if (mph->algo == CMPH_BRZ)
|
||||||
|
{
|
||||||
|
brz_config_set_memory_availability(mph, memory_availability);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void cmph_config_destroy(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
if(mph)
|
||||||
|
{
|
||||||
|
DEBUGP("Destroying mph with algo %s\n", cmph_names[mph->algo]);
|
||||||
|
switch (mph->algo)
|
||||||
|
{
|
||||||
|
case CMPH_CHM:
|
||||||
|
chm_config_destroy(mph);
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
|
bmz_config_destroy(mph);
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ8: /* included -- Fabiano */
|
||||||
|
bmz8_config_destroy(mph);
|
||||||
|
break;
|
||||||
|
case CMPH_BRZ: /* included -- Fabiano */
|
||||||
|
brz_config_destroy(mph);
|
||||||
|
break;
|
||||||
|
case CMPH_FCH: /* included -- Fabiano */
|
||||||
|
fch_config_destroy(mph);
|
||||||
|
break;
|
||||||
|
case CMPH_BDZ: /* included -- Fabiano */
|
||||||
|
bdz_config_destroy(mph);
|
||||||
|
break;
|
||||||
|
case CMPH_BDZ_PH: /* included -- Fabiano */
|
||||||
|
bdz_ph_config_destroy(mph);
|
||||||
|
break;
|
||||||
|
case CMPH_CHD_PH: /* included -- Fabiano */
|
||||||
|
chd_ph_config_destroy(mph);
|
||||||
|
break;
|
||||||
|
case CMPH_CHD: /* included -- Fabiano */
|
||||||
|
chd_config_destroy(mph);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
__config_destroy(mph);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void cmph_config_set_verbosity(cmph_config_t *mph, cmph_uint32 verbosity)
|
||||||
|
{
|
||||||
|
mph->verbosity = verbosity;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cmph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||||
|
{
|
||||||
|
switch (mph->algo)
|
||||||
|
{
|
||||||
|
case CMPH_CHM:
|
||||||
|
chm_config_set_hashfuncs(mph, hashfuncs);
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
|
bmz_config_set_hashfuncs(mph, hashfuncs);
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ8: /* included -- Fabiano */
|
||||||
|
bmz8_config_set_hashfuncs(mph, hashfuncs);
|
||||||
|
break;
|
||||||
|
case CMPH_BRZ: /* included -- Fabiano */
|
||||||
|
brz_config_set_hashfuncs(mph, hashfuncs);
|
||||||
|
break;
|
||||||
|
case CMPH_FCH: /* included -- Fabiano */
|
||||||
|
fch_config_set_hashfuncs(mph, hashfuncs);
|
||||||
|
break;
|
||||||
|
case CMPH_BDZ: /* included -- Fabiano */
|
||||||
|
bdz_config_set_hashfuncs(mph, hashfuncs);
|
||||||
|
break;
|
||||||
|
case CMPH_BDZ_PH: /* included -- Fabiano */
|
||||||
|
bdz_ph_config_set_hashfuncs(mph, hashfuncs);
|
||||||
|
break;
|
||||||
|
case CMPH_CHD_PH: /* included -- Fabiano */
|
||||||
|
chd_ph_config_set_hashfuncs(mph, hashfuncs);
|
||||||
|
break;
|
||||||
|
case CMPH_CHD: /* included -- Fabiano */
|
||||||
|
chd_config_set_hashfuncs(mph, hashfuncs);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
void cmph_config_set_graphsize(cmph_config_t *mph, double c)
|
||||||
|
{
|
||||||
|
mph->c = c;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_t *cmph_new(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
cmph_t *mphf = NULL;
|
||||||
|
double c = mph->c;
|
||||||
|
|
||||||
|
DEBUGP("Creating mph with algorithm %s\n", cmph_names[mph->algo]);
|
||||||
|
switch (mph->algo)
|
||||||
|
{
|
||||||
|
case CMPH_CHM:
|
||||||
|
DEBUGP("Creating chm hash\n");
|
||||||
|
mphf = chm_new(mph, c);
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
|
DEBUGP("Creating bmz hash\n");
|
||||||
|
mphf = bmz_new(mph, c);
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ8: /* included -- Fabiano */
|
||||||
|
DEBUGP("Creating bmz8 hash\n");
|
||||||
|
mphf = bmz8_new(mph, c);
|
||||||
|
break;
|
||||||
|
case CMPH_BRZ: /* included -- Fabiano */
|
||||||
|
DEBUGP("Creating brz hash\n");
|
||||||
|
if (c >= 2.0) brz_config_set_algo(mph, CMPH_FCH);
|
||||||
|
else brz_config_set_algo(mph, CMPH_BMZ8);
|
||||||
|
mphf = brz_new(mph, c);
|
||||||
|
break;
|
||||||
|
case CMPH_FCH: /* included -- Fabiano */
|
||||||
|
DEBUGP("Creating fch hash\n");
|
||||||
|
mphf = fch_new(mph, c);
|
||||||
|
break;
|
||||||
|
case CMPH_BDZ: /* included -- Fabiano */
|
||||||
|
DEBUGP("Creating bdz hash\n");
|
||||||
|
mphf = bdz_new(mph, c);
|
||||||
|
break;
|
||||||
|
case CMPH_BDZ_PH: /* included -- Fabiano */
|
||||||
|
DEBUGP("Creating bdz_ph hash\n");
|
||||||
|
mphf = bdz_ph_new(mph, c);
|
||||||
|
break;
|
||||||
|
case CMPH_CHD_PH: /* included -- Fabiano */
|
||||||
|
DEBUGP("Creating chd_ph hash\n");
|
||||||
|
mphf = chd_ph_new(mph, c);
|
||||||
|
break;
|
||||||
|
case CMPH_CHD: /* included -- Fabiano */
|
||||||
|
DEBUGP("Creating chd hash\n");
|
||||||
|
mphf = chd_new(mph, c);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
return mphf;
|
||||||
|
}
|
||||||
|
|
||||||
|
int cmph_dump(cmph_t *mphf, FILE *f)
|
||||||
|
{
|
||||||
|
switch (mphf->algo)
|
||||||
|
{
|
||||||
|
case CMPH_CHM:
|
||||||
|
return chm_dump(mphf, f);
|
||||||
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
|
return bmz_dump(mphf, f);
|
||||||
|
case CMPH_BMZ8: /* included -- Fabiano */
|
||||||
|
return bmz8_dump(mphf, f);
|
||||||
|
case CMPH_BRZ: /* included -- Fabiano */
|
||||||
|
return brz_dump(mphf, f);
|
||||||
|
case CMPH_FCH: /* included -- Fabiano */
|
||||||
|
return fch_dump(mphf, f);
|
||||||
|
case CMPH_BDZ: /* included -- Fabiano */
|
||||||
|
return bdz_dump(mphf, f);
|
||||||
|
case CMPH_BDZ_PH: /* included -- Fabiano */
|
||||||
|
return bdz_ph_dump(mphf, f);
|
||||||
|
case CMPH_CHD_PH: /* included -- Fabiano */
|
||||||
|
return chd_ph_dump(mphf, f);
|
||||||
|
case CMPH_CHD: /* included -- Fabiano */
|
||||||
|
return chd_dump(mphf, f);
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
assert(0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
cmph_t *cmph_load(FILE *f)
|
||||||
|
{
|
||||||
|
cmph_t *mphf = NULL;
|
||||||
|
DEBUGP("Loading mphf generic parts\n");
|
||||||
|
mphf = __cmph_load(f);
|
||||||
|
if (mphf == NULL) return NULL;
|
||||||
|
DEBUGP("Loading mphf algorithm dependent parts\n");
|
||||||
|
|
||||||
|
switch (mphf->algo)
|
||||||
|
{
|
||||||
|
case CMPH_CHM:
|
||||||
|
chm_load(f, mphf);
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
|
DEBUGP("Loading bmz algorithm dependent parts\n");
|
||||||
|
bmz_load(f, mphf);
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ8: /* included -- Fabiano */
|
||||||
|
DEBUGP("Loading bmz8 algorithm dependent parts\n");
|
||||||
|
bmz8_load(f, mphf);
|
||||||
|
break;
|
||||||
|
case CMPH_BRZ: /* included -- Fabiano */
|
||||||
|
DEBUGP("Loading brz algorithm dependent parts\n");
|
||||||
|
brz_load(f, mphf);
|
||||||
|
break;
|
||||||
|
case CMPH_FCH: /* included -- Fabiano */
|
||||||
|
DEBUGP("Loading fch algorithm dependent parts\n");
|
||||||
|
fch_load(f, mphf);
|
||||||
|
break;
|
||||||
|
case CMPH_BDZ: /* included -- Fabiano */
|
||||||
|
DEBUGP("Loading bdz algorithm dependent parts\n");
|
||||||
|
bdz_load(f, mphf);
|
||||||
|
break;
|
||||||
|
case CMPH_BDZ_PH: /* included -- Fabiano */
|
||||||
|
DEBUGP("Loading bdz_ph algorithm dependent parts\n");
|
||||||
|
bdz_ph_load(f, mphf);
|
||||||
|
break;
|
||||||
|
case CMPH_CHD_PH: /* included -- Fabiano */
|
||||||
|
DEBUGP("Loading chd_ph algorithm dependent parts\n");
|
||||||
|
chd_ph_load(f, mphf);
|
||||||
|
break;
|
||||||
|
case CMPH_CHD: /* included -- Fabiano */
|
||||||
|
DEBUGP("Loading chd algorithm dependent parts\n");
|
||||||
|
chd_load(f, mphf);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
DEBUGP("Loaded mphf\n");
|
||||||
|
return mphf;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
DEBUGP("mphf algorithm: %u \n", mphf->algo);
|
||||||
|
switch(mphf->algo)
|
||||||
|
{
|
||||||
|
case CMPH_CHM:
|
||||||
|
return chm_search(mphf, key, keylen);
|
||||||
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
|
DEBUGP("bmz algorithm search\n");
|
||||||
|
return bmz_search(mphf, key, keylen);
|
||||||
|
case CMPH_BMZ8: /* included -- Fabiano */
|
||||||
|
DEBUGP("bmz8 algorithm search\n");
|
||||||
|
return bmz8_search(mphf, key, keylen);
|
||||||
|
case CMPH_BRZ: /* included -- Fabiano */
|
||||||
|
DEBUGP("brz algorithm search\n");
|
||||||
|
return brz_search(mphf, key, keylen);
|
||||||
|
case CMPH_FCH: /* included -- Fabiano */
|
||||||
|
DEBUGP("fch algorithm search\n");
|
||||||
|
return fch_search(mphf, key, keylen);
|
||||||
|
case CMPH_BDZ: /* included -- Fabiano */
|
||||||
|
DEBUGP("bdz algorithm search\n");
|
||||||
|
return bdz_search(mphf, key, keylen);
|
||||||
|
case CMPH_BDZ_PH: /* included -- Fabiano */
|
||||||
|
DEBUGP("bdz_ph algorithm search\n");
|
||||||
|
return bdz_ph_search(mphf, key, keylen);
|
||||||
|
case CMPH_CHD_PH: /* included -- Fabiano */
|
||||||
|
DEBUGP("chd_ph algorithm search\n");
|
||||||
|
return chd_ph_search(mphf, key, keylen);
|
||||||
|
case CMPH_CHD: /* included -- Fabiano */
|
||||||
|
DEBUGP("chd algorithm search\n");
|
||||||
|
return chd_search(mphf, key, keylen);
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
assert(0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 cmph_size(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
return mphf->size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cmph_destroy(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
switch(mphf->algo)
|
||||||
|
{
|
||||||
|
case CMPH_CHM:
|
||||||
|
chm_destroy(mphf);
|
||||||
|
return;
|
||||||
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
|
bmz_destroy(mphf);
|
||||||
|
return;
|
||||||
|
case CMPH_BMZ8: /* included -- Fabiano */
|
||||||
|
bmz8_destroy(mphf);
|
||||||
|
return;
|
||||||
|
case CMPH_BRZ: /* included -- Fabiano */
|
||||||
|
brz_destroy(mphf);
|
||||||
|
return;
|
||||||
|
case CMPH_FCH: /* included -- Fabiano */
|
||||||
|
fch_destroy(mphf);
|
||||||
|
return;
|
||||||
|
case CMPH_BDZ: /* included -- Fabiano */
|
||||||
|
bdz_destroy(mphf);
|
||||||
|
return;
|
||||||
|
case CMPH_BDZ_PH: /* included -- Fabiano */
|
||||||
|
bdz_ph_destroy(mphf);
|
||||||
|
return;
|
||||||
|
case CMPH_CHD_PH: /* included -- Fabiano */
|
||||||
|
chd_ph_destroy(mphf);
|
||||||
|
return;
|
||||||
|
case CMPH_CHD: /* included -- Fabiano */
|
||||||
|
chd_destroy(mphf);
|
||||||
|
return;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
assert(0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn void cmph_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void cmph_pack(cmph_t *mphf, void *packed_mphf)
|
||||||
|
{
|
||||||
|
// packing algorithm type to be used in cmph.c
|
||||||
|
cmph_uint32 * ptr = (cmph_uint32 *) packed_mphf;
|
||||||
|
*ptr++ = mphf->algo;
|
||||||
|
DEBUGP("mphf->algo = %u\n", mphf->algo);
|
||||||
|
switch(mphf->algo)
|
||||||
|
{
|
||||||
|
case CMPH_CHM:
|
||||||
|
chm_pack(mphf, ptr);
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
|
bmz_pack(mphf, ptr);
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ8: /* included -- Fabiano */
|
||||||
|
bmz8_pack(mphf, ptr);
|
||||||
|
break;
|
||||||
|
case CMPH_BRZ: /* included -- Fabiano */
|
||||||
|
brz_pack(mphf, ptr);
|
||||||
|
break;
|
||||||
|
case CMPH_FCH: /* included -- Fabiano */
|
||||||
|
fch_pack(mphf, ptr);
|
||||||
|
break;
|
||||||
|
case CMPH_BDZ: /* included -- Fabiano */
|
||||||
|
bdz_pack(mphf, ptr);
|
||||||
|
break;
|
||||||
|
case CMPH_BDZ_PH: /* included -- Fabiano */
|
||||||
|
bdz_ph_pack(mphf, ptr);
|
||||||
|
break;
|
||||||
|
case CMPH_CHD_PH: /* included -- Fabiano */
|
||||||
|
chd_ph_pack(mphf, ptr);
|
||||||
|
break;
|
||||||
|
case CMPH_CHD: /* included -- Fabiano */
|
||||||
|
chd_pack(mphf, ptr);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 cmph_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 cmph_packed_size(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
switch(mphf->algo)
|
||||||
|
{
|
||||||
|
case CMPH_CHM:
|
||||||
|
return chm_packed_size(mphf);
|
||||||
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
|
return bmz_packed_size(mphf);
|
||||||
|
case CMPH_BMZ8: /* included -- Fabiano */
|
||||||
|
return bmz8_packed_size(mphf);
|
||||||
|
case CMPH_BRZ: /* included -- Fabiano */
|
||||||
|
return brz_packed_size(mphf);
|
||||||
|
case CMPH_FCH: /* included -- Fabiano */
|
||||||
|
return fch_packed_size(mphf);
|
||||||
|
case CMPH_BDZ: /* included -- Fabiano */
|
||||||
|
return bdz_packed_size(mphf);
|
||||||
|
case CMPH_BDZ_PH: /* included -- Fabiano */
|
||||||
|
return bdz_ph_packed_size(mphf);
|
||||||
|
case CMPH_CHD_PH: /* included -- Fabiano */
|
||||||
|
return chd_ph_packed_size(mphf);
|
||||||
|
case CMPH_CHD: /* included -- Fabiano */
|
||||||
|
return chd_packed_size(mphf);
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
return 0; // FAILURE
|
||||||
|
}
|
||||||
|
|
||||||
|
/** cmph_uint32 cmph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 cmph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
cmph_uint32 *ptr = (cmph_uint32 *)packed_mphf;
|
||||||
|
// fprintf(stderr, "algo:%u\n", *ptr);
|
||||||
|
switch(*ptr)
|
||||||
|
{
|
||||||
|
case CMPH_CHM:
|
||||||
|
return chm_search_packed(++ptr, key, keylen);
|
||||||
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
|
return bmz_search_packed(++ptr, key, keylen);
|
||||||
|
case CMPH_BMZ8: /* included -- Fabiano */
|
||||||
|
return bmz8_search_packed(++ptr, key, keylen);
|
||||||
|
case CMPH_BRZ: /* included -- Fabiano */
|
||||||
|
return brz_search_packed(++ptr, key, keylen);
|
||||||
|
case CMPH_FCH: /* included -- Fabiano */
|
||||||
|
return fch_search_packed(++ptr, key, keylen);
|
||||||
|
case CMPH_BDZ: /* included -- Fabiano */
|
||||||
|
return bdz_search_packed(++ptr, key, keylen);
|
||||||
|
case CMPH_BDZ_PH: /* included -- Fabiano */
|
||||||
|
return bdz_ph_search_packed(++ptr, key, keylen);
|
||||||
|
case CMPH_CHD_PH: /* included -- Fabiano */
|
||||||
|
return chd_ph_search_packed(++ptr, key, keylen);
|
||||||
|
case CMPH_CHD: /* included -- Fabiano */
|
||||||
|
return chd_search_packed(++ptr, key, keylen);
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
return 0; // FAILURE
|
||||||
|
}
|
112
cmph/cmph.h
Normal file
112
cmph/cmph.h
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
#ifndef __CMPH_H__
|
||||||
|
#define __CMPH_H__
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
{
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "cmph_types.h"
|
||||||
|
|
||||||
|
typedef struct __config_t cmph_config_t;
|
||||||
|
typedef struct __cmph_t cmph_t;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
void *data;
|
||||||
|
cmph_uint32 nkeys;
|
||||||
|
int (*read)(void *, char **, cmph_uint32 *);
|
||||||
|
void (*dispose)(void *, char *, cmph_uint32);
|
||||||
|
void (*rewind)(void *);
|
||||||
|
} cmph_io_adapter_t;
|
||||||
|
|
||||||
|
/** Adapter pattern API **/
|
||||||
|
/* please call free() in the created adapters */
|
||||||
|
cmph_io_adapter_t *cmph_io_nlfile_adapter(FILE * keys_fd);
|
||||||
|
void cmph_io_nlfile_adapter_destroy(cmph_io_adapter_t * key_source);
|
||||||
|
|
||||||
|
cmph_io_adapter_t *cmph_io_nlnkfile_adapter(FILE * keys_fd, cmph_uint32 nkeys);
|
||||||
|
void cmph_io_nlnkfile_adapter_destroy(cmph_io_adapter_t * key_source);
|
||||||
|
|
||||||
|
cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys);
|
||||||
|
void cmph_io_vector_adapter_destroy(cmph_io_adapter_t * key_source);
|
||||||
|
|
||||||
|
cmph_io_adapter_t *cmph_io_byte_vector_adapter(cmph_uint8 ** vector, cmph_uint32 nkeys);
|
||||||
|
void cmph_io_byte_vector_adapter_destroy(cmph_io_adapter_t * key_source);
|
||||||
|
|
||||||
|
cmph_io_adapter_t *cmph_io_struct_vector_adapter(void * vector,
|
||||||
|
cmph_uint32 struct_size,
|
||||||
|
cmph_uint32 key_offset,
|
||||||
|
cmph_uint32 key_len,
|
||||||
|
cmph_uint32 nkeys);
|
||||||
|
|
||||||
|
void cmph_io_struct_vector_adapter_destroy(cmph_io_adapter_t * key_source);
|
||||||
|
|
||||||
|
/** Hash configuration API **/
|
||||||
|
cmph_config_t *cmph_config_new(cmph_io_adapter_t *key_source);
|
||||||
|
void cmph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
|
||||||
|
void cmph_config_set_verbosity(cmph_config_t *mph, cmph_uint32 verbosity);
|
||||||
|
void cmph_config_set_graphsize(cmph_config_t *mph, double c);
|
||||||
|
void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo);
|
||||||
|
void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir);
|
||||||
|
void cmph_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd);
|
||||||
|
void cmph_config_set_b(cmph_config_t *mph, cmph_uint32 b);
|
||||||
|
void cmph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin);
|
||||||
|
void cmph_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability);
|
||||||
|
void cmph_config_destroy(cmph_config_t *mph);
|
||||||
|
|
||||||
|
/** Hash API **/
|
||||||
|
cmph_t *cmph_new(cmph_config_t *mph);
|
||||||
|
|
||||||
|
/** cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Computes the mphf value.
|
||||||
|
* \param mphf pointer to the resulting function
|
||||||
|
* \param key is the key to be hashed
|
||||||
|
* \param keylen is the key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
cmph_uint32 cmph_size(cmph_t *mphf);
|
||||||
|
void cmph_destroy(cmph_t *mphf);
|
||||||
|
|
||||||
|
/** Hash serialization/deserialization */
|
||||||
|
int cmph_dump(cmph_t *mphf, FILE *f);
|
||||||
|
cmph_t *cmph_load(FILE *f);
|
||||||
|
|
||||||
|
/** \fn void cmph_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the
|
||||||
|
* \param resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void cmph_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 cmph_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 cmph_packed_size(cmph_t *mphf);
|
||||||
|
|
||||||
|
/** cmph_uint32 cmph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 cmph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
// TIMING functions. To use the macro CMPH_TIMING must be defined
|
||||||
|
#include "cmph_time.h"
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
69
cmph/cmph_structs.c
Normal file
69
cmph/cmph_structs.c
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
#include "cmph_structs.h"
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
//#define DEBUG
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
|
cmph_config_t *__config_new(cmph_io_adapter_t *key_source)
|
||||||
|
{
|
||||||
|
cmph_config_t *mph = (cmph_config_t *)malloc(sizeof(cmph_config_t));
|
||||||
|
memset(mph, 0, sizeof(cmph_config_t));
|
||||||
|
if (mph == NULL) return NULL;
|
||||||
|
mph->key_source = key_source;
|
||||||
|
mph->verbosity = 0;
|
||||||
|
mph->data = NULL;
|
||||||
|
mph->c = 0;
|
||||||
|
return mph;
|
||||||
|
}
|
||||||
|
|
||||||
|
void __config_destroy(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
free(mph);
|
||||||
|
}
|
||||||
|
|
||||||
|
void __cmph_dump(cmph_t *mphf, FILE *fd)
|
||||||
|
{
|
||||||
|
register size_t nbytes;
|
||||||
|
nbytes = fwrite(cmph_names[mphf->algo], (size_t)(strlen(cmph_names[mphf->algo]) + 1), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(&(mphf->size), sizeof(mphf->size), (size_t)1, fd);
|
||||||
|
}
|
||||||
|
cmph_t *__cmph_load(FILE *f)
|
||||||
|
{
|
||||||
|
cmph_t *mphf = NULL;
|
||||||
|
cmph_uint32 i;
|
||||||
|
char algo_name[BUFSIZ];
|
||||||
|
char *ptr = algo_name;
|
||||||
|
CMPH_ALGO algo = CMPH_COUNT;
|
||||||
|
register size_t nbytes;
|
||||||
|
|
||||||
|
DEBUGP("Loading mphf\n");
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
size_t c = fread(ptr, (size_t)1, (size_t)1, f);
|
||||||
|
if (c != 1) return NULL;
|
||||||
|
if (*ptr == 0) break;
|
||||||
|
++ptr;
|
||||||
|
}
|
||||||
|
for(i = 0; i < CMPH_COUNT; ++i)
|
||||||
|
{
|
||||||
|
if (strcmp(algo_name, cmph_names[i]) == 0)
|
||||||
|
{
|
||||||
|
algo = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (algo == CMPH_COUNT)
|
||||||
|
{
|
||||||
|
DEBUGP("Algorithm %s not found\n", algo_name);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
mphf = (cmph_t *)malloc(sizeof(cmph_t));
|
||||||
|
mphf->algo = algo;
|
||||||
|
nbytes = fread(&(mphf->size), sizeof(mphf->size), (size_t)1, f);
|
||||||
|
mphf->data = NULL;
|
||||||
|
DEBUGP("Algorithm is %s and mphf is sized %u\n", cmph_names[algo], mphf->size);
|
||||||
|
|
||||||
|
return mphf;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
33
cmph/cmph_structs.h
Normal file
33
cmph/cmph_structs.h
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
#ifndef __CMPH_STRUCTS_H__
|
||||||
|
#define __CMPH_STRUCTS_H__
|
||||||
|
|
||||||
|
#include "cmph.h"
|
||||||
|
|
||||||
|
/** Hash generation algorithm data
|
||||||
|
*/
|
||||||
|
struct __config_t
|
||||||
|
{
|
||||||
|
CMPH_ALGO algo;
|
||||||
|
cmph_io_adapter_t *key_source;
|
||||||
|
cmph_uint32 verbosity;
|
||||||
|
double c;
|
||||||
|
void *data; // algorithm dependent data
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Hash querying algorithm data
|
||||||
|
*/
|
||||||
|
struct __cmph_t
|
||||||
|
{
|
||||||
|
CMPH_ALGO algo;
|
||||||
|
cmph_uint32 size;
|
||||||
|
cmph_io_adapter_t *key_source;
|
||||||
|
void *data; // algorithm dependent data
|
||||||
|
};
|
||||||
|
|
||||||
|
cmph_config_t *__config_new(cmph_io_adapter_t *key_source);
|
||||||
|
void __config_destroy(cmph_config_t*);
|
||||||
|
void __cmph_dump(cmph_t *mphf, FILE *);
|
||||||
|
cmph_t *__cmph_load(FILE *f);
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
62
cmph/cmph_time.h
Normal file
62
cmph/cmph_time.h
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
#ifdef ELAPSED_TIME_IN_SECONDS
|
||||||
|
#undef ELAPSED_TIME_IN_SECONDS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ELAPSED_TIME_IN_uSECONDS
|
||||||
|
#undef ELAPSED_TIME_IN_uSECONDS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
// include headers to use gettimeofday
|
||||||
|
#else
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include <sys/resource.h>
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#ifndef __CMPH_TIME_H__
|
||||||
|
#define __CMPH_TIME_H__
|
||||||
|
static inline void elapsed_time_in_seconds(double * elapsed_time)
|
||||||
|
{
|
||||||
|
struct timeval e_time;
|
||||||
|
if (gettimeofday(&e_time, NULL) < 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
*elapsed_time = (double)e_time.tv_sec + ((double)e_time.tv_usec/1000000.0);
|
||||||
|
}
|
||||||
|
static inline void dummy_elapsed_time_in_seconds()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
static inline void elapsed_time_in_useconds(cmph_uint64 * elapsed_time)
|
||||||
|
{
|
||||||
|
struct timeval e_time;
|
||||||
|
if (gettimeofday(&e_time, NULL) < 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
*elapsed_time = (cmph_uint64)(e_time.tv_sec*1000000 + e_time.tv_usec);
|
||||||
|
}
|
||||||
|
static inline void dummy_elapsed_time_in_useconds()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CMPH_TIMING
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#define ELAPSED_TIME_IN_SECONDS elapsed_time_in_seconds
|
||||||
|
#define ELAPSED_TIME_IN_uSECONDS elapsed_time_in_useconds
|
||||||
|
#else
|
||||||
|
#define ELAPSED_TIME_IN_SECONDS dummy_elapsed_time_in_seconds
|
||||||
|
#define ELAPSED_TIME_IN_uSECONDS dummy_elapsed_time_in_useconds
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#define ELAPSED_TIME_IN_SECONDS
|
||||||
|
#define ELAPSED_TIME_IN_uSECONDS
|
||||||
|
#else
|
||||||
|
#define ELAPSED_TIME_IN_SECONDS dummy_elapsed_time_in_seconds
|
||||||
|
#define ELAPSED_TIME_IN_uSECONDS dummy_elapsed_time_in_useconds
|
||||||
|
#endif
|
||||||
|
#endif
|
42
cmph/cmph_types.h
Normal file
42
cmph/cmph_types.h
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
#ifndef __CMPH_TYPES_H__
|
||||||
|
#define __CMPH_TYPES_H__
|
||||||
|
|
||||||
|
typedef char cmph_int8;
|
||||||
|
typedef unsigned char cmph_uint8;
|
||||||
|
|
||||||
|
typedef short cmph_int16;
|
||||||
|
typedef unsigned short cmph_uint16;
|
||||||
|
|
||||||
|
typedef int cmph_int32;
|
||||||
|
typedef unsigned int cmph_uint32;
|
||||||
|
|
||||||
|
#if defined(__ia64) || defined(__x86_64__)
|
||||||
|
/** \typedef long cmph_int64;
|
||||||
|
* \brief 64-bit integer for a 64-bit achitecture.
|
||||||
|
*/
|
||||||
|
typedef long cmph_int64;
|
||||||
|
|
||||||
|
/** \typedef unsigned long cmph_uint64;
|
||||||
|
* \brief Unsigned 64-bit integer for a 64-bit achitecture.
|
||||||
|
*/
|
||||||
|
typedef unsigned long cmph_uint64;
|
||||||
|
#else
|
||||||
|
/** \typedef long long cmph_int64;
|
||||||
|
* \brief 64-bit integer for a 32-bit achitecture.
|
||||||
|
*/
|
||||||
|
typedef long long cmph_int64;
|
||||||
|
|
||||||
|
/** \typedef unsigned long long cmph_uint64;
|
||||||
|
* \brief Unsigned 64-bit integer for a 32-bit achitecture.
|
||||||
|
*/
|
||||||
|
typedef unsigned long long cmph_uint64;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef enum { CMPH_HASH_JENKINS, CMPH_HASH_COUNT } CMPH_HASH;
|
||||||
|
extern const char *cmph_hash_names[];
|
||||||
|
typedef enum { CMPH_BMZ, CMPH_BMZ8, CMPH_CHM, CMPH_BRZ, CMPH_FCH,
|
||||||
|
CMPH_BDZ, CMPH_BDZ_PH,
|
||||||
|
CMPH_CHD_PH, CMPH_CHD, CMPH_COUNT } CMPH_ALGO;
|
||||||
|
extern const char *cmph_names[];
|
||||||
|
|
||||||
|
#endif
|
321
cmph/compressed_rank.c
Normal file
321
cmph/compressed_rank.c
Normal file
@ -0,0 +1,321 @@
|
|||||||
|
#include<stdlib.h>
|
||||||
|
#include<stdio.h>
|
||||||
|
#include<limits.h>
|
||||||
|
#include<string.h>
|
||||||
|
#include"compressed_rank.h"
|
||||||
|
#include"bitbool.h"
|
||||||
|
// #define DEBUG
|
||||||
|
#include"debug.h"
|
||||||
|
static inline cmph_uint32 compressed_rank_i_log2(cmph_uint32 x)
|
||||||
|
{
|
||||||
|
register cmph_uint32 res = 0;
|
||||||
|
|
||||||
|
while(x > 1)
|
||||||
|
{
|
||||||
|
x >>= 1;
|
||||||
|
res++;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
};
|
||||||
|
|
||||||
|
void compressed_rank_init(compressed_rank_t * cr)
|
||||||
|
{
|
||||||
|
cr->max_val = 0;
|
||||||
|
cr->n = 0;
|
||||||
|
cr->rem_r = 0;
|
||||||
|
select_init(&cr->sel);
|
||||||
|
cr->vals_rems = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void compressed_rank_destroy(compressed_rank_t * cr)
|
||||||
|
{
|
||||||
|
free(cr->vals_rems);
|
||||||
|
cr->vals_rems = 0;
|
||||||
|
select_destroy(&cr->sel);
|
||||||
|
}
|
||||||
|
|
||||||
|
void compressed_rank_generate(compressed_rank_t * cr, cmph_uint32 * vals_table, cmph_uint32 n)
|
||||||
|
{
|
||||||
|
register cmph_uint32 i,j;
|
||||||
|
register cmph_uint32 rems_mask;
|
||||||
|
register cmph_uint32 * select_vec = 0;
|
||||||
|
cr->n = n;
|
||||||
|
cr->max_val = vals_table[cr->n - 1];
|
||||||
|
cr->rem_r = compressed_rank_i_log2(cr->max_val/cr->n);
|
||||||
|
if(cr->rem_r == 0)
|
||||||
|
{
|
||||||
|
cr->rem_r = 1;
|
||||||
|
}
|
||||||
|
select_vec = (cmph_uint32 *) calloc(cr->max_val >> cr->rem_r, sizeof(cmph_uint32));
|
||||||
|
cr->vals_rems = (cmph_uint32 *) calloc(BITS_TABLE_SIZE(cr->n, cr->rem_r), sizeof(cmph_uint32));
|
||||||
|
rems_mask = (1U << cr->rem_r) - 1U;
|
||||||
|
|
||||||
|
for(i = 0; i < cr->n; i++)
|
||||||
|
{
|
||||||
|
set_bits_value(cr->vals_rems, i, vals_table[i] & rems_mask, cr->rem_r, rems_mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(i = 1, j = 0; i <= cr->max_val >> cr->rem_r; i++)
|
||||||
|
{
|
||||||
|
while(i > (vals_table[j] >> cr->rem_r))
|
||||||
|
{
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
select_vec[i - 1] = j;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// FABIANO: before it was (cr->total_length >> cr->rem_r) + 1. But I wiped out the + 1 because
|
||||||
|
// I changed the select structure to work up to m, instead of up to m - 1.
|
||||||
|
select_generate(&cr->sel, select_vec, cr->max_val >> cr->rem_r, cr->n);
|
||||||
|
|
||||||
|
free(select_vec);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 compressed_rank_query(compressed_rank_t * cr, cmph_uint32 idx)
|
||||||
|
{
|
||||||
|
register cmph_uint32 rems_mask;
|
||||||
|
register cmph_uint32 val_quot, val_rem;
|
||||||
|
register cmph_uint32 sel_res, rank;
|
||||||
|
|
||||||
|
if(idx > cr->max_val)
|
||||||
|
{
|
||||||
|
return cr->n;
|
||||||
|
}
|
||||||
|
|
||||||
|
val_quot = idx >> cr->rem_r;
|
||||||
|
rems_mask = (1U << cr->rem_r) - 1U;
|
||||||
|
val_rem = idx & rems_mask;
|
||||||
|
if(val_quot == 0)
|
||||||
|
{
|
||||||
|
rank = sel_res = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sel_res = select_query(&cr->sel, val_quot - 1) + 1;
|
||||||
|
rank = sel_res - val_quot;
|
||||||
|
}
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if(GETBIT32(cr->sel.bits_vec, sel_res))
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if(get_bits_value(cr->vals_rems, rank, cr->rem_r, rems_mask) >= val_rem)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
sel_res++;
|
||||||
|
rank++;
|
||||||
|
} while(1);
|
||||||
|
|
||||||
|
return rank;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 compressed_rank_get_space_usage(compressed_rank_t * cr)
|
||||||
|
{
|
||||||
|
register cmph_uint32 space_usage = select_get_space_usage(&cr->sel);
|
||||||
|
space_usage += BITS_TABLE_SIZE(cr->n, cr->rem_r)*(cmph_uint32)sizeof(cmph_uint32)*8;
|
||||||
|
space_usage += 3*(cmph_uint32)sizeof(cmph_uint32)*8;
|
||||||
|
return space_usage;
|
||||||
|
}
|
||||||
|
|
||||||
|
void compressed_rank_dump(compressed_rank_t * cr, char **buf, cmph_uint32 *buflen)
|
||||||
|
{
|
||||||
|
register cmph_uint32 sel_size = select_packed_size(&(cr->sel));
|
||||||
|
register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
register cmph_uint32 pos = 0;
|
||||||
|
char * buf_sel = 0;
|
||||||
|
cmph_uint32 buflen_sel = 0;
|
||||||
|
|
||||||
|
*buflen = 4*(cmph_uint32)sizeof(cmph_uint32) + sel_size + vals_rems_size;
|
||||||
|
|
||||||
|
DEBUGP("sel_size = %u\n", sel_size);
|
||||||
|
DEBUGP("vals_rems_size = %u\n", vals_rems_size);
|
||||||
|
|
||||||
|
*buf = (char *)calloc(*buflen, sizeof(char));
|
||||||
|
|
||||||
|
if (!*buf)
|
||||||
|
{
|
||||||
|
*buflen = UINT_MAX;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// dumping max_val, n and rem_r
|
||||||
|
memcpy(*buf, &(cr->max_val), sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
DEBUGP("max_val = %u\n", cr->max_val);
|
||||||
|
|
||||||
|
memcpy(*buf + pos, &(cr->n), sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
DEBUGP("n = %u\n", cr->n);
|
||||||
|
|
||||||
|
memcpy(*buf + pos, &(cr->rem_r), sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
DEBUGP("rem_r = %u\n", cr->rem_r);
|
||||||
|
|
||||||
|
// dumping sel
|
||||||
|
select_dump(&cr->sel, &buf_sel, &buflen_sel);
|
||||||
|
memcpy(*buf + pos, &buflen_sel, sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
DEBUGP("buflen_sel = %u\n", buflen_sel);
|
||||||
|
|
||||||
|
memcpy(*buf + pos, buf_sel, buflen_sel);
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
for(i = 0; i < buflen_sel; i++)
|
||||||
|
{
|
||||||
|
DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(*buf + pos + i));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
pos += buflen_sel;
|
||||||
|
|
||||||
|
free(buf_sel);
|
||||||
|
|
||||||
|
// dumping vals_rems
|
||||||
|
memcpy(*buf + pos, cr->vals_rems, vals_rems_size);
|
||||||
|
#ifdef DEBUG
|
||||||
|
for(i = 0; i < vals_rems_size; i++)
|
||||||
|
{
|
||||||
|
DEBUGP("pos = %u -- vals_rems_size = %u -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(*buf + pos + i));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
pos += vals_rems_size;
|
||||||
|
|
||||||
|
DEBUGP("Dumped compressed rank structure with size %u bytes\n", *buflen);
|
||||||
|
}
|
||||||
|
|
||||||
|
void compressed_rank_load(compressed_rank_t * cr, const char *buf, cmph_uint32 buflen)
|
||||||
|
{
|
||||||
|
register cmph_uint32 pos = 0;
|
||||||
|
cmph_uint32 buflen_sel = 0;
|
||||||
|
register cmph_uint32 vals_rems_size = 0;
|
||||||
|
|
||||||
|
// loading max_val, n, and rem_r
|
||||||
|
memcpy(&(cr->max_val), buf, sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
DEBUGP("max_val = %u\n", cr->max_val);
|
||||||
|
|
||||||
|
memcpy(&(cr->n), buf + pos, sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
DEBUGP("n = %u\n", cr->n);
|
||||||
|
|
||||||
|
memcpy(&(cr->rem_r), buf + pos, sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
DEBUGP("rem_r = %u\n", cr->rem_r);
|
||||||
|
|
||||||
|
// loading sel
|
||||||
|
memcpy(&buflen_sel, buf + pos, sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
DEBUGP("buflen_sel = %u\n", buflen_sel);
|
||||||
|
|
||||||
|
select_load(&cr->sel, buf + pos, buflen_sel);
|
||||||
|
#ifdef DEBUG
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
for(i = 0; i < buflen_sel; i++)
|
||||||
|
{
|
||||||
|
DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(buf + pos + i));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
pos += buflen_sel;
|
||||||
|
|
||||||
|
// loading vals_rems
|
||||||
|
if(cr->vals_rems)
|
||||||
|
{
|
||||||
|
free(cr->vals_rems);
|
||||||
|
}
|
||||||
|
vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r);
|
||||||
|
cr->vals_rems = (cmph_uint32 *) calloc(vals_rems_size, sizeof(cmph_uint32));
|
||||||
|
vals_rems_size *= 4;
|
||||||
|
memcpy(cr->vals_rems, buf + pos, vals_rems_size);
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
for(i = 0; i < vals_rems_size; i++)
|
||||||
|
{
|
||||||
|
DEBUGP("pos = %u -- vals_rems_size = %u -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(buf + pos + i));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
pos += vals_rems_size;
|
||||||
|
|
||||||
|
DEBUGP("Loaded compressed rank structure with size %u bytes\n", buflen);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed)
|
||||||
|
{
|
||||||
|
if (cr && cr_packed)
|
||||||
|
{
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen = 0;
|
||||||
|
compressed_rank_dump(cr, &buf, &buflen);
|
||||||
|
memcpy(cr_packed, buf, buflen);
|
||||||
|
free(buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr)
|
||||||
|
{
|
||||||
|
register cmph_uint32 sel_size = select_packed_size(&cr->sel);
|
||||||
|
register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
return 4 * (cmph_uint32)sizeof(cmph_uint32) + sel_size + vals_rems_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx)
|
||||||
|
{
|
||||||
|
// unpacking cr_packed
|
||||||
|
register cmph_uint32 *ptr = (cmph_uint32 *)cr_packed;
|
||||||
|
register cmph_uint32 max_val = *ptr++;
|
||||||
|
register cmph_uint32 n = *ptr++;
|
||||||
|
register cmph_uint32 rem_r = *ptr++;
|
||||||
|
register cmph_uint32 buflen_sel = *ptr++;
|
||||||
|
register cmph_uint32 * sel_packed = ptr;
|
||||||
|
|
||||||
|
register cmph_uint32 * bits_vec = sel_packed + 2; // skipping n and m
|
||||||
|
|
||||||
|
register cmph_uint32 * vals_rems = (ptr += (buflen_sel >> 2));
|
||||||
|
|
||||||
|
// compressed sequence query computation
|
||||||
|
register cmph_uint32 rems_mask;
|
||||||
|
register cmph_uint32 val_quot, val_rem;
|
||||||
|
register cmph_uint32 sel_res, rank;
|
||||||
|
|
||||||
|
if(idx > max_val)
|
||||||
|
{
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
val_quot = idx >> rem_r;
|
||||||
|
rems_mask = (1U << rem_r) - 1U;
|
||||||
|
val_rem = idx & rems_mask;
|
||||||
|
if(val_quot == 0)
|
||||||
|
{
|
||||||
|
rank = sel_res = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sel_res = select_query_packed(sel_packed, val_quot - 1) + 1;
|
||||||
|
rank = sel_res - val_quot;
|
||||||
|
}
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if(GETBIT32(bits_vec, sel_res))
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if(get_bits_value(vals_rems, rank, rem_r, rems_mask) >= val_rem)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
sel_res++;
|
||||||
|
rank++;
|
||||||
|
} while(1);
|
||||||
|
|
||||||
|
return rank;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
55
cmph/compressed_rank.h
Normal file
55
cmph/compressed_rank.h
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
#ifndef __CMPH_COMPRESSED_RANK_H__
|
||||||
|
#define __CMPH_COMPRESSED_RANK_H__
|
||||||
|
|
||||||
|
#include "select.h"
|
||||||
|
|
||||||
|
struct _compressed_rank_t
|
||||||
|
{
|
||||||
|
cmph_uint32 max_val;
|
||||||
|
cmph_uint32 n; // number of values stored in vals_rems
|
||||||
|
// The length in bits of each value is decomposed into two compnents: the lg(n) MSBs are stored in rank_select data structure
|
||||||
|
// the remaining LSBs are stored in a table of n cells, each one of rem_r bits.
|
||||||
|
cmph_uint32 rem_r;
|
||||||
|
select_t sel;
|
||||||
|
cmph_uint32 * vals_rems;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct _compressed_rank_t compressed_rank_t;
|
||||||
|
|
||||||
|
void compressed_rank_init(compressed_rank_t * cr);
|
||||||
|
|
||||||
|
void compressed_rank_destroy(compressed_rank_t * cr);
|
||||||
|
|
||||||
|
void compressed_rank_generate(compressed_rank_t * cr, cmph_uint32 * vals_table, cmph_uint32 n);
|
||||||
|
|
||||||
|
cmph_uint32 compressed_rank_query(compressed_rank_t * cr, cmph_uint32 idx);
|
||||||
|
|
||||||
|
cmph_uint32 compressed_rank_get_space_usage(compressed_rank_t * cr);
|
||||||
|
|
||||||
|
void compressed_rank_dump(compressed_rank_t * cr, char **buf, cmph_uint32 *buflen);
|
||||||
|
|
||||||
|
void compressed_rank_load(compressed_rank_t * cr, const char *buf, cmph_uint32 buflen);
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed);
|
||||||
|
* \brief Support the ability to pack a compressed_rank structure into a preallocated contiguous memory space pointed by cr_packed.
|
||||||
|
* \param cr points to the compressed_rank structure
|
||||||
|
* \param cr_packed pointer to the contiguous memory area used to store the compressed_rank structure. The size of cr_packed must be at least @see compressed_rank_packed_size
|
||||||
|
*/
|
||||||
|
void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr);
|
||||||
|
* \brief Return the amount of space needed to pack a compressed_rank structure.
|
||||||
|
* \return the size of the packed compressed_rank structure or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr);
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx);
|
||||||
|
* \param cr_packed is a pointer to a contiguous memory area
|
||||||
|
* \param idx is an index to compute the rank
|
||||||
|
* \return an integer that represents the compressed_rank value.
|
||||||
|
*/
|
||||||
|
cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx);
|
||||||
|
|
||||||
|
#endif
|
378
cmph/compressed_seq.c
Normal file
378
cmph/compressed_seq.c
Normal file
@ -0,0 +1,378 @@
|
|||||||
|
#include "compressed_seq.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "bitbool.h"
|
||||||
|
|
||||||
|
// #define DEBUG
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
|
static inline cmph_uint32 compressed_seq_i_log2(cmph_uint32 x)
|
||||||
|
{
|
||||||
|
register cmph_uint32 res = 0;
|
||||||
|
|
||||||
|
while(x > 1)
|
||||||
|
{
|
||||||
|
x >>= 1;
|
||||||
|
res++;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
};
|
||||||
|
|
||||||
|
void compressed_seq_init(compressed_seq_t * cs)
|
||||||
|
{
|
||||||
|
select_init(&cs->sel);
|
||||||
|
cs->n = 0;
|
||||||
|
cs->rem_r = 0;
|
||||||
|
cs->length_rems = 0;
|
||||||
|
cs->total_length = 0;
|
||||||
|
cs->store_table = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void compressed_seq_destroy(compressed_seq_t * cs)
|
||||||
|
{
|
||||||
|
free(cs->store_table);
|
||||||
|
cs->store_table = 0;
|
||||||
|
free(cs->length_rems);
|
||||||
|
cs->length_rems = 0;
|
||||||
|
select_destroy(&cs->sel);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cmph_uint32 n)
|
||||||
|
{
|
||||||
|
register cmph_uint32 i;
|
||||||
|
// lengths: represents lengths of encoded values
|
||||||
|
register cmph_uint32 * lengths = (cmph_uint32 *)calloc(n, sizeof(cmph_uint32));
|
||||||
|
register cmph_uint32 rems_mask;
|
||||||
|
register cmph_uint32 stored_value;
|
||||||
|
|
||||||
|
cs->n = n;
|
||||||
|
cs->total_length = 0;
|
||||||
|
|
||||||
|
for(i = 0; i < cs->n; i++)
|
||||||
|
{
|
||||||
|
if(vals_table[i] == 0)
|
||||||
|
{
|
||||||
|
lengths[i] = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
lengths[i] = compressed_seq_i_log2(vals_table[i] + 1);
|
||||||
|
cs->total_length += lengths[i];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
if(cs->store_table)
|
||||||
|
{
|
||||||
|
free(cs->store_table);
|
||||||
|
}
|
||||||
|
cs->store_table = (cmph_uint32 *) calloc(((cs->total_length + 31) >> 5), sizeof(cmph_uint32));
|
||||||
|
cs->total_length = 0;
|
||||||
|
|
||||||
|
for(i = 0; i < cs->n; i++)
|
||||||
|
{
|
||||||
|
if(vals_table[i] == 0)
|
||||||
|
continue;
|
||||||
|
stored_value = vals_table[i] - ((1U << lengths[i]) - 1U);
|
||||||
|
set_bits_at_pos(cs->store_table, cs->total_length, stored_value, lengths[i]);
|
||||||
|
cs->total_length += lengths[i];
|
||||||
|
};
|
||||||
|
|
||||||
|
cs->rem_r = compressed_seq_i_log2(cs->total_length/cs->n);
|
||||||
|
|
||||||
|
if(cs->rem_r == 0)
|
||||||
|
{
|
||||||
|
cs->rem_r = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(cs->length_rems)
|
||||||
|
{
|
||||||
|
free(cs->length_rems);
|
||||||
|
}
|
||||||
|
|
||||||
|
cs->length_rems = (cmph_uint32 *) calloc(BITS_TABLE_SIZE(cs->n, cs->rem_r), sizeof(cmph_uint32));
|
||||||
|
|
||||||
|
rems_mask = (1U << cs->rem_r) - 1U;
|
||||||
|
cs->total_length = 0;
|
||||||
|
|
||||||
|
for(i = 0; i < cs->n; i++)
|
||||||
|
{
|
||||||
|
cs->total_length += lengths[i];
|
||||||
|
set_bits_value(cs->length_rems, i, cs->total_length & rems_mask, cs->rem_r, rems_mask);
|
||||||
|
lengths[i] = cs->total_length >> cs->rem_r;
|
||||||
|
};
|
||||||
|
|
||||||
|
select_init(&cs->sel);
|
||||||
|
|
||||||
|
// FABIANO: before it was (cs->total_length >> cs->rem_r) + 1. But I wiped out the + 1 because
|
||||||
|
// I changed the select structure to work up to m, instead of up to m - 1.
|
||||||
|
select_generate(&cs->sel, lengths, cs->n, (cs->total_length >> cs->rem_r));
|
||||||
|
|
||||||
|
free(lengths);
|
||||||
|
};
|
||||||
|
|
||||||
|
cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs)
|
||||||
|
{
|
||||||
|
register cmph_uint32 space_usage = select_get_space_usage(&cs->sel);
|
||||||
|
space_usage += ((cs->total_length + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32) * 8;
|
||||||
|
space_usage += BITS_TABLE_SIZE(cs->n, cs->rem_r) * (cmph_uint32)sizeof(cmph_uint32) * 8;
|
||||||
|
return 4 * (cmph_uint32)sizeof(cmph_uint32) * 8 + space_usage;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx)
|
||||||
|
{
|
||||||
|
register cmph_uint32 enc_idx, enc_length;
|
||||||
|
register cmph_uint32 rems_mask;
|
||||||
|
register cmph_uint32 stored_value;
|
||||||
|
register cmph_uint32 sel_res;
|
||||||
|
|
||||||
|
assert(idx < cs->n); // FABIANO ADDED
|
||||||
|
|
||||||
|
rems_mask = (1U << cs->rem_r) - 1U;
|
||||||
|
|
||||||
|
if(idx == 0)
|
||||||
|
{
|
||||||
|
enc_idx = 0;
|
||||||
|
sel_res = select_query(&cs->sel, idx);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sel_res = select_query(&cs->sel, idx - 1);
|
||||||
|
|
||||||
|
enc_idx = (sel_res - (idx - 1)) << cs->rem_r;
|
||||||
|
enc_idx += get_bits_value(cs->length_rems, idx-1, cs->rem_r, rems_mask);
|
||||||
|
|
||||||
|
sel_res = select_next_query(&cs->sel, sel_res);
|
||||||
|
};
|
||||||
|
|
||||||
|
enc_length = (sel_res - idx) << cs->rem_r;
|
||||||
|
enc_length += get_bits_value(cs->length_rems, idx, cs->rem_r, rems_mask);
|
||||||
|
enc_length -= enc_idx;
|
||||||
|
if(enc_length == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
stored_value = get_bits_at_pos(cs->store_table, enc_idx, enc_length);
|
||||||
|
return stored_value + ((1U << enc_length) - 1U);
|
||||||
|
};
|
||||||
|
|
||||||
|
void compressed_seq_dump(compressed_seq_t * cs, char ** buf, cmph_uint32 * buflen)
|
||||||
|
{
|
||||||
|
register cmph_uint32 sel_size = select_packed_size(&(cs->sel));
|
||||||
|
register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r) * 4;
|
||||||
|
register cmph_uint32 store_table_size = ((cs->total_length + 31) >> 5) * 4;
|
||||||
|
register cmph_uint32 pos = 0;
|
||||||
|
char * buf_sel = 0;
|
||||||
|
cmph_uint32 buflen_sel = 0;
|
||||||
|
|
||||||
|
*buflen = 4*(cmph_uint32)sizeof(cmph_uint32) + sel_size + length_rems_size + store_table_size;
|
||||||
|
|
||||||
|
DEBUGP("sel_size = %u\n", sel_size);
|
||||||
|
DEBUGP("length_rems_size = %u\n", length_rems_size);
|
||||||
|
DEBUGP("store_table_size = %u\n", store_table_size);
|
||||||
|
*buf = (char *)calloc(*buflen, sizeof(char));
|
||||||
|
|
||||||
|
if (!*buf)
|
||||||
|
{
|
||||||
|
*buflen = UINT_MAX;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// dumping n, rem_r and total_length
|
||||||
|
memcpy(*buf, &(cs->n), sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
DEBUGP("n = %u\n", cs->n);
|
||||||
|
|
||||||
|
memcpy(*buf + pos, &(cs->rem_r), sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
DEBUGP("rem_r = %u\n", cs->rem_r);
|
||||||
|
|
||||||
|
memcpy(*buf + pos, &(cs->total_length), sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
DEBUGP("total_length = %u\n", cs->total_length);
|
||||||
|
|
||||||
|
|
||||||
|
// dumping sel
|
||||||
|
select_dump(&cs->sel, &buf_sel, &buflen_sel);
|
||||||
|
memcpy(*buf + pos, &buflen_sel, sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
DEBUGP("buflen_sel = %u\n", buflen_sel);
|
||||||
|
|
||||||
|
memcpy(*buf + pos, buf_sel, buflen_sel);
|
||||||
|
#ifdef DEBUG
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
for(i = 0; i < buflen_sel; i++)
|
||||||
|
{
|
||||||
|
DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(*buf + pos + i));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
pos += buflen_sel;
|
||||||
|
|
||||||
|
free(buf_sel);
|
||||||
|
|
||||||
|
// dumping length_rems
|
||||||
|
memcpy(*buf + pos, cs->length_rems, length_rems_size);
|
||||||
|
#ifdef DEBUG
|
||||||
|
for(i = 0; i < length_rems_size; i++)
|
||||||
|
{
|
||||||
|
DEBUGP("pos = %u -- length_rems_size = %u -- length_rems[%u] = %u\n", pos, length_rems_size, i, *(*buf + pos + i));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
pos += length_rems_size;
|
||||||
|
|
||||||
|
// dumping store_table
|
||||||
|
memcpy(*buf + pos, cs->store_table, store_table_size);
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
for(i = 0; i < store_table_size; i++)
|
||||||
|
{
|
||||||
|
DEBUGP("pos = %u -- store_table_size = %u -- store_table[%u] = %u\n", pos, store_table_size, i, *(*buf + pos + i));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
DEBUGP("Dumped compressed sequence structure with size %u bytes\n", *buflen);
|
||||||
|
}
|
||||||
|
|
||||||
|
void compressed_seq_load(compressed_seq_t * cs, const char * buf, cmph_uint32 buflen)
|
||||||
|
{
|
||||||
|
register cmph_uint32 pos = 0;
|
||||||
|
cmph_uint32 buflen_sel = 0;
|
||||||
|
register cmph_uint32 length_rems_size = 0;
|
||||||
|
register cmph_uint32 store_table_size = 0;
|
||||||
|
|
||||||
|
// loading n, rem_r and total_length
|
||||||
|
memcpy(&(cs->n), buf, sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
DEBUGP("n = %u\n", cs->n);
|
||||||
|
|
||||||
|
memcpy(&(cs->rem_r), buf + pos, sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
DEBUGP("rem_r = %u\n", cs->rem_r);
|
||||||
|
|
||||||
|
memcpy(&(cs->total_length), buf + pos, sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
DEBUGP("total_length = %u\n", cs->total_length);
|
||||||
|
|
||||||
|
// loading sel
|
||||||
|
memcpy(&buflen_sel, buf + pos, sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
DEBUGP("buflen_sel = %u\n", buflen_sel);
|
||||||
|
|
||||||
|
select_load(&cs->sel, buf + pos, buflen_sel);
|
||||||
|
#ifdef DEBUG
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
for(i = 0; i < buflen_sel; i++)
|
||||||
|
{
|
||||||
|
DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(buf + pos + i));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
pos += buflen_sel;
|
||||||
|
|
||||||
|
// loading length_rems
|
||||||
|
if(cs->length_rems)
|
||||||
|
{
|
||||||
|
free(cs->length_rems);
|
||||||
|
}
|
||||||
|
length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r);
|
||||||
|
cs->length_rems = (cmph_uint32 *) calloc(length_rems_size, sizeof(cmph_uint32));
|
||||||
|
length_rems_size *= 4;
|
||||||
|
memcpy(cs->length_rems, buf + pos, length_rems_size);
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
for(i = 0; i < length_rems_size; i++)
|
||||||
|
{
|
||||||
|
DEBUGP("pos = %u -- length_rems_size = %u -- length_rems[%u] = %u\n", pos, length_rems_size, i, *(buf + pos + i));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
pos += length_rems_size;
|
||||||
|
|
||||||
|
// loading store_table
|
||||||
|
store_table_size = ((cs->total_length + 31) >> 5);
|
||||||
|
if(cs->store_table)
|
||||||
|
{
|
||||||
|
free(cs->store_table);
|
||||||
|
}
|
||||||
|
cs->store_table = (cmph_uint32 *) calloc(store_table_size, sizeof(cmph_uint32));
|
||||||
|
store_table_size *= 4;
|
||||||
|
memcpy(cs->store_table, buf + pos, store_table_size);
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
for(i = 0; i < store_table_size; i++)
|
||||||
|
{
|
||||||
|
DEBUGP("pos = %u -- store_table_size = %u -- store_table[%u] = %u\n", pos, store_table_size, i, *(buf + pos + i));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
DEBUGP("Loaded compressed sequence structure with size %u bytes\n", buflen);
|
||||||
|
}
|
||||||
|
|
||||||
|
void compressed_seq_pack(compressed_seq_t *cs, void *cs_packed)
|
||||||
|
{
|
||||||
|
if (cs && cs_packed)
|
||||||
|
{
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen = 0;
|
||||||
|
compressed_seq_dump(cs, &buf, &buflen);
|
||||||
|
memcpy(cs_packed, buf, buflen);
|
||||||
|
free(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs)
|
||||||
|
{
|
||||||
|
register cmph_uint32 sel_size = select_packed_size(&cs->sel);
|
||||||
|
register cmph_uint32 store_table_size = ((cs->total_length + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
return 4 * (cmph_uint32)sizeof(cmph_uint32) + sel_size + store_table_size + length_rems_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
cmph_uint32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx)
|
||||||
|
{
|
||||||
|
// unpacking cs_packed
|
||||||
|
register cmph_uint32 *ptr = (cmph_uint32 *)cs_packed;
|
||||||
|
register cmph_uint32 n = *ptr++;
|
||||||
|
register cmph_uint32 rem_r = *ptr++;
|
||||||
|
ptr++; // skipping total_length
|
||||||
|
// register cmph_uint32 total_length = *ptr++;
|
||||||
|
register cmph_uint32 buflen_sel = *ptr++;
|
||||||
|
register cmph_uint32 * sel_packed = ptr;
|
||||||
|
register cmph_uint32 * length_rems = (ptr += (buflen_sel >> 2));
|
||||||
|
register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(n, rem_r);
|
||||||
|
register cmph_uint32 * store_table = (ptr += length_rems_size);
|
||||||
|
|
||||||
|
// compressed sequence query computation
|
||||||
|
register cmph_uint32 enc_idx, enc_length;
|
||||||
|
register cmph_uint32 rems_mask;
|
||||||
|
register cmph_uint32 stored_value;
|
||||||
|
register cmph_uint32 sel_res;
|
||||||
|
|
||||||
|
rems_mask = (1U << rem_r) - 1U;
|
||||||
|
|
||||||
|
if(idx == 0)
|
||||||
|
{
|
||||||
|
enc_idx = 0;
|
||||||
|
sel_res = select_query_packed(sel_packed, idx);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sel_res = select_query_packed(sel_packed, idx - 1);
|
||||||
|
|
||||||
|
enc_idx = (sel_res - (idx - 1)) << rem_r;
|
||||||
|
enc_idx += get_bits_value(length_rems, idx-1, rem_r, rems_mask);
|
||||||
|
|
||||||
|
sel_res = select_next_query_packed(sel_packed, sel_res);
|
||||||
|
};
|
||||||
|
|
||||||
|
enc_length = (sel_res - idx) << rem_r;
|
||||||
|
enc_length += get_bits_value(length_rems, idx, rem_r, rems_mask);
|
||||||
|
enc_length -= enc_idx;
|
||||||
|
if(enc_length == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
stored_value = get_bits_at_pos(store_table, enc_idx, enc_length);
|
||||||
|
return stored_value + ((1U << enc_length) - 1U);
|
||||||
|
}
|
84
cmph/compressed_seq.h
Normal file
84
cmph/compressed_seq.h
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
#ifndef __CMPH_COMPRESSED_SEQ_H__
|
||||||
|
#define __CMPH_COMPRESSED_SEQ_H__
|
||||||
|
|
||||||
|
#include"select.h"
|
||||||
|
|
||||||
|
struct _compressed_seq_t
|
||||||
|
{
|
||||||
|
cmph_uint32 n; // number of values stored in store_table
|
||||||
|
// The length in bits of each value is decomposed into two compnents: the lg(n) MSBs are stored in rank_select data structure
|
||||||
|
// the remaining LSBs are stored in a table of n cells, each one of rem_r bits.
|
||||||
|
cmph_uint32 rem_r;
|
||||||
|
cmph_uint32 total_length; // total length in bits of stored_table
|
||||||
|
select_t sel;
|
||||||
|
cmph_uint32 * length_rems;
|
||||||
|
cmph_uint32 * store_table;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct _compressed_seq_t compressed_seq_t;
|
||||||
|
|
||||||
|
/** \fn void compressed_seq_init(compressed_seq_t * cs);
|
||||||
|
* \brief Initialize a compressed sequence structure.
|
||||||
|
* \param cs points to the compressed sequence structure to be initialized
|
||||||
|
*/
|
||||||
|
void compressed_seq_init(compressed_seq_t * cs);
|
||||||
|
|
||||||
|
/** \fn void compressed_seq_destroy(compressed_seq_t * cs);
|
||||||
|
* \brief Destroy a compressed sequence given as input.
|
||||||
|
* \param cs points to the compressed sequence structure to be destroyed
|
||||||
|
*/
|
||||||
|
void compressed_seq_destroy(compressed_seq_t * cs);
|
||||||
|
|
||||||
|
/** \fn void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cmph_uint32 n);
|
||||||
|
* \brief Generate a compressed sequence from an input array with n values.
|
||||||
|
* \param cs points to the compressed sequence structure
|
||||||
|
* \param vals_table poiter to the array given as input
|
||||||
|
* \param n number of values in @see vals_table
|
||||||
|
*/
|
||||||
|
void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cmph_uint32 n);
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx);
|
||||||
|
* \brief Returns the value stored at index @see idx of the compressed sequence structure.
|
||||||
|
* \param cs points to the compressed sequence structure
|
||||||
|
* \param idx index to retrieve the value from
|
||||||
|
* \return the value stored at index @see idx of the compressed sequence structure
|
||||||
|
*/
|
||||||
|
cmph_uint32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx);
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs);
|
||||||
|
* \brief Returns amount of space (in bits) to store the compressed sequence.
|
||||||
|
* \param cs points to the compressed sequence structure
|
||||||
|
* \return the amount of space (in bits) to store @see cs
|
||||||
|
*/
|
||||||
|
cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs);
|
||||||
|
|
||||||
|
void compressed_seq_dump(compressed_seq_t * cs, char ** buf, cmph_uint32 * buflen);
|
||||||
|
|
||||||
|
void compressed_seq_load(compressed_seq_t * cs, const char * buf, cmph_uint32 buflen);
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn void compressed_seq_pack(compressed_seq_t *cs, void *cs_packed);
|
||||||
|
* \brief Support the ability to pack a compressed sequence structure into a preallocated contiguous memory space pointed by cs_packed.
|
||||||
|
* \param cs points to the compressed sequence structure
|
||||||
|
* \param cs_packed pointer to the contiguous memory area used to store the compressed sequence structure. The size of cs_packed must be at least @see compressed_seq_packed_size
|
||||||
|
*/
|
||||||
|
void compressed_seq_pack(compressed_seq_t *cs, void *cs_packed);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs);
|
||||||
|
* \brief Return the amount of space needed to pack a compressed sequence structure.
|
||||||
|
* \return the size of the packed compressed sequence structure or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs);
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx);
|
||||||
|
* \brief Returns the value stored at index @see idx of the packed compressed sequence structure.
|
||||||
|
* \param cs_packed is a pointer to a contiguous memory area
|
||||||
|
* \param idx is the index to retrieve the value from
|
||||||
|
* \return the value stored at index @see idx of the packed compressed sequence structure
|
||||||
|
*/
|
||||||
|
cmph_uint32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx);
|
||||||
|
|
||||||
|
#endif
|
53
cmph/debug.h
Normal file
53
cmph/debug.h
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
#ifdef DEBUGP
|
||||||
|
#undef DEBUGP
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
#include <cstdio>
|
||||||
|
#ifdef WIN32
|
||||||
|
#include <cstring>
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#include <stdio.h>
|
||||||
|
#ifdef WIN32
|
||||||
|
#include <string.h>
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef __GNUC__
|
||||||
|
#ifndef __DEBUG_H__
|
||||||
|
#define __DEBUG_H__
|
||||||
|
#include <stdarg.h>
|
||||||
|
static void debugprintf(const char *format, ...)
|
||||||
|
{
|
||||||
|
va_list ap;
|
||||||
|
char *f = NULL;
|
||||||
|
const char *p="%s:%d ";
|
||||||
|
size_t plen = strlen(p);
|
||||||
|
va_start(ap, format);
|
||||||
|
f = (char *)malloc(plen + strlen(format) + 1);
|
||||||
|
if (!f) return;
|
||||||
|
memcpy(f, p, plen);
|
||||||
|
memcpy(f + plen, format, strlen(format) + 1);
|
||||||
|
vfprintf(stderr, f, ap);
|
||||||
|
va_end(ap);
|
||||||
|
free(f);
|
||||||
|
}
|
||||||
|
static void dummyprintf(const char *format, ...)
|
||||||
|
{}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
#ifndef __GNUC__
|
||||||
|
#define DEBUGP debugprintf
|
||||||
|
#else
|
||||||
|
#define DEBUGP(args...) do { fprintf(stderr, "%s:%d ", __FILE__, __LINE__); fprintf(stderr, ## args); } while(0)
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#ifndef __GNUC__
|
||||||
|
#define DEBUGP dummyprintf
|
||||||
|
#else
|
||||||
|
#define DEBUGP(args...)
|
||||||
|
#endif
|
||||||
|
#endif
|
49
cmph/djb2_hash.c
Normal file
49
cmph/djb2_hash.c
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
#include "djb2_hash.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
djb2_state_t *djb2_state_new()
|
||||||
|
{
|
||||||
|
djb2_state_t *state = (djb2_state_t *)malloc(sizeof(djb2_state_t));
|
||||||
|
state->hashfunc = CMPH_HASH_DJB2;
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
|
void djb2_state_destroy(djb2_state_t *state)
|
||||||
|
{
|
||||||
|
free(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 djb2_hash(djb2_state_t *state, const char *k, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
register cmph_uint32 hash = 5381;
|
||||||
|
const unsigned char *ptr = (unsigned char *)k;
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
while (i < keylen)
|
||||||
|
{
|
||||||
|
hash = hash*33 ^ *ptr;
|
||||||
|
++ptr, ++i;
|
||||||
|
}
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void djb2_state_dump(djb2_state_t *state, char **buf, cmph_uint32 *buflen)
|
||||||
|
{
|
||||||
|
*buf = NULL;
|
||||||
|
*buflen = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
djb2_state_t *djb2_state_copy(djb2_state_t *src_state)
|
||||||
|
{
|
||||||
|
djb2_state_t *dest_state = (djb2_state_t *)malloc(sizeof(djb2_state_t));
|
||||||
|
dest_state->hashfunc = src_state->hashfunc;
|
||||||
|
return dest_state;
|
||||||
|
}
|
||||||
|
|
||||||
|
djb2_state_t *djb2_state_load(const char *buf, cmph_uint32 buflen)
|
||||||
|
{
|
||||||
|
djb2_state_t *state = (djb2_state_t *)malloc(sizeof(djb2_state_t));
|
||||||
|
state->hashfunc = CMPH_HASH_DJB2;
|
||||||
|
return state;
|
||||||
|
}
|
18
cmph/djb2_hash.h
Normal file
18
cmph/djb2_hash.h
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
#ifndef __DJB2_HASH_H__
|
||||||
|
#define __DJB2_HASH_H__
|
||||||
|
|
||||||
|
#include "hash.h"
|
||||||
|
|
||||||
|
typedef struct __djb2_state_t
|
||||||
|
{
|
||||||
|
CMPH_HASH hashfunc;
|
||||||
|
} djb2_state_t;
|
||||||
|
|
||||||
|
djb2_state_t *djb2_state_new();
|
||||||
|
cmph_uint32 djb2_hash(djb2_state_t *state, const char *k, cmph_uint32 keylen);
|
||||||
|
void djb2_state_dump(djb2_state_t *state, char **buf, cmph_uint32 *buflen);
|
||||||
|
djb2_state_t *djb2_state_copy(djb2_state_t *src_state);
|
||||||
|
djb2_state_t *djb2_state_load(const char *buf, cmph_uint32 buflen);
|
||||||
|
void djb2_state_destroy(djb2_state_t *state);
|
||||||
|
|
||||||
|
#endif
|
517
cmph/fch.c
Normal file
517
cmph/fch.c
Normal file
@ -0,0 +1,517 @@
|
|||||||
|
#include "fch.h"
|
||||||
|
#include "cmph_structs.h"
|
||||||
|
#include "fch_structs.h"
|
||||||
|
#include "hash.h"
|
||||||
|
#include "bitbool.h"
|
||||||
|
#include "fch_buckets.h"
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <string.h>
|
||||||
|
#define INDEX 0 /* alignment index within a bucket */
|
||||||
|
//#define DEBUG
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
|
static fch_buckets_t * mapping(cmph_config_t *mph);
|
||||||
|
static cmph_uint32 * ordering(fch_buckets_t * buckets);
|
||||||
|
static cmph_uint8 check_for_collisions_h2(fch_config_data_t *fch, fch_buckets_t * buckets, cmph_uint32 *sorted_indexes);
|
||||||
|
static void permut(cmph_uint32 * vector, cmph_uint32 n);
|
||||||
|
static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph_uint32 *sorted_indexes);
|
||||||
|
|
||||||
|
fch_config_data_t *fch_config_new()
|
||||||
|
{
|
||||||
|
fch_config_data_t *fch;
|
||||||
|
fch = (fch_config_data_t *)malloc(sizeof(fch_config_data_t));
|
||||||
|
assert(fch);
|
||||||
|
memset(fch, 0, sizeof(fch_config_data_t));
|
||||||
|
fch->hashfuncs[0] = CMPH_HASH_JENKINS;
|
||||||
|
fch->hashfuncs[1] = CMPH_HASH_JENKINS;
|
||||||
|
fch->m = fch->b = 0;
|
||||||
|
fch->c = fch->p1 = fch->p2 = 0.0;
|
||||||
|
fch->g = NULL;
|
||||||
|
fch->h1 = NULL;
|
||||||
|
fch->h2 = NULL;
|
||||||
|
return fch;
|
||||||
|
}
|
||||||
|
|
||||||
|
void fch_config_destroy(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
fch_config_data_t *data = (fch_config_data_t *)mph->data;
|
||||||
|
//DEBUGP("Destroying algorithm dependent data\n");
|
||||||
|
free(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void fch_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||||
|
{
|
||||||
|
fch_config_data_t *fch = (fch_config_data_t *)mph->data;
|
||||||
|
CMPH_HASH *hashptr = hashfuncs;
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
while(*hashptr != CMPH_HASH_COUNT)
|
||||||
|
{
|
||||||
|
if (i >= 2) break; //fch only uses two hash functions
|
||||||
|
fch->hashfuncs[i] = *hashptr;
|
||||||
|
++i, ++hashptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 mixh10h11h12(cmph_uint32 b, double p1, double p2, cmph_uint32 initial_index)
|
||||||
|
{
|
||||||
|
register cmph_uint32 int_p2 = (cmph_uint32)p2;
|
||||||
|
if (initial_index < p1) initial_index %= int_p2; /* h11 o h10 */
|
||||||
|
else { /* h12 o h10 */
|
||||||
|
initial_index %= b;
|
||||||
|
if(initial_index < p2) initial_index += int_p2;
|
||||||
|
}
|
||||||
|
return initial_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
cmph_uint32 fch_calc_b(double c, cmph_uint32 m)
|
||||||
|
{
|
||||||
|
return (cmph_uint32)ceil((c*m)/(log((double)m)/log(2.0) + 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
double fch_calc_p1(cmph_uint32 m)
|
||||||
|
{
|
||||||
|
return ceil(0.55*m);
|
||||||
|
}
|
||||||
|
|
||||||
|
double fch_calc_p2(cmph_uint32 b)
|
||||||
|
{
|
||||||
|
return ceil(0.3*b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static fch_buckets_t * mapping(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
fch_buckets_t *buckets = NULL;
|
||||||
|
fch_config_data_t *fch = (fch_config_data_t *)mph->data;
|
||||||
|
if (fch->h1) hash_state_destroy(fch->h1);
|
||||||
|
fch->h1 = hash_state_new(fch->hashfuncs[0], fch->m);
|
||||||
|
fch->b = fch_calc_b(fch->c, fch->m);
|
||||||
|
fch->p1 = fch_calc_p1(fch->m);
|
||||||
|
fch->p2 = fch_calc_p2(fch->b);
|
||||||
|
//DEBUGP("b:%u p1:%f p2:%f\n", fch->b, fch->p1, fch->p2);
|
||||||
|
buckets = fch_buckets_new(fch->b);
|
||||||
|
|
||||||
|
mph->key_source->rewind(mph->key_source->data);
|
||||||
|
for(i = 0; i < fch->m; i++)
|
||||||
|
{
|
||||||
|
cmph_uint32 h1, keylen;
|
||||||
|
char *key = NULL;
|
||||||
|
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||||
|
h1 = hash(fch->h1, key, keylen) % fch->m;
|
||||||
|
h1 = mixh10h11h12 (fch->b, fch->p1, fch->p2, h1);
|
||||||
|
fch_buckets_insert(buckets, h1, key, keylen);
|
||||||
|
key = NULL; // transger memory ownership
|
||||||
|
|
||||||
|
}
|
||||||
|
return buckets;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// returns the buckets indexes sorted by their sizes.
|
||||||
|
static cmph_uint32 * ordering(fch_buckets_t * buckets)
|
||||||
|
{
|
||||||
|
return fch_buckets_get_indexes_sorted_by_size(buckets);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check whether function h2 causes collisions among the keys of each bucket */
|
||||||
|
static cmph_uint8 check_for_collisions_h2(fch_config_data_t *fch, fch_buckets_t * buckets, cmph_uint32 *sorted_indexes)
|
||||||
|
{
|
||||||
|
//cmph_uint32 max_size = fch_buckets_get_max_size(buckets);
|
||||||
|
cmph_uint8 * hashtable = (cmph_uint8 *)calloc((size_t)fch->m, sizeof(cmph_uint8));
|
||||||
|
cmph_uint32 nbuckets = fch_buckets_get_nbuckets(buckets);
|
||||||
|
cmph_uint32 i = 0, index = 0, j =0;
|
||||||
|
for (i = 0; i < nbuckets; i++)
|
||||||
|
{
|
||||||
|
cmph_uint32 nkeys = fch_buckets_get_size(buckets, sorted_indexes[i]);
|
||||||
|
memset(hashtable, 0, (size_t)fch->m);
|
||||||
|
//DEBUGP("bucket %u -- nkeys: %u\n", i, nkeys);
|
||||||
|
for (j = 0; j < nkeys; j++)
|
||||||
|
{
|
||||||
|
char * key = fch_buckets_get_key(buckets, sorted_indexes[i], j);
|
||||||
|
cmph_uint32 keylen = fch_buckets_get_keylength(buckets, sorted_indexes[i], j);
|
||||||
|
index = hash(fch->h2, key, keylen) % fch->m;
|
||||||
|
if(hashtable[index]) { // collision detected
|
||||||
|
free(hashtable);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
hashtable[index] = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(hashtable);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void permut(cmph_uint32 * vector, cmph_uint32 n)
|
||||||
|
{
|
||||||
|
cmph_uint32 i, j, b;
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
j = (cmph_uint32) rand() % n;
|
||||||
|
b = vector[i];
|
||||||
|
vector[i] = vector[j];
|
||||||
|
vector[j] = b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph_uint32 *sorted_indexes)
|
||||||
|
{
|
||||||
|
cmph_uint32 * random_table = (cmph_uint32 *) calloc((size_t)fch->m, sizeof(cmph_uint32));
|
||||||
|
cmph_uint32 * map_table = (cmph_uint32 *) calloc((size_t)fch->m, sizeof(cmph_uint32));
|
||||||
|
cmph_uint32 iteration_to_generate_h2 = 0;
|
||||||
|
cmph_uint32 searching_iterations = 0;
|
||||||
|
cmph_uint8 restart = 0;
|
||||||
|
cmph_uint32 nbuckets = fch_buckets_get_nbuckets(buckets);
|
||||||
|
cmph_uint32 i, j, z, counter = 0, filled_count = 0;
|
||||||
|
if (fch->g) free (fch->g);
|
||||||
|
fch->g = (cmph_uint32 *) calloc((size_t)fch->b, sizeof(cmph_uint32));
|
||||||
|
|
||||||
|
//DEBUGP("max bucket size: %u\n", fch_buckets_get_max_size(buckets));
|
||||||
|
|
||||||
|
for(i = 0; i < fch->m; i++)
|
||||||
|
{
|
||||||
|
random_table[i] = i;
|
||||||
|
}
|
||||||
|
permut(random_table, fch->m);
|
||||||
|
for(i = 0; i < fch->m; i++)
|
||||||
|
{
|
||||||
|
map_table[random_table[i]] = i;
|
||||||
|
}
|
||||||
|
do {
|
||||||
|
if (fch->h2) hash_state_destroy(fch->h2);
|
||||||
|
fch->h2 = hash_state_new(fch->hashfuncs[1], fch->m);
|
||||||
|
restart = check_for_collisions_h2(fch, buckets, sorted_indexes);
|
||||||
|
filled_count = 0;
|
||||||
|
if (!restart)
|
||||||
|
{
|
||||||
|
searching_iterations++; iteration_to_generate_h2 = 0;
|
||||||
|
//DEBUGP("searching_iterations: %u\n", searching_iterations);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
iteration_to_generate_h2++;
|
||||||
|
//DEBUGP("iteration_to_generate_h2: %u\n", iteration_to_generate_h2);
|
||||||
|
}
|
||||||
|
for(i = 0; (i < nbuckets) && !restart; i++) {
|
||||||
|
cmph_uint32 bucketsize = fch_buckets_get_size(buckets, sorted_indexes[i]);
|
||||||
|
if (bucketsize == 0)
|
||||||
|
{
|
||||||
|
restart = 0; // false
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else restart = 1; // true
|
||||||
|
for(z = 0; (z < (fch->m - filled_count)) && restart; z++) {
|
||||||
|
char * key = fch_buckets_get_key(buckets, sorted_indexes[i], INDEX);
|
||||||
|
cmph_uint32 keylen = fch_buckets_get_keylength(buckets, sorted_indexes[i], INDEX);
|
||||||
|
cmph_uint32 h2 = hash(fch->h2, key, keylen) % fch->m;
|
||||||
|
counter = 0;
|
||||||
|
restart = 0; // false
|
||||||
|
fch->g[sorted_indexes[i]] = (fch->m + random_table[filled_count + z] - h2) % fch->m;
|
||||||
|
//DEBUGP("g[%u]: %u\n", sorted_indexes[i], fch->g[sorted_indexes[i]]);
|
||||||
|
j = INDEX;
|
||||||
|
do {
|
||||||
|
cmph_uint32 index = 0;
|
||||||
|
key = fch_buckets_get_key(buckets, sorted_indexes[i], j);
|
||||||
|
keylen = fch_buckets_get_keylength(buckets, sorted_indexes[i], j);
|
||||||
|
h2 = hash(fch->h2, key, keylen) % fch->m;
|
||||||
|
index = (h2 + fch->g[sorted_indexes[i]]) % fch->m;
|
||||||
|
//DEBUGP("key:%s keylen:%u index: %u h2:%u bucketsize:%u\n", key, keylen, index, h2, bucketsize);
|
||||||
|
if (map_table[index] >= filled_count) {
|
||||||
|
cmph_uint32 y = map_table[index];
|
||||||
|
cmph_uint32 ry = random_table[y];
|
||||||
|
random_table[y] = random_table[filled_count];
|
||||||
|
random_table[filled_count] = ry;
|
||||||
|
map_table[random_table[y]] = y;
|
||||||
|
map_table[random_table[filled_count]] = filled_count;
|
||||||
|
filled_count++;
|
||||||
|
counter ++;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
restart = 1; // true
|
||||||
|
filled_count = filled_count - counter;
|
||||||
|
counter = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
j = (j + 1) % bucketsize;
|
||||||
|
} while(j % bucketsize != INDEX);
|
||||||
|
}
|
||||||
|
//getchar();
|
||||||
|
}
|
||||||
|
} while(restart && (searching_iterations < 10) && (iteration_to_generate_h2 < 1000));
|
||||||
|
free(map_table);
|
||||||
|
free(random_table);
|
||||||
|
return restart;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
cmph_t *fch_new(cmph_config_t *mph, double c)
|
||||||
|
{
|
||||||
|
cmph_t *mphf = NULL;
|
||||||
|
fch_data_t *fchf = NULL;
|
||||||
|
cmph_uint32 iterations = 100;
|
||||||
|
cmph_uint8 restart_mapping = 0;
|
||||||
|
fch_buckets_t * buckets = NULL;
|
||||||
|
cmph_uint32 * sorted_indexes = NULL;
|
||||||
|
fch_config_data_t *fch = (fch_config_data_t *)mph->data;
|
||||||
|
fch->m = mph->key_source->nkeys;
|
||||||
|
//DEBUGP("m: %f\n", fch->m);
|
||||||
|
if (c <= 2) c = 2.6; // validating restrictions over parameter c.
|
||||||
|
fch->c = c;
|
||||||
|
//DEBUGP("c: %f\n", fch->c);
|
||||||
|
fch->h1 = NULL;
|
||||||
|
fch->h2 = NULL;
|
||||||
|
fch->g = NULL;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Entering mapping step for mph creation of %u keys\n", fch->m);
|
||||||
|
}
|
||||||
|
if (buckets) fch_buckets_destroy(buckets);
|
||||||
|
buckets = mapping(mph);
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Starting ordering step\n");
|
||||||
|
}
|
||||||
|
if (sorted_indexes) free (sorted_indexes);
|
||||||
|
sorted_indexes = ordering(buckets);
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Starting searching step.\n");
|
||||||
|
}
|
||||||
|
restart_mapping = searching(fch, buckets, sorted_indexes);
|
||||||
|
iterations--;
|
||||||
|
|
||||||
|
} while(restart_mapping && iterations > 0);
|
||||||
|
if (buckets) fch_buckets_destroy(buckets);
|
||||||
|
if (sorted_indexes) free (sorted_indexes);
|
||||||
|
if (iterations == 0) return NULL;
|
||||||
|
mphf = (cmph_t *)malloc(sizeof(cmph_t));
|
||||||
|
mphf->algo = mph->algo;
|
||||||
|
fchf = (fch_data_t *)malloc(sizeof(fch_data_t));
|
||||||
|
fchf->g = fch->g;
|
||||||
|
fch->g = NULL; //transfer memory ownership
|
||||||
|
fchf->h1 = fch->h1;
|
||||||
|
fch->h1 = NULL; //transfer memory ownership
|
||||||
|
fchf->h2 = fch->h2;
|
||||||
|
fch->h2 = NULL; //transfer memory ownership
|
||||||
|
fchf->p2 = fch->p2;
|
||||||
|
fchf->p1 = fch->p1;
|
||||||
|
fchf->b = fch->b;
|
||||||
|
fchf->c = fch->c;
|
||||||
|
fchf->m = fch->m;
|
||||||
|
mphf->data = fchf;
|
||||||
|
mphf->size = fch->m;
|
||||||
|
//DEBUGP("Successfully generated minimal perfect hash\n");
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
|
||||||
|
}
|
||||||
|
return mphf;
|
||||||
|
}
|
||||||
|
|
||||||
|
int fch_dump(cmph_t *mphf, FILE *fd)
|
||||||
|
{
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen;
|
||||||
|
register size_t nbytes;
|
||||||
|
|
||||||
|
fch_data_t *data = (fch_data_t *)mphf->data;
|
||||||
|
__cmph_dump(mphf, fd);
|
||||||
|
|
||||||
|
hash_state_dump(data->h1, &buf, &buflen);
|
||||||
|
//DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||||
|
nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
hash_state_dump(data->h2, &buf, &buflen);
|
||||||
|
//DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||||
|
nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(&(data->c), sizeof(double), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(&(data->b), sizeof(cmph_uint32), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(&(data->p1), sizeof(double), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(&(data->p2), sizeof(double), (size_t)1, fd);
|
||||||
|
nbytes = fwrite(data->g, sizeof(cmph_uint32)*(data->b), (size_t)1, fd);
|
||||||
|
#ifdef DEBUG
|
||||||
|
cmph_uint32 i;
|
||||||
|
fprintf(stderr, "G: ");
|
||||||
|
for (i = 0; i < data->b; ++i) fprintf(stderr, "%u ", data->g[i]);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
#endif
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void fch_load(FILE *f, cmph_t *mphf)
|
||||||
|
{
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen;
|
||||||
|
register size_t nbytes;
|
||||||
|
fch_data_t *fch = (fch_data_t *)malloc(sizeof(fch_data_t));
|
||||||
|
|
||||||
|
//DEBUGP("Loading fch mphf\n");
|
||||||
|
mphf->data = fch;
|
||||||
|
//DEBUGP("Reading h1\n");
|
||||||
|
fch->h1 = NULL;
|
||||||
|
nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
//DEBUGP("Hash state of h1 has %u bytes\n", buflen);
|
||||||
|
buf = (char *)malloc((size_t)buflen);
|
||||||
|
nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
|
||||||
|
fch->h1 = hash_state_load(buf, buflen);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
//DEBUGP("Loading fch mphf\n");
|
||||||
|
mphf->data = fch;
|
||||||
|
//DEBUGP("Reading h2\n");
|
||||||
|
fch->h2 = NULL;
|
||||||
|
nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
//DEBUGP("Hash state of h2 has %u bytes\n", buflen);
|
||||||
|
buf = (char *)malloc((size_t)buflen);
|
||||||
|
nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
|
||||||
|
fch->h2 = hash_state_load(buf, buflen);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
|
||||||
|
//DEBUGP("Reading m and n\n");
|
||||||
|
nbytes = fread(&(fch->m), sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
nbytes = fread(&(fch->c), sizeof(double), (size_t)1, f);
|
||||||
|
nbytes = fread(&(fch->b), sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
nbytes = fread(&(fch->p1), sizeof(double), (size_t)1, f);
|
||||||
|
nbytes = fread(&(fch->p2), sizeof(double), (size_t)1, f);
|
||||||
|
|
||||||
|
fch->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*fch->b);
|
||||||
|
nbytes = fread(fch->g, fch->b*sizeof(cmph_uint32), (size_t)1, f);
|
||||||
|
#ifdef DEBUG
|
||||||
|
cmph_uint32 i;
|
||||||
|
fprintf(stderr, "G: ");
|
||||||
|
for (i = 0; i < fch->b; ++i) fprintf(stderr, "%u ", fch->g[i]);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 fch_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
fch_data_t *fch = mphf->data;
|
||||||
|
cmph_uint32 h1 = hash(fch->h1, key, keylen) % fch->m;
|
||||||
|
cmph_uint32 h2 = hash(fch->h2, key, keylen) % fch->m;
|
||||||
|
h1 = mixh10h11h12 (fch->b, fch->p1, fch->p2, h1);
|
||||||
|
//DEBUGP("key: %s h1: %u h2: %u g[h1]: %u\n", key, h1, h2, fch->g[h1]);
|
||||||
|
return (h2 + fch->g[h1]) % fch->m;
|
||||||
|
}
|
||||||
|
void fch_destroy(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
fch_data_t *data = (fch_data_t *)mphf->data;
|
||||||
|
free(data->g);
|
||||||
|
hash_state_destroy(data->h1);
|
||||||
|
hash_state_destroy(data->h2);
|
||||||
|
free(data);
|
||||||
|
free(mphf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn void fch_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void fch_pack(cmph_t *mphf, void *packed_mphf)
|
||||||
|
{
|
||||||
|
fch_data_t *data = (fch_data_t *)mphf->data;
|
||||||
|
cmph_uint8 * ptr = packed_mphf;
|
||||||
|
|
||||||
|
// packing h1 type
|
||||||
|
CMPH_HASH h1_type = hash_get_type(data->h1);
|
||||||
|
*((cmph_uint32 *) ptr) = h1_type;
|
||||||
|
ptr += sizeof(cmph_uint32);
|
||||||
|
|
||||||
|
// packing h1
|
||||||
|
hash_state_pack(data->h1, ptr);
|
||||||
|
ptr += hash_state_packed_size(h1_type);
|
||||||
|
|
||||||
|
// packing h2 type
|
||||||
|
CMPH_HASH h2_type = hash_get_type(data->h2);
|
||||||
|
*((cmph_uint32 *) ptr) = h2_type;
|
||||||
|
ptr += sizeof(cmph_uint32);
|
||||||
|
|
||||||
|
// packing h2
|
||||||
|
hash_state_pack(data->h2, ptr);
|
||||||
|
ptr += hash_state_packed_size(h2_type);
|
||||||
|
|
||||||
|
// packing m
|
||||||
|
*((cmph_uint32 *) ptr) = data->m;
|
||||||
|
ptr += sizeof(data->m);
|
||||||
|
|
||||||
|
// packing b
|
||||||
|
*((cmph_uint32 *) ptr) = data->b;
|
||||||
|
ptr += sizeof(data->b);
|
||||||
|
|
||||||
|
// packing p1
|
||||||
|
*((cmph_uint64 *)ptr) = (cmph_uint64)data->p1;
|
||||||
|
ptr += sizeof(data->p1);
|
||||||
|
|
||||||
|
// packing p2
|
||||||
|
*((cmph_uint64 *)ptr) = (cmph_uint64)data->p2;
|
||||||
|
ptr += sizeof(data->p2);
|
||||||
|
|
||||||
|
// packing g
|
||||||
|
memcpy(ptr, data->g, sizeof(cmph_uint32)*(data->b));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 fch_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 fch_packed_size(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
fch_data_t *data = (fch_data_t *)mphf->data;
|
||||||
|
CMPH_HASH h1_type = hash_get_type(data->h1);
|
||||||
|
CMPH_HASH h2_type = hash_get_type(data->h2);
|
||||||
|
|
||||||
|
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
|
||||||
|
4*sizeof(cmph_uint32) + 2*sizeof(double) + sizeof(cmph_uint32)*(data->b));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** cmph_uint32 fch_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
register cmph_uint8 *h1_ptr = packed_mphf;
|
||||||
|
register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr);
|
||||||
|
h1_ptr += 4;
|
||||||
|
|
||||||
|
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
|
||||||
|
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
|
||||||
|
h2_ptr += 4;
|
||||||
|
|
||||||
|
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
|
||||||
|
|
||||||
|
register cmph_uint32 m = *g_ptr++;
|
||||||
|
|
||||||
|
register cmph_uint32 b = *g_ptr++;
|
||||||
|
|
||||||
|
register double p1 = (double)(*((cmph_uint64 *)g_ptr));
|
||||||
|
g_ptr += 2;
|
||||||
|
|
||||||
|
register double p2 = (double)(*((cmph_uint64 *)g_ptr));
|
||||||
|
g_ptr += 2;
|
||||||
|
|
||||||
|
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m;
|
||||||
|
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m;
|
||||||
|
|
||||||
|
h1 = mixh10h11h12 (b, p1, p2, h1);
|
||||||
|
return (h2 + g_ptr[h1]) % m;
|
||||||
|
}
|
||||||
|
|
48
cmph/fch.h
Normal file
48
cmph/fch.h
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
#ifndef __CMPH_FCH_H__
|
||||||
|
#define __CMPH_FCH_H__
|
||||||
|
|
||||||
|
#include "cmph.h"
|
||||||
|
|
||||||
|
typedef struct __fch_data_t fch_data_t;
|
||||||
|
typedef struct __fch_config_data_t fch_config_data_t;
|
||||||
|
|
||||||
|
/* Parameters calculation */
|
||||||
|
cmph_uint32 fch_calc_b(double c, cmph_uint32 m);
|
||||||
|
double fch_calc_p1(cmph_uint32 m);
|
||||||
|
double fch_calc_p2(cmph_uint32 b);
|
||||||
|
cmph_uint32 mixh10h11h12(cmph_uint32 b, double p1, double p2, cmph_uint32 initial_index);
|
||||||
|
|
||||||
|
fch_config_data_t *fch_config_new();
|
||||||
|
void fch_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
|
||||||
|
void fch_config_destroy(cmph_config_t *mph);
|
||||||
|
cmph_t *fch_new(cmph_config_t *mph, double c);
|
||||||
|
|
||||||
|
void fch_load(FILE *f, cmph_t *mphf);
|
||||||
|
int fch_dump(cmph_t *mphf, FILE *f);
|
||||||
|
void fch_destroy(cmph_t *mphf);
|
||||||
|
cmph_uint32 fch_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** \fn void fch_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void fch_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 fch_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 fch_packed_size(cmph_t *mphf);
|
||||||
|
|
||||||
|
/** cmph_uint32 fch_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
#endif
|
214
cmph/fch_buckets.c
Normal file
214
cmph/fch_buckets.c
Normal file
@ -0,0 +1,214 @@
|
|||||||
|
#include "vqueue.h"
|
||||||
|
#include "fch_buckets.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
//#define DEBUG
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
|
typedef struct __fch_bucket_entry_t
|
||||||
|
{
|
||||||
|
char * value;
|
||||||
|
cmph_uint32 length;
|
||||||
|
} fch_bucket_entry_t;
|
||||||
|
|
||||||
|
typedef struct __fch_bucket_t
|
||||||
|
{
|
||||||
|
fch_bucket_entry_t * entries;
|
||||||
|
cmph_uint32 capacity, size;
|
||||||
|
} fch_bucket_t;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static void fch_bucket_new(fch_bucket_t *bucket)
|
||||||
|
{
|
||||||
|
assert(bucket);
|
||||||
|
bucket->size = 0;
|
||||||
|
bucket->entries = NULL;
|
||||||
|
bucket->capacity = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fch_bucket_destroy(fch_bucket_t *bucket)
|
||||||
|
{
|
||||||
|
cmph_uint32 i;
|
||||||
|
assert(bucket);
|
||||||
|
for (i = 0; i < bucket->size; i++)
|
||||||
|
{
|
||||||
|
free((bucket->entries + i)->value);
|
||||||
|
}
|
||||||
|
free(bucket->entries);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void fch_bucket_reserve(fch_bucket_t *bucket, cmph_uint32 size)
|
||||||
|
{
|
||||||
|
assert(bucket);
|
||||||
|
if (bucket->capacity < size)
|
||||||
|
{
|
||||||
|
cmph_uint32 new_capacity = bucket->capacity + 1;
|
||||||
|
DEBUGP("Increasing current capacity %u to %u\n", bucket->capacity, size);
|
||||||
|
while (new_capacity < size)
|
||||||
|
{
|
||||||
|
new_capacity *= 2;
|
||||||
|
}
|
||||||
|
bucket->entries = (fch_bucket_entry_t *)realloc(bucket->entries, sizeof(fch_bucket_entry_t)*new_capacity);
|
||||||
|
assert(bucket->entries);
|
||||||
|
bucket->capacity = new_capacity;
|
||||||
|
DEBUGP("Increased\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fch_bucket_insert(fch_bucket_t *bucket, char *val, cmph_uint32 val_length)
|
||||||
|
{
|
||||||
|
assert(bucket);
|
||||||
|
fch_bucket_reserve(bucket, bucket->size + 1);
|
||||||
|
(bucket->entries + bucket->size)->value = val;
|
||||||
|
(bucket->entries + bucket->size)->length = val_length;
|
||||||
|
++(bucket->size);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static cmph_uint8 fch_bucket_is_empty(fch_bucket_t *bucket)
|
||||||
|
{
|
||||||
|
assert(bucket);
|
||||||
|
return (cmph_uint8)(bucket->size == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static cmph_uint32 fch_bucket_size(fch_bucket_t *bucket)
|
||||||
|
{
|
||||||
|
assert(bucket);
|
||||||
|
return bucket->size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static char * fch_bucket_get_key(fch_bucket_t *bucket, cmph_uint32 index_key)
|
||||||
|
{
|
||||||
|
assert(bucket); assert(index_key < bucket->size);
|
||||||
|
return (bucket->entries + index_key)->value;
|
||||||
|
}
|
||||||
|
|
||||||
|
static cmph_uint32 fch_bucket_get_length(fch_bucket_t *bucket, cmph_uint32 index_key)
|
||||||
|
{
|
||||||
|
assert(bucket); assert(index_key < bucket->size);
|
||||||
|
return (bucket->entries + index_key)->length;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fch_bucket_print(fch_bucket_t * bucket, cmph_uint32 index)
|
||||||
|
{
|
||||||
|
cmph_uint32 i;
|
||||||
|
assert(bucket);
|
||||||
|
fprintf(stderr, "Printing bucket %u ...\n", index);
|
||||||
|
for (i = 0; i < bucket->size; i++)
|
||||||
|
{
|
||||||
|
fprintf(stderr, " key: %s\n", (bucket->entries + i)->value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
struct __fch_buckets_t
|
||||||
|
{
|
||||||
|
fch_bucket_t * values;
|
||||||
|
cmph_uint32 nbuckets, max_size;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
fch_buckets_t * fch_buckets_new(cmph_uint32 nbuckets)
|
||||||
|
{
|
||||||
|
cmph_uint32 i;
|
||||||
|
fch_buckets_t *buckets = (fch_buckets_t *)malloc(sizeof(fch_buckets_t));
|
||||||
|
assert(buckets);
|
||||||
|
buckets->values = (fch_bucket_t *)calloc((size_t)nbuckets, sizeof(fch_bucket_t));
|
||||||
|
for (i = 0; i < nbuckets; i++) fch_bucket_new(buckets->values + i);
|
||||||
|
assert(buckets->values);
|
||||||
|
buckets->nbuckets = nbuckets;
|
||||||
|
buckets->max_size = 0;
|
||||||
|
return buckets;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint8 fch_buckets_is_empty(fch_buckets_t * buckets, cmph_uint32 index)
|
||||||
|
{
|
||||||
|
assert(index < buckets->nbuckets);
|
||||||
|
return fch_bucket_is_empty(buckets->values + index);
|
||||||
|
}
|
||||||
|
|
||||||
|
void fch_buckets_insert(fch_buckets_t * buckets, cmph_uint32 index, char * key, cmph_uint32 length)
|
||||||
|
{
|
||||||
|
assert(index < buckets->nbuckets);
|
||||||
|
fch_bucket_insert(buckets->values + index, key, length);
|
||||||
|
if (fch_bucket_size(buckets->values + index) > buckets->max_size)
|
||||||
|
{
|
||||||
|
buckets->max_size = fch_bucket_size(buckets->values + index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 fch_buckets_get_size(fch_buckets_t * buckets, cmph_uint32 index)
|
||||||
|
{
|
||||||
|
assert(index < buckets->nbuckets);
|
||||||
|
return fch_bucket_size(buckets->values + index);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
char * fch_buckets_get_key(fch_buckets_t * buckets, cmph_uint32 index, cmph_uint32 index_key)
|
||||||
|
{
|
||||||
|
assert(index < buckets->nbuckets);
|
||||||
|
return fch_bucket_get_key(buckets->values + index, index_key);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 fch_buckets_get_keylength(fch_buckets_t * buckets, cmph_uint32 index, cmph_uint32 index_key)
|
||||||
|
{
|
||||||
|
assert(index < buckets->nbuckets);
|
||||||
|
return fch_bucket_get_length(buckets->values + index, index_key);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 fch_buckets_get_max_size(fch_buckets_t * buckets)
|
||||||
|
{
|
||||||
|
return buckets->max_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 fch_buckets_get_nbuckets(fch_buckets_t * buckets)
|
||||||
|
{
|
||||||
|
return buckets->nbuckets;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 * fch_buckets_get_indexes_sorted_by_size(fch_buckets_t * buckets)
|
||||||
|
{
|
||||||
|
int i = 0;
|
||||||
|
cmph_uint32 sum = 0, value;
|
||||||
|
cmph_uint32 *nbuckets_size = (cmph_uint32 *) calloc((size_t)buckets->max_size + 1, sizeof(cmph_uint32));
|
||||||
|
cmph_uint32 * sorted_indexes = (cmph_uint32 *) calloc((size_t)buckets->nbuckets, sizeof(cmph_uint32));
|
||||||
|
|
||||||
|
// collect how many buckets for each size.
|
||||||
|
for(i = 0; i < buckets->nbuckets; i++) nbuckets_size[fch_bucket_size(buckets->values + i)] ++;
|
||||||
|
|
||||||
|
// calculating offset considering a decreasing order of buckets size.
|
||||||
|
value = nbuckets_size[buckets->max_size];
|
||||||
|
nbuckets_size[buckets->max_size] = sum;
|
||||||
|
for(i = (int)buckets->max_size - 1; i >= 0; i--)
|
||||||
|
{
|
||||||
|
sum += value;
|
||||||
|
value = nbuckets_size[i];
|
||||||
|
nbuckets_size[i] = sum;
|
||||||
|
|
||||||
|
}
|
||||||
|
for(i = 0; i < buckets->nbuckets; i++)
|
||||||
|
{
|
||||||
|
sorted_indexes[nbuckets_size[fch_bucket_size(buckets->values + i)]] = (cmph_uint32)i;
|
||||||
|
nbuckets_size[fch_bucket_size(buckets->values + i)] ++;
|
||||||
|
}
|
||||||
|
free(nbuckets_size);
|
||||||
|
return sorted_indexes;
|
||||||
|
}
|
||||||
|
|
||||||
|
void fch_buckets_print(fch_buckets_t * buckets)
|
||||||
|
{
|
||||||
|
cmph_uint32 i;
|
||||||
|
for (i = 0; i < buckets->nbuckets; i++) fch_bucket_print(buckets->values + i, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
void fch_buckets_destroy(fch_buckets_t * buckets)
|
||||||
|
{
|
||||||
|
cmph_uint32 i;
|
||||||
|
for (i = 0; i < buckets->nbuckets; i++) fch_bucket_destroy(buckets->values + i);
|
||||||
|
free(buckets->values);
|
||||||
|
free(buckets);
|
||||||
|
}
|
30
cmph/fch_buckets.h
Normal file
30
cmph/fch_buckets.h
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
#ifndef __CMPH_FCH_BUCKETS_H__
|
||||||
|
#define __CMPH_FCH_BUCKETS_H__
|
||||||
|
|
||||||
|
#include "cmph_types.h"
|
||||||
|
typedef struct __fch_buckets_t fch_buckets_t;
|
||||||
|
|
||||||
|
fch_buckets_t * fch_buckets_new(cmph_uint32 nbuckets);
|
||||||
|
|
||||||
|
cmph_uint8 fch_buckets_is_empty(fch_buckets_t * buckets, cmph_uint32 index);
|
||||||
|
|
||||||
|
void fch_buckets_insert(fch_buckets_t * buckets, cmph_uint32 index, char * key, cmph_uint32 length);
|
||||||
|
|
||||||
|
cmph_uint32 fch_buckets_get_size(fch_buckets_t * buckets, cmph_uint32 index);
|
||||||
|
|
||||||
|
char * fch_buckets_get_key(fch_buckets_t * buckets, cmph_uint32 index, cmph_uint32 index_key);
|
||||||
|
|
||||||
|
cmph_uint32 fch_buckets_get_keylength(fch_buckets_t * buckets, cmph_uint32 index, cmph_uint32 index_key);
|
||||||
|
|
||||||
|
// returns the size of biggest bucket.
|
||||||
|
cmph_uint32 fch_buckets_get_max_size(fch_buckets_t * buckets);
|
||||||
|
|
||||||
|
// returns the number of buckets.
|
||||||
|
cmph_uint32 fch_buckets_get_nbuckets(fch_buckets_t * buckets);
|
||||||
|
|
||||||
|
cmph_uint32 * fch_buckets_get_indexes_sorted_by_size(fch_buckets_t * buckets);
|
||||||
|
|
||||||
|
void fch_buckets_print(fch_buckets_t * buckets);
|
||||||
|
|
||||||
|
void fch_buckets_destroy(fch_buckets_t * buckets);
|
||||||
|
#endif
|
30
cmph/fch_structs.h
Executable file
30
cmph/fch_structs.h
Executable file
@ -0,0 +1,30 @@
|
|||||||
|
#ifndef __CMPH_FCH_STRUCTS_H__
|
||||||
|
#define __CMPH_FCH_STRUCTS_H__
|
||||||
|
|
||||||
|
#include "hash_state.h"
|
||||||
|
|
||||||
|
struct __fch_data_t
|
||||||
|
{
|
||||||
|
cmph_uint32 m; // words count
|
||||||
|
double c; // constant c
|
||||||
|
cmph_uint32 b; // parameter b = ceil(c*m/(log(m)/log(2) + 1)). Don't need to be stored
|
||||||
|
double p1; // constant p1 = ceil(0.6*m). Don't need to be stored
|
||||||
|
double p2; // constant p2 = ceil(0.3*b). Don't need to be stored
|
||||||
|
cmph_uint32 *g; // g function.
|
||||||
|
hash_state_t *h1; // h10 function.
|
||||||
|
hash_state_t *h2; // h20 function.
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __fch_config_data_t
|
||||||
|
{
|
||||||
|
CMPH_HASH hashfuncs[2];
|
||||||
|
cmph_uint32 m; // words count
|
||||||
|
double c; // constant c
|
||||||
|
cmph_uint32 b; // parameter b = ceil(c*m/(log(m)/log(2) + 1)). Don't need to be stored
|
||||||
|
double p1; // constant p1 = ceil(0.6*m). Don't need to be stored
|
||||||
|
double p2; // constant p2 = ceil(0.3*b). Don't need to be stored
|
||||||
|
cmph_uint32 *g; // g function.
|
||||||
|
hash_state_t *h1; // h10 function.
|
||||||
|
hash_state_t *h2; // h20 function.
|
||||||
|
};
|
||||||
|
#endif
|
53
cmph/fnv_hash.c
Normal file
53
cmph/fnv_hash.c
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
#include "fnv_hash.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
fnv_state_t *fnv_state_new()
|
||||||
|
{
|
||||||
|
fnv_state_t *state = (fnv_state_t *)malloc(sizeof(fnv_state_t));
|
||||||
|
state->hashfunc = CMPH_HASH_FNV;
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
|
void fnv_state_destroy(fnv_state_t *state)
|
||||||
|
{
|
||||||
|
free(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 fnv_hash(fnv_state_t *state, const char *k, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
const unsigned char *bp = (const unsigned char *)k;
|
||||||
|
const unsigned char *be = bp + keylen;
|
||||||
|
static unsigned int hval = 0;
|
||||||
|
|
||||||
|
while (bp < be)
|
||||||
|
{
|
||||||
|
|
||||||
|
//hval *= 0x01000193; good for non-gcc compiler
|
||||||
|
hval += (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) + (hval << 24); //good for gcc
|
||||||
|
|
||||||
|
hval ^= *bp++;
|
||||||
|
}
|
||||||
|
return hval;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void fnv_state_dump(fnv_state_t *state, char **buf, cmph_uint32 *buflen)
|
||||||
|
{
|
||||||
|
*buf = NULL;
|
||||||
|
*buflen = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
fnv_state_t * fnv_state_copy(fnv_state_t *src_state)
|
||||||
|
{
|
||||||
|
fnv_state_t *dest_state = (fnv_state_t *)malloc(sizeof(fnv_state_t));
|
||||||
|
dest_state->hashfunc = src_state->hashfunc;
|
||||||
|
return dest_state;
|
||||||
|
}
|
||||||
|
|
||||||
|
fnv_state_t *fnv_state_load(const char *buf, cmph_uint32 buflen)
|
||||||
|
{
|
||||||
|
fnv_state_t *state = (fnv_state_t *)malloc(sizeof(fnv_state_t));
|
||||||
|
state->hashfunc = CMPH_HASH_FNV;
|
||||||
|
return state;
|
||||||
|
}
|
18
cmph/fnv_hash.h
Normal file
18
cmph/fnv_hash.h
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
#ifndef __FNV_HASH_H__
|
||||||
|
#define __FNV_HASH_H__
|
||||||
|
|
||||||
|
#include "hash.h"
|
||||||
|
|
||||||
|
typedef struct __fnv_state_t
|
||||||
|
{
|
||||||
|
CMPH_HASH hashfunc;
|
||||||
|
} fnv_state_t;
|
||||||
|
|
||||||
|
fnv_state_t *fnv_state_new();
|
||||||
|
cmph_uint32 fnv_hash(fnv_state_t *state, const char *k, cmph_uint32 keylen);
|
||||||
|
void fnv_state_dump(fnv_state_t *state, char **buf, cmph_uint32 *buflen);
|
||||||
|
fnv_state_t *fnv_state_copy(fnv_state_t *src_state);
|
||||||
|
fnv_state_t *fnv_state_load(const char *buf, cmph_uint32 buflen);
|
||||||
|
void fnv_state_destroy(fnv_state_t *state);
|
||||||
|
|
||||||
|
#endif
|
338
cmph/graph.c
Normal file
338
cmph/graph.c
Normal file
@ -0,0 +1,338 @@
|
|||||||
|
#include "graph.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "vstack.h"
|
||||||
|
#include "bitbool.h"
|
||||||
|
|
||||||
|
//#define DEBUG
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
|
/* static const cmph_uint8 bitmask[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 }; */
|
||||||
|
/* #define GETBIT(array, i) (array[(i) / 8] & bitmask[(i) % 8]) */
|
||||||
|
/* #define SETBIT(array, i) (array[(i) / 8] |= bitmask[(i) % 8]) */
|
||||||
|
/* #define UNSETBIT(array, i) (array[(i) / 8] &= (~(bitmask[(i) % 8]))) */
|
||||||
|
|
||||||
|
#define abs_edge(e, i) (e % g->nedges + i * g->nedges)
|
||||||
|
|
||||||
|
struct __graph_t
|
||||||
|
{
|
||||||
|
cmph_uint32 nnodes;
|
||||||
|
cmph_uint32 nedges;
|
||||||
|
cmph_uint32 *edges;
|
||||||
|
cmph_uint32 *first;
|
||||||
|
cmph_uint32 *next;
|
||||||
|
cmph_uint8 *critical_nodes; /* included -- Fabiano*/
|
||||||
|
cmph_uint32 ncritical_nodes; /* included -- Fabiano*/
|
||||||
|
cmph_uint32 cedges;
|
||||||
|
int shrinking;
|
||||||
|
};
|
||||||
|
|
||||||
|
static cmph_uint32 EMPTY = UINT_MAX;
|
||||||
|
|
||||||
|
graph_t *graph_new(cmph_uint32 nnodes, cmph_uint32 nedges)
|
||||||
|
{
|
||||||
|
graph_t *graph = (graph_t *)malloc(sizeof(graph_t));
|
||||||
|
if (!graph) return NULL;
|
||||||
|
|
||||||
|
graph->edges = (cmph_uint32 *)malloc(sizeof(cmph_uint32) * 2 * nedges);
|
||||||
|
graph->next = (cmph_uint32 *)malloc(sizeof(cmph_uint32) * 2 * nedges);
|
||||||
|
graph->first = (cmph_uint32 *)malloc(sizeof(cmph_uint32) * nnodes);
|
||||||
|
graph->critical_nodes = NULL; /* included -- Fabiano*/
|
||||||
|
graph->ncritical_nodes = 0; /* included -- Fabiano*/
|
||||||
|
graph->nnodes = nnodes;
|
||||||
|
graph->nedges = nedges;
|
||||||
|
|
||||||
|
graph_clear_edges(graph);
|
||||||
|
return graph;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void graph_destroy(graph_t *graph)
|
||||||
|
{
|
||||||
|
DEBUGP("Destroying graph\n");
|
||||||
|
free(graph->edges);
|
||||||
|
free(graph->first);
|
||||||
|
free(graph->next);
|
||||||
|
free(graph->critical_nodes); /* included -- Fabiano*/
|
||||||
|
free(graph);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void graph_print(graph_t *g)
|
||||||
|
{
|
||||||
|
cmph_uint32 i, e;
|
||||||
|
for (i = 0; i < g->nnodes; ++i)
|
||||||
|
{
|
||||||
|
DEBUGP("Printing edges connected to %u\n", i);
|
||||||
|
e = g->first[i];
|
||||||
|
if (e != EMPTY)
|
||||||
|
{
|
||||||
|
printf("%u -> %u\n", g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)]);
|
||||||
|
while ((e = g->next[e]) != EMPTY)
|
||||||
|
{
|
||||||
|
printf("%u -> %u\n", g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void graph_add_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
|
||||||
|
{
|
||||||
|
cmph_uint32 e = g->cedges;
|
||||||
|
|
||||||
|
assert(v1 < g->nnodes);
|
||||||
|
assert(v2 < g->nnodes);
|
||||||
|
assert(e < g->nedges);
|
||||||
|
assert(!g->shrinking);
|
||||||
|
|
||||||
|
g->next[e] = g->first[v1];
|
||||||
|
g->first[v1] = e;
|
||||||
|
g->edges[e] = v2;
|
||||||
|
|
||||||
|
g->next[e + g->nedges] = g->first[v2];
|
||||||
|
g->first[v2] = e + g->nedges;
|
||||||
|
g->edges[e + g->nedges] = v1;
|
||||||
|
|
||||||
|
++(g->cedges);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int check_edge(graph_t *g, cmph_uint32 e, cmph_uint32 v1, cmph_uint32 v2)
|
||||||
|
{
|
||||||
|
DEBUGP("Checking edge %u %u looking for %u %u\n", g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)], v1, v2);
|
||||||
|
if (g->edges[abs_edge(e, 0)] == v1 && g->edges[abs_edge(e, 1)] == v2) return 1;
|
||||||
|
if (g->edges[abs_edge(e, 0)] == v2 && g->edges[abs_edge(e, 1)] == v1) return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 graph_edge_id(graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
|
||||||
|
{
|
||||||
|
cmph_uint32 e;
|
||||||
|
e = g->first[v1];
|
||||||
|
assert(e != EMPTY);
|
||||||
|
if (check_edge(g, e, v1, v2)) return abs_edge(e, 0);
|
||||||
|
do
|
||||||
|
{
|
||||||
|
e = g->next[e];
|
||||||
|
assert(e != EMPTY);
|
||||||
|
}
|
||||||
|
while (!check_edge(g, e, v1, v2));
|
||||||
|
return abs_edge(e, 0);
|
||||||
|
}
|
||||||
|
static void del_edge_point(graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
|
||||||
|
{
|
||||||
|
cmph_uint32 e, prev;
|
||||||
|
|
||||||
|
DEBUGP("Deleting edge point %u %u\n", v1, v2);
|
||||||
|
e = g->first[v1];
|
||||||
|
if (check_edge(g, e, v1, v2))
|
||||||
|
{
|
||||||
|
g->first[v1] = g->next[e];
|
||||||
|
//g->edges[e] = EMPTY;
|
||||||
|
DEBUGP("Deleted\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
DEBUGP("Checking linked list\n");
|
||||||
|
do
|
||||||
|
{
|
||||||
|
prev = e;
|
||||||
|
e = g->next[e];
|
||||||
|
assert(e != EMPTY);
|
||||||
|
}
|
||||||
|
while (!check_edge(g, e, v1, v2));
|
||||||
|
|
||||||
|
g->next[prev] = g->next[e];
|
||||||
|
//g->edges[e] = EMPTY;
|
||||||
|
DEBUGP("Deleted\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void graph_del_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
|
||||||
|
{
|
||||||
|
g->shrinking = 1;
|
||||||
|
del_edge_point(g, v1, v2);
|
||||||
|
del_edge_point(g, v2, v1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void graph_clear_edges(graph_t *g)
|
||||||
|
{
|
||||||
|
cmph_uint32 i;
|
||||||
|
for (i = 0; i < g->nnodes; ++i) g->first[i] = EMPTY;
|
||||||
|
for (i = 0; i < g->nedges*2; ++i)
|
||||||
|
{
|
||||||
|
g->edges[i] = EMPTY;
|
||||||
|
g->next[i] = EMPTY;
|
||||||
|
}
|
||||||
|
g->cedges = 0;
|
||||||
|
g->shrinking = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static cmph_uint8 find_degree1_edge(graph_t *g, cmph_uint32 v, cmph_uint8 *deleted, cmph_uint32 *e)
|
||||||
|
{
|
||||||
|
cmph_uint32 edge = g->first[v];
|
||||||
|
cmph_uint8 found = 0;
|
||||||
|
DEBUGP("Checking degree of vertex %u\n", v);
|
||||||
|
if (edge == EMPTY) return 0;
|
||||||
|
else if (!(GETBIT(deleted, abs_edge(edge, 0))))
|
||||||
|
{
|
||||||
|
found = 1;
|
||||||
|
*e = edge;
|
||||||
|
}
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
edge = g->next[edge];
|
||||||
|
if (edge == EMPTY) break;
|
||||||
|
if (GETBIT(deleted, abs_edge(edge, 0))) continue;
|
||||||
|
if (found) return 0;
|
||||||
|
DEBUGP("Found first edge\n");
|
||||||
|
*e = edge;
|
||||||
|
found = 1;
|
||||||
|
}
|
||||||
|
return found;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cyclic_del_edge(graph_t *g, cmph_uint32 v, cmph_uint8 *deleted)
|
||||||
|
{
|
||||||
|
|
||||||
|
cmph_uint32 e = 0;
|
||||||
|
cmph_uint8 degree1;
|
||||||
|
cmph_uint32 v1 = v;
|
||||||
|
cmph_uint32 v2 = 0;
|
||||||
|
|
||||||
|
degree1 = find_degree1_edge(g, v1, deleted, &e);
|
||||||
|
if (!degree1) return;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
DEBUGP("Deleting edge %u (%u->%u)\n", e, g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)]);
|
||||||
|
SETBIT(deleted, abs_edge(e, 0));
|
||||||
|
|
||||||
|
v2 = g->edges[abs_edge(e, 0)];
|
||||||
|
if (v2 == v1) v2 = g->edges[abs_edge(e, 1)];
|
||||||
|
|
||||||
|
DEBUGP("Checking if second endpoint %u has degree 1\n", v2);
|
||||||
|
degree1 = find_degree1_edge(g, v2, deleted, &e);
|
||||||
|
if (degree1)
|
||||||
|
{
|
||||||
|
DEBUGP("Inspecting vertex %u\n", v2);
|
||||||
|
v1 = v2;
|
||||||
|
}
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int graph_is_cyclic(graph_t *g)
|
||||||
|
{
|
||||||
|
cmph_uint32 i;
|
||||||
|
cmph_uint32 v;
|
||||||
|
cmph_uint8 *deleted = (cmph_uint8 *)malloc((g->nedges*sizeof(cmph_uint8))/8 + 1);
|
||||||
|
size_t deleted_len = g->nedges/8 + 1;
|
||||||
|
memset(deleted, 0, deleted_len);
|
||||||
|
|
||||||
|
DEBUGP("Looking for cycles in graph with %u vertices and %u edges\n", g->nnodes, g->nedges);
|
||||||
|
for (v = 0; v < g->nnodes; ++v)
|
||||||
|
{
|
||||||
|
cyclic_del_edge(g, v, deleted);
|
||||||
|
}
|
||||||
|
for (i = 0; i < g->nedges; ++i)
|
||||||
|
{
|
||||||
|
if (!(GETBIT(deleted, i)))
|
||||||
|
{
|
||||||
|
DEBUGP("Edge %u %u->%u was not deleted\n", i, g->edges[i], g->edges[i + g->nedges]);
|
||||||
|
free(deleted);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(deleted);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint8 graph_node_is_critical(graph_t * g, cmph_uint32 v) /* included -- Fabiano */
|
||||||
|
{
|
||||||
|
return (cmph_uint8)GETBIT(g->critical_nodes,v);
|
||||||
|
}
|
||||||
|
|
||||||
|
void graph_obtain_critical_nodes(graph_t *g) /* included -- Fabiano*/
|
||||||
|
{
|
||||||
|
cmph_uint32 i;
|
||||||
|
cmph_uint32 v;
|
||||||
|
cmph_uint8 *deleted = (cmph_uint8 *)malloc((g->nedges*sizeof(cmph_uint8))/8+1);
|
||||||
|
size_t deleted_len = g->nedges/8 + 1;
|
||||||
|
memset(deleted, 0, deleted_len);
|
||||||
|
free(g->critical_nodes);
|
||||||
|
g->critical_nodes = (cmph_uint8 *)malloc((g->nnodes*sizeof(cmph_uint8))/8 + 1);
|
||||||
|
g->ncritical_nodes = 0;
|
||||||
|
memset(g->critical_nodes, 0, (g->nnodes*sizeof(cmph_uint8))/8 + 1);
|
||||||
|
DEBUGP("Looking for the 2-core in graph with %u vertices and %u edges\n", g->nnodes, g->nedges);
|
||||||
|
for (v = 0; v < g->nnodes; ++v)
|
||||||
|
{
|
||||||
|
cyclic_del_edge(g, v, deleted);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < g->nedges; ++i)
|
||||||
|
{
|
||||||
|
if (!(GETBIT(deleted,i)))
|
||||||
|
{
|
||||||
|
DEBUGP("Edge %u %u->%u belongs to the 2-core\n", i, g->edges[i], g->edges[i + g->nedges]);
|
||||||
|
if(!(GETBIT(g->critical_nodes,g->edges[i])))
|
||||||
|
{
|
||||||
|
g->ncritical_nodes ++;
|
||||||
|
SETBIT(g->critical_nodes,g->edges[i]);
|
||||||
|
}
|
||||||
|
if(!(GETBIT(g->critical_nodes,g->edges[i + g->nedges])))
|
||||||
|
{
|
||||||
|
g->ncritical_nodes ++;
|
||||||
|
SETBIT(g->critical_nodes,g->edges[i + g->nedges]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(deleted);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint8 graph_contains_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2) /* included -- Fabiano*/
|
||||||
|
{
|
||||||
|
cmph_uint32 e;
|
||||||
|
e = g->first[v1];
|
||||||
|
if(e == EMPTY) return 0;
|
||||||
|
if (check_edge(g, e, v1, v2)) return 1;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
e = g->next[e];
|
||||||
|
if(e == EMPTY) return 0;
|
||||||
|
}
|
||||||
|
while (!check_edge(g, e, v1, v2));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 graph_vertex_id(graph_t *g, cmph_uint32 e, cmph_uint32 id) /* included -- Fabiano*/
|
||||||
|
{
|
||||||
|
return (g->edges[e + id*g->nedges]);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 graph_ncritical_nodes(graph_t *g) /* included -- Fabiano*/
|
||||||
|
{
|
||||||
|
return g->ncritical_nodes;
|
||||||
|
}
|
||||||
|
|
||||||
|
graph_iterator_t graph_neighbors_it(graph_t *g, cmph_uint32 v)
|
||||||
|
{
|
||||||
|
graph_iterator_t it;
|
||||||
|
it.vertex = v;
|
||||||
|
it.edge = g->first[v];
|
||||||
|
return it;
|
||||||
|
}
|
||||||
|
cmph_uint32 graph_next_neighbor(graph_t *g, graph_iterator_t* it)
|
||||||
|
{
|
||||||
|
cmph_uint32 ret;
|
||||||
|
if(it->edge == EMPTY) return GRAPH_NO_NEIGHBOR;
|
||||||
|
if (g->edges[it->edge] == it->vertex) ret = g->edges[it->edge + g->nedges];
|
||||||
|
else ret = g->edges[it->edge];
|
||||||
|
it->edge = g->next[it->edge];
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
40
cmph/graph.h
Normal file
40
cmph/graph.h
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
#ifndef _CMPH_GRAPH_H__
|
||||||
|
#define _CMPH_GRAPH_H__
|
||||||
|
|
||||||
|
#include <limits.h>
|
||||||
|
#include "cmph_types.h"
|
||||||
|
|
||||||
|
#define GRAPH_NO_NEIGHBOR UINT_MAX
|
||||||
|
|
||||||
|
typedef struct __graph_t graph_t;
|
||||||
|
typedef struct __graph_iterator_t graph_iterator_t;
|
||||||
|
struct __graph_iterator_t
|
||||||
|
{
|
||||||
|
cmph_uint32 vertex;
|
||||||
|
cmph_uint32 edge;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
graph_t *graph_new(cmph_uint32 nnodes, cmph_uint32 nedges);
|
||||||
|
void graph_destroy(graph_t *graph);
|
||||||
|
|
||||||
|
void graph_add_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2);
|
||||||
|
void graph_del_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2);
|
||||||
|
void graph_clear_edges(graph_t *g);
|
||||||
|
cmph_uint32 graph_edge_id(graph_t *g, cmph_uint32 v1, cmph_uint32 v2);
|
||||||
|
cmph_uint8 graph_contains_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2);
|
||||||
|
|
||||||
|
graph_iterator_t graph_neighbors_it(graph_t *g, cmph_uint32 v);
|
||||||
|
cmph_uint32 graph_next_neighbor(graph_t *g, graph_iterator_t* it);
|
||||||
|
|
||||||
|
void graph_obtain_critical_nodes(graph_t *g); /* included -- Fabiano*/
|
||||||
|
cmph_uint8 graph_node_is_critical(graph_t * g, cmph_uint32 v); /* included -- Fabiano */
|
||||||
|
cmph_uint32 graph_ncritical_nodes(graph_t *g); /* included -- Fabiano*/
|
||||||
|
cmph_uint32 graph_vertex_id(graph_t *g, cmph_uint32 e, cmph_uint32 id); /* included -- Fabiano*/
|
||||||
|
|
||||||
|
int graph_is_cyclic(graph_t *g);
|
||||||
|
|
||||||
|
void graph_print(graph_t *);
|
||||||
|
|
||||||
|
#endif
|
216
cmph/hash.c
Normal file
216
cmph/hash.c
Normal file
@ -0,0 +1,216 @@
|
|||||||
|
#include "hash_state.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
//#define DEBUG
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
|
const char *cmph_hash_names[] = { "jenkins", NULL };
|
||||||
|
|
||||||
|
hash_state_t *hash_state_new(CMPH_HASH hashfunc, cmph_uint32 hashsize)
|
||||||
|
{
|
||||||
|
hash_state_t *state = NULL;
|
||||||
|
switch (hashfunc)
|
||||||
|
{
|
||||||
|
case CMPH_HASH_JENKINS:
|
||||||
|
DEBUGP("Jenkins function - %u\n", hashsize);
|
||||||
|
state = (hash_state_t *)jenkins_state_new(hashsize);
|
||||||
|
DEBUGP("Jenkins function created\n");
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
state->hashfunc = hashfunc;
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
cmph_uint32 hash(hash_state_t *state, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
switch (state->hashfunc)
|
||||||
|
{
|
||||||
|
case CMPH_HASH_JENKINS:
|
||||||
|
return jenkins_hash((jenkins_state_t *)state, key, keylen);
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
assert(0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void hash_vector(hash_state_t *state, const char *key, cmph_uint32 keylen, cmph_uint32 * hashes)
|
||||||
|
{
|
||||||
|
switch (state->hashfunc)
|
||||||
|
{
|
||||||
|
case CMPH_HASH_JENKINS:
|
||||||
|
jenkins_hash_vector_((jenkins_state_t *)state, key, keylen, hashes);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void hash_state_dump(hash_state_t *state, char **buf, cmph_uint32 *buflen)
|
||||||
|
{
|
||||||
|
char *algobuf;
|
||||||
|
size_t len;
|
||||||
|
switch (state->hashfunc)
|
||||||
|
{
|
||||||
|
case CMPH_HASH_JENKINS:
|
||||||
|
jenkins_state_dump((jenkins_state_t *)state, &algobuf, buflen);
|
||||||
|
if (*buflen == UINT_MAX) return;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
*buf = (char *)malloc(strlen(cmph_hash_names[state->hashfunc]) + 1 + *buflen);
|
||||||
|
memcpy(*buf, cmph_hash_names[state->hashfunc], strlen(cmph_hash_names[state->hashfunc]) + 1);
|
||||||
|
DEBUGP("Algobuf is %u\n", *(cmph_uint32 *)algobuf);
|
||||||
|
len = *buflen;
|
||||||
|
memcpy(*buf + strlen(cmph_hash_names[state->hashfunc]) + 1, algobuf, len);
|
||||||
|
*buflen = (cmph_uint32)strlen(cmph_hash_names[state->hashfunc]) + 1 + *buflen;
|
||||||
|
free(algobuf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
hash_state_t * hash_state_copy(hash_state_t *src_state)
|
||||||
|
{
|
||||||
|
hash_state_t *dest_state = NULL;
|
||||||
|
switch (src_state->hashfunc)
|
||||||
|
{
|
||||||
|
case CMPH_HASH_JENKINS:
|
||||||
|
dest_state = (hash_state_t *)jenkins_state_copy((jenkins_state_t *)src_state);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
dest_state->hashfunc = src_state->hashfunc;
|
||||||
|
return dest_state;
|
||||||
|
}
|
||||||
|
|
||||||
|
hash_state_t *hash_state_load(const char *buf, cmph_uint32 buflen)
|
||||||
|
{
|
||||||
|
cmph_uint32 i;
|
||||||
|
cmph_uint32 offset;
|
||||||
|
CMPH_HASH hashfunc = CMPH_HASH_COUNT;
|
||||||
|
for (i = 0; i < CMPH_HASH_COUNT; ++i)
|
||||||
|
{
|
||||||
|
if (strcmp(buf, cmph_hash_names[i]) == 0)
|
||||||
|
{
|
||||||
|
hashfunc = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (hashfunc == CMPH_HASH_COUNT) return NULL;
|
||||||
|
offset = (cmph_uint32)strlen(cmph_hash_names[hashfunc]) + 1;
|
||||||
|
switch (hashfunc)
|
||||||
|
{
|
||||||
|
case CMPH_HASH_JENKINS:
|
||||||
|
return (hash_state_t *)jenkins_state_load(buf + offset, buflen - offset);
|
||||||
|
default:
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
void hash_state_destroy(hash_state_t *state)
|
||||||
|
{
|
||||||
|
switch (state->hashfunc)
|
||||||
|
{
|
||||||
|
case CMPH_HASH_JENKINS:
|
||||||
|
jenkins_state_destroy((jenkins_state_t *)state);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn void hash_state_pack(hash_state_t *state, void *hash_packed)
|
||||||
|
* \brief Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
|
||||||
|
* \param state points to the hash function
|
||||||
|
* \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size()
|
||||||
|
*
|
||||||
|
* Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
|
||||||
|
* However, the hash function type must be packed outside.
|
||||||
|
*/
|
||||||
|
void hash_state_pack(hash_state_t *state, void *hash_packed)
|
||||||
|
{
|
||||||
|
switch (state->hashfunc)
|
||||||
|
{
|
||||||
|
case CMPH_HASH_JENKINS:
|
||||||
|
// pack the jenkins hash function
|
||||||
|
jenkins_state_pack((jenkins_state_t *)state, hash_packed);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc)
|
||||||
|
* \brief Return the amount of space needed to pack a hash function.
|
||||||
|
* \param hashfunc function type
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc)
|
||||||
|
{
|
||||||
|
cmph_uint32 size = 0;
|
||||||
|
switch (hashfunc)
|
||||||
|
{
|
||||||
|
case CMPH_HASH_JENKINS:
|
||||||
|
size += jenkins_state_packed_size();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen)
|
||||||
|
* \param hash_packed is a pointer to a contiguous memory area
|
||||||
|
* \param hashfunc is the type of the hash function packed in hash_packed
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \return an integer that represents a hash value of 32 bits.
|
||||||
|
*/
|
||||||
|
cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
switch (hashfunc)
|
||||||
|
{
|
||||||
|
case CMPH_HASH_JENKINS:
|
||||||
|
return jenkins_hash_packed(hash_packed, k, keylen);
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
assert(0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
||||||
|
* \param hash_packed is a pointer to a contiguous memory area
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
|
||||||
|
*/
|
||||||
|
void hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
||||||
|
{
|
||||||
|
switch (hashfunc)
|
||||||
|
{
|
||||||
|
case CMPH_HASH_JENKINS:
|
||||||
|
jenkins_hash_vector_packed(hash_packed, k, keylen, hashes);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn CMPH_HASH hash_get_type(hash_state_t *state);
|
||||||
|
* \param state is a pointer to a hash_state_t structure
|
||||||
|
* \return the hash function type pointed by state
|
||||||
|
*/
|
||||||
|
CMPH_HASH hash_get_type(hash_state_t *state)
|
||||||
|
{
|
||||||
|
return state->hashfunc;
|
||||||
|
}
|
76
cmph/hash.h
Normal file
76
cmph/hash.h
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
#ifndef __CMPH_HASH_H__
|
||||||
|
#define __CMPH_HASH_H__
|
||||||
|
|
||||||
|
#include "cmph_types.h"
|
||||||
|
|
||||||
|
typedef union __hash_state_t hash_state_t;
|
||||||
|
|
||||||
|
hash_state_t *hash_state_new(CMPH_HASH, cmph_uint32 hashsize);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 hash(hash_state_t *state, const char *key, cmph_uint32 keylen);
|
||||||
|
* \param state is a pointer to a hash_state_t structure
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \return an integer that represents a hash value of 32 bits.
|
||||||
|
*/
|
||||||
|
cmph_uint32 hash(hash_state_t *state, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** \fn void hash_vector(hash_state_t *state, const char *key, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||||
|
* \param state is a pointer to a hash_state_t structure
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
|
||||||
|
*/
|
||||||
|
void hash_vector(hash_state_t *state, const char *key, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||||
|
|
||||||
|
void hash_state_dump(hash_state_t *state, char **buf, cmph_uint32 *buflen);
|
||||||
|
|
||||||
|
hash_state_t * hash_state_copy(hash_state_t *src_state);
|
||||||
|
|
||||||
|
hash_state_t *hash_state_load(const char *buf, cmph_uint32 buflen);
|
||||||
|
|
||||||
|
void hash_state_destroy(hash_state_t *state);
|
||||||
|
|
||||||
|
/** \fn void hash_state_pack(hash_state_t *state, void *hash_packed);
|
||||||
|
* \brief Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
|
||||||
|
* \param state points to the hash function
|
||||||
|
* \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size()
|
||||||
|
*
|
||||||
|
* Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
|
||||||
|
* However, the hash function type must be packed outside.
|
||||||
|
*/
|
||||||
|
void hash_state_pack(hash_state_t *state, void *hash_packed);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen);
|
||||||
|
* \param hash_packed is a pointer to a contiguous memory area
|
||||||
|
* \param hashfunc is the type of the hash function packed in hash_packed
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \return an integer that represents a hash value of 32 bits.
|
||||||
|
*/
|
||||||
|
cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc)
|
||||||
|
* \brief Return the amount of space needed to pack a hash function.
|
||||||
|
* \param hashfunc function type
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc);
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||||
|
* \param hash_packed is a pointer to a contiguous memory area
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
|
||||||
|
*/
|
||||||
|
void hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn CMPH_HASH hash_get_type(hash_state_t *state);
|
||||||
|
* \param state is a pointer to a hash_state_t structure
|
||||||
|
* \return the hash function type pointed by state
|
||||||
|
*/
|
||||||
|
CMPH_HASH hash_get_type(hash_state_t *state);
|
||||||
|
|
||||||
|
#endif
|
12
cmph/hash_state.h
Normal file
12
cmph/hash_state.h
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#ifndef __HASH_STATE_H__
|
||||||
|
#define __HASH_STATE_H__
|
||||||
|
|
||||||
|
#include "hash.h"
|
||||||
|
#include "jenkins_hash.h"
|
||||||
|
union __hash_state_t
|
||||||
|
{
|
||||||
|
CMPH_HASH hashfunc;
|
||||||
|
jenkins_state_t jenkins;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
289
cmph/hashtree.c
Normal file
289
cmph/hashtree.c
Normal file
@ -0,0 +1,289 @@
|
|||||||
|
#include "graph.h"
|
||||||
|
#include "hashtree.h"
|
||||||
|
#include "cmph_structs.h"
|
||||||
|
#include "hastree_structs.h"
|
||||||
|
#include "hash.h"
|
||||||
|
#include "bitbool.h"
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
//#define DEBUG
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
|
hashtree_config_data_t *hashtree_config_new()
|
||||||
|
{
|
||||||
|
hashtree_config_data_t *hashtree;
|
||||||
|
hashtree = (hashtree_config_data_t *)malloc(sizeof(hashtree_config_data_t));
|
||||||
|
if (!hashtree) return NULL;
|
||||||
|
memset(hashtree, 0, sizeof(hashtree_config_data_t));
|
||||||
|
hashtree->hashfuncs[0] = CMPH_HASH_JENKINS;
|
||||||
|
hashtree->hashfuncs[1] = CMPH_HASH_JENKINS;
|
||||||
|
hashtree->hashfuncs[2] = CMPH_HASH_JENKINS;
|
||||||
|
hashtree->memory = 32 * 1024 * 1024;
|
||||||
|
return hashtree;
|
||||||
|
}
|
||||||
|
void hashtree_config_destroy(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
hashtree_config_data_t *data = (hashtree_config_data_t *)mph->data;
|
||||||
|
DEBUGP("Destroying algorithm dependent data\n");
|
||||||
|
free(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void hashtree_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||||
|
{
|
||||||
|
hashtree_config_data_t *hashtree = (hashtree_config_data_t *)mph->data;
|
||||||
|
CMPH_HASH *hashptr = hashfuncs;
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
while(*hashptr != CMPH_HASH_COUNT)
|
||||||
|
{
|
||||||
|
if (i >= 3) break; //hashtree only uses three hash functions
|
||||||
|
hashtree->hashfuncs[i] = *hashptr;
|
||||||
|
++i, ++hashptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_t *hashtree_new(cmph_config_t *mph, double c)
|
||||||
|
{
|
||||||
|
cmph_t *mphf = NULL;
|
||||||
|
hashtree_data_t *hashtreef = NULL;
|
||||||
|
|
||||||
|
cmph_uint32 i;
|
||||||
|
cmph_uint32 iterations = 20;
|
||||||
|
cmph_uint8 *visited = NULL;
|
||||||
|
hashtree_config_data_t *hashtree = (hashtree_config_data_t *)mph->data;
|
||||||
|
hashtree->m = mph->key_source->nkeys;
|
||||||
|
hashtree->n = ceil(c * mph->key_source->nkeys);
|
||||||
|
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", hashtree->m, hashtree->n, c);
|
||||||
|
hashtree->graph = graph_new(hashtree->n, hashtree->m);
|
||||||
|
DEBUGP("Created graph\n");
|
||||||
|
|
||||||
|
hashtree->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*3);
|
||||||
|
for(i = 0; i < 3; ++i) hashtree->hashes[i] = NULL;
|
||||||
|
//Mapping step
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", hashtree->m, hashtree->n);
|
||||||
|
}
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
int ok;
|
||||||
|
hashtree->hashes[0] = hash_state_new(hashtree->hashfuncs[0], hashtree->n);
|
||||||
|
hashtree->hashes[1] = hash_state_new(hashtree->hashfuncs[1], hashtree->n);
|
||||||
|
ok = hashtree_gen_edges(mph);
|
||||||
|
if (!ok)
|
||||||
|
{
|
||||||
|
--iterations;
|
||||||
|
hash_state_destroy(hashtree->hashes[0]);
|
||||||
|
hashtree->hashes[0] = NULL;
|
||||||
|
hash_state_destroy(hashtree->hashes[1]);
|
||||||
|
hashtree->hashes[1] = NULL;
|
||||||
|
DEBUGP("%u iterations remaining\n", iterations);
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Acyclic graph creation failure - %u iterations remaining\n", iterations);
|
||||||
|
}
|
||||||
|
if (iterations == 0) break;
|
||||||
|
}
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
if (iterations == 0)
|
||||||
|
{
|
||||||
|
graph_destroy(hashtree->graph);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Assignment step
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Starting assignment step\n");
|
||||||
|
}
|
||||||
|
DEBUGP("Assignment step\n");
|
||||||
|
visited = (char *)malloc(hashtree->n/8 + 1);
|
||||||
|
memset(visited, 0, hashtree->n/8 + 1);
|
||||||
|
free(hashtree->g);
|
||||||
|
hashtree->g = (cmph_uint32 *)malloc(hashtree->n * sizeof(cmph_uint32));
|
||||||
|
assert(hashtree->g);
|
||||||
|
for (i = 0; i < hashtree->n; ++i)
|
||||||
|
{
|
||||||
|
if (!GETBIT(visited,i))
|
||||||
|
{
|
||||||
|
hashtree->g[i] = 0;
|
||||||
|
hashtree_traverse(hashtree, visited, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
graph_destroy(hashtree->graph);
|
||||||
|
free(visited);
|
||||||
|
hashtree->graph = NULL;
|
||||||
|
|
||||||
|
mphf = (cmph_t *)malloc(sizeof(cmph_t));
|
||||||
|
mphf->algo = mph->algo;
|
||||||
|
hashtreef = (hashtree_data_t *)malloc(sizeof(hashtree_data_t));
|
||||||
|
hashtreef->g = hashtree->g;
|
||||||
|
hashtree->g = NULL; //transfer memory ownership
|
||||||
|
hashtreef->hashes = hashtree->hashes;
|
||||||
|
hashtree->hashes = NULL; //transfer memory ownership
|
||||||
|
hashtreef->n = hashtree->n;
|
||||||
|
hashtreef->m = hashtree->m;
|
||||||
|
mphf->data = hashtreef;
|
||||||
|
mphf->size = hashtree->m;
|
||||||
|
DEBUGP("Successfully generated minimal perfect hash\n");
|
||||||
|
if (mph->verbosity)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
|
||||||
|
}
|
||||||
|
return mphf;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void hashtree_traverse(hashtree_config_data_t *hashtree, cmph_uint8 *visited, cmph_uint32 v)
|
||||||
|
{
|
||||||
|
|
||||||
|
graph_iterator_t it = graph_neighbors_it(hashtree->graph, v);
|
||||||
|
cmph_uint32 neighbor = 0;
|
||||||
|
SETBIT(visited,v);
|
||||||
|
|
||||||
|
DEBUGP("Visiting vertex %u\n", v);
|
||||||
|
while((neighbor = graph_next_neighbor(hashtree->graph, &it)) != GRAPH_NO_NEIGHBOR)
|
||||||
|
{
|
||||||
|
DEBUGP("Visiting neighbor %u\n", neighbor);
|
||||||
|
if(GETBIT(visited,neighbor)) continue;
|
||||||
|
DEBUGP("Visiting neighbor %u\n", neighbor);
|
||||||
|
DEBUGP("Visiting edge %u->%u with id %u\n", v, neighbor, graph_edge_id(hashtree->graph, v, neighbor));
|
||||||
|
hashtree->g[neighbor] = graph_edge_id(hashtree->graph, v, neighbor) - hashtree->g[v];
|
||||||
|
DEBUGP("g is %u (%u - %u mod %u)\n", hashtree->g[neighbor], graph_edge_id(hashtree->graph, v, neighbor), hashtree->g[v], hashtree->m);
|
||||||
|
hashtree_traverse(hashtree, visited, neighbor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int hashtree_gen_edges(cmph_config_t *mph)
|
||||||
|
{
|
||||||
|
cmph_uint32 e;
|
||||||
|
hashtree_config_data_t *hashtree = (hashtree_config_data_t *)mph->data;
|
||||||
|
int cycles = 0;
|
||||||
|
|
||||||
|
DEBUGP("Generating edges for %u vertices with hash functions %s and %s\n", hashtree->n, cmph_hash_names[hashtree->hashfuncs[0]], cmph_hash_names[hashtree->hashfuncs[1]]);
|
||||||
|
graph_clear_edges(hashtree->graph);
|
||||||
|
mph->key_source->rewind(mph->key_source->data);
|
||||||
|
for (e = 0; e < mph->key_source->nkeys; ++e)
|
||||||
|
{
|
||||||
|
cmph_uint32 h1, h2;
|
||||||
|
cmph_uint32 keylen;
|
||||||
|
char *key;
|
||||||
|
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||||
|
h1 = hash(hashtree->hashes[0], key, keylen) % hashtree->n;
|
||||||
|
h2 = hash(hashtree->hashes[1], key, keylen) % hashtree->n;
|
||||||
|
if (h1 == h2) if (++h2 >= hashtree->n) h2 = 0;
|
||||||
|
if (h1 == h2)
|
||||||
|
{
|
||||||
|
if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
|
||||||
|
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
DEBUGP("Adding edge: %u -> %u for key %s\n", h1, h2, key);
|
||||||
|
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||||
|
graph_add_edge(hashtree->graph, h1, h2);
|
||||||
|
}
|
||||||
|
cycles = graph_is_cyclic(hashtree->graph);
|
||||||
|
if (mph->verbosity && cycles) fprintf(stderr, "Cyclic graph generated\n");
|
||||||
|
DEBUGP("Looking for cycles: %u\n", cycles);
|
||||||
|
|
||||||
|
return ! cycles;
|
||||||
|
}
|
||||||
|
|
||||||
|
int hashtree_dump(cmph_t *mphf, FILE *fd)
|
||||||
|
{
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen;
|
||||||
|
cmph_uint32 two = 2; //number of hash functions
|
||||||
|
hashtree_data_t *data = (hashtree_data_t *)mphf->data;
|
||||||
|
__cmph_dump(mphf, fd);
|
||||||
|
|
||||||
|
fwrite(&two, sizeof(cmph_uint32), 1, fd);
|
||||||
|
hash_state_dump(data->hashes[0], &buf, &buflen);
|
||||||
|
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||||
|
fwrite(&buflen, sizeof(cmph_uint32), 1, fd);
|
||||||
|
fwrite(buf, buflen, 1, fd);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
hash_state_dump(data->hashes[1], &buf, &buflen);
|
||||||
|
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||||
|
fwrite(&buflen, sizeof(cmph_uint32), 1, fd);
|
||||||
|
fwrite(buf, buflen, 1, fd);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
fwrite(&(data->n), sizeof(cmph_uint32), 1, fd);
|
||||||
|
fwrite(&(data->m), sizeof(cmph_uint32), 1, fd);
|
||||||
|
|
||||||
|
fwrite(data->g, sizeof(cmph_uint32)*data->n, 1, fd);
|
||||||
|
#ifdef DEBUG
|
||||||
|
fprintf(stderr, "G: ");
|
||||||
|
for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", data->g[i]);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
#endif
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void hashtree_load(FILE *f, cmph_t *mphf)
|
||||||
|
{
|
||||||
|
cmph_uint32 nhashes;
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen;
|
||||||
|
cmph_uint32 i;
|
||||||
|
hashtree_data_t *hashtree = (hashtree_data_t *)malloc(sizeof(hashtree_data_t));
|
||||||
|
|
||||||
|
DEBUGP("Loading hashtree mphf\n");
|
||||||
|
mphf->data = hashtree;
|
||||||
|
fread(&nhashes, sizeof(cmph_uint32), 1, f);
|
||||||
|
hashtree->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*(nhashes + 1));
|
||||||
|
hashtree->hashes[nhashes] = NULL;
|
||||||
|
DEBUGP("Reading %u hashes\n", nhashes);
|
||||||
|
for (i = 0; i < nhashes; ++i)
|
||||||
|
{
|
||||||
|
hash_state_t *state = NULL;
|
||||||
|
fread(&buflen, sizeof(cmph_uint32), 1, f);
|
||||||
|
DEBUGP("Hash state has %u bytes\n", buflen);
|
||||||
|
buf = (char *)malloc(buflen);
|
||||||
|
fread(buf, buflen, 1, f);
|
||||||
|
state = hash_state_load(buf, buflen);
|
||||||
|
hashtree->hashes[i] = state;
|
||||||
|
free(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUGP("Reading m and n\n");
|
||||||
|
fread(&(hashtree->n), sizeof(cmph_uint32), 1, f);
|
||||||
|
fread(&(hashtree->m), sizeof(cmph_uint32), 1, f);
|
||||||
|
|
||||||
|
hashtree->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*hashtree->n);
|
||||||
|
fread(hashtree->g, hashtree->n*sizeof(cmph_uint32), 1, f);
|
||||||
|
#ifdef DEBUG
|
||||||
|
fprintf(stderr, "G: ");
|
||||||
|
for (i = 0; i < hashtree->n; ++i) fprintf(stderr, "%u ", hashtree->g[i]);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
cmph_uint32 hashtree_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
hashtree_data_t *hashtree = mphf->data;
|
||||||
|
cmph_uint32 h1 = hash(hashtree->hashes[0], key, keylen) % hashtree->n;
|
||||||
|
cmph_uint32 h2 = hash(hashtree->hashes[1], key, keylen) % hashtree->n;
|
||||||
|
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
||||||
|
if (h1 == h2 && ++h2 >= hashtree->n) h2 = 0;
|
||||||
|
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, hashtree->g[h1], hashtree->g[h2], hashtree->m);
|
||||||
|
return (hashtree->g[h1] + hashtree->g[h2]) % hashtree->m;
|
||||||
|
}
|
||||||
|
void hashtree_destroy(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
hashtree_data_t *data = (hashtree_data_t *)mphf->data;
|
||||||
|
free(data->g);
|
||||||
|
hash_state_destroy(data->hashes[0]);
|
||||||
|
hash_state_destroy(data->hashes[1]);
|
||||||
|
free(data->hashes);
|
||||||
|
free(data);
|
||||||
|
free(mphf);
|
||||||
|
}
|
19
cmph/hashtree.h
Normal file
19
cmph/hashtree.h
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
#ifndef __CMPH_HASHTREE_H__
|
||||||
|
#define __CMPH_HASHTREE_H__
|
||||||
|
|
||||||
|
#include "cmph.h"
|
||||||
|
|
||||||
|
typedef struct __hashtree_data_t hashtree_data_t;
|
||||||
|
typedef struct __hashtree_config_data_t hashtree_config_data_t;
|
||||||
|
|
||||||
|
hashtree_config_data_t *hashtree_config_new();
|
||||||
|
void hashtree_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
|
||||||
|
void hashtree_config_set_leaf_algo(cmph_config_t *mph, CMPH_ALGO leaf_algo);
|
||||||
|
void hashtree_config_destroy(cmph_config_t *mph);
|
||||||
|
cmph_t *hashtree_new(cmph_config_t *mph, double c);
|
||||||
|
|
||||||
|
void hashtree_load(FILE *f, cmph_t *mphf);
|
||||||
|
int hashtree_dump(cmph_t *mphf, FILE *f);
|
||||||
|
void hashtree_destroy(cmph_t *mphf);
|
||||||
|
cmph_uint32 hashtree_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
#endif
|
32
cmph/hashtree_structs.h
Normal file
32
cmph/hashtree_structs.h
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
#ifndef __CMPH_HASHTREE_STRUCTS_H__
|
||||||
|
#define __CMPH_HASHTREE_STRUCTS_H__
|
||||||
|
|
||||||
|
#include "hash_state.h"
|
||||||
|
|
||||||
|
struct __hashtree_data_t
|
||||||
|
{
|
||||||
|
cmph_uint32 m; //edges (words) count
|
||||||
|
double c; //constant c
|
||||||
|
cmph_uint8 *size; //size[i] stores the number of edges represented by g[i]
|
||||||
|
cmph_uint32 **g;
|
||||||
|
cmph_uint32 k; //number of components
|
||||||
|
hash_state_t **h1;
|
||||||
|
hash_state_t **h2;
|
||||||
|
hash_state_t *h3;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __hashtree_config_data_t
|
||||||
|
{
|
||||||
|
CMPH_ALGO leaf_algo;
|
||||||
|
CMPH_HASH hashfuncs[3];
|
||||||
|
cmph_uint32 m; //edges (words) count
|
||||||
|
cmph_uint8 *size; //size[i] stores the number of edges represented by g[i]
|
||||||
|
cmph_uint32 *offset; //offset[i] stores the sum size[0] + ... size[i - 1]
|
||||||
|
cmph_uint32 k; //number of components
|
||||||
|
cmph_uint32 memory;
|
||||||
|
hash_state_t **h1;
|
||||||
|
hash_state_t **h2;
|
||||||
|
hash_state_t *h3;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
297
cmph/jenkins_hash.c
Normal file
297
cmph/jenkins_hash.c
Normal file
@ -0,0 +1,297 @@
|
|||||||
|
#include "jenkins_hash.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#ifdef WIN32
|
||||||
|
#define _USE_MATH_DEFINES //For M_LOG2E
|
||||||
|
#endif
|
||||||
|
#include <math.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
//#define DEBUG
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
|
#define hashsize(n) ((cmph_uint32)1<<(n))
|
||||||
|
#define hashmask(n) (hashsize(n)-1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//#define NM2 /* Define this if you do not want power of 2 table sizes*/
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
--------------------------------------------------------------------
|
||||||
|
mix -- mix 3 32-bit values reversibly.
|
||||||
|
For every delta with one or two bits set, and the deltas of all three
|
||||||
|
high bits or all three low bits, whether the original value of a,b,c
|
||||||
|
is almost all zero or is uniformly distributed,
|
||||||
|
* If mix() is run forward or backward, at least 32 bits in a,b,c
|
||||||
|
have at least 1/4 probability of changing.
|
||||||
|
* If mix() is run forward, every bit of c will change between 1/3 and
|
||||||
|
2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
|
||||||
|
mix() was built out of 36 single-cycle latency instructions in a
|
||||||
|
structure that could supported 2x parallelism, like so:
|
||||||
|
a -= b;
|
||||||
|
a -= c; x = (c>>13);
|
||||||
|
b -= c; a ^= x;
|
||||||
|
b -= a; x = (a<<8);
|
||||||
|
c -= a; b ^= x;
|
||||||
|
c -= b; x = (b>>13);
|
||||||
|
...
|
||||||
|
Unfortunately, superscalar Pentiums and Sparcs can't take advantage
|
||||||
|
of that parallelism. They've also turned some of those single-cycle
|
||||||
|
latency instructions into multi-cycle latency instructions. Still,
|
||||||
|
this is the fastest good hash I could find. There were about 2^^68
|
||||||
|
to choose from. I only looked at a billion or so.
|
||||||
|
--------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#define mix(a,b,c) \
|
||||||
|
{ \
|
||||||
|
a -= b; a -= c; a ^= (c>>13); \
|
||||||
|
b -= c; b -= a; b ^= (a<<8); \
|
||||||
|
c -= a; c -= b; c ^= (b>>13); \
|
||||||
|
a -= b; a -= c; a ^= (c>>12); \
|
||||||
|
b -= c; b -= a; b ^= (a<<16); \
|
||||||
|
c -= a; c -= b; c ^= (b>>5); \
|
||||||
|
a -= b; a -= c; a ^= (c>>3); \
|
||||||
|
b -= c; b -= a; b ^= (a<<10); \
|
||||||
|
c -= a; c -= b; c ^= (b>>15); \
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
--------------------------------------------------------------------
|
||||||
|
hash() -- hash a variable-length key into a 32-bit value
|
||||||
|
k : the key (the unaligned variable-length array of bytes)
|
||||||
|
len : the length of the key, counting by bytes
|
||||||
|
initval : can be any 4-byte value
|
||||||
|
Returns a 32-bit value. Every bit of the key affects every bit of
|
||||||
|
the return value. Every 1-bit and 2-bit delta achieves avalanche.
|
||||||
|
About 6*len+35 instructions.
|
||||||
|
|
||||||
|
The best hash table sizes are powers of 2. There is no need to do
|
||||||
|
mod a prime (mod is sooo slow!). If you need less than 32 bits,
|
||||||
|
use a bitmask. For example, if you need only 10 bits, do
|
||||||
|
h = (h & hashmask(10));
|
||||||
|
In which case, the hash table should have hashsize(10) elements.
|
||||||
|
|
||||||
|
If you are hashing n strings (cmph_uint8 **)k, do it like this:
|
||||||
|
for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
|
||||||
|
|
||||||
|
By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
|
||||||
|
code any way you wish, private, educational, or commercial. It's free.
|
||||||
|
|
||||||
|
See http://burtleburtle.net/bob/hash/evahash.html
|
||||||
|
Use for hash table lookup, or anything where one collision in 2^^32 is
|
||||||
|
acceptable. Do NOT use for cryptographic purposes.
|
||||||
|
--------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
jenkins_state_t *jenkins_state_new(cmph_uint32 size) //size of hash table
|
||||||
|
{
|
||||||
|
jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
|
||||||
|
DEBUGP("Initializing jenkins hash\n");
|
||||||
|
state->seed = ((cmph_uint32)rand() % size);
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
void jenkins_state_destroy(jenkins_state_t *state)
|
||||||
|
{
|
||||||
|
free(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline void __jenkins_hash_vector(cmph_uint32 seed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
||||||
|
{
|
||||||
|
register cmph_uint32 len, length;
|
||||||
|
|
||||||
|
/* Set up the internal state */
|
||||||
|
length = keylen;
|
||||||
|
len = length;
|
||||||
|
hashes[0] = hashes[1] = 0x9e3779b9; /* the golden ratio; an arbitrary value */
|
||||||
|
hashes[2] = seed; /* the previous hash value - seed in our case */
|
||||||
|
|
||||||
|
/*---------------------------------------- handle most of the key */
|
||||||
|
while (len >= 12)
|
||||||
|
{
|
||||||
|
hashes[0] += ((cmph_uint32)k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24));
|
||||||
|
hashes[1] += ((cmph_uint32)k[4] +((cmph_uint32)k[5]<<8) +((cmph_uint32)k[6]<<16) +((cmph_uint32)k[7]<<24));
|
||||||
|
hashes[2] += ((cmph_uint32)k[8] +((cmph_uint32)k[9]<<8) +((cmph_uint32)k[10]<<16)+((cmph_uint32)k[11]<<24));
|
||||||
|
mix(hashes[0],hashes[1],hashes[2]);
|
||||||
|
k += 12; len -= 12;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*------------------------------------- handle the last 11 bytes */
|
||||||
|
hashes[2] += length;
|
||||||
|
switch(len) /* all the case statements fall through */
|
||||||
|
{
|
||||||
|
case 11:
|
||||||
|
hashes[2] +=((cmph_uint32)k[10]<<24);
|
||||||
|
case 10:
|
||||||
|
hashes[2] +=((cmph_uint32)k[9]<<16);
|
||||||
|
case 9 :
|
||||||
|
hashes[2] +=((cmph_uint32)k[8]<<8);
|
||||||
|
/* the first byte of hashes[2] is reserved for the length */
|
||||||
|
case 8 :
|
||||||
|
hashes[1] +=((cmph_uint32)k[7]<<24);
|
||||||
|
case 7 :
|
||||||
|
hashes[1] +=((cmph_uint32)k[6]<<16);
|
||||||
|
case 6 :
|
||||||
|
hashes[1] +=((cmph_uint32)k[5]<<8);
|
||||||
|
case 5 :
|
||||||
|
hashes[1] +=(cmph_uint8) k[4];
|
||||||
|
case 4 :
|
||||||
|
hashes[0] +=((cmph_uint32)k[3]<<24);
|
||||||
|
case 3 :
|
||||||
|
hashes[0] +=((cmph_uint32)k[2]<<16);
|
||||||
|
case 2 :
|
||||||
|
hashes[0] +=((cmph_uint32)k[1]<<8);
|
||||||
|
case 1 :
|
||||||
|
hashes[0] +=(cmph_uint8)k[0];
|
||||||
|
/* case 0: nothing left to add */
|
||||||
|
}
|
||||||
|
|
||||||
|
mix(hashes[0],hashes[1],hashes[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
cmph_uint32 hashes[3];
|
||||||
|
__jenkins_hash_vector(state->seed, k, keylen, hashes);
|
||||||
|
return hashes[2];
|
||||||
|
/* cmph_uint32 a, b, c;
|
||||||
|
cmph_uint32 len, length;
|
||||||
|
|
||||||
|
// Set up the internal state
|
||||||
|
length = keylen;
|
||||||
|
len = length;
|
||||||
|
a = b = 0x9e3779b9; // the golden ratio; an arbitrary value
|
||||||
|
c = state->seed; // the previous hash value - seed in our case
|
||||||
|
|
||||||
|
// handle most of the key
|
||||||
|
while (len >= 12)
|
||||||
|
{
|
||||||
|
a += (k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24));
|
||||||
|
b += (k[4] +((cmph_uint32)k[5]<<8) +((cmph_uint32)k[6]<<16) +((cmph_uint32)k[7]<<24));
|
||||||
|
c += (k[8] +((cmph_uint32)k[9]<<8) +((cmph_uint32)k[10]<<16)+((cmph_uint32)k[11]<<24));
|
||||||
|
mix(a,b,c);
|
||||||
|
k += 12; len -= 12;
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle the last 11 bytes
|
||||||
|
c += length;
|
||||||
|
switch(len) /// all the case statements fall through
|
||||||
|
{
|
||||||
|
case 11:
|
||||||
|
c +=((cmph_uint32)k[10]<<24);
|
||||||
|
case 10:
|
||||||
|
c +=((cmph_uint32)k[9]<<16);
|
||||||
|
case 9 :
|
||||||
|
c +=((cmph_uint32)k[8]<<8);
|
||||||
|
// the first byte of c is reserved for the length
|
||||||
|
case 8 :
|
||||||
|
b +=((cmph_uint32)k[7]<<24);
|
||||||
|
case 7 :
|
||||||
|
b +=((cmph_uint32)k[6]<<16);
|
||||||
|
case 6 :
|
||||||
|
b +=((cmph_uint32)k[5]<<8);
|
||||||
|
case 5 :
|
||||||
|
b +=k[4];
|
||||||
|
case 4 :
|
||||||
|
a +=((cmph_uint32)k[3]<<24);
|
||||||
|
case 3 :
|
||||||
|
a +=((cmph_uint32)k[2]<<16);
|
||||||
|
case 2 :
|
||||||
|
a +=((cmph_uint32)k[1]<<8);
|
||||||
|
case 1 :
|
||||||
|
a +=k[0];
|
||||||
|
// case 0: nothing left to add
|
||||||
|
}
|
||||||
|
|
||||||
|
mix(a,b,c);
|
||||||
|
|
||||||
|
/// report the result
|
||||||
|
|
||||||
|
return c;
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
void jenkins_hash_vector_(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
||||||
|
{
|
||||||
|
__jenkins_hash_vector(state->seed, k, keylen, hashes);
|
||||||
|
}
|
||||||
|
|
||||||
|
void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen)
|
||||||
|
{
|
||||||
|
*buflen = sizeof(cmph_uint32);
|
||||||
|
*buf = (char *)malloc(sizeof(cmph_uint32));
|
||||||
|
if (!*buf)
|
||||||
|
{
|
||||||
|
*buflen = UINT_MAX;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
memcpy(*buf, &(state->seed), sizeof(cmph_uint32));
|
||||||
|
DEBUGP("Dumped jenkins state with seed %u\n", state->seed);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
jenkins_state_t *jenkins_state_copy(jenkins_state_t *src_state)
|
||||||
|
{
|
||||||
|
jenkins_state_t *dest_state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
|
||||||
|
dest_state->hashfunc = src_state->hashfunc;
|
||||||
|
dest_state->seed = src_state->seed;
|
||||||
|
return dest_state;
|
||||||
|
}
|
||||||
|
|
||||||
|
jenkins_state_t *jenkins_state_load(const char *buf, cmph_uint32 buflen)
|
||||||
|
{
|
||||||
|
jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
|
||||||
|
state->seed = *(cmph_uint32 *)buf;
|
||||||
|
state->hashfunc = CMPH_HASH_JENKINS;
|
||||||
|
DEBUGP("Loaded jenkins state with seed %u\n", state->seed);
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed);
|
||||||
|
* \brief Support the ability to pack a jenkins function into a preallocated contiguous memory space pointed by jenkins_packed.
|
||||||
|
* \param state points to the jenkins function
|
||||||
|
* \param jenkins_packed pointer to the contiguous memory area used to store the jenkins function. The size of jenkins_packed must be at least jenkins_state_packed_size()
|
||||||
|
*/
|
||||||
|
void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed)
|
||||||
|
{
|
||||||
|
if (state && jenkins_packed)
|
||||||
|
{
|
||||||
|
memcpy(jenkins_packed, &(state->seed), sizeof(cmph_uint32));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 jenkins_state_packed_size(jenkins_state_t *state);
|
||||||
|
* \brief Return the amount of space needed to pack a jenkins function.
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 jenkins_state_packed_size()
|
||||||
|
{
|
||||||
|
return sizeof(cmph_uint32);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen);
|
||||||
|
* \param jenkins_packed is a pointer to a contiguous memory area
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \return an integer that represents a hash value of 32 bits.
|
||||||
|
*/
|
||||||
|
cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
cmph_uint32 hashes[3];
|
||||||
|
__jenkins_hash_vector(*((cmph_uint32 *)jenkins_packed), k, keylen, hashes);
|
||||||
|
return hashes[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||||
|
* \param jenkins_packed is a pointer to a contiguous memory area
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
|
||||||
|
*/
|
||||||
|
void jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
||||||
|
{
|
||||||
|
__jenkins_hash_vector(*((cmph_uint32 *)jenkins_packed), k, keylen, hashes);
|
||||||
|
}
|
65
cmph/jenkins_hash.h
Normal file
65
cmph/jenkins_hash.h
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
#ifndef __JEKINS_HASH_H__
|
||||||
|
#define __JEKINS_HASH_H__
|
||||||
|
|
||||||
|
#include "hash.h"
|
||||||
|
|
||||||
|
typedef struct __jenkins_state_t
|
||||||
|
{
|
||||||
|
CMPH_HASH hashfunc;
|
||||||
|
cmph_uint32 seed;
|
||||||
|
} jenkins_state_t;
|
||||||
|
|
||||||
|
jenkins_state_t *jenkins_state_new(cmph_uint32 size); //size of hash table
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen);
|
||||||
|
* \param state is a pointer to a jenkins_state_t structure
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \return an integer that represents a hash value of 32 bits.
|
||||||
|
*/
|
||||||
|
cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** \fn void jenkins_hash_vector_(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||||
|
* \param state is a pointer to a jenkins_state_t structure
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
|
||||||
|
*/
|
||||||
|
void jenkins_hash_vector_(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||||
|
|
||||||
|
void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen);
|
||||||
|
jenkins_state_t *jenkins_state_copy(jenkins_state_t *src_state);
|
||||||
|
jenkins_state_t *jenkins_state_load(const char *buf, cmph_uint32 buflen);
|
||||||
|
void jenkins_state_destroy(jenkins_state_t *state);
|
||||||
|
|
||||||
|
/** \fn void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed);
|
||||||
|
* \brief Support the ability to pack a jenkins function into a preallocated contiguous memory space pointed by jenkins_packed.
|
||||||
|
* \param state points to the jenkins function
|
||||||
|
* \param jenkins_packed pointer to the contiguous memory area used to store the jenkins function. The size of jenkins_packed must be at least jenkins_state_packed_size()
|
||||||
|
*/
|
||||||
|
void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 jenkins_state_packed_size();
|
||||||
|
* \brief Return the amount of space needed to pack a jenkins function.
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 jenkins_state_packed_size();
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen);
|
||||||
|
* \param jenkins_packed is a pointer to a contiguous memory area
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \return an integer that represents a hash value of 32 bits.
|
||||||
|
*/
|
||||||
|
cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** \fn jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||||
|
* \param jenkins_packed is a pointer to a contiguous memory area
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
|
||||||
|
*/
|
||||||
|
void jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||||
|
|
||||||
|
#endif
|
342
cmph/main.c
Normal file
342
cmph/main.c
Normal file
@ -0,0 +1,342 @@
|
|||||||
|
#ifdef WIN32
|
||||||
|
#include "wingetopt.h"
|
||||||
|
#else
|
||||||
|
#include <getopt.h>
|
||||||
|
#endif
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include "cmph.h"
|
||||||
|
#include "hash.h"
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
#define VERSION "0.8"
|
||||||
|
#else
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
void usage(const char *prg)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
|
||||||
|
}
|
||||||
|
void usage_long(const char *prg)
|
||||||
|
{
|
||||||
|
cmph_uint32 i;
|
||||||
|
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
|
||||||
|
fprintf(stderr, "Minimum perfect hashing tool\n\n");
|
||||||
|
fprintf(stderr, " -h\t print this help message\n");
|
||||||
|
fprintf(stderr, " -c\t c value determines:\n");
|
||||||
|
fprintf(stderr, " \t * the number of vertices in the graph for the algorithms BMZ and CHM\n");
|
||||||
|
fprintf(stderr, " \t * the number of bits per key required in the FCH algorithm\n");
|
||||||
|
fprintf(stderr, " \t * the load factor in the CHD_PH algorithm\n");
|
||||||
|
fprintf(stderr, " -a\t algorithm - valid values are\n");
|
||||||
|
for (i = 0; i < CMPH_COUNT; ++i) fprintf(stderr, " \t * %s\n", cmph_names[i]);
|
||||||
|
fprintf(stderr, " -f\t hash function (may be used multiple times) - valid values are\n");
|
||||||
|
for (i = 0; i < CMPH_HASH_COUNT; ++i) fprintf(stderr, " \t * %s\n", cmph_hash_names[i]);
|
||||||
|
fprintf(stderr, " -V\t print version number and exit\n");
|
||||||
|
fprintf(stderr, " -v\t increase verbosity (may be used multiple times)\n");
|
||||||
|
fprintf(stderr, " -k\t number of keys\n");
|
||||||
|
fprintf(stderr, " -g\t generation mode\n");
|
||||||
|
fprintf(stderr, " -s\t random seed\n");
|
||||||
|
fprintf(stderr, " -m\t minimum perfect hash function file \n");
|
||||||
|
fprintf(stderr, " -M\t main memory availability (in MB) used in BRZ algorithm \n");
|
||||||
|
fprintf(stderr, " -d\t temporary directory used in BRZ algorithm \n");
|
||||||
|
fprintf(stderr, " -b\t the meaning of this parameter depends on the algorithm selected in the -a option:\n");
|
||||||
|
fprintf(stderr, " \t * For BRZ it is used to make the maximal number of keys in a bucket lower than 256.\n");
|
||||||
|
fprintf(stderr, " \t In this case its value should be an integer in the range [64,175]. Default is 128.\n\n");
|
||||||
|
fprintf(stderr, " \t * For BDZ it is used to determine the size of some precomputed rank\n");
|
||||||
|
fprintf(stderr, " \t information and its value should be an integer in the range [3,10]. Default\n");
|
||||||
|
fprintf(stderr, " \t is 7. The larger is this value, the more compact are the resulting functions\n");
|
||||||
|
fprintf(stderr, " \t and the slower are them at evaluation time.\n\n");
|
||||||
|
fprintf(stderr, " \t * For CHD and CHD_PH it is used to set the average number of keys per bucket\n");
|
||||||
|
fprintf(stderr, " \t and its value should be an integer in the range [1,32]. Default is 4. The\n");
|
||||||
|
fprintf(stderr, " \t larger is this value, the slower is the construction of the functions.\n");
|
||||||
|
fprintf(stderr, " \t This parameter has no effect for other algorithms.\n\n");
|
||||||
|
fprintf(stderr, " -t\t set the number of keys per bin for a t-perfect hashing function. A t-perfect\n");
|
||||||
|
fprintf(stderr, " \t hash function allows at most t collisions in a given bin. This parameter applies\n");
|
||||||
|
fprintf(stderr, " \t only to the CHD and CHD_PH algorithms. Its value should be an integer in the\n");
|
||||||
|
fprintf(stderr, " \t range [1,128]. Defaul is 1\n");
|
||||||
|
fprintf(stderr, " keysfile\t line separated file with keys\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
cmph_uint32 verbosity = 0;
|
||||||
|
char generate = 0;
|
||||||
|
char *mphf_file = NULL;
|
||||||
|
FILE *mphf_fd = stdout;
|
||||||
|
const char *keys_file = NULL;
|
||||||
|
FILE *keys_fd;
|
||||||
|
cmph_uint32 nkeys = UINT_MAX;
|
||||||
|
cmph_uint32 seed = UINT_MAX;
|
||||||
|
CMPH_HASH *hashes = NULL;
|
||||||
|
cmph_uint32 nhashes = 0;
|
||||||
|
cmph_uint32 i;
|
||||||
|
CMPH_ALGO mph_algo = CMPH_CHM;
|
||||||
|
double c = 0;
|
||||||
|
cmph_config_t *config = NULL;
|
||||||
|
cmph_t *mphf = NULL;
|
||||||
|
char * tmp_dir = NULL;
|
||||||
|
cmph_io_adapter_t *source;
|
||||||
|
cmph_uint32 memory_availability = 0;
|
||||||
|
cmph_uint32 b = 0;
|
||||||
|
cmph_uint32 keys_per_bin = 1;
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
char ch = (char)getopt(argc, argv, "hVvgc:k:a:M:b:t:f:m:d:s:");
|
||||||
|
if (ch == -1) break;
|
||||||
|
switch (ch)
|
||||||
|
{
|
||||||
|
case 's':
|
||||||
|
{
|
||||||
|
char *cptr;
|
||||||
|
seed = (cmph_uint32)strtoul(optarg, &cptr, 10);
|
||||||
|
if(*cptr != 0) {
|
||||||
|
fprintf(stderr, "Invalid seed %s\n", optarg);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'c':
|
||||||
|
{
|
||||||
|
char *endptr;
|
||||||
|
c = strtod(optarg, &endptr);
|
||||||
|
if(*endptr != 0) {
|
||||||
|
fprintf(stderr, "Invalid c value %s\n", optarg);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'g':
|
||||||
|
generate = 1;
|
||||||
|
break;
|
||||||
|
case 'k':
|
||||||
|
{
|
||||||
|
char *endptr;
|
||||||
|
nkeys = (cmph_uint32)strtoul(optarg, &endptr, 10);
|
||||||
|
if(*endptr != 0) {
|
||||||
|
fprintf(stderr, "Invalid number of keys %s\n", optarg);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'm':
|
||||||
|
mphf_file = strdup(optarg);
|
||||||
|
break;
|
||||||
|
case 'd':
|
||||||
|
tmp_dir = strdup(optarg);
|
||||||
|
break;
|
||||||
|
case 'M':
|
||||||
|
{
|
||||||
|
char *cptr;
|
||||||
|
memory_availability = (cmph_uint32)strtoul(optarg, &cptr, 10);
|
||||||
|
if(*cptr != 0) {
|
||||||
|
fprintf(stderr, "Invalid memory availability %s\n", optarg);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'b':
|
||||||
|
{
|
||||||
|
char *cptr;
|
||||||
|
b = (cmph_uint32)strtoul(optarg, &cptr, 10);
|
||||||
|
if(*cptr != 0) {
|
||||||
|
fprintf(stderr, "Parameter b was not found: %s\n", optarg);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 't':
|
||||||
|
{
|
||||||
|
char *cptr;
|
||||||
|
keys_per_bin = (cmph_uint32)strtoul(optarg, &cptr, 10);
|
||||||
|
if(*cptr != 0) {
|
||||||
|
fprintf(stderr, "Parameter t was not found: %s\n", optarg);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'v':
|
||||||
|
++verbosity;
|
||||||
|
break;
|
||||||
|
case 'V':
|
||||||
|
printf("%s\n", VERSION);
|
||||||
|
return 0;
|
||||||
|
case 'h':
|
||||||
|
usage_long(argv[0]);
|
||||||
|
return 0;
|
||||||
|
case 'a':
|
||||||
|
{
|
||||||
|
char valid = 0;
|
||||||
|
for (i = 0; i < CMPH_COUNT; ++i)
|
||||||
|
{
|
||||||
|
if (strcmp(cmph_names[i], optarg) == 0)
|
||||||
|
{
|
||||||
|
mph_algo = i;
|
||||||
|
valid = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!valid)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Invalid mph algorithm: %s. It is not available in version %s\n", optarg, VERSION);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'f':
|
||||||
|
{
|
||||||
|
char valid = 0;
|
||||||
|
for (i = 0; i < CMPH_HASH_COUNT; ++i)
|
||||||
|
{
|
||||||
|
if (strcmp(cmph_hash_names[i], optarg) == 0)
|
||||||
|
{
|
||||||
|
hashes = (CMPH_HASH *)realloc(hashes, sizeof(CMPH_HASH) * ( nhashes + 2 ));
|
||||||
|
hashes[nhashes] = i;
|
||||||
|
hashes[nhashes + 1] = CMPH_HASH_COUNT;
|
||||||
|
++nhashes;
|
||||||
|
valid = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!valid)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Invalid hash function: %s\n", optarg);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
usage(argv[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (optind != argc - 1)
|
||||||
|
{
|
||||||
|
usage(argv[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
keys_file = argv[optind];
|
||||||
|
|
||||||
|
if (seed == UINT_MAX) seed = (cmph_uint32)time(NULL);
|
||||||
|
srand(seed);
|
||||||
|
int ret = 0;
|
||||||
|
if (mphf_file == NULL)
|
||||||
|
{
|
||||||
|
mphf_file = (char *)malloc(strlen(keys_file) + 5);
|
||||||
|
memcpy(mphf_file, keys_file, strlen(keys_file));
|
||||||
|
memcpy(mphf_file + strlen(keys_file), ".mph\0", (size_t)5);
|
||||||
|
}
|
||||||
|
|
||||||
|
keys_fd = fopen(keys_file, "r");
|
||||||
|
|
||||||
|
if (keys_fd == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Unable to open file %s: %s\n", keys_file, strerror(errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (seed == UINT_MAX) seed = (cmph_uint32)time(NULL);
|
||||||
|
if(nkeys == UINT_MAX) source = cmph_io_nlfile_adapter(keys_fd);
|
||||||
|
else source = cmph_io_nlnkfile_adapter(keys_fd, nkeys);
|
||||||
|
if (generate)
|
||||||
|
{
|
||||||
|
//Create mphf
|
||||||
|
mphf_fd = fopen(mphf_file, "w");
|
||||||
|
config = cmph_config_new(source);
|
||||||
|
cmph_config_set_algo(config, mph_algo);
|
||||||
|
if (nhashes) cmph_config_set_hashfuncs(config, hashes);
|
||||||
|
cmph_config_set_verbosity(config, verbosity);
|
||||||
|
cmph_config_set_tmp_dir(config, (cmph_uint8 *) tmp_dir);
|
||||||
|
cmph_config_set_mphf_fd(config, mphf_fd);
|
||||||
|
cmph_config_set_memory_availability(config, memory_availability);
|
||||||
|
cmph_config_set_b(config, b);
|
||||||
|
cmph_config_set_keys_per_bin(config, keys_per_bin);
|
||||||
|
|
||||||
|
//if((mph_algo == CMPH_BMZ || mph_algo == CMPH_BRZ) && c >= 2.0) c=1.15;
|
||||||
|
if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15;
|
||||||
|
if (c != 0) cmph_config_set_graphsize(config, c);
|
||||||
|
mphf = cmph_new(config);
|
||||||
|
|
||||||
|
cmph_config_destroy(config);
|
||||||
|
if (mphf == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Unable to create minimum perfect hashing function\n");
|
||||||
|
//cmph_config_destroy(config);
|
||||||
|
free(mphf_file);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mphf_fd == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Unable to open output file %s: %s\n", mphf_file, strerror(errno));
|
||||||
|
free(mphf_file);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
cmph_dump(mphf, mphf_fd);
|
||||||
|
cmph_destroy(mphf);
|
||||||
|
fclose(mphf_fd);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cmph_uint8 * hashtable = NULL;
|
||||||
|
mphf_fd = fopen(mphf_file, "r");
|
||||||
|
if (mphf_fd == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Unable to open input file %s: %s\n", mphf_file, strerror(errno));
|
||||||
|
free(mphf_file);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
mphf = cmph_load(mphf_fd);
|
||||||
|
fclose(mphf_fd);
|
||||||
|
if (!mphf)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Unable to parser input file %s\n", mphf_file);
|
||||||
|
free(mphf_file);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
cmph_uint32 siz = cmph_size(mphf);
|
||||||
|
hashtable = (cmph_uint8*)calloc(siz, sizeof(cmph_uint8));
|
||||||
|
memset(hashtable, 0,(size_t) siz);
|
||||||
|
//check all keys
|
||||||
|
for (i = 0; i < source->nkeys; ++i)
|
||||||
|
{
|
||||||
|
cmph_uint32 h;
|
||||||
|
char *buf;
|
||||||
|
cmph_uint32 buflen = 0;
|
||||||
|
source->read(source->data, &buf, &buflen);
|
||||||
|
h = cmph_search(mphf, buf, buflen);
|
||||||
|
if (!(h < siz))
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Unknown key %*s in the input.\n", buflen, buf);
|
||||||
|
ret = 1;
|
||||||
|
} else if(hashtable[h] >= keys_per_bin)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "More than %u keys were mapped to bin %u\n", keys_per_bin, h);
|
||||||
|
fprintf(stderr, "Duplicated or unknown key %*s in the input\n", buflen, buf);
|
||||||
|
ret = 1;
|
||||||
|
} else hashtable[h]++;
|
||||||
|
|
||||||
|
if (verbosity)
|
||||||
|
{
|
||||||
|
printf("%s -> %u\n", buf, h);
|
||||||
|
}
|
||||||
|
source->dispose(source->data, buf, buflen);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_destroy(mphf);
|
||||||
|
free(hashtable);
|
||||||
|
}
|
||||||
|
fclose(keys_fd);
|
||||||
|
free(mphf_file);
|
||||||
|
free(tmp_dir);
|
||||||
|
cmph_io_nlfile_adapter_destroy(source);
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
}
|
67
cmph/miller_rabin.c
Normal file
67
cmph/miller_rabin.c
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
#include "miller_rabin.h"
|
||||||
|
|
||||||
|
static inline cmph_uint64 int_pow(cmph_uint64 a, cmph_uint64 d, cmph_uint64 n)
|
||||||
|
{
|
||||||
|
cmph_uint64 a_pow = a;
|
||||||
|
cmph_uint64 res = 1;
|
||||||
|
while(d > 0)
|
||||||
|
{
|
||||||
|
if((d & 1) == 1)
|
||||||
|
res =(((cmph_uint64)res) * a_pow) % n;
|
||||||
|
a_pow = (((cmph_uint64)a_pow) * a_pow) % n;
|
||||||
|
d /= 2;
|
||||||
|
};
|
||||||
|
return res;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline cmph_uint8 check_witness(cmph_uint64 a_exp_d, cmph_uint64 n, cmph_uint64 s)
|
||||||
|
{
|
||||||
|
cmph_uint64 i;
|
||||||
|
cmph_uint64 a_exp = a_exp_d;
|
||||||
|
if(a_exp == 1 || a_exp == (n - 1))
|
||||||
|
return 1;
|
||||||
|
for(i = 1; i < s; i++)
|
||||||
|
{
|
||||||
|
a_exp = (((cmph_uint64)a_exp) * a_exp) % n;
|
||||||
|
if(a_exp == (n - 1))
|
||||||
|
return 1;
|
||||||
|
};
|
||||||
|
return 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
cmph_uint8 check_primality(cmph_uint64 n)
|
||||||
|
{
|
||||||
|
cmph_uint64 a, d, s, a_exp_d;
|
||||||
|
if((n % 2) == 0)
|
||||||
|
return 0;
|
||||||
|
if((n % 3) == 0)
|
||||||
|
return 0;
|
||||||
|
if((n % 5) == 0)
|
||||||
|
return 0;
|
||||||
|
if((n % 7 ) == 0)
|
||||||
|
return 0;
|
||||||
|
//we decompoe the number n - 1 into 2^s*d
|
||||||
|
s = 0;
|
||||||
|
d = n - 1;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
s++;
|
||||||
|
d /= 2;
|
||||||
|
}while((d % 2) == 0);
|
||||||
|
|
||||||
|
a = 2;
|
||||||
|
a_exp_d = int_pow(a, d, n);
|
||||||
|
if(check_witness(a_exp_d, n, s) == 0)
|
||||||
|
return 0;
|
||||||
|
a = 7;
|
||||||
|
a_exp_d = int_pow(a, d, n);
|
||||||
|
if(check_witness(a_exp_d, n, s) == 0)
|
||||||
|
return 0;
|
||||||
|
a = 61;
|
||||||
|
a_exp_d = int_pow(a, d, n);
|
||||||
|
if(check_witness(a_exp_d, n, s) == 0)
|
||||||
|
return 0;
|
||||||
|
return 1;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
5
cmph/miller_rabin.h
Normal file
5
cmph/miller_rabin.h
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
#ifndef _CMPH_MILLER_RABIN_H__
|
||||||
|
#define _CMPH_MILLER_RABIN_H__
|
||||||
|
#include "cmph_types.h"
|
||||||
|
cmph_uint8 check_primality(cmph_uint64 n);
|
||||||
|
#endif
|
49
cmph/sdbm_hash.c
Normal file
49
cmph/sdbm_hash.c
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
#include "sdbm_hash.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
sdbm_state_t *sdbm_state_new()
|
||||||
|
{
|
||||||
|
sdbm_state_t *state = (sdbm_state_t *)malloc(sizeof(sdbm_state_t));
|
||||||
|
state->hashfunc = CMPH_HASH_SDBM;
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
|
void sdbm_state_destroy(sdbm_state_t *state)
|
||||||
|
{
|
||||||
|
free(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 sdbm_hash(sdbm_state_t *state, const char *k, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
register cmph_uint32 hash = 0;
|
||||||
|
const unsigned char *ptr = (unsigned char *)k;
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
|
||||||
|
while(i < keylen) {
|
||||||
|
hash = *ptr + (hash << 6) + (hash << 16) - hash;
|
||||||
|
++ptr, ++i;
|
||||||
|
}
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void sdbm_state_dump(sdbm_state_t *state, char **buf, cmph_uint32 *buflen)
|
||||||
|
{
|
||||||
|
*buf = NULL;
|
||||||
|
*buflen = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
sdbm_state_t *sdbm_state_copy(sdbm_state_t *src_state)
|
||||||
|
{
|
||||||
|
sdbm_state_t *dest_state = (sdbm_state_t *)malloc(sizeof(sdbm_state_t));
|
||||||
|
dest_state->hashfunc = src_state->hashfunc;
|
||||||
|
return dest_state;
|
||||||
|
}
|
||||||
|
|
||||||
|
sdbm_state_t *sdbm_state_load(const char *buf, cmph_uint32 buflen)
|
||||||
|
{
|
||||||
|
sdbm_state_t *state = (sdbm_state_t *)malloc(sizeof(sdbm_state_t));
|
||||||
|
state->hashfunc = CMPH_HASH_SDBM;
|
||||||
|
return state;
|
||||||
|
}
|
18
cmph/sdbm_hash.h
Normal file
18
cmph/sdbm_hash.h
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
#ifndef __SDBM_HASH_H__
|
||||||
|
#define __SDBM_HASH_H__
|
||||||
|
|
||||||
|
#include "hash.h"
|
||||||
|
|
||||||
|
typedef struct __sdbm_state_t
|
||||||
|
{
|
||||||
|
CMPH_HASH hashfunc;
|
||||||
|
} sdbm_state_t;
|
||||||
|
|
||||||
|
sdbm_state_t *sdbm_state_new();
|
||||||
|
cmph_uint32 sdbm_hash(sdbm_state_t *state, const char *k, cmph_uint32 keylen);
|
||||||
|
void sdbm_state_dump(sdbm_state_t *state, char **buf, cmph_uint32 *buflen);
|
||||||
|
sdbm_state_t *sdbm_state_copy(sdbm_state_t *src_state);
|
||||||
|
sdbm_state_t *sdbm_state_load(const char *buf, cmph_uint32 buflen);
|
||||||
|
void sdbm_state_destroy(sdbm_state_t *state);
|
||||||
|
|
||||||
|
#endif
|
337
cmph/select.c
Normal file
337
cmph/select.c
Normal file
@ -0,0 +1,337 @@
|
|||||||
|
#include<stdlib.h>
|
||||||
|
#include<stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include "select_lookup_tables.h"
|
||||||
|
#include "select.h"
|
||||||
|
|
||||||
|
//#define DEBUG
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
|
#ifndef STEP_SELECT_TABLE
|
||||||
|
#define STEP_SELECT_TABLE 128
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef NBITS_STEP_SELECT_TABLE
|
||||||
|
#define NBITS_STEP_SELECT_TABLE 7
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef MASK_STEP_SELECT_TABLE
|
||||||
|
#define MASK_STEP_SELECT_TABLE 0x7f // 0x7f = 127
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline void select_insert_0(cmph_uint32 * buffer)
|
||||||
|
{
|
||||||
|
(*buffer) >>= 1;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline void select_insert_1(cmph_uint32 * buffer)
|
||||||
|
{
|
||||||
|
(*buffer) >>= 1;
|
||||||
|
(*buffer) |= 0x80000000;
|
||||||
|
};
|
||||||
|
|
||||||
|
void select_init(select_t * sel)
|
||||||
|
{
|
||||||
|
sel->n = 0;
|
||||||
|
sel->m = 0;
|
||||||
|
sel->bits_vec = 0;
|
||||||
|
sel->select_table = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
cmph_uint32 select_get_space_usage(select_t * sel)
|
||||||
|
{
|
||||||
|
register cmph_uint32 nbits;
|
||||||
|
register cmph_uint32 vec_size;
|
||||||
|
register cmph_uint32 sel_table_size;
|
||||||
|
register cmph_uint32 space_usage;
|
||||||
|
|
||||||
|
nbits = sel->n + sel->m;
|
||||||
|
vec_size = (nbits + 31) >> 5;
|
||||||
|
sel_table_size = (sel->n >> NBITS_STEP_SELECT_TABLE) + 1; // (sel->n >> NBITS_STEP_SELECT_TABLE) = (sel->n/STEP_SELECT_TABLE)
|
||||||
|
|
||||||
|
space_usage = 2 * sizeof(cmph_uint32) * 8; // n and m
|
||||||
|
space_usage += vec_size * (cmph_uint32) sizeof(cmph_uint32) * 8;
|
||||||
|
space_usage += sel_table_size * (cmph_uint32)sizeof(cmph_uint32) * 8;
|
||||||
|
return space_usage;
|
||||||
|
}
|
||||||
|
|
||||||
|
void select_destroy(select_t * sel)
|
||||||
|
{
|
||||||
|
free(sel->bits_vec);
|
||||||
|
free(sel->select_table);
|
||||||
|
sel->bits_vec = 0;
|
||||||
|
sel->select_table = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline void select_generate_sel_table(select_t * sel)
|
||||||
|
{
|
||||||
|
register cmph_uint8 * bits_table = (cmph_uint8 *)sel->bits_vec;
|
||||||
|
register cmph_uint32 part_sum, old_part_sum;
|
||||||
|
register cmph_uint32 vec_idx, one_idx, sel_table_idx;
|
||||||
|
|
||||||
|
part_sum = vec_idx = one_idx = sel_table_idx = 0;
|
||||||
|
|
||||||
|
for(;;)
|
||||||
|
{
|
||||||
|
// FABIANO: Should'n it be one_idx >= sel->n
|
||||||
|
if(one_idx >= sel->n)
|
||||||
|
break;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
old_part_sum = part_sum;
|
||||||
|
part_sum += rank_lookup_table[bits_table[vec_idx]];
|
||||||
|
vec_idx++;
|
||||||
|
} while (part_sum <= one_idx);
|
||||||
|
|
||||||
|
sel->select_table[sel_table_idx] = select_lookup_table[bits_table[vec_idx - 1]][one_idx - old_part_sum] + ((vec_idx - 1) << 3); // ((vec_idx - 1) << 3) = ((vec_idx - 1) * 8)
|
||||||
|
one_idx += STEP_SELECT_TABLE ;
|
||||||
|
sel_table_idx++;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
void select_generate(select_t * sel, cmph_uint32 * keys_vec, cmph_uint32 n, cmph_uint32 m)
|
||||||
|
{
|
||||||
|
register cmph_uint32 i, j, idx;
|
||||||
|
cmph_uint32 buffer = 0;
|
||||||
|
|
||||||
|
register cmph_uint32 nbits;
|
||||||
|
register cmph_uint32 vec_size;
|
||||||
|
register cmph_uint32 sel_table_size;
|
||||||
|
sel->n = n;
|
||||||
|
sel->m = m; // n values in the range [0,m-1]
|
||||||
|
|
||||||
|
nbits = sel->n + sel->m;
|
||||||
|
vec_size = (nbits + 31) >> 5; // (nbits + 31) >> 5 = (nbits + 31)/32
|
||||||
|
|
||||||
|
sel_table_size = (sel->n >> NBITS_STEP_SELECT_TABLE) + 1; // (sel->n >> NBITS_STEP_SELECT_TABLE) = (sel->n/STEP_SELECT_TABLE)
|
||||||
|
|
||||||
|
if(sel->bits_vec)
|
||||||
|
{
|
||||||
|
free(sel->bits_vec);
|
||||||
|
}
|
||||||
|
sel->bits_vec = (cmph_uint32 *)calloc(vec_size, sizeof(cmph_uint32));
|
||||||
|
|
||||||
|
if(sel->select_table)
|
||||||
|
{
|
||||||
|
free(sel->select_table);
|
||||||
|
}
|
||||||
|
sel->select_table = (cmph_uint32 *)calloc(sel_table_size, sizeof(cmph_uint32));
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
idx = i = j = 0;
|
||||||
|
|
||||||
|
for(;;)
|
||||||
|
{
|
||||||
|
while(keys_vec[j]==i)
|
||||||
|
{
|
||||||
|
select_insert_1(&buffer);
|
||||||
|
idx++;
|
||||||
|
|
||||||
|
if((idx & 0x1f) == 0 ) // (idx & 0x1f) = idx % 32
|
||||||
|
sel->bits_vec[(idx >> 5) - 1] = buffer; // (idx >> 5) = idx/32
|
||||||
|
j++;
|
||||||
|
|
||||||
|
if(j == sel->n)
|
||||||
|
goto loop_end;
|
||||||
|
|
||||||
|
//assert(keys_vec[j] < keys_vec[j-1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(i == sel->m)
|
||||||
|
break;
|
||||||
|
|
||||||
|
while(keys_vec[j] > i)
|
||||||
|
{
|
||||||
|
select_insert_0(&buffer);
|
||||||
|
idx++;
|
||||||
|
|
||||||
|
if((idx & 0x1f) == 0 ) // (idx & 0x1f) = idx % 32
|
||||||
|
sel->bits_vec[(idx >> 5) - 1] = buffer; // (idx >> 5) = idx/32
|
||||||
|
i++;
|
||||||
|
};
|
||||||
|
|
||||||
|
};
|
||||||
|
loop_end:
|
||||||
|
if((idx & 0x1f) != 0 ) // (idx & 0x1f) = idx % 32
|
||||||
|
{
|
||||||
|
buffer >>= 32 - (idx & 0x1f);
|
||||||
|
sel->bits_vec[ (idx - 1) >> 5 ] = buffer;
|
||||||
|
};
|
||||||
|
|
||||||
|
select_generate_sel_table(sel);
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline cmph_uint32 _select_query(cmph_uint8 * bits_table, cmph_uint32 * select_table, cmph_uint32 one_idx)
|
||||||
|
{
|
||||||
|
register cmph_uint32 vec_bit_idx ,vec_byte_idx;
|
||||||
|
register cmph_uint32 part_sum, old_part_sum;
|
||||||
|
|
||||||
|
vec_bit_idx = select_table[one_idx >> NBITS_STEP_SELECT_TABLE]; // one_idx >> NBITS_STEP_SELECT_TABLE = one_idx/STEP_SELECT_TABLE
|
||||||
|
vec_byte_idx = vec_bit_idx >> 3; // vec_bit_idx / 8
|
||||||
|
|
||||||
|
one_idx &= MASK_STEP_SELECT_TABLE; // one_idx %= STEP_SELECT_TABLE == one_idx &= MASK_STEP_SELECT_TABLE
|
||||||
|
one_idx += rank_lookup_table[bits_table[vec_byte_idx] & ((1 << (vec_bit_idx & 0x7)) - 1)];
|
||||||
|
part_sum = 0;
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
old_part_sum = part_sum;
|
||||||
|
part_sum += rank_lookup_table[bits_table[vec_byte_idx]];
|
||||||
|
vec_byte_idx++;
|
||||||
|
|
||||||
|
}while (part_sum <= one_idx);
|
||||||
|
|
||||||
|
return select_lookup_table[bits_table[vec_byte_idx - 1]][one_idx - old_part_sum] + ((vec_byte_idx-1) << 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 select_query(select_t * sel, cmph_uint32 one_idx)
|
||||||
|
{
|
||||||
|
return _select_query((cmph_uint8 *)sel->bits_vec, sel->select_table, one_idx);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static inline cmph_uint32 _select_next_query(cmph_uint8 * bits_table, cmph_uint32 vec_bit_idx)
|
||||||
|
{
|
||||||
|
register cmph_uint32 vec_byte_idx, one_idx;
|
||||||
|
register cmph_uint32 part_sum, old_part_sum;
|
||||||
|
|
||||||
|
vec_byte_idx = vec_bit_idx >> 3;
|
||||||
|
|
||||||
|
one_idx = rank_lookup_table[bits_table[vec_byte_idx] & ((1U << (vec_bit_idx & 0x7)) - 1U)] + 1U;
|
||||||
|
part_sum = 0;
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
old_part_sum = part_sum;
|
||||||
|
part_sum += rank_lookup_table[bits_table[vec_byte_idx]];
|
||||||
|
vec_byte_idx++;
|
||||||
|
|
||||||
|
}while (part_sum <= one_idx);
|
||||||
|
|
||||||
|
return select_lookup_table[bits_table[(vec_byte_idx - 1)]][(one_idx - old_part_sum)] + ((vec_byte_idx - 1) << 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 select_next_query(select_t * sel, cmph_uint32 vec_bit_idx)
|
||||||
|
{
|
||||||
|
return _select_next_query((cmph_uint8 *)sel->bits_vec, vec_bit_idx);
|
||||||
|
};
|
||||||
|
|
||||||
|
void select_dump(select_t *sel, char **buf, cmph_uint32 *buflen)
|
||||||
|
{
|
||||||
|
register cmph_uint32 nbits = sel->n + sel->m;
|
||||||
|
register cmph_uint32 vec_size = ((nbits + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32); // (nbits + 31) >> 5 = (nbits + 31)/32
|
||||||
|
register cmph_uint32 sel_table_size = ((sel->n >> NBITS_STEP_SELECT_TABLE) + 1) * (cmph_uint32)sizeof(cmph_uint32); // (sel->n >> NBITS_STEP_SELECT_TABLE) = (sel->n/STEP_SELECT_TABLE)
|
||||||
|
register cmph_uint32 pos = 0;
|
||||||
|
|
||||||
|
*buflen = 2*(cmph_uint32)sizeof(cmph_uint32) + vec_size + sel_table_size;
|
||||||
|
|
||||||
|
*buf = (char *)calloc(*buflen, sizeof(char));
|
||||||
|
|
||||||
|
if (!*buf)
|
||||||
|
{
|
||||||
|
*buflen = UINT_MAX;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(*buf, &(sel->n), sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
memcpy(*buf + pos, &(sel->m), sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
memcpy(*buf + pos, sel->bits_vec, vec_size);
|
||||||
|
pos += vec_size;
|
||||||
|
memcpy(*buf + pos, sel->select_table, sel_table_size);
|
||||||
|
|
||||||
|
DEBUGP("Dumped select structure with size %u bytes\n", *buflen);
|
||||||
|
}
|
||||||
|
|
||||||
|
void select_load(select_t * sel, const char *buf, cmph_uint32 buflen)
|
||||||
|
{
|
||||||
|
register cmph_uint32 pos = 0;
|
||||||
|
register cmph_uint32 nbits = 0;
|
||||||
|
register cmph_uint32 vec_size = 0;
|
||||||
|
register cmph_uint32 sel_table_size = 0;
|
||||||
|
|
||||||
|
memcpy(&(sel->n), buf, sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
memcpy(&(sel->m), buf + pos, sizeof(cmph_uint32));
|
||||||
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
||||||
|
|
||||||
|
nbits = sel->n + sel->m;
|
||||||
|
vec_size = ((nbits + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32); // (nbits + 31) >> 5 = (nbits + 31)/32
|
||||||
|
sel_table_size = ((sel->n >> NBITS_STEP_SELECT_TABLE) + 1) * (cmph_uint32)sizeof(cmph_uint32); // (sel->n >> NBITS_STEP_SELECT_TABLE) = (sel->n/STEP_SELECT_TABLE)
|
||||||
|
|
||||||
|
if(sel->bits_vec)
|
||||||
|
{
|
||||||
|
free(sel->bits_vec);
|
||||||
|
}
|
||||||
|
sel->bits_vec = (cmph_uint32 *)calloc(vec_size/sizeof(cmph_uint32), sizeof(cmph_uint32));
|
||||||
|
|
||||||
|
if(sel->select_table)
|
||||||
|
{
|
||||||
|
free(sel->select_table);
|
||||||
|
}
|
||||||
|
sel->select_table = (cmph_uint32 *)calloc(sel_table_size/sizeof(cmph_uint32), sizeof(cmph_uint32));
|
||||||
|
|
||||||
|
memcpy(sel->bits_vec, buf + pos, vec_size);
|
||||||
|
pos += vec_size;
|
||||||
|
memcpy(sel->select_table, buf + pos, sel_table_size);
|
||||||
|
|
||||||
|
DEBUGP("Loaded select structure with size %u bytes\n", buflen);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn void select_pack(select_t *sel, void *sel_packed);
|
||||||
|
* \brief Support the ability to pack a select structure function into a preallocated contiguous memory space pointed by sel_packed.
|
||||||
|
* \param sel points to the select structure
|
||||||
|
* \param sel_packed pointer to the contiguous memory area used to store the select structure. The size of sel_packed must be at least @see select_packed_size
|
||||||
|
*/
|
||||||
|
void select_pack(select_t *sel, void *sel_packed)
|
||||||
|
{
|
||||||
|
if (sel && sel_packed)
|
||||||
|
{
|
||||||
|
char *buf = NULL;
|
||||||
|
cmph_uint32 buflen = 0;
|
||||||
|
select_dump(sel, &buf, &buflen);
|
||||||
|
memcpy(sel_packed, buf, buflen);
|
||||||
|
free(buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 select_packed_size(select_t *sel);
|
||||||
|
* \brief Return the amount of space needed to pack a select structure.
|
||||||
|
* \return the size of the packed select structure or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 select_packed_size(select_t *sel)
|
||||||
|
{
|
||||||
|
register cmph_uint32 nbits = sel->n + sel->m;
|
||||||
|
register cmph_uint32 vec_size = ((nbits + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32); // (nbits + 31) >> 5 = (nbits + 31)/32
|
||||||
|
register cmph_uint32 sel_table_size = ((sel->n >> NBITS_STEP_SELECT_TABLE) + 1) * (cmph_uint32)sizeof(cmph_uint32); // (sel->n >> NBITS_STEP_SELECT_TABLE) = (sel->n/STEP_SELECT_TABLE)
|
||||||
|
return 2*(cmph_uint32)sizeof(cmph_uint32) + vec_size + sel_table_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
cmph_uint32 select_query_packed(void * sel_packed, cmph_uint32 one_idx)
|
||||||
|
{
|
||||||
|
register cmph_uint32 *ptr = (cmph_uint32 *)sel_packed;
|
||||||
|
register cmph_uint32 n = *ptr++;
|
||||||
|
register cmph_uint32 m = *ptr++;
|
||||||
|
register cmph_uint32 nbits = n + m;
|
||||||
|
register cmph_uint32 vec_size = (nbits + 31) >> 5; // (nbits + 31) >> 5 = (nbits + 31)/32
|
||||||
|
register cmph_uint8 * bits_vec = (cmph_uint8 *)ptr;
|
||||||
|
register cmph_uint32 * select_table = ptr + vec_size;
|
||||||
|
|
||||||
|
return _select_query(bits_vec, select_table, one_idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
cmph_uint32 select_next_query_packed(void * sel_packed, cmph_uint32 vec_bit_idx)
|
||||||
|
{
|
||||||
|
register cmph_uint8 * bits_vec = (cmph_uint8 *)sel_packed;
|
||||||
|
bits_vec += 8; // skipping n and m
|
||||||
|
return _select_next_query(bits_vec, vec_bit_idx);
|
||||||
|
}
|
61
cmph/select.h
Normal file
61
cmph/select.h
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
#ifndef __CMPH_SELECT_H__
|
||||||
|
#define __CMPH_SELECT_H__
|
||||||
|
|
||||||
|
#include "cmph_types.h"
|
||||||
|
|
||||||
|
struct _select_t
|
||||||
|
{
|
||||||
|
cmph_uint32 n,m;
|
||||||
|
cmph_uint32 * bits_vec;
|
||||||
|
cmph_uint32 * select_table;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct _select_t select_t;
|
||||||
|
|
||||||
|
void select_init(select_t * sel);
|
||||||
|
|
||||||
|
void select_destroy(select_t * sel);
|
||||||
|
|
||||||
|
void select_generate(select_t * sel, cmph_uint32 * keys_vec, cmph_uint32 n, cmph_uint32 m);
|
||||||
|
|
||||||
|
cmph_uint32 select_query(select_t * sel, cmph_uint32 one_idx);
|
||||||
|
|
||||||
|
cmph_uint32 select_next_query(select_t * sel, cmph_uint32 vec_bit_idx);
|
||||||
|
|
||||||
|
cmph_uint32 select_get_space_usage(select_t * sel);
|
||||||
|
|
||||||
|
void select_dump(select_t *sel, char **buf, cmph_uint32 *buflen);
|
||||||
|
|
||||||
|
void select_load(select_t * sel, const char *buf, cmph_uint32 buflen);
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn void select_pack(select_t *sel, void *sel_packed);
|
||||||
|
* \brief Support the ability to pack a select structure into a preallocated contiguous memory space pointed by sel_packed.
|
||||||
|
* \param sel points to the select structure
|
||||||
|
* \param sel_packed pointer to the contiguous memory area used to store the select structure. The size of sel_packed must be at least @see select_packed_size
|
||||||
|
*/
|
||||||
|
void select_pack(select_t *sel, void *sel_packed);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 select_packed_size(select_t *sel);
|
||||||
|
* \brief Return the amount of space needed to pack a select structure.
|
||||||
|
* \return the size of the packed select structure or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 select_packed_size(select_t *sel);
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 select_query_packed(void * sel_packed, cmph_uint32 one_idx);
|
||||||
|
* \param sel_packed is a pointer to a contiguous memory area
|
||||||
|
* \param one_idx is the rank for which we want to calculate the inverse function select
|
||||||
|
* \return an integer that represents the select value of rank idx.
|
||||||
|
*/
|
||||||
|
cmph_uint32 select_query_packed(void * sel_packed, cmph_uint32 one_idx);
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 select_next_query_packed(void * sel_packed, cmph_uint32 vec_bit_idx);
|
||||||
|
* \param sel_packed is a pointer to a contiguous memory area
|
||||||
|
* \param vec_bit_idx is a value prior computed by @see select_query_packed
|
||||||
|
* \return an integer that represents the next select value greater than @see vec_bit_idx.
|
||||||
|
*/
|
||||||
|
cmph_uint32 select_next_query_packed(void * sel_packed, cmph_uint32 vec_bit_idx);
|
||||||
|
|
||||||
|
#endif
|
170
cmph/select_lookup_tables.h
Normal file
170
cmph/select_lookup_tables.h
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
#ifndef SELECT_LOOKUP_TABLES
|
||||||
|
#define SELECT_LOOKUP_TABLES
|
||||||
|
|
||||||
|
#include "cmph_types.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
rank_lookup_table[i] simply gives the number of bits set to one in the byte of value i.
|
||||||
|
For example if i = 01010101 in binary then we have :
|
||||||
|
rank_lookup_table[i] = 4
|
||||||
|
*/
|
||||||
|
|
||||||
|
static cmph_uint8 rank_lookup_table[256] ={
|
||||||
|
0 , 1 , 1 , 2 , 1 , 2 , 2 , 3 , 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4
|
||||||
|
, 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4 , 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5
|
||||||
|
, 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4 , 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5
|
||||||
|
, 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6
|
||||||
|
, 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4 , 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5
|
||||||
|
, 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6
|
||||||
|
, 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6
|
||||||
|
, 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6 , 4 , 5 , 5 , 6 , 5 , 6 , 6 , 7
|
||||||
|
, 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4 , 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5
|
||||||
|
, 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6
|
||||||
|
, 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6
|
||||||
|
, 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6 , 4 , 5 , 5 , 6 , 5 , 6 , 6 , 7
|
||||||
|
, 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6
|
||||||
|
, 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6 , 4 , 5 , 5 , 6 , 5 , 6 , 6 , 7
|
||||||
|
, 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6 , 4 , 5 , 5 , 6 , 5 , 6 , 6 , 7
|
||||||
|
, 4 , 5 , 5 , 6 , 5 , 6 , 6 , 7 , 5 , 6 , 6 , 7 , 6 , 7 , 7 , 8
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
select_lookup_table[i][j] simply gives the index of the j'th bit set to one in the byte of value i.
|
||||||
|
For example if i=01010101 in binary then we have :
|
||||||
|
select_lookup_table[i][0] = 0, the first bit set to one is at position 0
|
||||||
|
select_lookup_table[i][1] = 2, the second bit set to one is at position 2
|
||||||
|
select_lookup_table[i][2] = 4, the third bit set to one is at position 4
|
||||||
|
select_lookup_table[i][3] = 6, the fourth bit set to one is at position 6
|
||||||
|
select_lookup_table[i][4] = 255, there is no more than 4 bits set to one in i, so we return escape value 255.
|
||||||
|
*/
|
||||||
|
static cmph_uint8 select_lookup_table[256][8]={
|
||||||
|
{ 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 255 , 255 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 255 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 255 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 255 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 255 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 3 , 255 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 255 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 3 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 3 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 3 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 4 , 255 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 4 , 255 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 4 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 4 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 4 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 4 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 4 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 4 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 3 , 4 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 4 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 3 , 4 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 4 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 3 , 4 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 4 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 3 , 4 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 4 , 255 , 255 , 255 } ,
|
||||||
|
{ 5 , 255 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 5 , 255 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 5 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 5 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 5 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 5 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 5 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 5 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 3 , 5 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 5 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 3 , 5 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 5 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 3 , 5 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 5 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 3 , 5 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 5 , 255 , 255 , 255 } ,
|
||||||
|
{ 4 , 5 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 4 , 5 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 4 , 5 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 4 , 5 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 4 , 5 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 4 , 5 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 4 , 5 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 4 , 5 , 255 , 255 , 255 } ,
|
||||||
|
{ 3 , 4 , 5 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 4 , 5 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 3 , 4 , 5 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 4 , 5 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 3 , 4 , 5 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 4 , 5 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 3 , 4 , 5 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 4 , 5 , 255 , 255 } ,
|
||||||
|
{ 6 , 255 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 6 , 255 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 6 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 6 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 6 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 6 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 6 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 3 , 6 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 6 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 3 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 6 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 3 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 6 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 3 , 6 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 6 , 255 , 255 , 255 } ,
|
||||||
|
{ 4 , 6 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 4 , 6 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 4 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 4 , 6 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 4 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 4 , 6 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 4 , 6 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 4 , 6 , 255 , 255 , 255 } ,
|
||||||
|
{ 3 , 4 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 4 , 6 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 3 , 4 , 6 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 4 , 6 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 3 , 4 , 6 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 4 , 6 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 3 , 4 , 6 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 4 , 6 , 255 , 255 } ,
|
||||||
|
{ 5 , 6 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 5 , 6 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 5 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 5 , 6 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 5 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 5 , 6 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 5 , 6 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 5 , 6 , 255 , 255 , 255 } ,
|
||||||
|
{ 3 , 5 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 5 , 6 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 3 , 5 , 6 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 5 , 6 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 3 , 5 , 6 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 5 , 6 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 3 , 5 , 6 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 5 , 6 , 255 , 255 } ,
|
||||||
|
{ 4 , 5 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 4 , 5 , 6 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 4 , 5 , 6 , 255 , 255 , 255 , 255 } , { 0 , 1 , 4 , 5 , 6 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 4 , 5 , 6 , 255 , 255 , 255 , 255 } , { 0 , 2 , 4 , 5 , 6 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 4 , 5 , 6 , 255 , 255 , 255 } , { 0 , 1 , 2 , 4 , 5 , 6 , 255 , 255 } ,
|
||||||
|
{ 3 , 4 , 5 , 6 , 255 , 255 , 255 , 255 } , { 0 , 3 , 4 , 5 , 6 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 3 , 4 , 5 , 6 , 255 , 255 , 255 } , { 0 , 1 , 3 , 4 , 5 , 6 , 255 , 255 } ,
|
||||||
|
{ 2 , 3 , 4 , 5 , 6 , 255 , 255 , 255 } , { 0 , 2 , 3 , 4 , 5 , 6 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 3 , 4 , 5 , 6 , 255 , 255 } , { 0 , 1 , 2 , 3 , 4 , 5 , 6 , 255 } ,
|
||||||
|
{ 7 , 255 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 7 , 255 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 7 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 7 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 7 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 7 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 7 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 3 , 7 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 7 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 3 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 7 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 3 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 7 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 3 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 4 , 7 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 4 , 7 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 4 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 4 , 7 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 4 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 4 , 7 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 4 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 4 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 3 , 4 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 4 , 7 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 3 , 4 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 4 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 3 , 4 , 7 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 4 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 3 , 4 , 7 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 4 , 7 , 255 , 255 } ,
|
||||||
|
{ 5 , 7 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 5 , 7 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 5 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 5 , 7 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 5 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 5 , 7 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 5 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 5 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 3 , 5 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 5 , 7 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 3 , 5 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 5 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 3 , 5 , 7 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 5 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 3 , 5 , 7 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 5 , 7 , 255 , 255 } ,
|
||||||
|
{ 4 , 5 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 4 , 5 , 7 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 4 , 5 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 4 , 5 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 4 , 5 , 7 , 255 , 255 , 255 , 255 } , { 0 , 2 , 4 , 5 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 4 , 5 , 7 , 255 , 255 , 255 } , { 0 , 1 , 2 , 4 , 5 , 7 , 255 , 255 } ,
|
||||||
|
{ 3 , 4 , 5 , 7 , 255 , 255 , 255 , 255 } , { 0 , 3 , 4 , 5 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 3 , 4 , 5 , 7 , 255 , 255 , 255 } , { 0 , 1 , 3 , 4 , 5 , 7 , 255 , 255 } ,
|
||||||
|
{ 2 , 3 , 4 , 5 , 7 , 255 , 255 , 255 } , { 0 , 2 , 3 , 4 , 5 , 7 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 3 , 4 , 5 , 7 , 255 , 255 } , { 0 , 1 , 2 , 3 , 4 , 5 , 7 , 255 } ,
|
||||||
|
{ 6 , 7 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 6 , 7 , 255 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 6 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 6 , 7 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 6 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 6 , 7 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 6 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 3 , 6 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 6 , 7 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 3 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 6 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 3 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 6 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 3 , 6 , 7 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 6 , 7 , 255 , 255 } ,
|
||||||
|
{ 4 , 6 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 4 , 6 , 7 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 4 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 4 , 6 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 4 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 2 , 4 , 6 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 4 , 6 , 7 , 255 , 255 , 255 } , { 0 , 1 , 2 , 4 , 6 , 7 , 255 , 255 } ,
|
||||||
|
{ 3 , 4 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 3 , 4 , 6 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 3 , 4 , 6 , 7 , 255 , 255 , 255 } , { 0 , 1 , 3 , 4 , 6 , 7 , 255 , 255 } ,
|
||||||
|
{ 2 , 3 , 4 , 6 , 7 , 255 , 255 , 255 } , { 0 , 2 , 3 , 4 , 6 , 7 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 3 , 4 , 6 , 7 , 255 , 255 } , { 0 , 1 , 2 , 3 , 4 , 6 , 7 , 255 } ,
|
||||||
|
{ 5 , 6 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 5 , 6 , 7 , 255 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 5 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 5 , 6 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 2 , 5 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 2 , 5 , 6 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 5 , 6 , 7 , 255 , 255 , 255 } , { 0 , 1 , 2 , 5 , 6 , 7 , 255 , 255 } ,
|
||||||
|
{ 3 , 5 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 3 , 5 , 6 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 3 , 5 , 6 , 7 , 255 , 255 , 255 } , { 0 , 1 , 3 , 5 , 6 , 7 , 255 , 255 } ,
|
||||||
|
{ 2 , 3 , 5 , 6 , 7 , 255 , 255 , 255 } , { 0 , 2 , 3 , 5 , 6 , 7 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 3 , 5 , 6 , 7 , 255 , 255 } , { 0 , 1 , 2 , 3 , 5 , 6 , 7 , 255 } ,
|
||||||
|
{ 4 , 5 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 4 , 5 , 6 , 7 , 255 , 255 , 255 } ,
|
||||||
|
{ 1 , 4 , 5 , 6 , 7 , 255 , 255 , 255 } , { 0 , 1 , 4 , 5 , 6 , 7 , 255 , 255 } ,
|
||||||
|
{ 2 , 4 , 5 , 6 , 7 , 255 , 255 , 255 } , { 0 , 2 , 4 , 5 , 6 , 7 , 255 , 255 } ,
|
||||||
|
{ 1 , 2 , 4 , 5 , 6 , 7 , 255 , 255 } , { 0 , 1 , 2 , 4 , 5 , 6 , 7 , 255 } ,
|
||||||
|
{ 3 , 4 , 5 , 6 , 7 , 255 , 255 , 255 } , { 0 , 3 , 4 , 5 , 6 , 7 , 255 , 255 } ,
|
||||||
|
{ 1 , 3 , 4 , 5 , 6 , 7 , 255 , 255 } , { 0 , 1 , 3 , 4 , 5 , 6 , 7 , 255 } ,
|
||||||
|
{ 2 , 3 , 4 , 5 , 6 , 7 , 255 , 255 } , { 0 , 2 , 3 , 4 , 5 , 6 , 7 , 255 } ,
|
||||||
|
{ 1 , 2 , 3 , 4 , 5 , 6 , 7 , 255 } , { 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 } };
|
||||||
|
|
||||||
|
#endif
|
51
cmph/vqueue.c
Normal file
51
cmph/vqueue.c
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
#include "vqueue.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
struct __vqueue_t
|
||||||
|
{
|
||||||
|
cmph_uint32 * values;
|
||||||
|
cmph_uint32 beg, end, capacity;
|
||||||
|
};
|
||||||
|
|
||||||
|
vqueue_t * vqueue_new(cmph_uint32 capacity)
|
||||||
|
{
|
||||||
|
size_t capacity_plus_one = capacity + 1;
|
||||||
|
vqueue_t *q = (vqueue_t *)malloc(sizeof(vqueue_t));
|
||||||
|
assert(q);
|
||||||
|
q->values = (cmph_uint32 *)calloc(capacity_plus_one, sizeof(cmph_uint32));
|
||||||
|
q->beg = q->end = 0;
|
||||||
|
q->capacity = (cmph_uint32) capacity_plus_one;
|
||||||
|
return q;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint8 vqueue_is_empty(vqueue_t * q)
|
||||||
|
{
|
||||||
|
return (cmph_uint8)(q->beg == q->end);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vqueue_insert(vqueue_t * q, cmph_uint32 val)
|
||||||
|
{
|
||||||
|
assert((q->end + 1)%q->capacity != q->beg); // Is queue full?
|
||||||
|
q->end = (q->end + 1)%q->capacity;
|
||||||
|
q->values[q->end] = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 vqueue_remove(vqueue_t * q)
|
||||||
|
{
|
||||||
|
assert(!vqueue_is_empty(q)); // Is queue empty?
|
||||||
|
q->beg = (q->beg + 1)%q->capacity;
|
||||||
|
return q->values[q->beg];
|
||||||
|
}
|
||||||
|
|
||||||
|
void vqueue_print(vqueue_t * q)
|
||||||
|
{
|
||||||
|
cmph_uint32 i;
|
||||||
|
for (i = q->beg; i != q->end; i = (i + 1)%q->capacity)
|
||||||
|
fprintf(stderr, "%u\n", q->values[(i + 1)%q->capacity]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vqueue_destroy(vqueue_t *q)
|
||||||
|
{
|
||||||
|
free(q->values); q->values = NULL; free(q);
|
||||||
|
}
|
18
cmph/vqueue.h
Normal file
18
cmph/vqueue.h
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
#ifndef __CMPH_VQUEUE_H__
|
||||||
|
#define __CMPH_VQUEUE_H__
|
||||||
|
|
||||||
|
#include "cmph_types.h"
|
||||||
|
typedef struct __vqueue_t vqueue_t;
|
||||||
|
|
||||||
|
vqueue_t * vqueue_new(cmph_uint32 capacity);
|
||||||
|
|
||||||
|
cmph_uint8 vqueue_is_empty(vqueue_t * q);
|
||||||
|
|
||||||
|
void vqueue_insert(vqueue_t * q, cmph_uint32 val);
|
||||||
|
|
||||||
|
cmph_uint32 vqueue_remove(vqueue_t * q);
|
||||||
|
|
||||||
|
void vqueue_print(vqueue_t * q);
|
||||||
|
|
||||||
|
void vqueue_destroy(vqueue_t * q);
|
||||||
|
#endif
|
79
cmph/vstack.c
Normal file
79
cmph/vstack.c
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
#include "vstack.h"
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
//#define DEBUG
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
|
struct __vstack_t
|
||||||
|
{
|
||||||
|
cmph_uint32 pointer;
|
||||||
|
cmph_uint32 *values;
|
||||||
|
cmph_uint32 capacity;
|
||||||
|
};
|
||||||
|
|
||||||
|
vstack_t *vstack_new()
|
||||||
|
{
|
||||||
|
vstack_t *stack = (vstack_t *)malloc(sizeof(vstack_t));
|
||||||
|
assert(stack);
|
||||||
|
stack->pointer = 0;
|
||||||
|
stack->values = NULL;
|
||||||
|
stack->capacity = 0;
|
||||||
|
return stack;
|
||||||
|
}
|
||||||
|
|
||||||
|
void vstack_destroy(vstack_t *stack)
|
||||||
|
{
|
||||||
|
assert(stack);
|
||||||
|
free(stack->values);
|
||||||
|
free(stack);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vstack_push(vstack_t *stack, cmph_uint32 val)
|
||||||
|
{
|
||||||
|
assert(stack);
|
||||||
|
vstack_reserve(stack, stack->pointer + 1);
|
||||||
|
stack->values[stack->pointer] = val;
|
||||||
|
++(stack->pointer);
|
||||||
|
}
|
||||||
|
void vstack_pop(vstack_t *stack)
|
||||||
|
{
|
||||||
|
assert(stack);
|
||||||
|
assert(stack->pointer > 0);
|
||||||
|
--(stack->pointer);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 vstack_top(vstack_t *stack)
|
||||||
|
{
|
||||||
|
assert(stack);
|
||||||
|
assert(stack->pointer > 0);
|
||||||
|
return stack->values[(stack->pointer - 1)];
|
||||||
|
}
|
||||||
|
int vstack_empty(vstack_t *stack)
|
||||||
|
{
|
||||||
|
assert(stack);
|
||||||
|
return stack->pointer == 0;
|
||||||
|
}
|
||||||
|
cmph_uint32 vstack_size(vstack_t *stack)
|
||||||
|
{
|
||||||
|
return stack->pointer;
|
||||||
|
}
|
||||||
|
void vstack_reserve(vstack_t *stack, cmph_uint32 size)
|
||||||
|
{
|
||||||
|
assert(stack);
|
||||||
|
if (stack->capacity < size)
|
||||||
|
{
|
||||||
|
cmph_uint32 new_capacity = stack->capacity + 1;
|
||||||
|
DEBUGP("Increasing current capacity %u to %u\n", stack->capacity, size);
|
||||||
|
while (new_capacity < size)
|
||||||
|
{
|
||||||
|
new_capacity *= 2;
|
||||||
|
}
|
||||||
|
stack->values = (cmph_uint32 *)realloc(stack->values, sizeof(cmph_uint32)*new_capacity);
|
||||||
|
assert(stack->values);
|
||||||
|
stack->capacity = new_capacity;
|
||||||
|
DEBUGP("Increased\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
18
cmph/vstack.h
Normal file
18
cmph/vstack.h
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
#ifndef __CMPH_VSTACK_H__
|
||||||
|
#define __CMPH_VSTACK_H__
|
||||||
|
|
||||||
|
#include "cmph_types.h"
|
||||||
|
typedef struct __vstack_t vstack_t;
|
||||||
|
|
||||||
|
vstack_t *vstack_new();
|
||||||
|
void vstack_destroy(vstack_t *stack);
|
||||||
|
|
||||||
|
void vstack_push(vstack_t *stack, cmph_uint32 val);
|
||||||
|
cmph_uint32 vstack_top(vstack_t *stack);
|
||||||
|
void vstack_pop(vstack_t *stack);
|
||||||
|
int vstack_empty(vstack_t *stack);
|
||||||
|
cmph_uint32 vstack_size(vstack_t *stack);
|
||||||
|
|
||||||
|
void vstack_reserve(vstack_t *stack, cmph_uint32 size);
|
||||||
|
|
||||||
|
#endif
|
179
cmph/wingetopt.c
Normal file
179
cmph/wingetopt.c
Normal file
@ -0,0 +1,179 @@
|
|||||||
|
#ifdef WIN32
|
||||||
|
/*****************************************************************************
|
||||||
|
*
|
||||||
|
* MODULE NAME : GETOPT.C
|
||||||
|
*
|
||||||
|
* COPYRIGHTS:
|
||||||
|
* This module contains code made available by IBM
|
||||||
|
* Corporation on an AS IS basis. Any one receiving the
|
||||||
|
* module is considered to be licensed under IBM copyrights
|
||||||
|
* to use the IBM-provided source code in any way he or she
|
||||||
|
* deems fit, including copying it, compiling it, modifying
|
||||||
|
* it, and redistributing it, with or without
|
||||||
|
* modifications. No license under any IBM patents or
|
||||||
|
* patent applications is to be implied from this copyright
|
||||||
|
* license.
|
||||||
|
*
|
||||||
|
* A user of the module should understand that IBM cannot
|
||||||
|
* provide technical support for the module and will not be
|
||||||
|
* responsible for any consequences of use of the program.
|
||||||
|
*
|
||||||
|
* Any notices, including this one, are not to be removed
|
||||||
|
* from the module without the prior written consent of
|
||||||
|
* IBM.
|
||||||
|
*
|
||||||
|
* AUTHOR: Original author:
|
||||||
|
* G. R. Blair (BOBBLAIR at AUSVM1)
|
||||||
|
* Internet: bobblair@bobblair.austin.ibm.com
|
||||||
|
*
|
||||||
|
* Extensively revised by:
|
||||||
|
* John Q. Walker II, Ph.D. (JOHHQ at RALVM6)
|
||||||
|
* Internet: johnq@ralvm6.vnet.ibm.com
|
||||||
|
*
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
/******************************************************************************
|
||||||
|
* getopt()
|
||||||
|
*
|
||||||
|
* The getopt() function is a command line parser. It returns the next
|
||||||
|
* option character in argv that matches an option character in opstring.
|
||||||
|
*
|
||||||
|
* The argv argument points to an array of argc+1 elements containing argc
|
||||||
|
* pointers to character strings followed by a null pointer.
|
||||||
|
*
|
||||||
|
* The opstring argument points to a string of option characters; if an
|
||||||
|
* option character is followed by a colon, the option is expected to have
|
||||||
|
* an argument that may or may not be separated from it by white space.
|
||||||
|
* The external variable optarg is set to point to the start of the option
|
||||||
|
* argument on return from getopt().
|
||||||
|
*
|
||||||
|
* The getopt() function places in optind the argv index of the next argument
|
||||||
|
* to be processed. The system initializes the external variable optind to
|
||||||
|
* 1 before the first call to getopt().
|
||||||
|
*
|
||||||
|
* When all options have been processed (that is, up to the first nonoption
|
||||||
|
* argument), getopt() returns EOF. The special option "--" may be used to
|
||||||
|
* delimit the end of the options; EOF will be returned, and "--" will be
|
||||||
|
* skipped.
|
||||||
|
*
|
||||||
|
* The getopt() function returns a question mark (?) when it encounters an
|
||||||
|
* option character not included in opstring. This error message can be
|
||||||
|
* disabled by setting opterr to zero. Otherwise, it returns the option
|
||||||
|
* character that was detected.
|
||||||
|
*
|
||||||
|
* If the special option "--" is detected, or all options have been
|
||||||
|
* processed, EOF is returned.
|
||||||
|
*
|
||||||
|
* Options are marked by either a minus sign (-) or a slash (/).
|
||||||
|
*
|
||||||
|
* No errors are defined.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h> /* for EOF */
|
||||||
|
#include <string.h> /* for strchr() */
|
||||||
|
|
||||||
|
/* static (global) variables that are specified as exported by getopt() */
|
||||||
|
extern char *optarg; /* pointer to the start of the option argument */
|
||||||
|
extern int optind; /* number of the next argv[] to be evaluated */
|
||||||
|
extern int opterr; /* non-zero if a question mark should be returned
|
||||||
|
when a non-valid option character is detected */
|
||||||
|
|
||||||
|
/* handle possible future character set concerns by putting this in a macro */
|
||||||
|
#define _next_char(string) (char)(*(string+1))
|
||||||
|
|
||||||
|
int getopt(int argc, char *argv[], char *opstring)
|
||||||
|
{
|
||||||
|
static char *pIndexPosition = NULL; /* place inside current argv string */
|
||||||
|
char *pArgString = NULL; /* where to start from next */
|
||||||
|
char *pOptString; /* the string in our program */
|
||||||
|
|
||||||
|
|
||||||
|
if (pIndexPosition != NULL) {
|
||||||
|
/* we last left off inside an argv string */
|
||||||
|
if (*(++pIndexPosition)) {
|
||||||
|
/* there is more to come in the most recent argv */
|
||||||
|
pArgString = pIndexPosition;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pArgString == NULL) {
|
||||||
|
/* we didn't leave off in the middle of an argv string */
|
||||||
|
if (optind >= argc) {
|
||||||
|
/* more command-line arguments than the argument count */
|
||||||
|
pIndexPosition = NULL; /* not in the middle of anything */
|
||||||
|
return EOF; /* used up all command-line arguments */
|
||||||
|
}
|
||||||
|
|
||||||
|
/*---------------------------------------------------------------------
|
||||||
|
* If the next argv[] is not an option, there can be no more options.
|
||||||
|
*-------------------------------------------------------------------*/
|
||||||
|
pArgString = argv[optind++]; /* set this to the next argument ptr */
|
||||||
|
|
||||||
|
if (('/' != *pArgString) && /* doesn't start with a slash or a dash? */
|
||||||
|
('-' != *pArgString)) {
|
||||||
|
--optind; /* point to current arg once we're done */
|
||||||
|
optarg = NULL; /* no argument follows the option */
|
||||||
|
pIndexPosition = NULL; /* not in the middle of anything */
|
||||||
|
return EOF; /* used up all the command-line flags */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check for special end-of-flags markers */
|
||||||
|
if ((strcmp(pArgString, "-") == 0) ||
|
||||||
|
(strcmp(pArgString, "--") == 0)) {
|
||||||
|
optarg = NULL; /* no argument follows the option */
|
||||||
|
pIndexPosition = NULL; /* not in the middle of anything */
|
||||||
|
return EOF; /* encountered the special flag */
|
||||||
|
}
|
||||||
|
|
||||||
|
pArgString++; /* look past the / or - */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (':' == *pArgString) { /* is it a colon? */
|
||||||
|
/*---------------------------------------------------------------------
|
||||||
|
* Rare case: if opterr is non-zero, return a question mark;
|
||||||
|
* otherwise, just return the colon we're on.
|
||||||
|
*-------------------------------------------------------------------*/
|
||||||
|
return (opterr ? (int)'?' : (int)':');
|
||||||
|
}
|
||||||
|
else if ((pOptString = strchr(opstring, *pArgString)) == 0) {
|
||||||
|
/*---------------------------------------------------------------------
|
||||||
|
* The letter on the command-line wasn't any good.
|
||||||
|
*-------------------------------------------------------------------*/
|
||||||
|
optarg = NULL; /* no argument follows the option */
|
||||||
|
pIndexPosition = NULL; /* not in the middle of anything */
|
||||||
|
return (opterr ? (int)'?' : (int)*pArgString);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/*---------------------------------------------------------------------
|
||||||
|
* The letter on the command-line matches one we expect to see
|
||||||
|
*-------------------------------------------------------------------*/
|
||||||
|
if (':' == _next_char(pOptString)) { /* is the next letter a colon? */
|
||||||
|
/* It is a colon. Look for an argument string. */
|
||||||
|
if ('\0' != _next_char(pArgString)) { /* argument in this argv? */
|
||||||
|
optarg = &pArgString[1]; /* Yes, it is */
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/*-------------------------------------------------------------
|
||||||
|
* The argument string must be in the next argv.
|
||||||
|
* But, what if there is none (bad input from the user)?
|
||||||
|
* In that case, return the letter, and optarg as NULL.
|
||||||
|
*-----------------------------------------------------------*/
|
||||||
|
if (optind < argc)
|
||||||
|
optarg = argv[optind++];
|
||||||
|
else {
|
||||||
|
optarg = NULL;
|
||||||
|
return (opterr ? (int)'?' : (int)*pArgString);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pIndexPosition = NULL; /* not in the middle of anything */
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* it's not a colon, so just return the letter */
|
||||||
|
optarg = NULL; /* no argument follows the option */
|
||||||
|
pIndexPosition = pArgString; /* point to the letter we're on */
|
||||||
|
}
|
||||||
|
return (int)*pArgString; /* return the letter that matched */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif //WIN32
|
25
cmph/wingetopt.h
Normal file
25
cmph/wingetopt.h
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef WIN32
|
||||||
|
#include <getopt.h>
|
||||||
|
#else
|
||||||
|
#ifndef _GETOPT_
|
||||||
|
#define _GETOPT_
|
||||||
|
|
||||||
|
#include <stdio.h> /* for EOF */
|
||||||
|
#include <string.h> /* for strchr() */
|
||||||
|
|
||||||
|
char *optarg = NULL; /* pointer to the start of the option argument */
|
||||||
|
int optind = 1; /* number of the next argv[] to be evaluated */
|
||||||
|
int opterr = 1; /* non-zero if a question mark should be returned */
|
||||||
|
|
||||||
|
int getopt(int argc, char *argv[], char *opstring);
|
||||||
|
#endif //_GETOPT_
|
||||||
|
#endif //WIN32
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue
Block a user