Crossings Minimization  1.0
check_edge_duplication.c
Go to the documentation of this file.
1 /**
2  * @file check_edge_duplication.c
3  * @brief utility that uses hashing to check whether an edge already exists
4  *
5  * $Id: check_edge_duplication.c 2 2011-06-07 19:50:41Z mfms $
6  */
7 
9 #include<stdbool.h>
10 #include<stdlib.h>
11 #include<stdio.h>
12 #include<string.h>
13 #include<math.h>
14 
15 /**
16  * Load factor for the bit vector, roughly the likelihood of a collision
17  * between distinct edges; also determines the size of the bit vector; small
18  * load factor implies large bit vector
19  */
20 #define LOAD_FACTOR 0.1
21 /** each pair corresponds to a single bit */
22 
23 static unsigned char * pairs_bit_vector;
24 static size_t bit_vector_length;
25 static int collisions; /*< Number of collisions during hashing */
26 static size_t bits_per_pair;
27 static size_t bytes_per_pair;
28 
29 /** two prime numbers that give good results for polynomial hashing */
30 #define HASH_VALUE_ONE 37
31 #define HASH_VALUE_TWO 113
32 
33 /**
34  * Uses a polynomial hash function to compute a hash value for a sequence of
35  * bytes. If the k bytes are b_{k-1}, ..., b_0 the polynomial P(x) is
36  * (b_{k-1} x^{k-1} + ... + b_0 x^0) mod z. The hash function evaluates P(x)
37  * at x = some suitably chosen prime number and maps the result to an integer
38  * in the range [0,max-1]; the modulus z should be >= max and relatively
39  * prime to x (otherwise some integers in the range will never be mapped to)
40  *
41  * @param start_ptr pointer to b_{k-1}, the first byte in the sequence
42  * @param length number of bytes in the sequence, i.e., k
43  * @param x the prime number at which P(x) is evaluated
44  * @param range_size the number of integers in the range, i.e., max
45  * @return an (somewhat random) integer in the interval [0,max-1]; a
46  * particular sequence of bytes always gives the same return value
47  */
48 static unsigned long polynomial_hash( const char * start_ptr,
49  unsigned int length,
50  unsigned int x,
51  unsigned long range_size )
52 {
53  unsigned int modulus = range_size;
54  /* adjust the modulus to ensure that it's relatively prime to evaluate at */
55  if ( modulus % x == 0 || x % modulus == 0 )
56  modulus++;
57  /* evaluate the polynomial using Horner's rule */
58  const char * current_byte = start_ptr;
59  unsigned long hash_value = 0;
60  while ( current_byte < start_ptr + length ) {
61  unsigned int coefficient = (unsigned int) (* current_byte);
62  hash_value = (x * hash_value + coefficient) % modulus;
63  current_byte++;
64  }
65  return hash_value % range_size;
66 }
67 
68 void create_hash_table_for_pairs( int expected_number_of_pairs )
69 {
70  bits_per_pair = (size_t) round( 1 / LOAD_FACTOR );
71  bytes_per_pair = bits_per_pair / sizeof( unsigned char ) + 1;
72  bit_vector_length = expected_number_of_pairs * bytes_per_pair;
73  pairs_bit_vector = calloc( expected_number_of_pairs, bytes_per_pair );
74 }
75 
76 /**
77  * Deallocates the hash table
78  */
80 {
81  free( pairs_bit_vector );
82  pairs_bit_vector = NULL;
83  printf( "Done adding edges: collisions = %d\n", collisions );
84 }
85 
86 /**
87  * @param first_int index of a node in the master_node_list
88  * @param second_int index of another node in the master_node_list
89  * @return true if an edge between the given nodes already exists
90  * <em>This function is oblivious to the fact that we are dealing with a
91  * random dag - it applies to any collection of pairs of integers</em>
92  */
93 bool pair_already_exists( int first_int, int second_int )
94 {
95 #ifdef DEBUG
96  printf( "-> pair_already_exists (%d,%d)\n", first_int, second_int );
97 #endif
98  unsigned int hashstr[2];
99  // make sure that the hash string will not be 0
100  first_int++; second_int++;
101  hashstr[0] = (first_int > second_int) ? first_int : second_int;
102  hashstr[1] = (first_int < second_int) ? first_int : second_int;
103  /* get the bit position and the byte using polynomial hashing*/
104  unsigned char bit_position
105  = polynomial_hash( (char *) hashstr,
106  2 * sizeof(unsigned int),
107  HASH_VALUE_ONE, sizeof(char) );
108  unsigned int byte_position
109  = polynomial_hash( (char *) hashstr,
110  2 * sizeof(unsigned int),
112 
113  /* check for collision */
114  unsigned char bit = 1 << bit_position;
115  if ( ( pairs_bit_vector[ byte_position ] & bit ) )
116  {
117  collisions++;
118 #ifdef DEBUG
119  printf( "<- pair_already_exists, return true\n" );
120 #endif
121  return true;
122  }
123 
124  /* Mark the bit in the bit vector and add the edge */
125  pairs_bit_vector[ byte_position ] |= bit;
126 #ifdef DEBUG
127  printf( "<- pair_already_exists, return false\n" );
128 #endif
129  return false;
130 }
131 
132 /* [Last modified: 2011 06 02 at 13:56:09 GMT] */
#define HASH_VALUE_TWO
static size_t bits_per_pair
static size_t bytes_per_pair
#define HASH_VALUE_ONE
static unsigned long polynomial_hash(const char *start_ptr, unsigned int length, unsigned int x, unsigned long range_size)
bool pair_already_exists(int first_int, int second_int)
void create_hash_table_for_pairs(int expected_number_of_pairs)
static size_t bit_vector_length
static unsigned int modulus
Definition: hash.c:25
void destroy_hash_table_for_pairs(void)
utility that uses hashing to check whether an edge already exists
#define LOAD_FACTOR
static unsigned char * pairs_bit_vector
static int collisions