1    	// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2    	// vim: ts=8 sw=2 smarttab
3    	
4    	/*
5    	 *******************************************************************
6    	 *                                                                 *
7    	 *                        Open Bloom Filter                        *
8    	 *                                                                 *
9    	 * Author: Arash Partow - 2000                                     *
10   	 * URL: http://www.partow.net/programming/hashfunctions/index.html *
11   	 *                                                                 *
12   	 * Copyright notice:                                               *
13   	 * Free use of the Open Bloom Filter Library is permitted under    *
14   	 * the guidelines and in accordance with the most current version  *
15   	 * of the Boost Software License, Version 1.0                      *
16   	 * http://www.opensource.org/licenses/bsl1.0.html                  *
17   	 *                                                                 *
18   	 *******************************************************************
19   	*/
20   	
21   	
22   	#ifndef COMMON_BLOOM_FILTER_HPP
23   	#define COMMON_BLOOM_FILTER_HPP
24   	
25   	#include <cmath>
26   	
27   	#include "include/mempool.h"
28   	#include "include/encoding.h"
29   	
30   	static const std::size_t bits_per_char = 0x08;    // 8 bits in 1 char(unsigned)
31   	static const unsigned char bit_mask[bits_per_char] = {
32   	  0x01,  //00000001
33   	  0x02,  //00000010
34   	  0x04,  //00000100
35   	  0x08,  //00001000
36   	  0x10,  //00010000
37   	  0x20,  //00100000
38   	  0x40,  //01000000
39   	  0x80   //10000000
40   	};
41   	
42   	MEMPOOL_DECLARE_FACTORY(unsigned char, byte, bloom_filter);
43   	
44   	class bloom_filter
45   	{
46   	protected:
47   	
48   	  typedef unsigned int bloom_type;
49   	  typedef unsigned char cell_type;
50   	
51   	  unsigned char*       bit_table_;   ///< pointer to bit map
52   	  std::vector<bloom_type> salt_;     ///< vector of salts
53   	  std::size_t         salt_count_;   ///< number of salts
54   	  std::size_t         table_size_;   ///< bit table size in bytes
55   	  std::size_t         insert_count_;  ///< insertion count
56   	  std::size_t         target_element_count_;  ///< target number of unique insertions
57   	  std::size_t         random_seed_;  ///< random seed
58   	
59   	public:
60   	
61   	  bloom_filter()
62   	    : bit_table_(0),
63   	      salt_count_(0),
64   	      table_size_(0),
65   	      insert_count_(0),
66   	      target_element_count_(0),
67   	      random_seed_(0)
68   	  {}
69   	
70   	  bloom_filter(const std::size_t& predicted_inserted_element_count,
71   		       const double& false_positive_probability,
72   		       const std::size_t& random_seed)
73   	    : bit_table_(0),
74   	      insert_count_(0),
75   	      target_element_count_(predicted_inserted_element_count),
76   	      random_seed_((random_seed) ? random_seed : 0xA5A5A5A5)
77   	  {
78   	    ceph_assert(false_positive_probability > 0.0);
79   	    find_optimal_parameters(predicted_inserted_element_count, false_positive_probability,
80   				    &salt_count_, &table_size_);
81   	    init();
82   	  }
83   	
84   	  bloom_filter(const std::size_t& salt_count,
85   		       std::size_t table_size,
86   		       const std::size_t& random_seed,
87   		       std::size_t target_element_count)
88   	    : bit_table_(0),
89   	      salt_count_(salt_count),
90   	      table_size_(table_size),
91   	      insert_count_(0),
92   	      target_element_count_(target_element_count),
93   	      random_seed_((random_seed) ? random_seed : 0xA5A5A5A5)
94   	  {
95   	    init();
96   	  }
97   	
98   	  void init() {
99   	    generate_unique_salt();
100  	    if (table_size_) {
101  	      bit_table_ = mempool::bloom_filter::alloc_byte.allocate(table_size_);
102  	      std::fill_n(bit_table_, table_size_, 0x00);
103  	    } else {
104  	      bit_table_ = NULL;
105  	    }
106  	  }
107  	
108  	  bloom_filter(const bloom_filter& filter)
109  	    : bit_table_(0)
110  	  {
111  	    this->operator=(filter);
112  	  }
113  	
114  	  bloom_filter& operator = (const bloom_filter& filter)
115  	  {
116  	    if (this != &filter) {
117  	      if (bit_table_) {
118  		mempool::bloom_filter::alloc_byte.deallocate(bit_table_, table_size_);
119  	      }
120  	      salt_count_ = filter.salt_count_;
121  	      table_size_ = filter.table_size_;
122  	      insert_count_ = filter.insert_count_;
123  	      target_element_count_ = filter.target_element_count_;
124  	      random_seed_ = filter.random_seed_;
125  	      bit_table_ = mempool::bloom_filter::alloc_byte.allocate(table_size_);
126  	      std::copy(filter.bit_table_, filter.bit_table_ + table_size_, bit_table_);
127  	      salt_ = filter.salt_;
128  	    }
129  	    return *this;
130  	  }
131  	
132  	  virtual ~bloom_filter()
133  	  {
134  	    mempool::bloom_filter::alloc_byte.deallocate(bit_table_, table_size_);
135  	  }
136  	
137  	  inline bool operator!() const
138  	  {
139  	    return (0 == table_size_);
140  	  }
141  	
142  	  inline void clear()
143  	  {
144  	    if (bit_table_)
145  	      std::fill_n(bit_table_, table_size_, 0x00);
146  	    insert_count_ = 0;
147  	  }
148  	
149  	  /**
150  	   * insert a u32 into the set
151  	   *
152  	   * NOTE: the internal hash is weak enough that consecutive inputs do
153  	   * not achieve the desired fpp.  Well-mixed values should be used
154  	   * here (e.g., put rjhash(x) into the filter instead of just x).
155  	   *
156  	   * @param val integer value to insert
157  	   */
158  	  inline void insert(uint32_t val) {
159  	    ceph_assert(bit_table_);
160  	    std::size_t bit_index = 0;
161  	    std::size_t bit = 0;
162  	    for (std::size_t i = 0; i < salt_.size(); ++i)
163  	    {
164  	      compute_indices(hash_ap(val,salt_[i]),bit_index,bit);
165  	      bit_table_[bit_index >> 3] |= bit_mask[bit];
166  	    }
167  	    ++insert_count_;
168  	  }
169  	
170  	  inline void insert(const unsigned char* key_begin, const std::size_t& length)
171  	  {
172  	    ceph_assert(bit_table_);
173  	    std::size_t bit_index = 0;
174  	    std::size_t bit = 0;
175  	    for (std::size_t i = 0; i < salt_.size(); ++i)
176  	    {
177  	      compute_indices(hash_ap(key_begin,length,salt_[i]),bit_index,bit);
178  	      bit_table_[bit_index >> 3] |= bit_mask[bit];
179  	    }
180  	    ++insert_count_;
181  	  }
182  	
183  	  inline void insert(const std::string& key)
184  	  {
185  	    insert(reinterpret_cast<const unsigned char*>(key.c_str()),key.size());
186  	  }
187  	
188  	  inline void insert(const char* data, const std::size_t& length)
189  	  {
190  	    insert(reinterpret_cast<const unsigned char*>(data),length);
191  	  }
192  	
193  	  template<typename InputIterator>
194  	  inline void insert(const InputIterator begin, const InputIterator end)
195  	  {
196  	    InputIterator itr = begin;
197  	    while (end != itr)
198  	    {
199  	      insert(*(itr++));
200  	    }
201  	  }
202  	
203  	  /**
204  	   * check if a u32 is contained by set
205  	   *
206  	   * NOTE: the internal hash is weak enough that consecutive inputs do
207  	   * not achieve the desired fpp.  Well-mixed values should be used
208  	   * here (e.g., put rjhash(x) into the filter instead of just x).
209  	   *
210  	   * @param val integer value to query
211  	   * @returns true if value is (probably) in the set, false if it definitely is not
212  	   */
213  	  inline virtual bool contains(uint32_t val) const
214  	  {
215  	    if (!bit_table_)
216  	      return false;
217  	    std::size_t bit_index = 0;
218  	    std::size_t bit = 0;
219  	    for (std::size_t i = 0; i < salt_.size(); ++i)
220  	    {
221  	      compute_indices(hash_ap(val,salt_[i]),bit_index,bit);
222  	      if ((bit_table_[bit_index >> 3] & bit_mask[bit]) != bit_mask[bit])
223  	      {
224  	        return false;
225  	      }
226  	    }
227  	    return true;
228  	  }
229  	
230  	  inline virtual bool contains(const unsigned char* key_begin, const std::size_t length) const
231  	  {
232  	    if (!bit_table_)
233  	      return false;
234  	    std::size_t bit_index = 0;
235  	    std::size_t bit = 0;
236  	    for (std::size_t i = 0; i < salt_.size(); ++i)
237  	    {
238  	      compute_indices(hash_ap(key_begin,length,salt_[i]),bit_index,bit);
239  	      if ((bit_table_[bit_index >> 3] & bit_mask[bit]) != bit_mask[bit])
240  	      {
241  	        return false;
242  	      }
243  	    }
244  	    return true;
245  	  }
246  	
247  	  inline bool contains(const std::string& key) const
248  	  {
249  	    return contains(reinterpret_cast<const unsigned char*>(key.c_str()),key.size());
250  	  }
251  	
252  	  inline bool contains(const char* data, const std::size_t& length) const
253  	  {
254  	    return contains(reinterpret_cast<const unsigned char*>(data),length);
255  	  }
256  	
257  	  template<typename InputIterator>
258  	  inline InputIterator contains_all(const InputIterator begin, const InputIterator end) const
259  	  {
260  	    InputIterator itr = begin;
261  	    while (end != itr)
262  	    {
263  	      if (!contains(*itr))
264  	      {
265  	        return itr;
266  	      }
267  	      ++itr;
268  	    }
269  	    return end;
270  	  }
271  	
272  	  template<typename InputIterator>
273  	  inline InputIterator contains_none(const InputIterator begin, const InputIterator end) const
274  	  {
275  	    InputIterator itr = begin;
276  	    while (end != itr)
277  	    {
278  	      if (contains(*itr))
279  	      {
280  	        return itr;
281  	      }
282  	      ++itr;
283  	    }
284  	    return end;
285  	  }
286  	
287  	  inline virtual std::size_t size() const
288  	  {
289  	    return table_size_ * bits_per_char;
290  	  }
291  	
292  	  inline std::size_t element_count() const
293  	  {
294  	    return insert_count_;
295  	  }
296  	
297  	  inline bool is_full() const
298  	  {
299  	    return insert_count_ >= target_element_count_;
300  	  }
301  	
302  	  /*
303  	   * density of bits set.  inconvenient units, but:
304  	   *    .3  = ~50% target insertions
305  	   *    .5  = 100% target insertions, "perfectly full"
306  	   *    .75 = 200% target insertions
307  	   *   1.0  = all bits set... infinite insertions
308  	   */
309  	  inline double density() const
310  	  {
311  	    if (!bit_table_)
312  	      return 0.0;
313  	    size_t set = 0;
314  	    uint8_t *p = bit_table_;
315  	    size_t left = table_size_;
316  	    while (left-- > 0) {
317  	      uint8_t c = *p;
318  	      for (; c; ++set)
319  		c &= c - 1;
320  	      ++p;
321  	    }
322  	    return (double)set / (double)(table_size_ << 3);
323  	  }
324  	
325  	  virtual inline double approx_unique_element_count() const {
326  	    // this is not a very good estimate; a better solution should have
327  	    // some asymptotic behavior as density() approaches 1.0.
328  	    return (double)target_element_count_ * 2.0 * density();
329  	  }
330  	
331  	  inline double effective_fpp() const
332  	  {
333  	    /*
334  	      Note:
335  	      The effective false positive probability is calculated using the
336  	      designated table size and hash function count in conjunction with
337  	      the current number of inserted elements - not the user defined
338  	      predicated/expected number of inserted elements.
339  	    */
340  	    return std::pow(1.0 - std::exp(-1.0 * salt_.size() * insert_count_ / size()), 1.0 * salt_.size());
341  	  }
342  	
343  	  inline const cell_type* table() const
344  	  {
345  	    return bit_table_;
346  	  }
347  	
348  	protected:
349  	
350  	  inline virtual void compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const
351  	  {
352  	    bit_index = hash % (table_size_ << 3);
353  	    bit = bit_index & 7;
354  	  }
355  	
356  	  void generate_unique_salt()
357  	  {
358  	    /*
359  	      Note:
360  	      A distinct hash function need not be implementation-wise
361  	      distinct. In the current implementation "seeding" a common
362  	      hash function with different values seems to be adequate.
363  	    */
364  	    const unsigned int predef_salt_count = 128;
365  	    static const bloom_type predef_salt[predef_salt_count] = {
366  	      0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC,
367  	      0x66666666, 0x99999999, 0xB5B5B5B5, 0x4B4B4B4B,
368  	      0xAA55AA55, 0x55335533, 0x33CC33CC, 0xCC66CC66,
369  	      0x66996699, 0x99B599B5, 0xB54BB54B, 0x4BAA4BAA,
370  	      0xAA33AA33, 0x55CC55CC, 0x33663366, 0xCC99CC99,
371  	      0x66B566B5, 0x994B994B, 0xB5AAB5AA, 0xAAAAAA33,
372  	      0x555555CC, 0x33333366, 0xCCCCCC99, 0x666666B5,
373  	      0x9999994B, 0xB5B5B5AA, 0xFFFFFFFF, 0xFFFF0000,
374  	      0xB823D5EB, 0xC1191CDF, 0xF623AEB3, 0xDB58499F,
375  	      0xC8D42E70, 0xB173F616, 0xA91A5967, 0xDA427D63,
376  	      0xB1E8A2EA, 0xF6C0D155, 0x4909FEA3, 0xA68CC6A7,
377  	      0xC395E782, 0xA26057EB, 0x0CD5DA28, 0x467C5492,
378  	      0xF15E6982, 0x61C6FAD3, 0x9615E352, 0x6E9E355A,
379  	      0x689B563E, 0x0C9831A8, 0x6753C18B, 0xA622689B,
380  	      0x8CA63C47, 0x42CC2884, 0x8E89919B, 0x6EDBD7D3,
381  	      0x15B6796C, 0x1D6FDFE4, 0x63FF9092, 0xE7401432,
382  	      0xEFFE9412, 0xAEAEDF79, 0x9F245A31, 0x83C136FC,
383  	      0xC3DA4A8C, 0xA5112C8C, 0x5271F491, 0x9A948DAB,
384  	      0xCEE59A8D, 0xB5F525AB, 0x59D13217, 0x24E7C331,
385  	      0x697C2103, 0x84B0A460, 0x86156DA9, 0xAEF2AC68,
386  	      0x23243DA5, 0x3F649643, 0x5FA495A8, 0x67710DF8,
387  	      0x9A6C499E, 0xDCFB0227, 0x46A43433, 0x1832B07A,
388  	      0xC46AFF3C, 0xB9C8FFF0, 0xC9500467, 0x34431BDF,
389  	      0xB652432B, 0xE367F12B, 0x427F4C1B, 0x224C006E,
390  	      0x2E7E5A89, 0x96F99AA5, 0x0BEB452A, 0x2FD87C39,
391  	      0x74B2E1FB, 0x222EFD24, 0xF357F60C, 0x440FCB1E,
392  	      0x8BBE030F, 0x6704DC29, 0x1144D12F, 0x948B1355,
393  	      0x6D8FD7E9, 0x1C11A014, 0xADD1592F, 0xFB3C712E,
394  	      0xFC77642F, 0xF9C4CE8C, 0x31312FB9, 0x08B0DD79,
395  	      0x318FA6E7, 0xC040D23D, 0xC0589AA7, 0x0CA5C075,
396  	      0xF874B172, 0x0CF914D5, 0x784D3280, 0x4E8CFEBC,
397  	      0xC569F575, 0xCDB2A091, 0x2CC016B4, 0x5C5F4421
398  	    };
399  	
400  	    if (salt_count_ <= predef_salt_count)
401  	    {
402  	      std::copy(predef_salt,
403  			predef_salt + salt_count_,
404  			std::back_inserter(salt_));
405  	       for (unsigned int i = 0; i < salt_.size(); ++i)
406  	       {
407  	        /*
408  	          Note:
409  	          This is done to integrate the user defined random seed,
410  	          so as to allow for the generation of unique bloom filter
411  	          instances.
412  	        */
413  	        salt_[i] = salt_[i] * salt_[(i + 3) % salt_.size()] + random_seed_;
414  	       }
415  	    }
416  	    else
417  	    {
418  	      std::copy(predef_salt,predef_salt + predef_salt_count,
419  			std::back_inserter(salt_));
420  	      srand(static_cast<unsigned int>(random_seed_));
421  	      while (salt_.size() < salt_count_)
422  	      {
(1) Event dont_call: "rand" should not be used for security-related applications, because linear congruential algorithms are too easy to break.
(2) Event remediation: Use a compliant random number generator, such as "/dev/random" or "/dev/urandom" on Unix-like systems, and CNG (Cryptography API: Next Generation) on Windows.
423  	        bloom_type current_salt = static_cast<bloom_type>(rand()) * static_cast<bloom_type>(rand());
424  	        if (0 == current_salt)
425  		  continue;
426  	        if (salt_.end() == std::find(salt_.begin(), salt_.end(), current_salt))
427  	        {
428  	          salt_.push_back(current_salt);
429  	        }
430  	      }
431  	    }
432  	  }
433  	
434  	  static void find_optimal_parameters(std::size_t target_insert_count,
435  					      double target_fpp,
436  					      std::size_t *salt_count,
437  					      std::size_t *table_size)
438  	  {
439  	    /*
440  	      Note:
441  	      The following will attempt to find the number of hash functions
442  	      and minimum amount of storage bits required to construct a bloom
443  	      filter consistent with the user defined false positive probability
444  	      and estimated element insertion count.
445  	    */
446  	
447  	    double min_m = std::numeric_limits<double>::infinity();
448  	    double min_k = 0.0;
449  	    double curr_m = 0.0;
450  	    double k = 1.0;
451  	    while (k < 1000.0)
452  	    {
453  	      double numerator  = (- k * target_insert_count);
454  	      double denominator = std::log(1.0 - std::pow(target_fpp, 1.0 / k));
455  	      curr_m = numerator / denominator;
456  	
457  	      if (curr_m < min_m)
458  	      {
459  	        min_m = curr_m;
460  	        min_k = k;
461  	      }
462  	      k += 1.0;
463  	    }
464  	
465  	    *salt_count = static_cast<std::size_t>(min_k);
466  	    size_t t = static_cast<std::size_t>(min_m);
467  	    t += (((t & 7) != 0) ? (bits_per_char - (t & 7)) : 0);
468  	    *table_size = t >> 3;
469  	  }
470  	
471  	  inline bloom_type hash_ap(uint32_t val, bloom_type hash) const
472  	  {
473  	    hash ^=    (hash <<  7) ^  ((val & 0xff000000) >> 24) * (hash >> 3);
474  	    hash ^= (~((hash << 11) + (((val & 0xff0000) >> 16) ^ (hash >> 5))));
475  	    hash ^=    (hash <<  7) ^  ((val & 0xff00) >> 8) * (hash >> 3);
476  	    hash ^= (~((hash << 11) + (((val & 0xff)) ^ (hash >> 5))));
477  	    return hash;
478  	  }
479  	
480  	  inline bloom_type hash_ap(const unsigned char* begin, std::size_t remaining_length, bloom_type hash) const
481  	  {
482  	    const unsigned char* itr = begin;
483  	
484  	    while (remaining_length >= 4)
485  	    {
486  	      hash ^=    (hash <<  7) ^  (*itr++) * (hash >> 3);
487  	      hash ^= (~((hash << 11) + ((*itr++) ^ (hash >> 5))));
488  	      hash ^=    (hash <<  7) ^  (*itr++) * (hash >> 3);
489  	      hash ^= (~((hash << 11) + ((*itr++) ^ (hash >> 5))));
490  	      remaining_length -= 4;
491  	    }
492  	
493  	    while (remaining_length >= 2)
494  	    {
495  	      hash ^=    (hash <<  7) ^  (*itr++) * (hash >> 3);
496  	      hash ^= (~((hash << 11) + ((*itr++) ^ (hash >> 5))));
497  	      remaining_length -= 2;
498  	    }
499  	
500  	    if (remaining_length)
501  	    {
502  	      hash ^= (hash <<  7) ^ (*itr) * (hash >> 3);
503  	    }
504  	
505  	    return hash;
506  	  }
507  	
508  	public:
509  	  void encode(ceph::buffer::list& bl) const;
510  	  void decode(ceph::buffer::list::const_iterator& bl);
511  	  void dump(ceph::Formatter *f) const;
512  	  static void generate_test_instances(std::list<bloom_filter*>& ls);
513  	};
514  	WRITE_CLASS_ENCODER(bloom_filter)
515  	
516  	
517  	class compressible_bloom_filter : public bloom_filter
518  	{
519  	public:
520  	
521  	  compressible_bloom_filter() : bloom_filter() {}
522  	
523  	  compressible_bloom_filter(const std::size_t& predicted_element_count,
524  				    const double& false_positive_probability,
525  				    const std::size_t& random_seed)
526  	    : bloom_filter(predicted_element_count, false_positive_probability, random_seed)
527  	  {
528  	    size_list.push_back(table_size_);
529  	  }
530  	
531  	  compressible_bloom_filter(const std::size_t& salt_count,
532  				    std::size_t table_size,
533  				    const std::size_t& random_seed,
534  				    std::size_t target_count)
535  	    : bloom_filter(salt_count, table_size, random_seed, target_count)
536  	  {
537  	    size_list.push_back(table_size_);
538  	  }
539  	
540  	  inline std::size_t size() const override
541  	  {
542  	    return size_list.back() * bits_per_char;
543  	  }
544  	
545  	  inline bool compress(const double& target_ratio)
546  	  {
547  	    if (!bit_table_)
548  	      return false;
549  	
550  	    if ((0.0 >= target_ratio) || (target_ratio >= 1.0))
551  	    {
552  	      return false;
553  	    }
554  	
555  	    std::size_t original_table_size = size_list.back();
556  	    std::size_t new_table_size = static_cast<std::size_t>(size_list.back() * target_ratio);
557  	
558  	    if ((!new_table_size) || (new_table_size >= original_table_size))
559  	    {
560  	      return false;
561  	    }
562  	
563  	    cell_type* tmp = mempool::bloom_filter::alloc_byte.allocate(new_table_size);
564  	    std::copy(bit_table_, bit_table_ + (new_table_size), tmp);
565  	    cell_type* itr = bit_table_ + (new_table_size);
566  	    cell_type* end = bit_table_ + (original_table_size);
567  	    cell_type* itr_tmp = tmp;
568  	    cell_type* itr_end = tmp + (new_table_size);
569  	    while (end != itr)
570  	    {
571  	      *(itr_tmp++) |= (*itr++);
572  	      if (itr_tmp == itr_end)
573  		itr_tmp = tmp;
574  	    }
575  	
576  	    mempool::bloom_filter::alloc_byte.deallocate(bit_table_, table_size_);
577  	    bit_table_ = tmp;
578  	    size_list.push_back(new_table_size);
579  	    table_size_ = new_table_size;
580  	
581  	    return true;
582  	  }
583  	
584  	  inline double approx_unique_element_count() const override {
585  	    // this is not a very good estimate; a better solution should have
586  	    // some asymptotic behavior as density() approaches 1.0.
587  	    //
588  	    // the compress() correction is also bad; it tends to under-estimate.
589  	    return (double)target_element_count_ * 2.0 * density() * (double)size_list.back() / (double)size_list.front();
590  	  }
591  	
592  	private:
593  	
594  	  inline void compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const override
595  	  {
596  	    bit_index = hash;
597  	    for (std::size_t i = 0; i < size_list.size(); ++i)
598  	    {
599  	      bit_index %= size_list[i] << 3;
600  	    }
601  	    bit = bit_index & 7;
602  	  }
603  	
604  	  std::vector<std::size_t> size_list;
605  	public:
606  	  void encode(ceph::bufferlist& bl) const;
607  	  void decode(ceph::bufferlist::const_iterator& bl);
608  	  void dump(ceph::Formatter *f) const;
609  	  static void generate_test_instances(std::list<compressible_bloom_filter*>& ls);
610  	};
611  	WRITE_CLASS_ENCODER(compressible_bloom_filter)
612  	
613  	#endif
614  	
615  	
616  	/*
617  	  Note 1:
618  	  If it can be guaranteed that bits_per_char will be of the form 2^n then
619  	  the following optimization can be used:
620  	
621  	  hash_table[bit_index >> n] |= bit_mask[bit_index & (bits_per_char - 1)];
622  	
623  	  Note 2:
624  	  For performance reasons where possible when allocating memory it should
625  	  be aligned (aligned_alloc) according to the architecture being used.
626  	*/
627