Sierra Toolkit  Version of the Day
hash_map_rdestl.h
1 #ifndef RDESTL_HASH_MAP_H
2 #define RDESTL_HASH_MAP_H
3 
4 #include <stk_util/util/algorithm_rdestl.h>
5 #include <stk_util/util/allocator_rdestl.h>
6 #include <stk_util/util/functional_rdestl.h>
7 #include <stk_util/util/hash_rdestl.h>
8 #include <stk_util/util/pair_rdestl.h>
9 
10 namespace rde
11 {
12 
13 // TLoadFactor4 - controls hash map load. 4 means 100% load, ie. hashmap will grow
14 // when number of items == capacity. Default value of 6 means it grows when
15 // number of items == capacity * 3/2 (6/4). Higher load == tighter maps, but bigger
16 // risk of collisions.
17 template<typename TKey, typename TValue,
18  class THashFunc = rde::hash<TKey>,
19  int TLoadFactor4 = 6,
20  class TKeyEqualFunc = rde::equal_to<TKey>,
21  class TAllocator = rde::allocator>
22 class hash_map
23 {
24 public:
25  typedef rde::pair<TKey, TValue> value_type;
26 
27 private:
28  struct node
29  {
30  static const hash_value_t kUnusedHash = 0xFFFFFFFF;
31  static const hash_value_t kDeletedHash = 0xFFFFFFFE;
32 
33  node(): hash(kUnusedHash) {}
34 
35  RDE_FORCEINLINE bool is_unused() const { return hash == kUnusedHash; }
36  RDE_FORCEINLINE bool is_deleted() const { return hash == kDeletedHash; }
37  RDE_FORCEINLINE bool is_occupied() const { return hash < kDeletedHash; }
38 
39  hash_value_t hash;
40  value_type data;
41  };
42  template<typename TNodePtr, typename TPtr, typename TRef>
43  class node_iterator
44  {
45  friend class hash_map;
46  public:
47  typedef forward_iterator_tag iterator_category;
48 
49  explicit node_iterator(TNodePtr node, const hash_map* map)
50  : m_node(node),
51  m_map(map)
52  {}
53  template<typename UNodePtr, typename UPtr, typename URef>
54  node_iterator(const node_iterator<UNodePtr, UPtr, URef>& rhs)
55  : m_node(rhs.node()),
56  m_map(rhs.get_map())
57  {}
58 
59  TRef operator*() const
60  {
61  RDE_ASSERT(m_node != 0);
62  return m_node->data;
63  }
64  TPtr operator->() const
65  {
66  return &m_node->data;
67  }
68  RDE_FORCEINLINE TNodePtr node() const
69  {
70  return m_node;
71  }
72 
73  node_iterator& operator++()
74  {
75  RDE_ASSERT(m_node != 0);
76  ++m_node;
77  move_to_next_occupied_node();
78  return *this;
79  }
80  node_iterator operator++(int)
81  {
82  node_iterator copy(*this);
83  ++(*this);
84  return copy;
85  }
86 
87  RDE_FORCEINLINE bool operator==(const node_iterator& rhs) const
88  {
89  return rhs.m_node == m_node;
90  }
91  bool operator!=(const node_iterator& rhs) const
92  {
93  return !(rhs == *this);
94  }
95 
96  const hash_map* get_map() const { return m_map; }
97  private:
98  void move_to_next_occupied_node()
99  {
100  // @todo: save nodeEnd in constructor?
101  TNodePtr nodeEnd = m_map->m_nodes + m_map->bucket_count();
102  for (; m_node < nodeEnd; ++m_node)
103  {
104  if (m_node->is_occupied())
105  break;
106  }
107  }
108  TNodePtr m_node;
109  const hash_map* m_map;
110  };
111 
112 public:
113  typedef TKey key_type;
114  typedef TValue mapped_type;
115  typedef TAllocator allocator_type;
116  typedef node_iterator<node*, value_type*, value_type&> iterator;
117  typedef node_iterator<const node*, const value_type*, const value_type&> const_iterator;
118  typedef int size_type;
119  static const size_type kNodeSize = sizeof(node);
120  static const size_type kInitialCapacity = 64;
121 
122  hash_map()
123  : m_nodes(&ms_emptyNode),
124  m_size(0),
125  m_capacity(0),
126  m_capacityMask(0),
127  m_numUsed(0)
128  {
129  RDE_ASSERT((kInitialCapacity & (kInitialCapacity - 1)) == 0); // Must be power-of-two
130  }
131  explicit hash_map(const allocator_type& allocator)
132  : m_nodes(&ms_emptyNode),
133  m_size(0),
134  m_capacity(0),
135  m_capacityMask(0),
136  m_numUsed(0),
137  m_allocator(allocator)
138  {
139 
140  }
141  explicit hash_map(size_type initial_bucket_count,
142  const allocator_type& allocator = allocator_type())
143  : m_nodes(&ms_emptyNode),
144  m_size(0),
145  m_capacity(0),
146  m_capacityMask(0),
147  m_numUsed(0),
148  m_allocator(allocator)
149  {
150  reserve(initial_bucket_count);
151  }
152  hash_map(size_type initial_bucket_count,
153  const THashFunc& hashFunc,
154  const allocator_type& allocator = allocator_type())
155  : m_nodes(&ms_emptyNode),
156  m_size(0),
157  m_capacity(0),
158  m_capacityMask(0),
159  m_numUsed(0),
160  m_hashFunc(hashFunc),
161  m_allocator(allocator)
162  {
163  reserve(initial_bucket_count);
164  }
165  hash_map(const hash_map& rhs, const allocator_type& allocator = allocator_type())
166  : m_nodes(&ms_emptyNode),
167  m_size(0),
168  m_capacity(0),
169  m_capacityMask(0),
170  m_numUsed(0),
171  m_allocator(allocator)
172  {
173  *this = rhs;
174  }
175  explicit hash_map(e_noinitialize)
176  {
177 
178  }
179  ~hash_map()
180  {
181  delete_nodes();
182  }
183 
184  iterator begin()
185  {
186  iterator it(m_nodes, this);
187  it.move_to_next_occupied_node();
188  return it;
189  }
190  const_iterator begin() const
191  {
192  const_iterator it(m_nodes, this);
193  it.move_to_next_occupied_node();
194  return it;
195  }
196  iterator end() { return iterator(m_nodes + m_capacity, this); }
197  const_iterator end() const { return const_iterator(m_nodes + m_capacity, this); }
198 
199  // @note: Added for compatiblity sake.
200  // Personally, I consider it "risky". Use find/insert for more
201  // explicit operations.
202  mapped_type& operator[](const key_type& key)
203  {
204  hash_value_t hash;
205  node* n = find_for_insert(key, &hash);
206  if (n == 0 || !n->is_occupied())
207  {
208  return insert_at(value_type(key, TValue()), n, hash).first->second;
209  }
210  return n->data.second;
211  }
212  // @note: Doesn't copy allocator.
213  hash_map& operator=(const hash_map& rhs)
214  {
215  RDE_ASSERT(invariant());
216  if (&rhs != this)
217  {
218  clear();
219  if (m_capacity < rhs.bucket_count())
220  {
221  delete_nodes();
222  m_nodes = allocate_nodes(rhs.bucket_count());
223  m_capacity = rhs.bucket_count();
224  m_capacityMask = m_capacity - 1;
225  }
226  rehash(m_capacity, m_nodes, rhs.m_capacity, rhs.m_nodes, false);
227  m_size = rhs.size();
228  m_numUsed = rhs.m_numUsed;
229  }
230  RDE_ASSERT(invariant());
231  return *this;
232  }
233  void swap(hash_map& rhs)
234  {
235  if (&rhs != this)
236  {
237  RDE_ASSERT(invariant());
238  RDE_ASSERT(m_allocator == rhs.m_allocator);
239  rde::swap(m_nodes, rhs.m_nodes);
240  rde::swap(m_size, rhs.m_size);
241  rde::swap(m_capacity, rhs.m_capacity);
242  rde::swap(m_capacityMask, rhs.m_capacityMask);
243  rde::swap(m_numUsed, rhs.m_numUsed);
244  rde::swap(m_hashFunc, rhs.m_hashFunc);
245  rde::swap(m_keyEqualFunc, rhs.m_keyEqualFunc);
246  RDE_ASSERT(invariant());
247  }
248  }
249 
250  rde::pair<iterator, bool> insert(const value_type& v)
251  {
252  typedef rde::pair<iterator, bool> ret_type_t;
253  RDE_ASSERT(invariant());
254  if (m_numUsed * TLoadFactor4 >= m_capacity * 4)
255  grow();
256 
257  hash_value_t hash = 0XFFFFFFFF;
258 
259  node* n = find_for_insert(v.first, &hash);
260  if (n->is_occupied())
261  {
262  RDE_ASSERT(hash == n->hash && m_keyEqualFunc(v.first, n->data.first));
263  return ret_type_t(iterator(n, this), false);
264  }
265  if (n->is_unused())
266  ++m_numUsed;
267  rde::copy_construct(&n->data, v);
268  n->hash = hash;
269  ++m_size;
270  RDE_ASSERT(invariant());
271  return ret_type_t(iterator(n, this), true);
272  }
273 
274  size_type erase(const key_type& key)
275  {
276  node* n = lookup(key);
277  if (n != (m_nodes + m_capacity) && n->is_occupied())
278  {
279  erase_node(n);
280  return 1;
281  }
282  return 0;
283  }
284  void erase(iterator it)
285  {
286  RDE_ASSERT(it.get_map() == this);
287  if (it != end())
288  {
289  RDE_ASSERT(!empty());
290  erase_node(it.node());
291  }
292  }
293  void erase(iterator from, iterator to)
294  {
295  for (; from != to; ++from)
296  {
297  node* n = from.node();
298  if (n->is_occupied())
299  erase_node(n);
300  }
301  }
302 
303  iterator find(const key_type& key)
304  {
305  node* n = lookup(key);
306  return iterator(n, this);
307  }
308  const_iterator find(const key_type& key) const
309  {
310  const node* n = lookup(key);
311  return const_iterator(n, this);
312  }
313 
314  void clear()
315  {
316  node* endNode = m_nodes + m_capacity;
317  for (node* iter = m_nodes; iter != endNode; ++iter)
318  {
319  if( iter )
320  {
321  if (iter->is_occupied())
322  {
323  rde::destruct(&iter->data);
324  }
325  // We can make them unused, because we clear whole hash_map,
326  // so we can guarantee there'll be no holes.
327  iter->hash = node::kUnusedHash;
328  }
329  }
330  m_size = 0;
331  m_numUsed = 0;
332  }
333 
334  // More like reserve.
335  // resize() name chosen for compatibility sake.
336  void reserve(size_type min_size)
337  {
338  size_type newCapacity = (m_capacity == 0 ? kInitialCapacity : m_capacity);
339  while (newCapacity < min_size)
340  newCapacity *= 2;
341  if (newCapacity > m_capacity)
342  grow(newCapacity);
343  }
344 
345  size_type bucket_count() const { return m_capacity; }
346  size_type size() const { return m_size; }
347  size_type empty() const { return size() == 0; }
348  size_type nonempty_bucket_count() const { return m_numUsed; }
349  size_type used_memory() const
350  {
351  return bucket_count() * kNodeSize;
352  }
353 
354  const allocator_type& get_allocator() const { return m_allocator; }
355  void set_allocator(const allocator_type& allocator)
356  {
357  m_allocator = allocator;
358  }
359 
360 private:
361  void grow()
362  {
363  const int newCapacity = (m_capacity == 0 ? kInitialCapacity : m_capacity * 2);
364  grow(newCapacity);
365  }
366  void grow(int new_capacity)
367  {
368  RDE_ASSERT((new_capacity & (new_capacity - 1)) == 0); // Must be power-of-two
369  node* newNodes = allocate_nodes(new_capacity);
370  rehash(new_capacity, newNodes, m_capacity, m_nodes, true);
371  if (m_nodes != &ms_emptyNode)
372  m_allocator.deallocate(m_nodes, sizeof(node) * m_capacity);
373  m_capacity = new_capacity;
374  m_capacityMask = new_capacity - 1;
375  m_nodes = newNodes;
376  m_numUsed = m_size;
377  RDE_ASSERT(m_numUsed < m_capacity);
378  }
379  rde::pair<iterator, bool> insert_at(const value_type& v, node* n,
380  hash_value_t hash)
381  {
382  RDE_ASSERT(invariant());
383  if (n == 0 || m_numUsed * TLoadFactor4 >= m_capacity * 4)
384  return insert(v);
385 
386  RDE_ASSERT(!n->is_occupied());
387  if (n->is_unused())
388  ++m_numUsed;
389  rde::copy_construct(&n->data, v);
390  n->hash = hash;
391  ++m_size;
392  RDE_ASSERT(invariant());
393  return rde::pair<iterator, bool>(iterator(n, this), true);
394  }
395  node* find_for_insert(const key_type& key, hash_value_t* out_hash)
396  {
397  if (m_capacity == 0)
398  return 0;
399 
400  const hash_value_t hash = hash_func(key);
401  *out_hash = hash;
402  uint32 i = hash & m_capacityMask;
403 
404  node* n = m_nodes + i;
405  if (n->hash == hash && m_keyEqualFunc(key, n->data.first))
406  return n;
407 
408  node* freeNode(0);
409  if (n->is_deleted())
410  freeNode = n;
411  uint32 numProbes(0);
412  // Guarantees loop termination.
413  RDE_ASSERT(m_numUsed < m_capacity);
414  while (!n->is_unused())
415  {
416  ++numProbes;
417  i = (i + numProbes) & m_capacityMask;
418  n = m_nodes + i;
419  if (compare_key(n, key, hash))
420  return n;
421  if (n->is_deleted() && freeNode == 0)
422  freeNode = n;
423  }
424  return freeNode ? freeNode : n;
425  }
426  node* lookup(const key_type& key) const
427  {
428  const hash_value_t hash = hash_func(key);
429  uint32 i = hash & m_capacityMask;
430  node* n = m_nodes + i;
431  // In theory, we could try to use compare_key here, it would
432  // be a little bit faster for keys with cheap_compare. However, if keys are
433  // not totally destroyed on removal (erase_node), this could result in returning
434  // unused nodes. By testing hashes - we make sure it does not happen.
435  // This could also be solved by doing what Google does -- set_empty_key/set_deleted_key,
436  // but for the time being it doesn't look to me like it's worth it.
437  if (n->hash == hash && m_keyEqualFunc(key, n->data.first))
438  return n;
439 
440  uint32 numProbes(0);
441  // Guarantees loop termination.
442  RDE_ASSERT(m_capacity == 0 || m_numUsed < m_capacity);
443  while (!n->is_unused())
444  {
445  ++numProbes;
446  i = (i + numProbes) & m_capacityMask;
447  n = m_nodes + i;
448 
449  if (compare_key(n, key, hash))
450  return n;
451  }
452  return m_nodes + m_capacity;
453  }
454 
455  static void rehash(int new_capacity, node* new_nodes,
456  int capacity, const node* nodes, bool destruct_original)
457  {
458  //if (nodes == &ms_emptyNode || new_nodes == &ms_emptyNode)
459  // return;
460 
461  const node* it = nodes;
462  const node* itEnd = nodes + capacity;
463  const uint32 mask = new_capacity - 1;
464  while (it != itEnd)
465  {
466  if (it->is_occupied())
467  {
468  const hash_value_t hash = it->hash;
469  uint32 i = hash & mask;
470 
471  node* n = new_nodes + i;
472  uint32 numProbes(0);
473  while (!n->is_unused())
474  {
475  ++numProbes;
476  i = (i + numProbes) & mask;
477  n = new_nodes + i;
478  }
479  rde::copy_construct(&n->data, it->data);
480  n->hash = hash;
481  if (destruct_original)
482  rde::destruct(&it->data);
483  }
484  ++it;
485  }
486  }
487 
488  node* allocate_nodes(int n)
489  {
490  node* buckets = static_cast<node*>(m_allocator.allocate(n * sizeof(node)));
491  node* iterBuckets(buckets);
492  node* end = iterBuckets + n;
493  for (; iterBuckets != end; ++iterBuckets)
494  iterBuckets->hash = node::kUnusedHash;
495 
496  return buckets;
497  }
498  void delete_nodes()
499  {
500  node* it = m_nodes;
501  node* itEnd = it + m_capacity;
502  while (it != itEnd)
503  {
504  if (it && it->is_occupied())
505  rde::destruct(&it->data);
506  ++it;
507  }
508  if (m_nodes != &ms_emptyNode)
509  m_allocator.deallocate(m_nodes, sizeof(node) * m_capacity);
510 
511  m_capacity = 0;
512  m_capacityMask = 0;
513  m_size = 0;
514  }
515  void erase_node(node* n)
516  {
517  RDE_ASSERT(!empty());
518  RDE_ASSERT(n->is_occupied());
519  rde::destruct(&n->data);
520  n->hash = node::kDeletedHash;
521  --m_size;
522  }
523 
524  RDE_FORCEINLINE hash_value_t hash_func(const key_type& key) const
525  {
526  const hash_value_t h = m_hashFunc(key) & 0xFFFFFFFD;
527  //RDE_ASSERT(h < node::kDeletedHash);
528  return h;
529  }
530  bool invariant() const
531  {
532  RDE_ASSERT((m_capacity & (m_capacity - 1)) == 0);
533  RDE_ASSERT(m_numUsed >= m_size);
534  return true;
535  }
536 
537  RDE_FORCEINLINE bool compare_key(const node* n, const key_type& key, hash_value_t hash,
538  int_to_type<false>) const
539  {
540  return (n->hash == hash && m_keyEqualFunc(key, n->data.first));
541  }
542  RDE_FORCEINLINE bool compare_key(const node* n, const key_type& key, hash_value_t,
543  int_to_type<true>) const
544  {
545  return m_keyEqualFunc(key, n->data.first);
546  }
547  RDE_FORCEINLINE bool compare_key(const node* n, const key_type& key, hash_value_t hash) const
548  {
549  return compare_key(n, key, hash, int_to_type<has_cheap_compare<TKey>::value>());
550  }
551 
552  node* m_nodes;
553  int m_size;
554  int m_capacity;
555  uint32 m_capacityMask;
556  int m_numUsed;
557  THashFunc m_hashFunc;
558  TKeyEqualFunc m_keyEqualFunc;
559  TAllocator m_allocator;
560 
561  static node ms_emptyNode;
562 };
563 
564 
565 // Holy ...
566 template<typename TKey, typename TValue,
567  class THashFunc,
568  int TLoadFactor4,
569  class TKeyEqualFunc,
570  class TAllocator>
571 typename hash_map<TKey, TValue, THashFunc, TLoadFactor4, TKeyEqualFunc, TAllocator>::node hash_map<TKey, TValue, THashFunc, TLoadFactor4, TKeyEqualFunc, TAllocator>::ms_emptyNode;
572 
573 }
574 
575 #endif
OutputIterator copy(InputIterator first, InputIterator last, OutputIterator result)
Part * find(const PartVector &parts, const std::string &name)
Find a part by name in a collection of parts.
Definition: Part.cpp:22
bool insert(PartVector &v, Part &part)
Insert a part into a properly ordered collection of parts. Returns true if this is a new insertion...
Definition: Part.cpp:85