ahash.h 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. #pragma once
  2. #include <string.h> /* memcpy */
  3. #include <stddef.h> /* size_t */
  4. /* simplest possible insert-only single-linked hash table */
  5. typedef void *(*AHashAllocFunc)(void *alloc_param, size_t size);
  6. typedef unsigned long (*AHashKeyHashFunc)(const void *key);
  7. typedef int (*AHashKeyCompareFunc)(const void *left, const void *right);
  8. struct AHashBucket_;
  9. typedef struct {
  10. /* must be set before aHashInit */
  11. long nbuckets;
  12. /* note-to-self: it is actually trivially possible to have variable-size keys and items */
  13. long key_size;
  14. long value_size;
  15. /* must return properly aligned (e.g. 16) regions */
  16. void *alloc_param;
  17. AHashAllocFunc alloc;
  18. AHashKeyHashFunc key_hash;
  19. AHashKeyCompareFunc key_compare;
  20. struct {
  21. long item_size;
  22. long value_offset;
  23. long next_ptr_offset;
  24. struct AHashBucket_ *buckets;
  25. } impl_;
  26. struct {
  27. long items;
  28. long empty_buckets;
  29. long worst_bucket_items;
  30. } stat;
  31. } AHash;
  32. void aHashInit(AHash *hash);
  33. void *aHashInsert(AHash *hash, const void *key, const void *value);
  34. void *aHashGet(const AHash *hash, const void *key);
  35. /* helper for hashing memory contents, e.g. strings and binary data
  36. * DON'T use it to hash structs, as those might have holes due to alignment,
  37. * and actual memory contents of these holes is not specified (UB?) */
  38. unsigned long aHashBytesHash(const void *data, size_t size);
  39. /* can be directly used if key is a zero-terminated string */
  40. unsigned long aHashStringHash(const void *string);
  41. #define AHASH_IMPLEMENT
  42. #ifdef AHASH_IMPLEMENT
  43. #ifndef AHASH_VALUE_ALIGNMENT
  44. #define AHASH_VALUE_ALIGNMENT 16 /* worst case for SSE & friends */
  45. #endif
  46. struct AHashBucket_ {
  47. void *items;
  48. long count;
  49. };
  50. struct AHashItemData_ {
  51. void *key;
  52. void *value;
  53. void **next;
  54. };
  55. static struct AHashItemData_ a__hashGetItemData(const AHash *hash, void *item) {
  56. char *bytes = item;
  57. return (struct AHashItemData_){
  58. .key = bytes,
  59. .value = bytes + hash->impl_.value_offset,
  60. .next = (void**)(bytes + hash->impl_.next_ptr_offset)
  61. };
  62. }
  63. void aHashInit(AHash *hash) {
  64. #define AHASH_ALIGNED_SIZE(S,A) (((S+A-1)/A)*A)
  65. hash->impl_.value_offset = AHASH_ALIGNED_SIZE(hash->key_size, AHASH_VALUE_ALIGNMENT);
  66. hash->impl_.next_ptr_offset = hash->impl_.value_offset + AHASH_ALIGNED_SIZE(hash->value_size, sizeof(void*));
  67. hash->impl_.item_size = hash->impl_.next_ptr_offset + sizeof(void*);
  68. hash->impl_.buckets = hash->alloc(hash->alloc_param, sizeof(struct AHashBucket_) * hash->nbuckets);
  69. for (long i = 0; i < hash->nbuckets; ++i)
  70. hash->impl_.buckets[i] = (struct AHashBucket_){
  71. .items = 0,
  72. .count = 0
  73. };
  74. hash->stat.items = 0;
  75. hash->stat.empty_buckets = hash->nbuckets;
  76. hash->stat.worst_bucket_items = 0;
  77. }
  78. void *aHashInsert(AHash *hash, const void *key, const void *value) {
  79. const long index = hash->key_hash(key) % hash->nbuckets;
  80. struct AHashBucket_ *const bucket = hash->impl_.buckets + index;
  81. void *item = bucket->items;
  82. struct AHashItemData_ prev_item_data;
  83. prev_item_data.next = &bucket->items;
  84. while(item != 0) {
  85. prev_item_data = a__hashGetItemData(hash, item);
  86. item = *prev_item_data.next;
  87. }
  88. void *const new_item = hash->alloc(hash->alloc_param, hash->impl_.item_size);
  89. const struct AHashItemData_ item_data = a__hashGetItemData(hash, new_item);
  90. memcpy(item_data.key, key, hash->key_size);
  91. memcpy(item_data.value, value, hash->value_size);
  92. item_data.next[0] = 0;
  93. *prev_item_data.next = new_item;
  94. if (!bucket->count) hash->stat.empty_buckets--;
  95. ++bucket->count;
  96. if (bucket->count > hash->stat.worst_bucket_items) hash->stat.worst_bucket_items = bucket->count;
  97. ++hash->stat.items;
  98. return new_item;
  99. }
  100. void *aHashGet(const AHash *hash, const void *key) {
  101. const long index = hash->key_hash(key) % hash->nbuckets;
  102. const struct AHashBucket_ *const bucket = hash->impl_.buckets + index;
  103. void *item = bucket->items;
  104. while (item) {
  105. const struct AHashItemData_ data = a__hashGetItemData(hash, item);
  106. if (0 == hash->key_compare(key, data.key))
  107. return data.value;
  108. item = *data.next;
  109. }
  110. return 0;
  111. }
  112. /* FNV-1a */
  113. unsigned long aHashBytesHash(const void *data, size_t size) {
  114. #if __LP64__
  115. unsigned long hash = 0xcbf29ce484222325ul;
  116. const unsigned long mul = 1099511628211ul;
  117. #else
  118. unsigned long hash = 0x811c9dc5ul;
  119. const unsigned long mul = 16777619ul;
  120. #endif
  121. for (size_t i = 0; i < size; ++i) {
  122. hash ^= ((const unsigned char*)data)[i];
  123. hash *= mul;
  124. }
  125. return hash;
  126. }
  127. unsigned long aHashStringHash(const void *string) {
  128. return aHashBytesHash(string, strlen(string));
  129. }
  130. #endif