diff --git a/.gitignore b/.gitignore index 9daeafb..d83a1b2 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -test +benchmark diff --git a/Makefile b/Makefile index 504c916..7c02f1e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ -test: test.c hm.h - cc -Wall -Wextra -ggdb -o test test.c +benchmark: benchmark.cpp + g++ -Wall -Wextra -o benchmark benchmark.cpp clean: - rm -rf test + rm -rf benchmark diff --git a/benchmark.cpp b/benchmark.cpp new file mode 100644 index 0000000..b93ca17 --- /dev/null +++ b/benchmark.cpp @@ -0,0 +1,241 @@ +#include +#include +#include +#include +#include +#include + +#define HM_IMPLEMENTATION +#include "hm.h" + +using namespace std::chrono; + +typedef std::chrono::time_point time_p; + +// Configuration +const int NUM_ELEMENTS = 1000000; + +time_p now() +{ + return high_resolution_clock::now(); +} + +double time_diff_nano(time_p end, time_p start) +{ + return duration_cast(end - start).count() / 1e6; +} + +void print_div() +{ + std::cout << "---------------------------------------------------------" << std::endl; +} + +void print_head() +{ + std::cout << std::left << std::setw(10) << "Operation" + << std::right << std::setw(22) << "std::unordered_map" + << std::right << std::setw(13) << "hm.h" + << std::right << std::setw(11) << "Ratio" << std::endl; +} + +void print_row(const std::string& label, double hm_ms, double std_ms) +{ + std::cout << std::left << std::setw(10) << label + << std::right << std::setw(19) << std::fixed << std::setprecision(2) << std_ms << " ms" + << std::right << std::setw(10) << std::fixed << std::setprecision(2) << hm_ms << " ms" + << std::right << std::setw(10) << (hm_ms / std_ms) << "x" << std::endl; +} + +// Test with string key type +uint64_t hash_str(const void *buffer, size_t size) +{ + (void)size; + const char *str = reinterpret_cast(buffer); + uint64_t hash = (uint64_t)str; + return hash; +} + +void test_str() +{ + std::vector key_pool; + std::vector value_pool; + + // Generate random strings to ensure we aren't just hashing "1", "2", "3" + for (int i = 0; i < NUM_ELEMENTS; ++i) { + key_pool.push_back("key_prefix_" + std::to_string(i) + "_suffix_" + std::to_string(rand() % 1000)); + value_pool.push_back("value_" + std::to_string(i)); + } + + // --- Custom implementation --- + // Using 0 for sizes tells your library to treat them as NULL-terminated strings + HashMap hm = hm_create_ex(sizeof(const char *), 0, hash_str); + + // Insertion + auto start = now(); + for (int i = 0; i < NUM_ELEMENTS; ++i) { + hm_put(&hm, key_pool[i].c_str(), value_pool[i].c_str()); + } + auto end = now(); + double custom_put = time_diff_nano(end, start); + + // Lookup + start = now(); + for (int i = 0; i < NUM_ELEMENTS; ++i) { + char* val = (char*)hm_get(&hm, key_pool[i].c_str()); + if (!val) { + std::cerr << "Lookup error!" << std::endl; + return; + } + } + end = now(); + double custom_get = time_diff_nano(end, start); + + // Deletion + start = now(); + for (int i = 0; i < NUM_ELEMENTS; ++i) { + hm_remove(&hm, key_pool[i].c_str()); + } + end = now(); + double custom_del = time_diff_nano(end, start); + + hm_free(&hm); + + // --- Benchmark std::unordered_map --- + std::unordered_map std_map; + + // Insertion + start = now(); + for (int i = 0; i < NUM_ELEMENTS; ++i) { + std_map[key_pool[i].c_str()] = value_pool[i].c_str(); + } + end = now(); + double std_put = time_diff_nano(end, start); + + // Lookup + start = now(); + for (int i = 0; i < NUM_ELEMENTS; ++i) { + auto it = std_map.find(key_pool[i].c_str()); + if (it == std_map.end()) std::cerr << "Lookup error!" << std::endl; + } + end = now(); + double std_get = time_diff_nano(end, start); + + // Deletion + start = now(); + for (int i = 0; i < NUM_ELEMENTS; ++i) { + std_map.erase(key_pool[i].c_str()); + } + end = now(); + double std_del = time_diff_nano(end, start); + + // Output Results + print_head(); + print_div(); + print_row("Insert", custom_put, std_put); + print_row("Lookup", custom_get, std_get); + print_row("Delete", custom_del, std_del); + print_div(); +} + +// Test with integer key type +uint64_t hash_int(const void *buffer, size_t size) +{ + (void)size; + return *reinterpret_cast(buffer); +} + +void test_int() +{ + // Prepare test data (random integers) to avoid string allocation overhead in the loops + std::vector keys(NUM_ELEMENTS); + std::vector values(NUM_ELEMENTS); + + for (int i = 0; i < NUM_ELEMENTS; ++i) { + keys[i] = i; + values[i] = i * 2; + } + + // --- Custom implementation --- + HashMap hm = hm_create_ex(sizeof(int), sizeof(int), hash_int); + + // Insertion + auto start = now(); + for (int i = 0; i < NUM_ELEMENTS; ++i) { + hm_put(&hm, &keys[i], &values[i]); + } + auto end = now(); + double custom_put = time_diff_nano(end, start); + + // Lookup + start = now(); + for (int i = 0; i < NUM_ELEMENTS; ++i) { + int* val = (int*)hm_get(&hm, &keys[i]); + if (!val || *val != values[i]) { + std::cerr << "Error in custom lookup!" << std::endl; + return; + } + } + end = now(); + double custom_get = time_diff_nano(end, start); + + // Deletion + start = now(); + for (int i = 0; i < NUM_ELEMENTS; ++i) { + hm_remove(&hm, &keys[i]); + } + end = now(); + double custom_del = time_diff_nano(end, start); + + hm_free(&hm); + + // --- Benchmark std::unordered_map --- + std::unordered_map std_map; + + // Insertion + start = now(); + for (int i = 0; i < NUM_ELEMENTS; ++i) { + std_map[keys[i]] = values[i]; + } + end = now(); + double std_put = time_diff_nano(end, start); + + // Lookup + start = now(); + for (int i = 0; i < NUM_ELEMENTS; ++i) { + auto it = std_map.find(keys[i]); + if (it == std_map.end() || it->second != values[i]) { + std::cerr << "Error in std lookup!" << std::endl; + } + } + end = now(); + double std_get = time_diff_nano(end, start); + + // Deletion + start = now(); + for (int i = 0; i < NUM_ELEMENTS; ++i) { + std_map.erase(keys[i]); + } + end = now(); + double std_del = time_diff_nano(end, start); + + // Output Results + print_head(); + print_div(); + print_row("Insert", custom_put, std_put); + print_row("Lookup", custom_get, std_get); + print_row("Delete", custom_del, std_del); + print_div(); +} + +int main() +{ + std::cout << "Benchmarking " << NUM_ELEMENTS << " (integer keys)" << std::endl << std::endl; + test_int(); + + std::cout << std::endl; + + std::cout << "Benchmarking " << NUM_ELEMENTS << " (string keys)" << std::endl << std::endl; + test_str(); + + return 0; +} diff --git a/hm.h b/hm.h index 6243a3e..fe29173 100644 --- a/hm.h +++ b/hm.h @@ -1,4 +1,4 @@ -// hm.h - v1.1.2 - MIT License +// hm.h - v1.2.0 - MIT License // chained hash table implementation as a single header library. // // [License and changelog] @@ -26,6 +26,13 @@ // This macro defines the initial amount of buckets a HashMap will // have when allocated. // +// #define HM_DEFAULT_HASH_FUNCTION new_hash_function (hm__fnv1a) +// +// This macro defines the hash function that will be used by default +// by every new HashMap except the ones (created with 'hm_create_ex'). +// The specified function has to respect the HmHashFunc type +// definition signature. +// // #define HM_MAX_LOAD_FACTOR new_load_factor (0.75f) // // This macro defines when a HashMap will be rehashed by surpassing @@ -66,7 +73,12 @@ // as a NULL terminated buffer for that HashMap. Allocation of the internal // buckets is done only at the first hm_put() call. // -// void hm_free(HashMap *hm) +// HashMap hm_create_ex(size_t key_size, size_t value_size, HmHashFunc hash_func) +// +// This function is the same as hm_create() but with additional parameters +// for the configuration of the HashMap. +// +// void hm_free(HashMap *hm) // // This functions frees and invalidates all the keys and values stored in the // specified HashMap. @@ -138,6 +150,10 @@ # define HM_INITIAL_CAPACITY 256 #endif // HM_INITIAL_CAPACITY +#ifndef HM_DEFAULT_HASH_FUNCTION +# define HM_DEFAULT_HASH_FUNCTION hm__fnv1a +#endif // HM_DEFAULT_HASH_FUNCTION + #ifndef HM_MAX_LOAD_FACTOR # define HM_MAX_LOAD_FACTOR 0.75f #endif // HM_MAX_LOAD_FACTOR @@ -169,6 +185,9 @@ extern "C" { // Prevent name mangling #endif // __cplusplus +// Type definition for a hash function returning a 64-bit hash +typedef uint64_t (*HmHashFunc)(const void *buffer, size_t size); + typedef struct Hm__Bucket { struct Hm__Bucket *next; struct Hm__Bucket *prev; @@ -179,6 +198,7 @@ typedef struct Hm__Bucket { typedef struct { Hm__Bucket **map; + HmHashFunc hash_func; size_t count; size_t capacity; size_t key_size; @@ -194,6 +214,7 @@ typedef struct { // Create and free HashMap hm_create(size_t key_size, size_t value_size); +HashMap hm_create_ex(size_t key_size, size_t value_size, HmHashFunc hash_func); void hm_free(HashMap *hm); // Modify, access and remove @@ -233,12 +254,26 @@ HashMap hm_create(size_t key_size, size_t value_size) { HashMap hm; memset(&hm, 0, sizeof(hm)); + hm.hash_func = HM_DEFAULT_HASH_FUNCTION; hm.capacity = HM_INITIAL_CAPACITY; hm.key_size = key_size; hm.value_size = value_size; return hm; } +HashMap hm_create_ex(size_t key_size, size_t value_size, HmHashFunc hash_func) +{ + HashMap hm = hm_create(key_size, value_size); + + if (hash_func == NULL) { + hm.hash_func = HM_DEFAULT_HASH_FUNCTION; + } else { + hm.hash_func = hash_func; + } + + return hm; +} + void hm_free(HashMap *hm) { if (hm == NULL) { @@ -297,7 +332,7 @@ bool hm_put(HashMap *hm, const void *key, const void *value) size_t value_size = hm__value_size(hm, value); // Get the head bucket - size_t idx = hm__fnv1a(key, key_size) % hm->capacity; + size_t idx = hm->hash_func(key, key_size) % hm->capacity; Hm__Bucket *head = hm->map[idx]; // Check if key already exists @@ -330,7 +365,7 @@ bool hm_put(HashMap *hm, const void *key, const void *value) if (new_bucket->prev != NULL) { new_bucket->prev->next = new_bucket; } else { - size_t idx = hm__fnv1a(key, key_size) % hm->capacity; + size_t idx = hm->hash_func(key, key_size) % hm->capacity; hm->map[idx] = new_bucket; } } @@ -369,7 +404,7 @@ void *hm_get(const HashMap *hm, const void *key) } size_t key_size = hm__key_size(hm, key); - size_t idx = hm__fnv1a(key, key_size) % hm->capacity; + size_t idx = hm->hash_func(key, key_size) % hm->capacity; Hm__Bucket *head = hm->map[idx]; if (head == NULL) { @@ -392,7 +427,7 @@ bool hm_remove(HashMap *hm, const void *key) } size_t key_size = hm__key_size(hm, key); - size_t idx = hm__fnv1a(key, key_size) % hm->capacity; + size_t idx = hm->hash_func(key, key_size) % hm->capacity; Hm__Bucket *head = hm->map[idx]; if (head == NULL) { @@ -608,7 +643,7 @@ bool hm__rehash(HashMap *hm) Hm__Bucket *next = cur->next; size_t key_size = hm__key_size(hm, cur->key); - uint64_t idx = hm__fnv1a(cur->key, key_size) % new_capacity; + uint64_t idx = hm->hash_func(cur->key, key_size) % new_capacity; // Insert new bucket cur->prev = NULL; @@ -667,6 +702,8 @@ Hm__Bucket *hm__bucket_create(const void *key, size_t key_size, const void *valu /* * Revision history: * + * 1.2.0 (2026-04-08) New HmHashFunc type, configurable by the user with + * hm_create_ex() or HM_DEFAULT_HASH_FUNCTION * 1.1.2 (2026-04-08) Optimized Hm__Bucket creation by only allocating one * buffer instead of three per bucket * 1.1.1 (2026-02-13) Added new hm_of() helper macro diff --git a/test.c b/test.c deleted file mode 100644 index 3dab858..0000000 --- a/test.c +++ /dev/null @@ -1,58 +0,0 @@ -#include - -#define HM_IMPLEMENTATION -#define HM_INITIAL_CAPACITY 1 -#include "hm.h" - -void print_hm(const HashMap *hm) -{ - HashMapIterator it = hm_iterate(hm); - while (hm_next(&it) != NULL) { - printf("%-15s: ", (const char*)hm_key(&it)); - printf("%d\n", *(int*)hm_value(&it)); - } -} - -const char *words[49] = { - "apple", "banana", "cherry", "date", "elderberry", "fig", "grape", - "honeydew", "apple", "banana", "kiwi", "lemon", "mango", "nectarine", - "orange", "papaya", "quince", "raspberry", "strawberry", "tangerine", - "ugli", "voavanga", "watermelon", "xigua", "yellowfruit", "zucchini", - "apple", "banana", "cherry", "mango", "kiwi", "lemon", "lemon", "orange", - "papaya", "papaya", "grape", "grape", "grape", "strawberry", "strawberry", - "date", "date", "date", "fig", "fig", "elderberry", "nectarine", "plum" -}; -const size_t N = sizeof(words)/sizeof(words[0]); -const int ONE = 1; - -int main(void) -{ - HashMap freq = hm_create(0, sizeof(int)); - - for (size_t i = 0; i < N; ++i) { - const char *word = words[i]; - - int *f = hm_get(&freq, word); - if (f == NULL) { - hm_put(&freq, word, &ONE); - } else { - *f += 1; - } - } - - print_hm(&freq); - printf("count = %zu\n", freq.count); - - HashMapIterator it = hm_iterate(&freq); - hm_next(&it); - while (hm_value(&it) != NULL) { - const void *key = hm_key(&it); - hm_next(&it); - hm_remove(&freq, key); - } - - printf("count = %zu\n", freq.count); - hm_free(&freq); - - return 0; -}