v1.2.0

2026-04-09 01:29:56 +02:00
parent 89bdf7bbdf
commit d2343ab8f8
5 changed files with 289 additions and 69 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1 @@
-test
+benchmark
--- a/6
+++ b/6
@@ -1,6 +1,6 @@

-test: test.c hm.h
-	cc -Wall -Wextra -ggdb -o test test.c
+benchmark: benchmark.cpp
+	g++ -Wall -Wextra -o benchmark benchmark.cpp

 clean:
-	rm -rf test
+	rm -rf benchmark
--- a/benchmark.cpp
+++ b/benchmark.cpp
@@ -0,0 +1,241 @@
+#include <iostream>
+#include <vector>
+#include <string>
+#include <chrono>
+#include <unordered_map>
+#include <iomanip>
+
+#define HM_IMPLEMENTATION
+#include "hm.h"
+
+using namespace std::chrono;
+
+typedef std::chrono::time_point<std::chrono::high_resolution_clock> time_p;
+
+// Configuration
+const int NUM_ELEMENTS = 1000000;
+
+time_p now()
+{
+    return high_resolution_clock::now();
+}
+
+double time_diff_nano(time_p end, time_p start)
+{
+    return duration_cast<nanoseconds>(end - start).count() / 1e6;
+}
+
+void print_div()
+{
+    std::cout << "---------------------------------------------------------" << std::endl;
+}
+
+void print_head()
+{
+    std::cout << std::left << std::setw(10) << "Operation"
+        << std::right << std::setw(22) << "std::unordered_map"
+        << std::right << std::setw(13) << "hm.h"
+        << std::right << std::setw(11) << "Ratio" << std::endl;
+}
+
+void print_row(const std::string& label, double hm_ms, double std_ms)
+{
+    std::cout << std::left << std::setw(10) << label
+        << std::right << std::setw(19) << std::fixed << std::setprecision(2) << std_ms << " ms"
+        << std::right << std::setw(10) << std::fixed << std::setprecision(2) << hm_ms << " ms"
+        << std::right << std::setw(10) << (hm_ms / std_ms) << "x" << std::endl;
+}
+
+// Test with string key type
+uint64_t hash_str(const void *buffer, size_t size)
+{
+    (void)size;
+    const char *str = reinterpret_cast<const char *>(buffer);
+    uint64_t hash = (uint64_t)str;
+    return hash;
+}
+
+void test_str()
+{
+    std::vector<std::string> key_pool;
+    std::vector<std::string> value_pool;
+
+    // Generate random strings to ensure we aren't just hashing "1", "2", "3"
+    for (int i = 0; i < NUM_ELEMENTS; ++i) {
+        key_pool.push_back("key_prefix_" + std::to_string(i) + "_suffix_" + std::to_string(rand() % 1000));
+        value_pool.push_back("value_" + std::to_string(i));
+    }
+
+    // --- Custom implementation ---
+    // Using 0 for sizes tells your library to treat them as NULL-terminated strings
+    HashMap hm = hm_create_ex(sizeof(const char *), 0, hash_str);
+
+    // Insertion
+    auto start = now();
+    for (int i = 0; i < NUM_ELEMENTS; ++i) {
+        hm_put(&hm, key_pool[i].c_str(), value_pool[i].c_str());
+    }
+    auto end = now();
+    double custom_put = time_diff_nano(end, start);
+
+    // Lookup
+    start = now();
+    for (int i = 0; i < NUM_ELEMENTS; ++i) {
+        char* val = (char*)hm_get(&hm, key_pool[i].c_str());
+        if (!val) {
+            std::cerr << "Lookup error!" << std::endl;
+            return;
+        }
+    }
+    end = now();
+    double custom_get = time_diff_nano(end, start);
+
+    // Deletion
+    start = now();
+    for (int i = 0; i < NUM_ELEMENTS; ++i) {
+        hm_remove(&hm, key_pool[i].c_str());
+    }
+    end = now();
+    double custom_del = time_diff_nano(end, start);
+
+    hm_free(&hm);
+
+    // --- Benchmark std::unordered_map ---
+    std::unordered_map<const char*, const char*> std_map;
+
+    // Insertion
+    start = now();
+    for (int i = 0; i < NUM_ELEMENTS; ++i) {
+        std_map[key_pool[i].c_str()] = value_pool[i].c_str();
+    }
+    end = now();
+    double std_put = time_diff_nano(end, start);
+
+    // Lookup
+    start = now();
+    for (int i = 0; i < NUM_ELEMENTS; ++i) {
+        auto it = std_map.find(key_pool[i].c_str());
+        if (it == std_map.end()) std::cerr << "Lookup error!" << std::endl;
+    }
+    end = now();
+    double std_get = time_diff_nano(end, start);
+
+    // Deletion
+    start = now();
+    for (int i = 0; i < NUM_ELEMENTS; ++i) {
+        std_map.erase(key_pool[i].c_str());
+    }
+    end = now();
+    double std_del = time_diff_nano(end, start);
+
+    // Output Results
+    print_head();
+    print_div();
+    print_row("Insert", custom_put, std_put);
+    print_row("Lookup", custom_get, std_get);
+    print_row("Delete", custom_del, std_del);
+    print_div();
+}
+
+// Test with integer key type
+uint64_t hash_int(const void *buffer, size_t size)
+{
+    (void)size;
+    return *reinterpret_cast<const int*>(buffer);
+}
+
+void test_int()
+{
+    // Prepare test data (random integers) to avoid string allocation overhead in the loops
+    std::vector<int> keys(NUM_ELEMENTS);
+    std::vector<int> values(NUM_ELEMENTS);
+
+    for (int i = 0; i < NUM_ELEMENTS; ++i) {
+        keys[i] = i;
+        values[i] = i * 2;
+    }
+
+    // --- Custom implementation ---
+    HashMap hm = hm_create_ex(sizeof(int), sizeof(int), hash_int);
+
+    // Insertion
+    auto start = now();
+    for (int i = 0; i < NUM_ELEMENTS; ++i) {
+        hm_put(&hm, &keys[i], &values[i]);
+    }
+    auto end = now();
+    double custom_put = time_diff_nano(end, start);
+
+    // Lookup
+    start = now();
+    for (int i = 0; i < NUM_ELEMENTS; ++i) {
+        int* val = (int*)hm_get(&hm, &keys[i]);
+        if (!val || *val != values[i]) {
+            std::cerr << "Error in custom lookup!" << std::endl;
+            return;
+        }
+    }
+    end = now();
+    double custom_get = time_diff_nano(end, start);
+
+    // Deletion
+    start = now();
+    for (int i = 0; i < NUM_ELEMENTS; ++i) {
+        hm_remove(&hm, &keys[i]);
+    }
+    end = now();
+    double custom_del = time_diff_nano(end, start);
+
+    hm_free(&hm);
+
+    // --- Benchmark std::unordered_map ---
+    std::unordered_map<int, int> std_map;
+
+    // Insertion
+    start = now();
+    for (int i = 0; i < NUM_ELEMENTS; ++i) {
+        std_map[keys[i]] = values[i];
+    }
+    end = now();
+    double std_put = time_diff_nano(end, start);
+
+    // Lookup
+    start = now();
+    for (int i = 0; i < NUM_ELEMENTS; ++i) {
+        auto it = std_map.find(keys[i]);
+        if (it == std_map.end() || it->second != values[i]) {
+            std::cerr << "Error in std lookup!" << std::endl;
+        }
+    }
+    end = now();
+    double std_get = time_diff_nano(end, start);
+
+    // Deletion
+    start = now();
+    for (int i = 0; i < NUM_ELEMENTS; ++i) {
+        std_map.erase(keys[i]);
+    }
+    end = now();
+    double std_del = time_diff_nano(end, start);
+
+    // Output Results
+    print_head();
+    print_div();
+    print_row("Insert", custom_put, std_put);
+    print_row("Lookup", custom_get, std_get);
+    print_row("Delete", custom_del, std_del);
+    print_div();
+}
+
+int main()
+{
+    std::cout << "Benchmarking " << NUM_ELEMENTS << " (integer keys)" << std::endl << std::endl;
+    test_int();
+
+    std::cout << std::endl;
+
+    std::cout << "Benchmarking " << NUM_ELEMENTS << " (string keys)" << std::endl << std::endl;
+    test_str();
+
+    return 0;
+}
--- a/hm.h
+++ b/hm.h
@@ -1,4 +1,4 @@
-// hm.h - v1.1.2 - MIT License
+// hm.h - v1.2.0 - MIT License
 // chained hash table implementation as a single header library.
 //
 // [License and changelog]
@@ -26,6 +26,13 @@
 //         This macro defines the initial amount of buckets a HashMap will
 //         have when allocated.
 //
+//     #define HM_DEFAULT_HASH_FUNCTION new_hash_function (hm__fnv1a)
+//
+//         This macro defines the hash function that will be used by default
+//         by every new HashMap except the ones (created with 'hm_create_ex').
+//         The specified function has to respect the HmHashFunc type
+//         definition signature.
+//
 //     #define HM_MAX_LOAD_FACTOR new_load_factor (0.75f)
 //
 //         This macro defines when a HashMap will be rehashed by surpassing
@@ -66,6 +73,11 @@
 // as a NULL terminated buffer for that HashMap. Allocation of the internal
 // buckets is done only at the first hm_put() call.
 //
+//     HashMap hm_create_ex(size_t key_size, size_t value_size, HmHashFunc hash_func)
+//
+// This function is the same as hm_create() but with additional parameters
+// for the configuration of the HashMap.
+//
 //     void hm_free(HashMap *hm)
 //
 // This functions frees and invalidates all the keys and values stored in the
@@ -138,6 +150,10 @@
 #    define HM_INITIAL_CAPACITY 256
 #endif // HM_INITIAL_CAPACITY

+#ifndef HM_DEFAULT_HASH_FUNCTION
+#    define HM_DEFAULT_HASH_FUNCTION hm__fnv1a
+#endif // HM_DEFAULT_HASH_FUNCTION
+
 #ifndef HM_MAX_LOAD_FACTOR
 #    define HM_MAX_LOAD_FACTOR 0.75f
 #endif // HM_MAX_LOAD_FACTOR
@@ -169,6 +185,9 @@
 extern "C" { // Prevent name mangling
 #endif // __cplusplus

+// Type definition for a hash function returning a 64-bit hash
+typedef uint64_t (*HmHashFunc)(const void *buffer, size_t size);
+
 typedef struct Hm__Bucket {
    struct Hm__Bucket *next;
    struct Hm__Bucket *prev;
@@ -179,6 +198,7 @@ typedef struct Hm__Bucket {

 typedef struct {
    Hm__Bucket **map;
+    HmHashFunc hash_func;
    size_t count;
    size_t capacity;
    size_t key_size;
@@ -194,6 +214,7 @@ typedef struct {

 // Create and free
 HashMap hm_create(size_t key_size, size_t value_size);
+HashMap hm_create_ex(size_t key_size, size_t value_size, HmHashFunc hash_func);
 void hm_free(HashMap *hm);

 // Modify, access and remove
@@ -233,12 +254,26 @@ HashMap hm_create(size_t key_size, size_t value_size)
 {
    HashMap hm;
    memset(&hm, 0, sizeof(hm));
+    hm.hash_func = HM_DEFAULT_HASH_FUNCTION;
    hm.capacity = HM_INITIAL_CAPACITY;
    hm.key_size = key_size;
    hm.value_size = value_size;
    return hm;
 }

+HashMap hm_create_ex(size_t key_size, size_t value_size, HmHashFunc hash_func)
+{
+    HashMap hm = hm_create(key_size, value_size);
+
+    if (hash_func == NULL) {
+        hm.hash_func = HM_DEFAULT_HASH_FUNCTION;
+    } else {
+        hm.hash_func = hash_func;
+    }
+
+    return hm;
+}
+
 void hm_free(HashMap *hm)
 {
    if (hm == NULL) {
@@ -297,7 +332,7 @@ bool hm_put(HashMap *hm, const void *key, const void *value)
    size_t value_size = hm__value_size(hm, value);

    // Get the head bucket
-    size_t idx = hm__fnv1a(key, key_size) % hm->capacity;
+    size_t idx = hm->hash_func(key, key_size) % hm->capacity;
    Hm__Bucket *head = hm->map[idx];

    // Check if key already exists
@@ -330,7 +365,7 @@ bool hm_put(HashMap *hm, const void *key, const void *value)
            if (new_bucket->prev != NULL) {
                new_bucket->prev->next = new_bucket;
            } else {
-                size_t idx = hm__fnv1a(key, key_size) % hm->capacity;
+                size_t idx = hm->hash_func(key, key_size) % hm->capacity;
                hm->map[idx] = new_bucket;
            }
        }
@@ -369,7 +404,7 @@ void *hm_get(const HashMap *hm, const void *key)
    }

    size_t key_size = hm__key_size(hm, key);
-    size_t idx = hm__fnv1a(key, key_size) % hm->capacity;
+    size_t idx = hm->hash_func(key, key_size) % hm->capacity;

    Hm__Bucket *head = hm->map[idx];
    if (head == NULL) {
@@ -392,7 +427,7 @@ bool hm_remove(HashMap *hm, const void *key)
    }

    size_t key_size = hm__key_size(hm, key);
-    size_t idx = hm__fnv1a(key, key_size) % hm->capacity;
+    size_t idx = hm->hash_func(key, key_size) % hm->capacity;

    Hm__Bucket *head = hm->map[idx];
    if (head == NULL) {
@@ -608,7 +643,7 @@ bool hm__rehash(HashMap *hm)
            Hm__Bucket *next = cur->next;

            size_t key_size = hm__key_size(hm, cur->key);
-            uint64_t idx = hm__fnv1a(cur->key, key_size) % new_capacity;
+            uint64_t idx = hm->hash_func(cur->key, key_size) % new_capacity;

            // Insert new bucket
            cur->prev = NULL;
@@ -667,6 +702,8 @@ Hm__Bucket *hm__bucket_create(const void *key, size_t key_size, const void *valu
 /*
 * Revision history:
 *
+ *     1.2.0 (2026-04-08) New HmHashFunc type, configurable by the user with
+ *                        hm_create_ex() or HM_DEFAULT_HASH_FUNCTION
 *     1.1.2 (2026-04-08) Optimized Hm__Bucket creation by only allocating one
 *                        buffer instead of three per bucket
 *     1.1.1 (2026-02-13) Added new hm_of() helper macro
--- a/test.c
+++ b/test.c
@@ -1,58 +0,0 @@
-#include <stdio.h>
-
-#define HM_IMPLEMENTATION
-#define HM_INITIAL_CAPACITY 1
-#include "hm.h"
-
-void print_hm(const HashMap *hm)
-{
-    HashMapIterator it = hm_iterate(hm);
-    while (hm_next(&it) != NULL) {
-        printf("%-15s: ", (const char*)hm_key(&it));
-        printf("%d\n", *(int*)hm_value(&it));
-    }
-}
-
-const char *words[49] = {
-    "apple", "banana", "cherry", "date", "elderberry", "fig", "grape",
-    "honeydew", "apple", "banana", "kiwi", "lemon", "mango", "nectarine",
-    "orange", "papaya", "quince", "raspberry", "strawberry", "tangerine",
-    "ugli", "voavanga", "watermelon", "xigua", "yellowfruit", "zucchini",
-    "apple", "banana", "cherry", "mango", "kiwi", "lemon", "lemon", "orange",
-    "papaya", "papaya", "grape", "grape", "grape", "strawberry", "strawberry",
-    "date", "date", "date", "fig", "fig", "elderberry", "nectarine", "plum"
-};
-const size_t N = sizeof(words)/sizeof(words[0]);
-const int ONE = 1;
-
-int main(void)
-{
-    HashMap freq = hm_create(0, sizeof(int));
-
-    for (size_t i = 0; i < N; ++i) {
-        const char *word = words[i];
-
-        int *f = hm_get(&freq, word);
-        if (f == NULL) {
-            hm_put(&freq, word, &ONE);
-        } else {
-            *f += 1;
-        }
-    }
-
-    print_hm(&freq);
-    printf("count = %zu\n", freq.count);
-
-    HashMapIterator it = hm_iterate(&freq);
-    hm_next(&it);
-    while (hm_value(&it) != NULL) {
-        const void *key = hm_key(&it);
-        hm_next(&it);
-        hm_remove(&freq, key);
-    }
-
-    printf("count = %zu\n", freq.count);
-    hm_free(&freq);
-
-    return 0;
-}