This commit is contained in:
2026-04-09 01:29:56 +02:00
parent 89bdf7bbdf
commit d2343ab8f8
5 changed files with 289 additions and 69 deletions

2
.gitignore vendored
View File

@@ -1 +1 @@
test
benchmark

View File

@@ -1,6 +1,6 @@
test: test.c hm.h
cc -Wall -Wextra -ggdb -o test test.c
benchmark: benchmark.cpp
g++ -Wall -Wextra -o benchmark benchmark.cpp
clean:
rm -rf test
rm -rf benchmark

241
benchmark.cpp Normal file
View File

@@ -0,0 +1,241 @@
#include <iostream>
#include <vector>
#include <string>
#include <chrono>
#include <unordered_map>
#include <iomanip>
#define HM_IMPLEMENTATION
#include "hm.h"
using namespace std::chrono;
typedef std::chrono::time_point<std::chrono::high_resolution_clock> time_p;
// Configuration
const int NUM_ELEMENTS = 1000000;
time_p now()
{
return high_resolution_clock::now();
}
double time_diff_nano(time_p end, time_p start)
{
return duration_cast<nanoseconds>(end - start).count() / 1e6;
}
void print_div()
{
std::cout << "---------------------------------------------------------" << std::endl;
}
void print_head()
{
std::cout << std::left << std::setw(10) << "Operation"
<< std::right << std::setw(22) << "std::unordered_map"
<< std::right << std::setw(13) << "hm.h"
<< std::right << std::setw(11) << "Ratio" << std::endl;
}
void print_row(const std::string& label, double hm_ms, double std_ms)
{
std::cout << std::left << std::setw(10) << label
<< std::right << std::setw(19) << std::fixed << std::setprecision(2) << std_ms << " ms"
<< std::right << std::setw(10) << std::fixed << std::setprecision(2) << hm_ms << " ms"
<< std::right << std::setw(10) << (hm_ms / std_ms) << "x" << std::endl;
}
// Test with string key type
uint64_t hash_str(const void *buffer, size_t size)
{
(void)size;
const char *str = reinterpret_cast<const char *>(buffer);
uint64_t hash = (uint64_t)str;
return hash;
}
void test_str()
{
std::vector<std::string> key_pool;
std::vector<std::string> value_pool;
// Generate random strings to ensure we aren't just hashing "1", "2", "3"
for (int i = 0; i < NUM_ELEMENTS; ++i) {
key_pool.push_back("key_prefix_" + std::to_string(i) + "_suffix_" + std::to_string(rand() % 1000));
value_pool.push_back("value_" + std::to_string(i));
}
// --- Custom implementation ---
// Using 0 for sizes tells your library to treat them as NULL-terminated strings
HashMap hm = hm_create_ex(sizeof(const char *), 0, hash_str);
// Insertion
auto start = now();
for (int i = 0; i < NUM_ELEMENTS; ++i) {
hm_put(&hm, key_pool[i].c_str(), value_pool[i].c_str());
}
auto end = now();
double custom_put = time_diff_nano(end, start);
// Lookup
start = now();
for (int i = 0; i < NUM_ELEMENTS; ++i) {
char* val = (char*)hm_get(&hm, key_pool[i].c_str());
if (!val) {
std::cerr << "Lookup error!" << std::endl;
return;
}
}
end = now();
double custom_get = time_diff_nano(end, start);
// Deletion
start = now();
for (int i = 0; i < NUM_ELEMENTS; ++i) {
hm_remove(&hm, key_pool[i].c_str());
}
end = now();
double custom_del = time_diff_nano(end, start);
hm_free(&hm);
// --- Benchmark std::unordered_map ---
std::unordered_map<const char*, const char*> std_map;
// Insertion
start = now();
for (int i = 0; i < NUM_ELEMENTS; ++i) {
std_map[key_pool[i].c_str()] = value_pool[i].c_str();
}
end = now();
double std_put = time_diff_nano(end, start);
// Lookup
start = now();
for (int i = 0; i < NUM_ELEMENTS; ++i) {
auto it = std_map.find(key_pool[i].c_str());
if (it == std_map.end()) std::cerr << "Lookup error!" << std::endl;
}
end = now();
double std_get = time_diff_nano(end, start);
// Deletion
start = now();
for (int i = 0; i < NUM_ELEMENTS; ++i) {
std_map.erase(key_pool[i].c_str());
}
end = now();
double std_del = time_diff_nano(end, start);
// Output Results
print_head();
print_div();
print_row("Insert", custom_put, std_put);
print_row("Lookup", custom_get, std_get);
print_row("Delete", custom_del, std_del);
print_div();
}
// Test with integer key type
uint64_t hash_int(const void *buffer, size_t size)
{
(void)size;
return *reinterpret_cast<const int*>(buffer);
}
void test_int()
{
// Prepare test data (random integers) to avoid string allocation overhead in the loops
std::vector<int> keys(NUM_ELEMENTS);
std::vector<int> values(NUM_ELEMENTS);
for (int i = 0; i < NUM_ELEMENTS; ++i) {
keys[i] = i;
values[i] = i * 2;
}
// --- Custom implementation ---
HashMap hm = hm_create_ex(sizeof(int), sizeof(int), hash_int);
// Insertion
auto start = now();
for (int i = 0; i < NUM_ELEMENTS; ++i) {
hm_put(&hm, &keys[i], &values[i]);
}
auto end = now();
double custom_put = time_diff_nano(end, start);
// Lookup
start = now();
for (int i = 0; i < NUM_ELEMENTS; ++i) {
int* val = (int*)hm_get(&hm, &keys[i]);
if (!val || *val != values[i]) {
std::cerr << "Error in custom lookup!" << std::endl;
return;
}
}
end = now();
double custom_get = time_diff_nano(end, start);
// Deletion
start = now();
for (int i = 0; i < NUM_ELEMENTS; ++i) {
hm_remove(&hm, &keys[i]);
}
end = now();
double custom_del = time_diff_nano(end, start);
hm_free(&hm);
// --- Benchmark std::unordered_map ---
std::unordered_map<int, int> std_map;
// Insertion
start = now();
for (int i = 0; i < NUM_ELEMENTS; ++i) {
std_map[keys[i]] = values[i];
}
end = now();
double std_put = time_diff_nano(end, start);
// Lookup
start = now();
for (int i = 0; i < NUM_ELEMENTS; ++i) {
auto it = std_map.find(keys[i]);
if (it == std_map.end() || it->second != values[i]) {
std::cerr << "Error in std lookup!" << std::endl;
}
}
end = now();
double std_get = time_diff_nano(end, start);
// Deletion
start = now();
for (int i = 0; i < NUM_ELEMENTS; ++i) {
std_map.erase(keys[i]);
}
end = now();
double std_del = time_diff_nano(end, start);
// Output Results
print_head();
print_div();
print_row("Insert", custom_put, std_put);
print_row("Lookup", custom_get, std_get);
print_row("Delete", custom_del, std_del);
print_div();
}
int main()
{
std::cout << "Benchmarking " << NUM_ELEMENTS << " (integer keys)" << std::endl << std::endl;
test_int();
std::cout << std::endl;
std::cout << "Benchmarking " << NUM_ELEMENTS << " (string keys)" << std::endl << std::endl;
test_str();
return 0;
}

49
hm.h
View File

@@ -1,4 +1,4 @@
// hm.h - v1.1.2 - MIT License
// hm.h - v1.2.0 - MIT License
// chained hash table implementation as a single header library.
//
// [License and changelog]
@@ -26,6 +26,13 @@
// This macro defines the initial amount of buckets a HashMap will
// have when allocated.
//
// #define HM_DEFAULT_HASH_FUNCTION new_hash_function (hm__fnv1a)
//
// This macro defines the hash function that will be used by default
// by every new HashMap except the ones (created with 'hm_create_ex').
// The specified function has to respect the HmHashFunc type
// definition signature.
//
// #define HM_MAX_LOAD_FACTOR new_load_factor (0.75f)
//
// This macro defines when a HashMap will be rehashed by surpassing
@@ -66,6 +73,11 @@
// as a NULL terminated buffer for that HashMap. Allocation of the internal
// buckets is done only at the first hm_put() call.
//
// HashMap hm_create_ex(size_t key_size, size_t value_size, HmHashFunc hash_func)
//
// This function is the same as hm_create() but with additional parameters
// for the configuration of the HashMap.
//
// void hm_free(HashMap *hm)
//
// This functions frees and invalidates all the keys and values stored in the
@@ -138,6 +150,10 @@
# define HM_INITIAL_CAPACITY 256
#endif // HM_INITIAL_CAPACITY
#ifndef HM_DEFAULT_HASH_FUNCTION
# define HM_DEFAULT_HASH_FUNCTION hm__fnv1a
#endif // HM_DEFAULT_HASH_FUNCTION
#ifndef HM_MAX_LOAD_FACTOR
# define HM_MAX_LOAD_FACTOR 0.75f
#endif // HM_MAX_LOAD_FACTOR
@@ -169,6 +185,9 @@
extern "C" { // Prevent name mangling
#endif // __cplusplus
// Type definition for a hash function returning a 64-bit hash
typedef uint64_t (*HmHashFunc)(const void *buffer, size_t size);
typedef struct Hm__Bucket {
struct Hm__Bucket *next;
struct Hm__Bucket *prev;
@@ -179,6 +198,7 @@ typedef struct Hm__Bucket {
typedef struct {
Hm__Bucket **map;
HmHashFunc hash_func;
size_t count;
size_t capacity;
size_t key_size;
@@ -194,6 +214,7 @@ typedef struct {
// Create and free
HashMap hm_create(size_t key_size, size_t value_size);
HashMap hm_create_ex(size_t key_size, size_t value_size, HmHashFunc hash_func);
void hm_free(HashMap *hm);
// Modify, access and remove
@@ -233,12 +254,26 @@ HashMap hm_create(size_t key_size, size_t value_size)
{
HashMap hm;
memset(&hm, 0, sizeof(hm));
hm.hash_func = HM_DEFAULT_HASH_FUNCTION;
hm.capacity = HM_INITIAL_CAPACITY;
hm.key_size = key_size;
hm.value_size = value_size;
return hm;
}
HashMap hm_create_ex(size_t key_size, size_t value_size, HmHashFunc hash_func)
{
HashMap hm = hm_create(key_size, value_size);
if (hash_func == NULL) {
hm.hash_func = HM_DEFAULT_HASH_FUNCTION;
} else {
hm.hash_func = hash_func;
}
return hm;
}
void hm_free(HashMap *hm)
{
if (hm == NULL) {
@@ -297,7 +332,7 @@ bool hm_put(HashMap *hm, const void *key, const void *value)
size_t value_size = hm__value_size(hm, value);
// Get the head bucket
size_t idx = hm__fnv1a(key, key_size) % hm->capacity;
size_t idx = hm->hash_func(key, key_size) % hm->capacity;
Hm__Bucket *head = hm->map[idx];
// Check if key already exists
@@ -330,7 +365,7 @@ bool hm_put(HashMap *hm, const void *key, const void *value)
if (new_bucket->prev != NULL) {
new_bucket->prev->next = new_bucket;
} else {
size_t idx = hm__fnv1a(key, key_size) % hm->capacity;
size_t idx = hm->hash_func(key, key_size) % hm->capacity;
hm->map[idx] = new_bucket;
}
}
@@ -369,7 +404,7 @@ void *hm_get(const HashMap *hm, const void *key)
}
size_t key_size = hm__key_size(hm, key);
size_t idx = hm__fnv1a(key, key_size) % hm->capacity;
size_t idx = hm->hash_func(key, key_size) % hm->capacity;
Hm__Bucket *head = hm->map[idx];
if (head == NULL) {
@@ -392,7 +427,7 @@ bool hm_remove(HashMap *hm, const void *key)
}
size_t key_size = hm__key_size(hm, key);
size_t idx = hm__fnv1a(key, key_size) % hm->capacity;
size_t idx = hm->hash_func(key, key_size) % hm->capacity;
Hm__Bucket *head = hm->map[idx];
if (head == NULL) {
@@ -608,7 +643,7 @@ bool hm__rehash(HashMap *hm)
Hm__Bucket *next = cur->next;
size_t key_size = hm__key_size(hm, cur->key);
uint64_t idx = hm__fnv1a(cur->key, key_size) % new_capacity;
uint64_t idx = hm->hash_func(cur->key, key_size) % new_capacity;
// Insert new bucket
cur->prev = NULL;
@@ -667,6 +702,8 @@ Hm__Bucket *hm__bucket_create(const void *key, size_t key_size, const void *valu
/*
* Revision history:
*
* 1.2.0 (2026-04-08) New HmHashFunc type, configurable by the user with
* hm_create_ex() or HM_DEFAULT_HASH_FUNCTION
* 1.1.2 (2026-04-08) Optimized Hm__Bucket creation by only allocating one
* buffer instead of three per bucket
* 1.1.1 (2026-02-13) Added new hm_of() helper macro

58
test.c
View File

@@ -1,58 +0,0 @@
#include <stdio.h>
#define HM_IMPLEMENTATION
#define HM_INITIAL_CAPACITY 1
#include "hm.h"
void print_hm(const HashMap *hm)
{
HashMapIterator it = hm_iterate(hm);
while (hm_next(&it) != NULL) {
printf("%-15s: ", (const char*)hm_key(&it));
printf("%d\n", *(int*)hm_value(&it));
}
}
const char *words[49] = {
"apple", "banana", "cherry", "date", "elderberry", "fig", "grape",
"honeydew", "apple", "banana", "kiwi", "lemon", "mango", "nectarine",
"orange", "papaya", "quince", "raspberry", "strawberry", "tangerine",
"ugli", "voavanga", "watermelon", "xigua", "yellowfruit", "zucchini",
"apple", "banana", "cherry", "mango", "kiwi", "lemon", "lemon", "orange",
"papaya", "papaya", "grape", "grape", "grape", "strawberry", "strawberry",
"date", "date", "date", "fig", "fig", "elderberry", "nectarine", "plum"
};
const size_t N = sizeof(words)/sizeof(words[0]);
const int ONE = 1;
int main(void)
{
HashMap freq = hm_create(0, sizeof(int));
for (size_t i = 0; i < N; ++i) {
const char *word = words[i];
int *f = hm_get(&freq, word);
if (f == NULL) {
hm_put(&freq, word, &ONE);
} else {
*f += 1;
}
}
print_hm(&freq);
printf("count = %zu\n", freq.count);
HashMapIterator it = hm_iterate(&freq);
hm_next(&it);
while (hm_value(&it) != NULL) {
const void *key = hm_key(&it);
hm_next(&it);
hm_remove(&freq, key);
}
printf("count = %zu\n", freq.count);
hm_free(&freq);
return 0;
}