commit b13e0f09c12809f6799c1615194932c7638d7510 Author: seajee Date: Tue Nov 18 21:33:06 2025 +0100 Initial commit diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1cc8e11 --- /dev/null +++ b/Makefile @@ -0,0 +1,6 @@ + +test: main.c hm.h + cc -Wall -Wextra -ggdb -o test main.c + +clean: + rm -rf test diff --git a/hm.h b/hm.h new file mode 100644 index 0000000..47cdc01 --- /dev/null +++ b/hm.h @@ -0,0 +1,544 @@ +// hm.h - v1.0.0 - MIT License +// chained hash table implementation as a single header library. + +#ifndef _HM_H_ +#define _HM_H_ + +#include +#include +#include +#include + +#ifndef HM_INITIAL_CAPACITY +# define HM_INITIAL_CAPACITY 256 +#endif // HM_INITIAL_CAPACITY + +#ifndef HM_MAX_LOAD_FACTOR +# define HM_MAX_LOAD_FACTOR 0.75f +#endif // HM_MAX_LOAD_FACTOR + +#ifndef HM_GROWTH_FACTOR +# define HM_GROWTH_FACTOR 2 +#endif // HM_GROWTH_FACTOR + +#ifndef HM_NO_ASSERT +# ifndef HM_ASSERT +# include +# define HM_ASSERT assert +# endif // HM_ASSERT +#else +# define HM_ASSERT(...) ((void)0) +#endif // HM_NO_ASSERT + +#ifndef HM_REALLOC +# include +# define HM_REALLOC realloc +#endif // HM_REALLOC + +#ifndef HM_FREE +# include +# define HM_FREE free +#endif // HM_FREE + +#ifdef __cplusplus +extern "C" { // Prevent name mangling +#endif // __cplusplus + +typedef struct Hm__Bucket { + struct Hm__Bucket *next; + struct Hm__Bucket *prev; + void *key; + void *value; + size_t value_size; +} Hm__Bucket; + +typedef struct { + Hm__Bucket **map; + size_t count; + size_t capacity; + size_t key_size; + size_t value_size; +} HashMap; + +typedef struct { + const HashMap *hm; + Hm__Bucket *bucket; + size_t index; + bool end; +} HashMapIterator; + +// Create and free +HashMap hm_create(size_t key_size, size_t value_size); +void hm_free(HashMap *hm); + +// Modify, access and remove +bool hm_put(HashMap *hm, const void *key, const void *value); +void *hm_get(const HashMap *hm, const void *key); +bool hm_remove(HashMap *hm, const void *key); + +// Iterator functions +HashMapIterator hm_iterate(const HashMap *hm); +const void *hm_key(const HashMapIterator *it); +void *hm_value(const HashMapIterator *it); +void *hm_next(HashMapIterator *it); + +// Private functions +uint64_t hm__fnv1a(const void *buffer, size_t size); +size_t hm__key_size(const HashMap *hm, const void *key); +size_t hm__value_size(const HashMap *hm, const void *value); +bool hm__keycmp(const HashMap *hm, const void *key1, const void *key2); +bool hm__rehash(HashMap *hm); +Hm__Bucket *hm__bucket_create(const void *key, size_t key_size, const void *value, size_t value_size); + +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif // _HM_H_ + +#ifdef HM_IMPLEMENTATION + +#ifdef __cplusplus +extern "C" { // Prevent name mangling +#endif // __cplusplus + +HashMap hm_create(size_t key_size, size_t value_size) +{ + HashMap hm; + memset(&hm, 0, sizeof(hm)); + hm.capacity = HM_INITIAL_CAPACITY; + hm.key_size = key_size; + hm.value_size = value_size; + return hm; +} + +void hm_free(HashMap *hm) +{ + if (hm == NULL || hm->map == NULL) { + memset(hm, 0, sizeof(*hm)); + return; + } + + for (size_t i = 0; i < hm->capacity; ++i) { + Hm__Bucket *cur = hm->map[i]; + if (cur == NULL) { + continue; + } + + while (cur != NULL) { + Hm__Bucket *next = cur->next; + HM_FREE(cur->key); + HM_FREE(cur->value); + HM_FREE(cur); + cur = next; + } + } + + HM_FREE(hm->map); + memset(hm, 0, sizeof(*hm)); +} + +bool hm_put(HashMap *hm, const void *key, const void *value) +{ + if (hm == NULL || key == NULL || value == NULL) { + HM_ASSERT(false && "Invalid parameters"); + return false; + } + + // Ensure that HashMap is initialised + if (hm->map == NULL) { + hm->map = (Hm__Bucket**)HM_REALLOC(NULL, sizeof(*hm->map) * HM_INITIAL_CAPACITY); + if (hm->map == NULL) { + HM_ASSERT(false && "Reallocation failed"); + return false; + } + hm->capacity = HM_INITIAL_CAPACITY; + memset(hm->map, 0, sizeof(*hm->map) * hm->capacity); + } + + // Rehash if necessary + if ((float)hm->count + 1 > (float)hm->capacity * HM_MAX_LOAD_FACTOR) { + if (!hm__rehash(hm)) { + return false; + } + } + + // Key-value sizes + size_t key_size = hm__key_size(hm, key); + size_t value_size = hm__value_size(hm, value); + + // Get the head bucket + size_t idx = hm__fnv1a(key, key_size) % hm->capacity; + Hm__Bucket *head = hm->map[idx]; + + // Check if key already exists + for (Hm__Bucket *cur = head; cur != NULL; cur = cur->next) { + if (!hm__keycmp(hm, key, cur->key)) { + continue; + } + + // Fixed or equal value_size + if (hm->value_size != 0 || value_size == cur->value_size) { + memcpy(cur->value, value, value_size); + return true; + } + + // Temporary buffer in case of overlapping pointers + uint8_t *tmp = (uint8_t*)HM_REALLOC(NULL, value_size); + if (tmp == NULL) { + HM_ASSERT(false && "Reallocation failed"); + return false; + } + memcpy(tmp, value, value_size); + + // Realloc and copy + uint8_t *new_value = (uint8_t*)HM_REALLOC(cur->value, value_size); + if (new_value == NULL) { + HM_ASSERT(false && "Reallocation failed"); + return false; + } + cur->value = new_value; + memcpy(cur->value, tmp, value_size); + + HM_FREE(tmp); + + return true; + } + + // Insert the new value + Hm__Bucket *new_bucket = hm__bucket_create(key, key_size, value, value_size); + if (new_bucket == NULL) { + return false; + } + + new_bucket->next = head; + if (head != NULL) { + head->prev = new_bucket; + } + hm->map[idx] = new_bucket; + ++hm->count; + + return true; +} + +void *hm_get(const HashMap *hm, const void *key) +{ + if (hm == NULL) { + HM_ASSERT(false && "Invalid parameters"); + return NULL; + } + + if (hm->map == NULL || key == NULL) { + return NULL; + } + + size_t key_size = hm__key_size(hm, key); + size_t idx = hm__fnv1a(key, key_size) % hm->capacity; + + Hm__Bucket *head = hm->map[idx]; + if (head == NULL) { + return NULL; + } + + for (Hm__Bucket *cur = head; cur != NULL; cur = cur->next) { + if (hm__keycmp(hm, key, cur->key)) { + return cur->value; + } + } + + return NULL; +} + +bool hm_remove(HashMap *hm, const void *key) +{ + if (hm == NULL || hm->map == NULL || key == NULL) { + return false; + } + + size_t key_size = hm__key_size(hm, key); + size_t idx = hm__fnv1a(key, key_size) % hm->capacity; + + Hm__Bucket *head = hm->map[idx]; + if (head == NULL) { + return false; + } + + for (Hm__Bucket *cur = head; cur != NULL; cur = cur->next) { + if (hm__keycmp(hm, key, cur->key)) { + if (cur == head) { + hm->map[idx] = cur->next; + } else { + cur->prev->next = cur->next; + } + + if (cur->next != NULL) { + cur->next->prev = cur->prev; + } + + HM_FREE(cur->key); + HM_FREE(cur->value); + HM_FREE(cur); + --hm->count; + return true; + } + } + + return false; +} + +HashMapIterator hm_iterate(const HashMap *hm) +{ + HashMapIterator it; + memset(&it, 0, sizeof(it)); + + if (hm == NULL) { + HM_ASSERT(false && "Invalid parameters"); + } + + it.hm = hm; + + return it; +} + +const void *hm_key(const HashMapIterator *it) +{ + if (it == NULL || it->hm == NULL) { + HM_ASSERT(false && "Invalid parameters"); + return NULL; + } + + // Reached the end + if (it->end) { + return NULL; + } + + return it->bucket->key; +} + +void *hm_value(const HashMapIterator *it) +{ + if (it == NULL || it->hm == NULL) { + HM_ASSERT(false && "Invalid parameters"); + return NULL; + } + + // Reached the end + if (it->end) { + return NULL; + } + + return it->bucket->value; +} + +void *hm_next(HashMapIterator *it) +{ + if (it == NULL || it->hm == NULL) { + HM_ASSERT(false && "Invalid parameters"); + return NULL; + } + + // Reached the end + if (it->end) { + return NULL; + } + + if (it->bucket == NULL) { + // Find first list of buckets + while ((it->bucket = it->hm->map[it->index++]) == NULL); + return it->bucket->value; + } + + // Next bucket + if (it->bucket->next != NULL) { + it->bucket = it->bucket->next; + return it->bucket->value; + } + + // Find next list of buckets + while ((it->bucket = it->hm->map[++it->index]) == NULL); + + // Reached the end + if (it->index >= it->hm->capacity) { + it->end = true; + return NULL; + } + + it->bucket = it->hm->map[it->index]; + return it->bucket->value; +} + +uint64_t hm__fnv1a(const void *buffer, size_t size) +{ + if (buffer == NULL) { + HM_ASSERT(false && "Invalid parameters"); + return 0; + } + + const uint8_t *b = (const uint8_t*)buffer; + uint64_t hash = 14695981039346656037ULL; + + for (size_t i = 0; i < size; ++i) { + hash ^= b[i]; + hash *= 1099511628211ULL; + } + + return hash; +} + +size_t hm__key_size(const HashMap *hm, const void *key) +{ + if (hm == NULL || key == NULL) { + HM_ASSERT(false && "Invalid parameters"); + return 0; + } + + return (hm->key_size == 0 ? strlen((const char*)key) + 1 : hm->key_size); +} + +size_t hm__value_size(const HashMap *hm, const void *value) +{ + if (hm == NULL || value == NULL) { + HM_ASSERT(false && "Invalid parameters"); + return 0; + } + + return (hm->value_size == 0 ? strlen((const char*)value) + 1 : hm->value_size); +} + +bool hm__keycmp(const HashMap *hm, const void *key1, const void *key2) +{ + if (hm == NULL || key1 == NULL || key2 == NULL) { + HM_ASSERT(false && "Invalid parameters"); + return false; + } + + if (hm->key_size == 0) { + return strcmp((const char*)key1, (const char*)key2) == 0; + } + + return memcmp(key1, key2, hm->key_size) == 0; +} + +bool hm__rehash(HashMap *hm) +{ + if (hm == NULL || hm->map == NULL) { + HM_ASSERT(false && "Invalid parameters"); + return false; + } + + // Allocate new table + size_t new_capacity = hm->capacity * HM_GROWTH_FACTOR; + Hm__Bucket **new_map = (Hm__Bucket**)HM_REALLOC(NULL, sizeof(*new_map) * new_capacity); + if (new_map == NULL) { + HM_ASSERT(false && "Reallocation failed"); + return false; + } + memset(new_map, 0, sizeof(*new_map) * new_capacity); + + // Iterate old table + for (size_t i = 0; i < hm->capacity; ++i) { + Hm__Bucket *cur = hm->map[i]; + while (cur != NULL) { + Hm__Bucket *next = cur->next; + + size_t key_size = hm__key_size(hm, cur->key); + uint64_t idx = hm__fnv1a(cur->key, key_size) % new_capacity; + + // Insert new bucket + cur->prev = NULL; + cur->next = new_map[idx]; + if (new_map[idx] != NULL) { + new_map[idx]->prev = cur; + } + new_map[idx] = cur; + + cur = next; + } + } + + HM_FREE(hm->map); + hm->map = new_map; + hm->capacity = new_capacity; + + return true; +} + +Hm__Bucket *hm__bucket_create(const void *key, size_t key_size, const void *value, size_t value_size) +{ + if (key == NULL || value == NULL) { + HM_ASSERT(false && "Invalid parameters"); + return NULL; + } + + // Allocate bucket + Hm__Bucket *bucket = (Hm__Bucket*)HM_REALLOC(NULL, sizeof(*bucket)); + if (bucket == NULL) { + HM_ASSERT(false && "Reallocation failed"); + return NULL; + } + memset(bucket, 0, sizeof(*bucket)); + + // Allocate key + bucket->key = HM_REALLOC(NULL, key_size); + if (bucket->key == NULL) { + HM_ASSERT(false && "Reallocation failed"); + HM_FREE(bucket); + return NULL; + } + memset(bucket->key, 0, key_size); + + // Allocate value + bucket->value = HM_REALLOC(NULL, value_size); + if (bucket->value == NULL) { + HM_ASSERT(false && "Reallocation failed"); + HM_FREE(bucket->key); + HM_FREE(bucket); + return NULL; + } + memset(bucket->value, 0, value_size); + + // Copy Key-value data + memcpy(bucket->key, key, key_size); + memcpy(bucket->value, value, value_size); + bucket->value_size = value_size; + + return bucket; +} + +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif // HM_IMPLEMENTATION + +// TODO: Find a way to reduce calls to malloc + +/* + * Revision history: + * + * 1.0.0 (2025-11-18) Initial release + */ + +/* + * MIT License + * + * Copyright (c) 2025 seajee + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ diff --git a/main.c b/main.c new file mode 100644 index 0000000..7f87a18 --- /dev/null +++ b/main.c @@ -0,0 +1,58 @@ +#include + +#define HM_IMPLEMENTATION +#define HM_INITIAL_CAPACITY 1 +#include "hm.h" + +void print_hm(const HashMap *hm) +{ + HashMapIterator it = hm_iterate(hm); + while (hm_next(&it) != NULL) { + printf("%-15s: ", (const char*)hm_key(&it)); + printf("%d\n", *(int*)hm_value(&it)); + } +} + +const char *words[49] = { + "apple", "banana", "cherry", "date", "elderberry", "fig", "grape", + "honeydew", "apple", "banana", "kiwi", "lemon", "mango", "nectarine", + "orange", "papaya", "quince", "raspberry", "strawberry", "tangerine", + "ugli", "voavanga", "watermelon", "xigua", "yellowfruit", "zucchini", + "apple", "banana", "cherry", "mango", "kiwi", "lemon", "lemon", "orange", + "papaya", "papaya", "grape", "grape", "grape", "strawberry", "strawberry", + "date", "date", "date", "fig", "fig", "elderberry", "nectarine", "plum" +}; +const size_t N = sizeof(words)/sizeof(words[0]); +const int ONE = 1; + +int main(void) +{ + HashMap freq = hm_create(0, sizeof(int)); + + for (size_t i = 0; i < N; ++i) { + const char *word = words[i]; + + int *f = hm_get(&freq, word); + if (f == NULL) { + hm_put(&freq, word, &ONE); + } else { + *f += 1; + } + } + + print(&freq); + printf("count = %zu\n", freq.count); + + HashMapIterator it = hm_iterate(&freq); + hm_next(&it); + while (hm_value(&it) != NULL) { + const void *key = hm_key(&it); + hm_next(&it); + hm_remove(&freq, key); + } + + printf("count = %zu\n", freq.count); + hm_free(&freq); + + return 0; +}