-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathht.h
330 lines (289 loc) · 7.68 KB
/
ht.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
/* Simple hash table implemented in C.
* ©2023 Yuichiro Nakada
*
* This software is released under the MIT License.
* https://github.com/benhoyt/ht
* */
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include <assert.h>
#ifndef _HT_H
#define _HT_H
// Hash table structure: create with ht_create, free with ht_destroy.
typedef struct ht ht;
// Hash table iterator: create with ht_iterator, iterate with ht_next.
typedef struct {
const char* key; // current key
void* value; // current value
// Don't use these fields directly.
ht* _table; // reference to hash table being iterated
size_t _index; // current index into ht._entries
} hti;
#endif // _HT_H
// Hash table entry (slot may be filled or empty).
typedef struct {
const char* key; // key is NULL if this slot is empty
void* value;
} ht_entry;
// Hash table structure: create with ht_create, free with ht_destroy.
struct ht {
ht_entry* entries; // hash slots
size_t capacity; // size of _entries array
size_t length; // number of items in hash table
};
#define INITIAL_CAPACITY 16 // must not be zero
ht* ht_create(void)
{
// Allocate space for hash table struct.
ht* table = malloc(sizeof(ht));
if (table == NULL) {
return NULL;
}
table->length = 0;
table->capacity = INITIAL_CAPACITY;
// Allocate (zero'd) space for entry buckets.
table->entries = calloc(table->capacity, sizeof(ht_entry));
if (table->entries == NULL) {
free(table); // error, free table before we return!
return NULL;
}
return table;
}
void ht_destroy(ht* table)
{
// First free allocated keys.
for (size_t i = 0; i < table->capacity; i++) {
free((void*)table->entries[i].key);
}
// Then free entries array and table itself.
free(table->entries);
free(table);
}
#define FNV_OFFSET 14695981039346656037UL
#define FNV_PRIME 1099511628211UL
// Return 64-bit FNV-1a hash for key (NUL-terminated). See description:
// https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function
static uint64_t hash_key(const char* key)
{
uint64_t hash = FNV_OFFSET;
for (const char* p = key; *p; p++) {
hash ^= (uint64_t)(unsigned char)(*p);
hash *= FNV_PRIME;
}
return hash;
}
void* ht_get(ht* table, const char* key)
{
// AND hash with capacity-1 to ensure it's within entries array.
uint64_t hash = hash_key(key);
size_t index = (size_t)(hash & (uint64_t)(table->capacity - 1));
// Loop till we find an empty entry.
while (table->entries[index].key != NULL) {
if (strcmp(key, table->entries[index].key) == 0) {
// Found key, return value.
return table->entries[index].value;
}
// Key wasn't in this slot, move to next (linear probing).
index++;
if (index >= table->capacity) {
// At end of entries array, wrap around.
index = 0;
}
}
return NULL;
}
size_t ht_get_index(ht* this, const char* key)
{
// AND hash with capacity-1 to ensure it's within entries array.
uint64_t hash = hash_key(key);
size_t index = (size_t)(hash & (uint64_t)(this->capacity - 1));
// Loop till we find an empty entry.
while (this->entries[index].key != NULL) {
if (strcmp(key, this->entries[index].key) == 0) {
// Found key, return value.
return index;
}
// Key wasn't in this slot, move to next (linear probing).
index++;
if (index >= this->capacity) {
// At end of entries array, wrap around.
index = 0;
}
}
return -1;
}
int ht_del(ht *this, const char *key)
{
size_t i = ht_get_index(this, key);
if (i<0) return 0;
// free((void*)this->entries[i].key);
char *p = (char*)this->entries[i].key;
*p = 0;
int *v = this->entries[i].value;
*v = -1;
this->length--;
return 1;
}
// Internal function to set an entry (without expanding table).
static const char* ht_set_entry(ht_entry* entries, size_t capacity,
const char* key, void* value, size_t* plength)
{
// AND hash with capacity-1 to ensure it's within entries array.
uint64_t hash = hash_key(key);
size_t index = (size_t)(hash & (uint64_t)(capacity - 1));
// Loop till we find an empty entry.
while (entries[index].key != NULL) {
if (entries[index].key[0]==0) break; // add
if (strcmp(key, entries[index].key) == 0) {
// Found key (it already exists), update value.
entries[index].value = value;
return entries[index].key;
}
// Key wasn't in this slot, move to next (linear probing).
index++;
if (index >= capacity) {
// At end of entries array, wrap around.
index = 0;
}
}
// Didn't find key, allocate+copy if needed, then insert it.
if (plength != NULL) {
key = strdup(key);
if (key == NULL) {
return NULL;
}
(*plength)++;
}
entries[index].key = (char*)key;
entries[index].value = value;
return key;
}
// Expand hash table to twice its current size. Return true on success,
// false if out of memory.
static bool ht_expand(ht* table)
{
// Allocate new entries array.
size_t new_capacity = table->capacity * 2;
if (new_capacity < table->capacity) {
return false; // overflow (capacity would be too big)
}
ht_entry* new_entries = calloc(new_capacity, sizeof(ht_entry));
if (new_entries == NULL) {
return false;
}
// Iterate entries, move all non-empty ones to new table's entries.
for (size_t i = 0; i < table->capacity; i++) {
ht_entry entry = table->entries[i];
if (entry.key != NULL) {
ht_set_entry(new_entries, new_capacity, entry.key,
entry.value, NULL);
}
}
// Free old entries array and update this table's details.
free(table->entries);
table->entries = new_entries;
table->capacity = new_capacity;
return true;
}
const char* ht_set(ht* table, const char* key, void* value)
{
assert(value != NULL);
if (value == NULL) return NULL;
// If length will exceed half of current capacity, expand it.
if (table->length >= table->capacity / 2) {
if (!ht_expand(table)) return NULL;
}
// Set entry and update length.
return ht_set_entry(table->entries, table->capacity, key, value, &table->length);
}
size_t ht_length(ht* table)
{
return table->length;
}
hti ht_iterator(ht* table)
{
hti it;
it._table = table;
it._index = 0;
return it;
}
bool ht_next(hti* it)
{
// Loop till we've hit end of entries array.
ht* table = it->_table;
while (it->_index < table->capacity) {
size_t i = it->_index;
it->_index++;
if (table->entries[i].key != NULL) {
// Found next non-empty item, update iterator key and value.
ht_entry entry = table->entries[i];
it->key = entry.key;
it->value = entry.value;
return true;
}
}
return false;
}
void ht_print(ht* this)
{
hti it = ht_iterator(this);
while (ht_next(&it)) {
printf("%s %d\n", it.key, *(int*)it.value);
}
}
#if 0
// Example:
// $ echo 'foo bar the bar bar bar the' | ./demo
// foo 1
// bar 4
// the 2
// 3
void exit_nomem(void)
{
fprintf(stderr, "out of memory\n");
exit(1);
}
int main(void)
{
ht* counts = ht_create();
if (counts == NULL) {
exit_nomem();
}
// Read next word from stdin (at most 100 chars long).
char word[101];
while (scanf("%100s", word) != EOF) {
// Look up word.
void* value = ht_get(counts, word);
if (value != NULL) {
// Already exists, increment int that value points to.
int* pcount = (int*)value;
(*pcount)++;
continue;
}
// Word not found, allocate space for new int and set to 1.
int* pcount = malloc(sizeof(int));
if (pcount == NULL) {
exit_nomem();
}
*pcount = 1;
if (ht_set(counts, word, pcount) == NULL) {
exit_nomem();
}
}
// ht_del(counts, "the");
// ht_print(counts);
// Print out words and frequencies, freeing values as we go.
hti it = ht_iterator(counts);
while (ht_next(&it)) {
printf("%s %d\n", it.key, *(int*)it.value);
free(it.value);
}
// Show the number of unique words.
printf("Unique words: %d\n", (int)ht_length(counts));
ht_destroy(counts);
return 0;
}
#endif