Graphviz 13.0.0~dev.20250121.0651
Loading...
Searching...
No Matches
refstr.c
Go to the documentation of this file.
1
6/*************************************************************************
7 * Copyright (c) 2011 AT&T Intellectual Property
8 * All rights reserved. This program and the accompanying materials
9 * are made available under the terms of the Eclipse Public License v1.0
10 * which accompanies this distribution, and is available at
11 * https://www.eclipse.org/legal/epl-v10.html
12 *
13 * Contributors: Details at https://graphviz.org
14 *************************************************************************/
15
16#include <assert.h>
17#include <cgraph/cghdr.h>
18#include <stdbool.h>
19#include <stdint.h>
20#include <stdio.h>
21#include <stdlib.h>
22#include <string.h>
23#include <util/alloc.h>
24#include <util/unreachable.h>
25
26/*
27 * reference counted strings.
28 */
29
30typedef struct {
31 uint64_t refcnt: sizeof(uint64_t) * 8 - 1;
32 uint64_t is_html: 1;
33 char s[];
34} refstr_t;
35
36_Static_assert(
37 offsetof(refstr_t, s) % 2 == 0,
38 "refstr_t.s is not at an even offset, breaking lib/cgraph/id.c code");
39
46static bool refstr_eq(const char *a, bool is_html, const refstr_t *b) {
47 if (is_html != b->is_html) {
48 return false;
49 }
50 return strcmp(a, b->s) == 0;
51}
52
54typedef struct {
56 size_t size;
57 size_t capacity_exp;
58} strdict_t;
59
61
68static uint64_t hash(const void *key, size_t len, uint8_t extra) {
69 assert(key != NULL || len == 0);
70
71 // The following implementation is based on the `MurmurHash64A` variant of the
72 // public domain MurmurHash by Austin Appleby. More information on this at
73 // https://github.com/aappleby/smhasher/. Relevant changes made to Austin’s
74 // original implementation:
75 // • Our implementation is alignment-agnostic. No assumption is made about
76 // the initial alignment of `key`.
77 // • Our implementation uses `unsigned char` pointers, avoiding Undefined
78 // Behavior when the input pointer originated from a non-`uint64_t`
79 // object. This is written in a style that allows contemporary compilers
80 // to optimize code back into wider 8-byte accesses where possible.
81 // • Our implementation supports an extra byte to be considered to have
82 // followed the main data. See calls to this function for why this `extra`
83 // parameter exists.
84
85 static const uint64_t seed = 0;
86
87 const uint64_t m = UINT64_C(0xc6a4a7935bd1e995);
88 const unsigned r = 47;
89
90 uint64_t h = seed ^ (len * m);
91
92 const unsigned char *data = key;
93 const unsigned char *end = data + len / sizeof(uint64_t) * sizeof(uint64_t);
94
95 while (data != end) {
96
97 uint64_t k;
98 memcpy(&k, data, sizeof(k));
99 data += sizeof(k);
100
101 k *= m;
102 k ^= k >> r;
103 k *= m;
104
105 h ^= k;
106 h *= m;
107 }
108
109 const unsigned char *data2 = data;
110
111 // accumulate extra byte
112 h ^= (uint64_t)extra << 56;
113
114 switch (len & 7) {
115 case 7:
116 h ^= (uint64_t)data2[6] << 48; // fall through
117 case 6:
118 h ^= (uint64_t)data2[5] << 40; // fall through
119 case 5:
120 h ^= (uint64_t)data2[4] << 32; // fall through
121 case 4:
122 h ^= (uint64_t)data2[3] << 24; // fall through
123 case 3:
124 h ^= (uint64_t)data2[2] << 16; // fall through
125 case 2:
126 h ^= (uint64_t)data2[1] << 8; // fall through
127 case 1:
128 h ^= (uint64_t)data2[0];
129 break;
130 default:
131 // nothing required
132 break;
133 }
134 h *= m;
135
136 h ^= h >> r;
137 h *= m;
138 h ^= h >> r;
139
140 return h;
141}
142
144static strdict_t *strdict_new(void) { return gv_alloc(sizeof(strdict_t)); }
145
151static size_t strdict_hash(const char *s, bool is_html) {
152 assert(s != NULL);
153 return (size_t)hash(s, strlen(s), is_html);
154}
155
158static refstr_t *const TOMBSTONE = (refstr_t *)-1;
159
161static void strdict_add(strdict_t *dict, refstr_t *r) {
162 assert(dict != NULL);
163 assert(r != NULL);
164 assert(r != TOMBSTONE);
165
166 // a watermark ratio at which the set capacity should be expanded
167 static const size_t OCCUPANCY_THRESHOLD_PERCENT = 70;
168
169 // do we need to expand the backing store?
170 size_t capacity = dict->buckets == NULL ? 0 : 1ul << dict->capacity_exp;
171 const bool grow = 100 * dict->size >= OCCUPANCY_THRESHOLD_PERCENT * capacity;
172
173 if (grow) {
174 const size_t new_c = capacity == 0 ? 10 : dict->capacity_exp + 1;
175 refstr_t **new_b = gv_calloc(1ul << new_c, sizeof(refstr_t *));
176
177 // Construct a new dictionary and copy everything into it. Note we need to
178 // rehash because capacity (and hence modulo wraparound behavior) has
179 // changed. This conveniently flushes out the tombstones too.
180 strdict_t new_d = {.buckets = new_b, .capacity_exp = new_c};
181 for (size_t i = 0; i < capacity; ++i) {
182 // skip empty buckets
183 if (dict->buckets[i] == NULL) {
184 continue;
185 }
186 // skip deleted buckets
187 if (dict->buckets[i] == TOMBSTONE) {
188 continue;
189 }
190 strdict_add(&new_d, dict->buckets[i]);
191 }
192
193 // replace ourselves with this new dictionary
194 free(dict->buckets);
195 *dict = new_d;
196 }
197
198 assert(dict->buckets != NULL);
199 capacity = 1ul << dict->capacity_exp;
200 assert(capacity > dict->size);
201
202 const size_t h = strdict_hash(r->s, r->is_html);
203
204 for (size_t i = 0; i < capacity; ++i) {
205 const size_t candidate = (h + i) % capacity;
206
207 // if we found an empty bucket or a previously deleted bucket, we can insert
208 if (dict->buckets[candidate] == NULL ||
209 dict->buckets[candidate] == TOMBSTONE) {
210 dict->buckets[candidate] = r;
211 ++dict->size;
212 return;
213 }
214 }
215
216 UNREACHABLE();
217}
218
225static refstr_t *strdict_find(strdict_t *dict, const char *s, bool is_html) {
226 assert(dict != NULL);
227 assert(s != NULL);
228
229 const size_t h = strdict_hash(s, is_html);
230 const size_t capacity = dict->buckets == NULL ? 0 : 1ul << dict->capacity_exp;
231
232 for (size_t i = 0; i < capacity; ++i) {
233 const size_t candidate = (h + i) % capacity;
234
235 // if we found an empty bucket, the sought item does not exist
236 if (dict->buckets[candidate] == NULL) {
237 return NULL;
238 }
239
240 // if we found a previously deleted slot, skip over it
241 if (dict->buckets[candidate] == TOMBSTONE) {
242 continue;
243 }
244
245 // is this the string we are searching for?
246 if (refstr_eq(s, is_html, dict->buckets[candidate])) {
247 return dict->buckets[candidate];
248 }
249 }
250
251 // not found
252 return NULL;
253}
254
256static void strdict_remove(strdict_t *dict, const refstr_t *key) {
257 assert(dict != NULL);
258 assert(key != NULL);
259 assert(key != TOMBSTONE);
260
261 const size_t h = strdict_hash(key->s, key->is_html);
262 const size_t capacity = dict->buckets == NULL ? 0 : 1ul << dict->capacity_exp;
263
264 for (size_t i = 0; i < capacity; ++i) {
265 const size_t candidate = (h + i) % capacity;
266
267 // if we found an empty bucket, the sought item does not exist
268 if (dict->buckets[candidate] == NULL) {
269 return;
270 }
271
272 // if we found a previously deleted bucket, skip over it
273 if (dict->buckets[candidate] == TOMBSTONE) {
274 continue;
275 }
276
277 // is this the string we are searching for?
278 if (refstr_eq(key->s, key->is_html, dict->buckets[candidate])) {
279 assert(dict->size > 0);
280 free(dict->buckets[candidate]);
281 dict->buckets[candidate] = TOMBSTONE;
282 --dict->size;
283 return;
284 }
285 }
286}
287
289static void strdict_free(strdict_t **dict) {
290 assert(dict != NULL);
291
292 if (*dict != NULL && (*dict)->buckets != NULL) {
293 for (size_t i = 0; i < 1ul << (*dict)->capacity_exp; ++i) {
294 if ((*dict)->buckets[i] != TOMBSTONE) {
295 free((*dict)->buckets[i]);
296 }
297 }
298 free((*dict)->buckets);
299 }
300
301 free(*dict);
302 *dict = NULL;
303}
304
305/* refdict:
306 * Return a pointer to the string dictionary associated with g.
307 * If necessary, create it.
308 */
310 strdict_t **dictref;
311
312 if (g)
313 dictref = (strdict_t **)&g->clos->strdict;
314 else
315 dictref = &Refdict_default;
316 if (*dictref == NULL) {
317 *dictref = strdict_new();
318 }
319 return dictref;
320}
321
323{
325 return 0;
326}
327
328static char *refstrbind(strdict_t *strdict, const char *s) {
329 refstr_t *r;
330 r = strdict_find(strdict, s, false);
331 if (r)
332 return r->s;
333 else
334 return NULL;
335}
336
337char *agstrbind(Agraph_t * g, const char *s)
338{
339 return refstrbind(*refdict(g), s);
340}
341
342static char *agstrdup_internal(Agraph_t *g, const char *s, bool is_html) {
343 refstr_t *r;
344 size_t sz;
345
346 if (s == NULL)
347 return NULL;
348 strdict_t *strdict = *refdict(g);
349 r = strdict_find(strdict, s, is_html);
350 if (r)
351 r->refcnt++;
352 else {
353 sz = sizeof(refstr_t) + strlen(s) + 1;
354 if (g)
355 r = gv_calloc(sz, sizeof(char));
356 else {
357 r = malloc(sz);
358 if (sz > 0 && r == NULL) {
359 return NULL;
360 }
361 }
362 r->refcnt = 1;
363 r->is_html = is_html;
364 strcpy(r->s, s);
365 strdict_add(strdict, r);
366 }
367 return r->s;
368}
369
370char *agstrdup(Agraph_t *g, const char *s) {
371 return agstrdup_internal(g, s, false);
372}
373
374char *agstrdup_html(Agraph_t *g, const char *s) {
375 return agstrdup_internal(g, s, true);
376}
377
378int agstrfree(Agraph_t *g, const char *s, bool is_html) {
379 refstr_t *r;
380
381 if (s == NULL)
382 return FAILURE;
383
384 strdict_t *strdict = *refdict(g);
385 r = strdict_find(strdict, s, is_html);
386 if (r && r->s == s) {
387 r->refcnt--;
388 if (r->refcnt == 0) {
389 strdict_remove(strdict, r);
390 }
391 }
392 if (r == NULL)
393 return FAILURE;
394 return SUCCESS;
395}
396
397/* aghtmlstr:
398 * Return true if s is an HTML string.
399 * We assume s is within a refstr.
400 */
401int aghtmlstr(const char *s)
402{
403 const refstr_t *key;
404
405 if (s == NULL)
406 return 0;
407 key = (const refstr_t *)(s - offsetof(refstr_t, s));
408 return key->is_html;
409}
410
411#ifdef DEBUG
412static int refstrprint(const refstr_t *r) {
413 fprintf(stderr, "%s\n", r->s);
414 return 0;
415}
416
417void agrefstrdump(Agraph_t * g)
418{
419 const strdict_t *d = *refdict(g);
420 for (size_t i = 0;
421 d != NULL && d->buckets != NULL && i < 1ul << d->capacity_exp; ++i) {
422 refstrprint(d->buckets[i]);
423 }
424}
425#endif
Memory allocation wrappers that exit on failure.
static void * gv_calloc(size_t nmemb, size_t size)
Definition alloc.h:26
static void * gv_alloc(size_t size)
Definition alloc.h:47
cgraph.h additions
#define FAILURE
Definition cghdr.h:45
#define SUCCESS
Definition cghdr.h:44
#define hash
Definition dthdr.h:13
static long seed
Definition exeval.c:1035
static double len(glCompPoint p)
Definition glutils.c:150
void * malloc(YYSIZE_T)
void free(void *)
node NULL
Definition grammar.y:163
int aghtmlstr(const char *s)
Definition refstr.c:401
int agstrfree(Agraph_t *g, const char *s, bool is_html)
Definition refstr.c:378
char * agstrdup(Agraph_t *g, const char *s)
returns a pointer to a reference-counted copy of the argument string, creating one if necessary
Definition refstr.c:370
char * agstrbind(Agraph_t *g, const char *s)
Definition refstr.c:337
char * agstrdup_html(Agraph_t *g, const char *s)
Definition refstr.c:374
static refstr_t * strdict_find(strdict_t *dict, const char *s, bool is_html)
Definition refstr.c:225
int agstrclose(Agraph_t *g)
Definition refstr.c:322
static void strdict_add(strdict_t *dict, refstr_t *r)
add a reference-counted string to a dictionary
Definition refstr.c:161
static strdict_t * Refdict_default
Definition refstr.c:60
static void strdict_remove(strdict_t *dict, const refstr_t *key)
remove a reference-counted string from a dictionary
Definition refstr.c:256
static char * agstrdup_internal(Agraph_t *g, const char *s, bool is_html)
Definition refstr.c:342
static strdict_t * strdict_new(void)
create a new string dictionary
Definition refstr.c:144
static strdict_t ** refdict(Agraph_t *g)
Definition refstr.c:309
static void strdict_free(strdict_t **dict)
destroy a string dictionary
Definition refstr.c:289
static bool refstr_eq(const char *a, bool is_html, const refstr_t *b)
Definition refstr.c:46
static char * refstrbind(strdict_t *strdict, const char *s)
Definition refstr.c:328
static size_t strdict_hash(const char *s, bool is_html)
Definition refstr.c:151
static refstr_t *const TOMBSTONE
Definition refstr.c:158
void * strdict
shared string dict
Definition cgraph.h:413
graph or subgraph
Definition cgraph.h:424
Agclos_t * clos
shared resources
Definition cgraph.h:439
Definition legal.c:50
uint64_t refcnt
Definition refstr.c:31
char s[]
Definition refstr.c:33
uint64_t is_html
Definition refstr.c:32
a string dictionary
Definition refstr.c:54
size_t capacity_exp
log₂ size of buckets
Definition refstr.c:57
refstr_t ** buckets
backing store of elements
Definition refstr.c:55
size_t size
number of elements in the dictionary
Definition refstr.c:56
Definition grammar.c:93
#define UNREACHABLE()
Definition unreachable.h:30