Graphviz 13.0.0~dev.20250424.1043
Loading...
Searching...
No Matches
refstr.c
Go to the documentation of this file.
1
6/*************************************************************************
7 * Copyright (c) 2011 AT&T Intellectual Property
8 * All rights reserved. This program and the accompanying materials
9 * are made available under the terms of the Eclipse Public License v1.0
10 * which accompanies this distribution, and is available at
11 * https://www.eclipse.org/legal/epl-v10.html
12 *
13 * Contributors: Details at https://graphviz.org
14 *************************************************************************/
15
16#include <assert.h>
17#include <cgraph/cghdr.h>
18#include <stdbool.h>
19#include <stdint.h>
20#include <stdio.h>
21#include <stdlib.h>
22#include <string.h>
23#include <util/alloc.h>
24#include <util/unreachable.h>
25
26/*
27 * reference counted strings.
28 */
29
30typedef struct {
31 uint64_t refcnt: sizeof(uint64_t) * 8 - 1;
32 uint64_t is_html: 1;
33 char s[];
34} refstr_t;
35
36_Static_assert(
37 offsetof(refstr_t, s) % 2 == 0,
38 "refstr_t.s is not at an even offset, breaking lib/cgraph/id.c code");
39
46static bool refstr_eq(const char *a, bool is_html, const refstr_t *b) {
47 if (is_html != b->is_html) {
48 return false;
49 }
50 return strcmp(a, b->s) == 0;
51}
52
54typedef struct {
56 size_t size;
57 size_t capacity_exp;
58} strdict_t;
59
61
68static uint64_t hash(const void *key, size_t len, uint8_t extra) {
69 assert(key != NULL || len == 0);
70
71 // The following implementation is based on the `MurmurHash64A` variant of the
72 // public domain MurmurHash by Austin Appleby. More information on this at
73 // https://github.com/aappleby/smhasher/. Relevant changes made to Austin’s
74 // original implementation:
75 // • Our implementation is alignment-agnostic. No assumption is made about
76 // the initial alignment of `key`.
77 // • Our implementation uses `unsigned char` pointers, avoiding Undefined
78 // Behavior when the input pointer originated from a non-`uint64_t`
79 // object. This is written in a style that allows contemporary compilers
80 // to optimize code back into wider 8-byte accesses where possible.
81 // • Our implementation supports an extra byte to be considered to have
82 // followed the main data. See calls to this function for why this `extra`
83 // parameter exists.
84
85 static const uint64_t seed = 0;
86
87 const uint64_t m = UINT64_C(0xc6a4a7935bd1e995);
88 const unsigned r = 47;
89
90 uint64_t h = seed ^ (len * m);
91
92 const unsigned char *data = key;
93 const unsigned char *end = data + len / sizeof(uint64_t) * sizeof(uint64_t);
94
95 while (data != end) {
96
97 uint64_t k;
98 memcpy(&k, data, sizeof(k));
99 data += sizeof(k);
100
101 k *= m;
102 k ^= k >> r;
103 k *= m;
104
105 h ^= k;
106 h *= m;
107 }
108
109 const unsigned char *data2 = data;
110
111 // accumulate extra byte
112 h ^= (uint64_t)extra << 56;
113
114 switch (len & 7) {
115 case 7:
116 h ^= (uint64_t)data2[6] << 48; // fall through
117 case 6:
118 h ^= (uint64_t)data2[5] << 40; // fall through
119 case 5:
120 h ^= (uint64_t)data2[4] << 32; // fall through
121 case 4:
122 h ^= (uint64_t)data2[3] << 24; // fall through
123 case 3:
124 h ^= (uint64_t)data2[2] << 16; // fall through
125 case 2:
126 h ^= (uint64_t)data2[1] << 8; // fall through
127 case 1:
128 h ^= (uint64_t)data2[0];
129 break;
130 default:
131 // nothing required
132 break;
133 }
134 h *= m;
135
136 h ^= h >> r;
137 h *= m;
138 h ^= h >> r;
139
140 return h;
141}
142
144static strdict_t *strdict_new(void) { return gv_alloc(sizeof(strdict_t)); }
145
151static size_t strdict_hash(const char *s, bool is_html) {
152 assert(s != NULL);
153 return (size_t)hash(s, strlen(s), is_html);
154}
155
158static refstr_t *const TOMBSTONE = (refstr_t *)-1;
159
161static void strdict_add(strdict_t *dict, refstr_t *r) {
162 assert(dict != NULL);
163 assert(r != NULL);
164 assert(r != TOMBSTONE);
165
166 // a watermark ratio at which the set capacity should be expanded
167 static const size_t OCCUPANCY_THRESHOLD_PERCENT = 70;
168
169 // do we need to expand the backing store?
170 size_t capacity = dict->buckets == NULL ? 0 : (size_t)1 << dict->capacity_exp;
171 const bool grow = 100 * dict->size >= OCCUPANCY_THRESHOLD_PERCENT * capacity;
172
173 if (grow) {
174 const size_t new_c = capacity == 0 ? 10 : dict->capacity_exp + 1;
175 refstr_t **new_b = gv_calloc((size_t)1 << new_c, sizeof(refstr_t *));
176
177 // Construct a new dictionary and copy everything into it. Note we need to
178 // rehash because capacity (and hence modulo wraparound behavior) has
179 // changed. This conveniently flushes out the tombstones too.
180 strdict_t new_d = {.buckets = new_b, .capacity_exp = new_c};
181 for (size_t i = 0; i < capacity; ++i) {
182 // skip empty buckets
183 if (dict->buckets[i] == NULL) {
184 continue;
185 }
186 // skip deleted buckets
187 if (dict->buckets[i] == TOMBSTONE) {
188 continue;
189 }
190 strdict_add(&new_d, dict->buckets[i]);
191 }
192
193 // replace ourselves with this new dictionary
194 free(dict->buckets);
195 *dict = new_d;
196 }
197
198 assert(dict->buckets != NULL);
199 capacity = (size_t)1 << dict->capacity_exp;
200 assert(capacity > dict->size);
201
202 const size_t h = strdict_hash(r->s, r->is_html);
203
204 for (size_t i = 0; i < capacity; ++i) {
205 const size_t candidate = (h + i) % capacity;
206
207 // if we found an empty bucket or a previously deleted bucket, we can insert
208 if (dict->buckets[candidate] == NULL ||
209 dict->buckets[candidate] == TOMBSTONE) {
210 dict->buckets[candidate] = r;
211 ++dict->size;
212 return;
213 }
214 }
215
216 UNREACHABLE();
217}
218
225static refstr_t *strdict_find(strdict_t *dict, const char *s, bool is_html) {
226 assert(dict != NULL);
227 assert(s != NULL);
228
229 const size_t h = strdict_hash(s, is_html);
230 const size_t capacity = dict->buckets == NULL
231 ? 0 : (size_t)1 << dict->capacity_exp;
232
233 for (size_t i = 0; i < capacity; ++i) {
234 const size_t candidate = (h + i) % capacity;
235
236 // if we found an empty bucket, the sought item does not exist
237 if (dict->buckets[candidate] == NULL) {
238 return NULL;
239 }
240
241 // if we found a previously deleted slot, skip over it
242 if (dict->buckets[candidate] == TOMBSTONE) {
243 continue;
244 }
245
246 // is this the string we are searching for?
247 if (refstr_eq(s, is_html, dict->buckets[candidate])) {
248 return dict->buckets[candidate];
249 }
250 }
251
252 // not found
253 return NULL;
254}
255
257static void strdict_remove(strdict_t *dict, const refstr_t *key) {
258 assert(dict != NULL);
259 assert(key != NULL);
260 assert(key != TOMBSTONE);
261
262 const size_t h = strdict_hash(key->s, key->is_html);
263 const size_t capacity = dict->buckets == NULL
264 ? 0 : (size_t)1 << dict->capacity_exp;
265
266 for (size_t i = 0; i < capacity; ++i) {
267 const size_t candidate = (h + i) % capacity;
268
269 // if we found an empty bucket, the sought item does not exist
270 if (dict->buckets[candidate] == NULL) {
271 return;
272 }
273
274 // if we found a previously deleted bucket, skip over it
275 if (dict->buckets[candidate] == TOMBSTONE) {
276 continue;
277 }
278
279 // is this the string we are searching for?
280 if (refstr_eq(key->s, key->is_html, dict->buckets[candidate])) {
281 assert(dict->size > 0);
282 free(dict->buckets[candidate]);
283 dict->buckets[candidate] = TOMBSTONE;
284 --dict->size;
285 return;
286 }
287 }
288}
289
291static void strdict_free(strdict_t **dict) {
292 assert(dict != NULL);
293
294 if (*dict != NULL && (*dict)->buckets != NULL) {
295 for (size_t i = 0; i < (size_t)1 << (*dict)->capacity_exp; ++i) {
296 if ((*dict)->buckets[i] != TOMBSTONE) {
297 free((*dict)->buckets[i]);
298 }
299 }
300 free((*dict)->buckets);
301 }
302
303 free(*dict);
304 *dict = NULL;
305}
306
307/* refdict:
308 * Return a pointer to the string dictionary associated with g.
309 * If necessary, create it.
310 */
312 strdict_t **dictref;
313
314 if (g)
315 dictref = (strdict_t **)&g->clos->strdict;
316 else
317 dictref = &Refdict_default;
318 if (*dictref == NULL) {
319 *dictref = strdict_new();
320 }
321 return dictref;
322}
323
325{
327 return 0;
328}
329
330static char *refstrbind(strdict_t *strdict, const char *s, bool is_html) {
331 refstr_t *r;
332 r = strdict_find(strdict, s, is_html);
333 if (r)
334 return r->s;
335 else
336 return NULL;
337}
338
339char *agstrbind(Agraph_t *g, const char *s) {
340
341 // did this string originate from `agstrdup_html(g, …)`?
342 if (s != NULL) {
343 strdict_t *const strdict = *refdict(g);
344 refstr_t *const ref = strdict_find(strdict, s, true);
345 if (ref != NULL && ref->s == s) {
346 // create this copy as HTML-like
347 return agstrbind_html(g, s);
348 }
349 }
350
351 return agstrbind_text(g, s);
352}
353
354char *agstrbind_html(Agraph_t *g, const char *s) {
355 return refstrbind(*refdict(g), s, true);
356}
357
358char *agstrbind_text(Agraph_t * g, const char *s)
359{
360 return refstrbind(*refdict(g), s, false);
361}
362
363static char *agstrdup_internal(Agraph_t *g, const char *s, bool is_html) {
364 refstr_t *r;
365 size_t sz;
366
367 if (s == NULL)
368 return NULL;
369 strdict_t *strdict = *refdict(g);
370 r = strdict_find(strdict, s, is_html);
371 if (r)
372 r->refcnt++;
373 else {
374 sz = sizeof(refstr_t) + strlen(s) + 1;
375 if (g)
376 r = gv_calloc(sz, sizeof(char));
377 else {
378 r = malloc(sz);
379 if (sz > 0 && r == NULL) {
380 return NULL;
381 }
382 }
383 r->refcnt = 1;
384 r->is_html = is_html;
385 strcpy(r->s, s);
386 strdict_add(strdict, r);
387 }
388 return r->s;
389}
390
391char *agstrdup_text(Agraph_t *g, const char *s) {
392 return agstrdup_internal(g, s, false);
393}
394
395char *agstrdup_html(Agraph_t *g, const char *s) {
396 return agstrdup_internal(g, s, true);
397}
398
399char *agstrdup(Agraph_t *g, const char *s) {
400
401 // did this string originate from `agstrdup_html(g, …)`?
402 if (s != NULL) {
403 strdict_t *const strdict = *refdict(g);
404 refstr_t *const ref = strdict_find(strdict, s, true);
405 if (ref != NULL && ref->s == s) {
406 // create this copy as HTML-like
407 return agstrdup_html(g, s);
408 }
409 }
410
411 // otherwise, create the copy as regular text
412 return agstrdup_text(g, s);
413}
414
415int agstrfree(Agraph_t *g, const char *s, bool is_html) {
416 refstr_t *r;
417
418 if (s == NULL)
419 return FAILURE;
420
421 strdict_t *strdict = *refdict(g);
422 r = strdict_find(strdict, s, is_html);
423 if (r && r->s == s) {
424 r->refcnt--;
425 if (r->refcnt == 0) {
426 strdict_remove(strdict, r);
427 }
428 }
429 if (r == NULL)
430 return FAILURE;
431 return SUCCESS;
432}
433
434/* aghtmlstr:
435 * Return true if s is an HTML string.
436 * We assume s is within a refstr.
437 */
438int aghtmlstr(const char *s)
439{
440 const refstr_t *key;
441
442 if (s == NULL)
443 return 0;
444 key = (const refstr_t *)(s - offsetof(refstr_t, s));
445 return key->is_html;
446}
447
448#ifdef DEBUG
449static int refstrprint(const refstr_t *r) {
450 fprintf(stderr, "%s\n", r->s);
451 return 0;
452}
453
454void agrefstrdump(Agraph_t * g)
455{
456 const strdict_t *d = *refdict(g);
457 for (size_t i = 0;
458 d != NULL && d->buckets != NULL && i < (size_t)1 << d->capacity_exp;
459 ++i) {
460 refstrprint(d->buckets[i]);
461 }
462}
463#endif
Memory allocation wrappers that exit on failure.
static void * gv_calloc(size_t nmemb, size_t size)
Definition alloc.h:26
static void * gv_alloc(size_t size)
Definition alloc.h:47
cgraph.h additions
#define FAILURE
Definition cghdr.h:45
#define SUCCESS
Definition cghdr.h:44
#define hash
Definition dthdr.h:13
static long seed
Definition exeval.c:1043
static double len(glCompPoint p)
Definition glutils.c:150
void * malloc(YYSIZE_T)
void free(void *)
node NULL
Definition grammar.y:163
char * agstrbind_html(Agraph_t *g, const char *s)
Definition refstr.c:354
char * agstrbind_text(Agraph_t *g, const char *s)
Definition refstr.c:358
int aghtmlstr(const char *s)
Definition refstr.c:438
int agstrfree(Agraph_t *g, const char *s, bool is_html)
Definition refstr.c:415
char * agstrdup_text(Agraph_t *g, const char *s)
returns a pointer to a reference-counted regular text copy of the argument string,...
Definition refstr.c:391
char * agstrdup(Agraph_t *g, const char *s)
returns a pointer to a reference-counted copy of the argument string, creating one if necessary
Definition refstr.c:399
char * agstrbind(Agraph_t *g, const char *s)
Definition refstr.c:339
char * agstrdup_html(Agraph_t *g, const char *s)
returns a pointer to a reference-counted HTML-like copy of the argument string, creating one if neces...
Definition refstr.c:395
static refstr_t * strdict_find(strdict_t *dict, const char *s, bool is_html)
Definition refstr.c:225
int agstrclose(Agraph_t *g)
Definition refstr.c:324
static void strdict_add(strdict_t *dict, refstr_t *r)
add a reference-counted string to a dictionary
Definition refstr.c:161
static strdict_t * Refdict_default
Definition refstr.c:60
static void strdict_remove(strdict_t *dict, const refstr_t *key)
remove a reference-counted string from a dictionary
Definition refstr.c:257
static char * agstrdup_internal(Agraph_t *g, const char *s, bool is_html)
Definition refstr.c:363
static strdict_t * strdict_new(void)
create a new string dictionary
Definition refstr.c:144
static strdict_t ** refdict(Agraph_t *g)
Definition refstr.c:311
static void strdict_free(strdict_t **dict)
destroy a string dictionary
Definition refstr.c:291
static char * refstrbind(strdict_t *strdict, const char *s, bool is_html)
Definition refstr.c:330
static bool refstr_eq(const char *a, bool is_html, const refstr_t *b)
Definition refstr.c:46
static size_t strdict_hash(const char *s, bool is_html)
Definition refstr.c:151
static refstr_t *const TOMBSTONE
Definition refstr.c:158
void ref(Site *v)
Definition site.c:59
void * strdict
shared string dict
Definition cgraph.h:413
graph or subgraph
Definition cgraph.h:424
Agclos_t * clos
shared resources
Definition cgraph.h:434
Definition legal.c:50
uint64_t refcnt
Definition refstr.c:31
char s[]
Definition refstr.c:33
uint64_t is_html
Definition refstr.c:32
a string dictionary
Definition refstr.c:54
size_t capacity_exp
log₂ size of buckets
Definition refstr.c:57
refstr_t ** buckets
backing store of elements
Definition refstr.c:55
size_t size
number of elements in the dictionary
Definition refstr.c:56
Definition grammar.c:93
#define UNREACHABLE()
Definition unreachable.h:30