Graphviz 14.1.2~dev.20260118.1035
Loading...
Searching...
No Matches
refstr.c
Go to the documentation of this file.
1
6/*************************************************************************
7 * Copyright (c) 2011 AT&T Intellectual Property
8 * All rights reserved. This program and the accompanying materials
9 * are made available under the terms of the Eclipse Public License v1.0
10 * which accompanies this distribution, and is available at
11 * https://www.eclipse.org/legal/epl-v10.html
12 *
13 * Contributors: Details at https://graphviz.org
14 *************************************************************************/
15
16#include "config.h"
17
18#include <assert.h>
19#include <cgraph/cghdr.h>
20#include <stdbool.h>
21#include <stdint.h>
22#include <stdio.h>
23#include <stdlib.h>
24#include <string.h>
25#include <util/alloc.h>
26#include <util/unreachable.h>
27
28/*
29 * reference counted strings.
30 */
31
32typedef struct {
33 uint64_t refcnt: sizeof(uint64_t) * 8 - 1;
34 uint64_t is_html: 1;
35 char s[];
36} refstr_t;
37
38_Static_assert(
39 offsetof(refstr_t, s) % 2 == 0,
40 "refstr_t.s is not at an even offset, breaking lib/cgraph/id.c code");
41
48static bool refstr_eq(const char *a, bool is_html, const refstr_t *b) {
49 if (is_html != b->is_html) {
50 return false;
51 }
52 return strcmp(a, b->s) == 0;
53}
54
56typedef struct {
58 size_t size;
59 size_t capacity_exp;
60} strdict_t;
61
63
70static uint64_t hash(const void *key, size_t len, uint8_t extra) {
71 assert(key != NULL || len == 0);
72
73 // The following implementation is based on the `MurmurHash64A` variant of the
74 // public domain MurmurHash by Austin Appleby. More information on this at
75 // https://github.com/aappleby/smhasher/. Relevant changes made to Austin’s
76 // original implementation:
77 // • Our implementation is alignment-agnostic. No assumption is made about
78 // the initial alignment of `key`.
79 // • Our implementation uses `unsigned char` pointers, avoiding Undefined
80 // Behavior when the input pointer originated from a non-`uint64_t`
81 // object. This is written in a style that allows contemporary compilers
82 // to optimize code back into wider 8-byte accesses where possible.
83 // • Our implementation supports an extra byte to be considered to have
84 // followed the main data. See calls to this function for why this `extra`
85 // parameter exists.
86
87 static const uint64_t seed = 0;
88
89 const uint64_t m = UINT64_C(0xc6a4a7935bd1e995);
90 const unsigned r = 47;
91
92 uint64_t h = seed ^ (len * m);
93
94 const unsigned char *data = key;
95 const unsigned char *end = data + len / sizeof(uint64_t) * sizeof(uint64_t);
96
97 while (data != end) {
98
99 uint64_t k;
100 memcpy(&k, data, sizeof(k));
101 data += sizeof(k);
102
103 k *= m;
104 k ^= k >> r;
105 k *= m;
106
107 h ^= k;
108 h *= m;
109 }
110
111 const unsigned char *data2 = data;
112
113 // accumulate extra byte
114 h ^= (uint64_t)extra << 56;
115
116 switch (len & 7) {
117 case 7:
118 h ^= (uint64_t)data2[6] << 48; // fall through
119 case 6:
120 h ^= (uint64_t)data2[5] << 40; // fall through
121 case 5:
122 h ^= (uint64_t)data2[4] << 32; // fall through
123 case 4:
124 h ^= (uint64_t)data2[3] << 24; // fall through
125 case 3:
126 h ^= (uint64_t)data2[2] << 16; // fall through
127 case 2:
128 h ^= (uint64_t)data2[1] << 8; // fall through
129 case 1:
130 h ^= (uint64_t)data2[0];
131 break;
132 default:
133 // nothing required
134 break;
135 }
136 h *= m;
137
138 h ^= h >> r;
139 h *= m;
140 h ^= h >> r;
141
142 return h;
143}
144
146static strdict_t *strdict_new(void) { return gv_alloc(sizeof(strdict_t)); }
147
153static size_t strdict_hash(const char *s, bool is_html) {
154 assert(s != NULL);
155 return (size_t)hash(s, strlen(s), is_html);
156}
157
160static refstr_t *const TOMBSTONE = (refstr_t *)-1;
161
163static void strdict_add(strdict_t *dict, refstr_t *r) {
164 assert(dict != NULL);
165 assert(r != NULL);
166 assert(r != TOMBSTONE);
167
168 // a watermark ratio at which the set capacity should be expanded
169 static const size_t OCCUPANCY_THRESHOLD_PERCENT = 70;
170
171 // do we need to expand the backing store?
172 size_t capacity = dict->buckets == NULL ? 0 : (size_t)1 << dict->capacity_exp;
173 const bool grow = 100 * dict->size >= OCCUPANCY_THRESHOLD_PERCENT * capacity;
174
175 if (grow) {
176 const size_t new_c = capacity == 0 ? 10 : dict->capacity_exp + 1;
177 refstr_t **new_b = gv_calloc((size_t)1 << new_c, sizeof(refstr_t *));
178
179 // Construct a new dictionary and copy everything into it. Note we need to
180 // rehash because capacity (and hence modulo wraparound behavior) has
181 // changed. This conveniently flushes out the tombstones too.
182 strdict_t new_d = {.buckets = new_b, .capacity_exp = new_c};
183 for (size_t i = 0; i < capacity; ++i) {
184 // skip empty buckets
185 if (dict->buckets[i] == NULL) {
186 continue;
187 }
188 // skip deleted buckets
189 if (dict->buckets[i] == TOMBSTONE) {
190 continue;
191 }
192 strdict_add(&new_d, dict->buckets[i]);
193 }
194
195 // replace ourselves with this new dictionary
196 free(dict->buckets);
197 *dict = new_d;
198 }
199
200 assert(dict->buckets != NULL);
201 capacity = (size_t)1 << dict->capacity_exp;
202 assert(capacity > dict->size);
203
204 const size_t h = strdict_hash(r->s, r->is_html != 0);
205
206 for (size_t i = 0; i < capacity; ++i) {
207 const size_t candidate = (h + i) % capacity;
208
209 // if we found an empty bucket or a previously deleted bucket, we can insert
210 if (dict->buckets[candidate] == NULL ||
211 dict->buckets[candidate] == TOMBSTONE) {
212 dict->buckets[candidate] = r;
213 ++dict->size;
214 return;
215 }
216 }
217
218 UNREACHABLE();
219}
220
227static refstr_t *strdict_find(strdict_t *dict, const char *s, bool is_html) {
228 assert(dict != NULL);
229 assert(s != NULL);
230
231 const size_t h = strdict_hash(s, is_html);
232 const size_t capacity = dict->buckets == NULL
233 ? 0 : (size_t)1 << dict->capacity_exp;
234
235 for (size_t i = 0; i < capacity; ++i) {
236 const size_t candidate = (h + i) % capacity;
237
238 // if we found an empty bucket, the sought item does not exist
239 if (dict->buckets[candidate] == NULL) {
240 return NULL;
241 }
242
243 // if we found a previously deleted slot, skip over it
244 if (dict->buckets[candidate] == TOMBSTONE) {
245 continue;
246 }
247
248 // is this the string we are searching for?
249 if (refstr_eq(s, is_html, dict->buckets[candidate])) {
250 return dict->buckets[candidate];
251 }
252 }
253
254 // not found
255 return NULL;
256}
257
259static void strdict_remove(strdict_t *dict, const refstr_t *key) {
260 assert(dict != NULL);
261 assert(key != NULL);
262 assert(key != TOMBSTONE);
263
264 const size_t h = strdict_hash(key->s, key->is_html != 0);
265 const size_t capacity = dict->buckets == NULL
266 ? 0 : (size_t)1 << dict->capacity_exp;
267
268 for (size_t i = 0; i < capacity; ++i) {
269 const size_t candidate = (h + i) % capacity;
270
271 // if we found an empty bucket, the sought item does not exist
272 if (dict->buckets[candidate] == NULL) {
273 return;
274 }
275
276 // if we found a previously deleted bucket, skip over it
277 if (dict->buckets[candidate] == TOMBSTONE) {
278 continue;
279 }
280
281 // is this the string we are searching for?
282 if (refstr_eq(key->s, key->is_html != 0, dict->buckets[candidate])) {
283 assert(dict->size > 0);
284 free(dict->buckets[candidate]);
285 dict->buckets[candidate] = TOMBSTONE;
286 --dict->size;
287 return;
288 }
289 }
290}
291
293static void strdict_free(strdict_t **dict) {
294 assert(dict != NULL);
295
296 if (*dict != NULL && (*dict)->buckets != NULL) {
297 for (size_t i = 0; i < (size_t)1 << (*dict)->capacity_exp; ++i) {
298 if ((*dict)->buckets[i] != TOMBSTONE) {
299 free((*dict)->buckets[i]);
300 }
301 }
302 free((*dict)->buckets);
303 }
304
305 free(*dict);
306 *dict = NULL;
307}
308
309/* refdict:
310 * Return a pointer to the string dictionary associated with g.
311 * If necessary, create it.
312 */
314 strdict_t **dictref;
315
316 if (g)
317 dictref = (strdict_t **)&g->clos->strdict;
318 else
319 dictref = &Refdict_default;
320 if (*dictref == NULL) {
321 *dictref = strdict_new();
322 }
323 return dictref;
324}
325
327{
329 return 0;
330}
331
332static char *refstrbind(strdict_t *strdict, const char *s, bool is_html) {
333 refstr_t *r;
334 r = strdict_find(strdict, s, is_html);
335 if (r)
336 return r->s;
337 else
338 return NULL;
339}
340
341char *agstrbind(Agraph_t *g, const char *s) {
342
343 // did this string originate from `agstrdup_html(g, …)`?
344 if (s != NULL) {
345 strdict_t *const strdict = *refdict(g);
346 refstr_t *const ref = strdict_find(strdict, s, true);
347 if (ref != NULL && ref->s == s) {
348 // create this copy as HTML-like
349 return agstrbind_html(g, s);
350 }
351 }
352
353 return agstrbind_text(g, s);
354}
355
356char *agstrbind_html(Agraph_t *g, const char *s) {
357 return refstrbind(*refdict(g), s, true);
358}
359
360char *agstrbind_text(Agraph_t * g, const char *s)
361{
362 return refstrbind(*refdict(g), s, false);
363}
364
365static char *agstrdup_internal(Agraph_t *g, const char *s, bool is_html) {
366 refstr_t *r;
367
368 if (s == NULL)
369 return NULL;
370 strdict_t *strdict = *refdict(g);
371 r = strdict_find(strdict, s, is_html);
372 if (r)
373 r->refcnt++;
374 else {
375 const size_t s_size = strlen(s) + 1;
376 const size_t sz = sizeof(refstr_t) + s_size;
377 if (g)
378 r = gv_calloc(sz, sizeof(char));
379 else {
380 r = malloc(sz);
381 if (r == NULL) {
382 return NULL;
383 }
384 }
385 r->refcnt = 1;
386 r->is_html = is_html;
387 memcpy(r->s, s, s_size);
388 strdict_add(strdict, r);
389 }
390 return r->s;
391}
392
393char *agstrdup_text(Agraph_t *g, const char *s) {
394 return agstrdup_internal(g, s, false);
395}
396
397char *agstrdup_html(Agraph_t *g, const char *s) {
398 return agstrdup_internal(g, s, true);
399}
400
401char *agstrdup(Agraph_t *g, const char *s) {
402
403 // did this string originate from `agstrdup_html(g, …)`?
404 if (s != NULL) {
405 strdict_t *const strdict = *refdict(g);
406 refstr_t *const ref = strdict_find(strdict, s, true);
407 if (ref != NULL && ref->s == s) {
408 // create this copy as HTML-like
409 return agstrdup_html(g, s);
410 }
411 }
412
413 // otherwise, create the copy as regular text
414 return agstrdup_text(g, s);
415}
416
417int agstrfree(Agraph_t *g, const char *s, bool is_html) {
418 refstr_t *r;
419
420 if (s == NULL)
421 return FAILURE;
422
423 strdict_t *strdict = *refdict(g);
424 r = strdict_find(strdict, s, is_html);
425 if (r && r->s == s) {
426 r->refcnt--;
427 if (r->refcnt == 0) {
428 strdict_remove(strdict, r);
429 }
430 }
431 if (r == NULL)
432 return FAILURE;
433 return SUCCESS;
434}
435
436/* aghtmlstr:
437 * Return true if s is an HTML string.
438 * We assume s is within a refstr.
439 */
440int aghtmlstr(const char *s)
441{
442 const refstr_t *key;
443
444 if (s == NULL)
445 return 0;
446 key = (const refstr_t *)(s - offsetof(refstr_t, s));
447 return key->is_html != 0;
448}
449
450#ifdef DEBUG
451static int refstrprint(const refstr_t *r) {
452 fprintf(stderr, "%s\n", r->s);
453 return 0;
454}
455
456void agrefstrdump(Agraph_t * g)
457{
458 const strdict_t *d = *refdict(g);
459 for (size_t i = 0;
460 d != NULL && d->buckets != NULL && i < (size_t)1 << d->capacity_exp;
461 ++i) {
462 refstrprint(d->buckets[i]);
463 }
464}
465#endif
Memory allocation wrappers that exit on failure.
static void * gv_calloc(size_t nmemb, size_t size)
Definition alloc.h:26
static void * gv_alloc(size_t size)
Definition alloc.h:47
cgraph.h additions
#define FAILURE
Definition cghdr.h:45
#define SUCCESS
Definition cghdr.h:44
#define hash
Definition dthdr.h:13
static long seed
Definition exeval.c:1008
static double len(glCompPoint p)
Definition glutils.c:138
void * malloc(YYSIZE_T)
void free(void *)
node NULL
Definition grammar.y:181
char * agstrbind_html(Agraph_t *g, const char *s)
Definition refstr.c:356
char * agstrbind_text(Agraph_t *g, const char *s)
Definition refstr.c:360
int aghtmlstr(const char *s)
Definition refstr.c:440
int agstrfree(Agraph_t *g, const char *s, bool is_html)
Definition refstr.c:417
char * agstrdup_text(Agraph_t *g, const char *s)
returns a pointer to a reference-counted regular text copy of the argument string,...
Definition refstr.c:393
char * agstrdup(Agraph_t *g, const char *s)
returns a pointer to a reference-counted copy of the argument string, creating one if necessary
Definition refstr.c:401
char * agstrbind(Agraph_t *g, const char *s)
Definition refstr.c:341
char * agstrdup_html(Agraph_t *g, const char *s)
returns a pointer to a reference-counted HTML-like copy of the argument string, creating one if neces...
Definition refstr.c:397
static refstr_t * strdict_find(strdict_t *dict, const char *s, bool is_html)
Definition refstr.c:227
int agstrclose(Agraph_t *g)
Definition refstr.c:326
static void strdict_add(strdict_t *dict, refstr_t *r)
add a reference-counted string to a dictionary
Definition refstr.c:163
static strdict_t * Refdict_default
Definition refstr.c:62
static void strdict_remove(strdict_t *dict, const refstr_t *key)
remove a reference-counted string from a dictionary
Definition refstr.c:259
static char * agstrdup_internal(Agraph_t *g, const char *s, bool is_html)
Definition refstr.c:365
static strdict_t * strdict_new(void)
create a new string dictionary
Definition refstr.c:146
static strdict_t ** refdict(Agraph_t *g)
Definition refstr.c:313
static void strdict_free(strdict_t **dict)
destroy a string dictionary
Definition refstr.c:293
static char * refstrbind(strdict_t *strdict, const char *s, bool is_html)
Definition refstr.c:332
static bool refstr_eq(const char *a, bool is_html, const refstr_t *b)
Definition refstr.c:48
static size_t strdict_hash(const char *s, bool is_html)
Definition refstr.c:153
static refstr_t *const TOMBSTONE
Definition refstr.c:160
void * strdict
shared string dict
Definition cgraph.h:413
graph or subgraph
Definition cgraph.h:424
Agclos_t * clos
shared resources
Definition cgraph.h:434
uint64_t refcnt
Definition refstr.c:33
char s[]
Definition refstr.c:35
uint64_t is_html
Definition refstr.c:34
a string dictionary
Definition refstr.c:56
size_t capacity_exp
log₂ size of buckets
Definition refstr.c:59
refstr_t ** buckets
backing store of elements
Definition refstr.c:57
size_t size
number of elements in the dictionary
Definition refstr.c:58
Definition grammar.c:90
#define UNREACHABLE()
Definition unreachable.h:30