Graphviz 12.0.1~dev.20240715.2254
Loading...
Searching...
No Matches
htmlparse.y
Go to the documentation of this file.
1
3/*************************************************************************
4 * Copyright (c) 2011 AT&T Intellectual Property
5 * All rights reserved. This program and the accompanying materials
6 * are made available under the terms of the Eclipse Public License v1.0
7 * which accompanies this distribution, and is available at
8 * https://www.eclipse.org/legal/epl-v10.html
9 *
10 * Contributors: Details at https://graphviz.org
11 *************************************************************************/
12
13%require "3.0"
14
15 /* By default, Bison emits a parser using symbols prefixed with "yy". Graphviz
16 * contains multiple Bison-generated parsers, so we alter this prefix to avoid
17 * symbol clashes.
18 */
19%define api.prefix {html}
20
21%{
22
23#include <cgraph/alloc.h>
24#include <common/render.h>
25#include <common/htmltable.h>
26#include <common/htmllex.h>
27
28extern int htmlparse(void);
29
30typedef struct sfont_t {
32 struct sfont_t *pfont;
33} sfont_t;
34
35static struct {
36 htmllabel_t* lbl; /* Generated label */
37 htmltbl_t* tblstack; /* Stack of tables maintained during parsing */
38 Dt_t* fitemList; /* Dictionary for font text items */
40 agxbuf* str; /* Buffer for text */
42 GVC_t* gvc;
43} HTMLstate;
44
45/* free_ritem:
46 * Free row. This closes and frees row's list, then
47 * the pitem itself is freed.
48 */
49static void free_ritem(void *item) {
50 pitem *p = item;
51 dtclose (p->u.rp);
52 free (p);
53}
54
55/* cleanTbl:
56 * Clean up table if error in parsing.
57 */
58static void
60{
61 dtclose (tp->u.p.rows);
62 free_html_data (&tp->data);
63 free (tp);
64}
65
66/* cleanCell:
67 * Clean up cell if error in parsing.
68 */
69static void
71{
72 if (cp->child.kind == HTML_TBL) cleanTbl (cp->child.u.tbl);
73 else if (cp->child.kind == HTML_TEXT) free_html_text (cp->child.u.txt);
74 free_html_data (&cp->data);
75 free (cp);
76}
77
78/* free_citem:
79 * Free cell item during parsing. This frees cell and pitem.
80 */
81static void free_citem(void *item) {
82 pitem *p = item;
83 cleanCell (p->u.cp);
84 free (p);
85}
86
87static Dtdisc_t rowDisc = {
88 .key = offsetof(pitem, u),
89 .size = sizeof(void *),
90 .link = offsetof(pitem, link),
91 .freef = free_ritem,
92};
93static Dtdisc_t cellDisc = {
94 .key = offsetof(pitem, u),
95 .size = sizeof(void *),
96 .link = offsetof(pitem, link),
97 .freef = free,
98};
99
100typedef struct {
101 Dtlink_t link;
102 textspan_t ti;
103} fitem;
104
105typedef struct {
106 Dtlink_t link;
107 htextspan_t lp;
108} fspan;
109
110static void free_fitem(void *item) {
111 fitem *p = item;
112 free (p->ti.str);
113 free (p);
114}
115
116static void free_fspan(void *span) {
117 fspan *p = span;
118 textspan_t* ti;
119
120 if (p->lp.nitems) {
121 ti = p->lp.items;
122 for (size_t i = 0; i < p->lp.nitems; i++) {
123 free (ti->str);
124 ti++;
125 }
126 free (p->lp.items);
127 }
128 free (p);
129}
130
131static Dtdisc_t fstrDisc = {
132 .link = offsetof(fitem, link),
133 .freef = free,
134};
135
136static Dtdisc_t fspanDisc = {
137 .link = offsetof(fspan, link),
138 .freef = free,
139};
140
141/* appendFItemList:
142 * Append a new fitem to the list.
143 */
144static void
146{
147 fitem *fi = gv_alloc(sizeof(fitem));
148
149 fi->ti.str = agxbdisown(ag);
150 fi->ti.font = HTMLstate.fontstack->cfont;
151 dtinsert(HTMLstate.fitemList, fi);
152}
153
154/* appendFLineList:
155 */
156static void
157appendFLineList (int v)
158{
159 fspan *ln = gv_alloc(sizeof(fspan));
160 fitem *fi;
161 Dt_t *ilist = HTMLstate.fitemList;
162
163 size_t cnt = (size_t)dtsize(ilist);
164 ln->lp.just = v;
165 if (cnt) {
166 int i = 0;
167 ln->lp.nitems = cnt;
168 ln->lp.items = gv_calloc(cnt, sizeof(textspan_t));
169
170 fi = (fitem*)dtflatten(ilist);
171 for (; fi; fi = (fitem*)dtlink(fitemList, fi)) {
172 /* NOTE: When fitemList is closed, it uses free, which only frees the container,
173 * not the contents, so this copy is safe.
174 */
175 ln->lp.items[i] = fi->ti;
176 i++;
177 }
178 }
179 else {
180 ln->lp.items = gv_alloc(sizeof(textspan_t));
181 ln->lp.nitems = 1;
182 ln->lp.items[0].str = gv_strdup("");
183 ln->lp.items[0].font = HTMLstate.fontstack->cfont;
184 }
185
186 dtclear(ilist);
187
188 dtinsert(HTMLstate.fspanList, ln);
189}
190
191static htmltxt_t*
192mkText(void)
193{
194 Dt_t * ispan = HTMLstate.fspanList;
195 fspan *fl ;
196 htmltxt_t *hft = gv_alloc(sizeof(htmltxt_t));
197
198 if (dtsize (HTMLstate.fitemList))
200
201 size_t cnt = (size_t)dtsize(ispan);
202 hft->nspans = cnt;
203
204 if (cnt) {
205 int i = 0;
206 hft->spans = gv_calloc(cnt, sizeof(htextspan_t));
207 for(fl=dtfirst(ispan); fl; fl=dtnext(ispan,fl)) {
208 hft->spans[i] = fl->lp;
209 i++;
210 }
211 }
212
213 dtclear(ispan);
214
215 return hft;
216}
217
218static pitem* lastRow (void)
219{
220 htmltbl_t* tbl = HTMLstate.tblstack;
221 pitem* sp = dtlast (tbl->u.p.rows);
222 return sp;
223}
224
225/* addRow:
226 * Add new cell row to current table.
227 */
228static pitem* addRow (void)
229{
230 Dt_t* dp = dtopen(&cellDisc, Dtqueue);
231 htmltbl_t* tbl = HTMLstate.tblstack;
232 pitem* sp = gv_alloc(sizeof(pitem));
233 sp->u.rp = dp;
234 if (tbl->hrule)
235 sp->ruled = 1;
236 dtinsert (tbl->u.p.rows, sp);
237 return sp;
238}
239
240/* setCell:
241 * Set cell body and type and attach to row
242 */
243static void setCell(htmlcell_t *cp, void *obj, char kind) {
244 pitem* sp = gv_alloc(sizeof(pitem));
245 htmltbl_t* tbl = HTMLstate.tblstack;
246 pitem* rp = dtlast (tbl->u.p.rows);
247 Dt_t* row = rp->u.rp;
248 sp->u.cp = cp;
249 dtinsert (row, sp);
250 cp->child.kind = kind;
251 if (tbl->vrule)
252 cp->ruled = HTML_VRULE;
253
254 if(kind == HTML_TEXT)
255 cp->child.u.txt = obj;
256 else if (kind == HTML_IMAGE)
257 cp->child.u.img = obj;
258 else
259 cp->child.u.tbl = obj;
260}
261
262/* mkLabel:
263 * Create label, given body and type.
264 */
265static htmllabel_t *mkLabel(void *obj, char kind) {
266 htmllabel_t* lp = gv_alloc(sizeof(htmllabel_t));
267
268 lp->kind = kind;
269 if (kind == HTML_TEXT)
270 lp->u.txt = obj;
271 else
272 lp->u.tbl = obj;
273 return lp;
274}
275
276/* freeFontstack:
277 * Free all stack items but the last, which is
278 * put on artificially during in parseHTML.
279 */
280static void
281freeFontstack(void)
282{
283 sfont_t* s;
284 sfont_t* next;
285
286 for (s = HTMLstate.fontstack; (next = s->pfont); s = next) {
287 free(s);
288 }
289}
290
291/* cleanup:
292 * Called on error. Frees resources allocated during parsing.
293 * This includes a label, plus a walk down the stack of
294 * tables. Note that we use the free_citem function to actually
295 * free cells.
296 */
297static void cleanup (void)
298{
299 htmltbl_t* tp = HTMLstate.tblstack;
300 htmltbl_t* next;
301
302 if (HTMLstate.lbl) {
304 HTMLstate.lbl = NULL;
305 }
307 while (tp) {
308 next = tp->u.p.prev;
309 cleanTbl (tp);
310 tp = next;
311 }
313
315 dtclear (HTMLstate.fitemList);
317
319 dtclear (HTMLstate.fspanList);
321
323}
324
325/* nonSpace:
326 * Return 1 if s contains a non-space character.
327 */
328static int nonSpace (char* s)
329{
330 char c;
331
332 while ((c = *s++)) {
333 if (c != ' ') return 1;
334 }
335 return 0;
336}
337
338/* pushFont:
339 * Fonts are allocated in the lexer.
340 */
341static void
343{
344 sfont_t *ft = gv_alloc(sizeof(sfont_t));
345 textfont_t* curfont = HTMLstate.fontstack->cfont;
346 textfont_t f = *fp;
347
348 if (curfont) {
349 if (!f.color && curfont->color)
350 f.color = curfont->color;
351 if ((f.size < 0.0) && (curfont->size >= 0.0))
352 f.size = curfont->size;
353 if (!f.name && curfont->name)
354 f.name = curfont->name;
355 if (curfont->flags)
356 f.flags |= curfont->flags;
357 }
358
359 ft->cfont = dtinsert(HTMLstate.gvc->textfont_dt, &f);
360 ft->pfont = HTMLstate.fontstack;
361 HTMLstate.fontstack = ft;
362}
363
364/* popFont:
365 */
366static void
367popFont (void)
368{
369 sfont_t* curfont = HTMLstate.fontstack;
370 sfont_t* prevfont = curfont->pfont;
371
372 free (curfont);
373 HTMLstate.fontstack = prevfont;
374}
375
376%}
377
378%union {
379 int i;
380 htmltxt_t* txt;
382 htmltbl_t* tbl;
384 htmlimg_t* img;
385 pitem* p;
386}
387
391%token T_HR T_hr T_end_hr
392%token T_VR T_vr T_end_vr
393%token <i> T_BR T_br
394%token <img> T_IMG T_img
395%token <tbl> T_table
396%token <cell> T_cell
398
399%type <txt> fonttext
400%type <cell> cell cells
401%type <i> br
402%type <tbl> table fonttable
403%type <img> image
404%type <p> row rows
405
406%start html
407
408%%
409
411 | T_html fonttable T_end_html { HTMLstate.lbl = mkLabel($2,HTML_TBL); }
412 | error { cleanup(); YYABORT; }
413 ;
414
415fonttext : text { $$ = mkText(); }
416 ;
417
418text : text textitem
419 | textitem
420 ;
421
422textitem : string { appendFItemList(HTMLstate.str);}
424 | font text n_font
425 | italic text n_italic
426 | underline text n_underline
427 | overline text n_overline
428 | bold text n_bold
429 | sup text n_sup
430 | sub text n_sub
431 | strike text n_strike
432 ;
433
435 ;
436
438 ;
439
440italic : T_italic {pushFont($1);}
441 ;
442
444 ;
445
447 ;
448
450 ;
451
452strike : T_s {pushFont($1);}
453 ;
454
456 ;
457
458underline : T_underline {pushFont($1);}
459 ;
460
462 ;
463
464overline : T_overline {pushFont($1);}
465 ;
466
468 ;
469
471 ;
472
473n_sup : T_n_sup {popFont();}
474 ;
475
477 ;
478
479n_sub : T_n_sub {popFont();}
480 ;
481
483 | T_BR { $$ = $1; }
484 ;
485
486string : T_string
487 | string T_string
488 ;
489
491 if (nonSpace(agxbuse(HTMLstate.str))) {
492 htmlerror ("Syntax error: non-space string used before <TABLE>");
494 }
495 $2->u.p.prev = HTMLstate.tblstack;
496 $2->u.p.rows = dtopen(&rowDisc, Dtqueue);
497 HTMLstate.tblstack = $2;
498 $2->font = HTMLstate.fontstack->cfont;
499 $<tbl>$ = $2;
500 }
502 if (nonSpace(agxbuse(HTMLstate.str))) {
503 htmlerror ("Syntax error: non-space string used after </TABLE>");
504 cleanup(); YYABORT;
505 }
506 $$ = HTMLstate.tblstack;
507 HTMLstate.tblstack = HTMLstate.tblstack->u.p.prev;
508 }
509 ;
510
511fonttable : table { $$ = $1; }
513 | italic table n_italic { $$=$2; }
514 | underline table n_underline { $$=$2; }
515 | overline table n_overline { $$=$2; }
516 | bold table n_bold { $$=$2; }
517 ;
518
519opt_space : string
520 | /* empty*/
521 ;
522
523rows : row { $$ = $1; }
524 | rows row { $$ = $2; }
525 | rows HR row { $1->ruled = 1; $$ = $3; }
526 ;
527
528row : T_row { addRow (); } cells T_end_row { $$ = lastRow(); }
529 ;
530
531cells : cell { $$ = $1; }
532 | cells cell { $$ = $2; }
533 | cells VR cell { $1->ruled |= HTML_VRULE; $$ = $3; }
534 ;
535
536cell : T_cell fonttable { setCell($1,$2,HTML_TBL); } T_end_cell { $$ = $1; }
540 ;
541
543 | T_IMG { $$ = $1; }
544 ;
545
547 | T_HR
548 ;
549
551 | T_VR
552 ;
553
554
555%%
556
557/* parseHTML:
558 * Return parsed label or NULL if failure.
559 * Set warn to 0 on success; 1 for warning message; 2 if no expat; 3 for error
560 * message.
561 */
563parseHTML (char* txt, int* warn, htmlenv_t *env)
564{
565 agxbuf str = {0};
566 htmllabel_t* l;
567 sfont_t dfltf;
568
569 dfltf.cfont = NULL;
570 dfltf.pfont = NULL;
571 HTMLstate.fontstack = &dfltf;
572 HTMLstate.tblstack = 0;
573 HTMLstate.lbl = 0;
574 HTMLstate.gvc = GD_gvc(env->g);
575 HTMLstate.fitemList = dtopen(&fstrDisc, Dtqueue);
576 HTMLstate.fspanList = dtopen(&fspanDisc, Dtqueue);
577
578 HTMLstate.str = &str;
579
580 if (initHTMLlexer (txt, &str, env)) {/* failed: no libexpat - give up */
581 *warn = 2;
582 l = NULL;
583 }
584 else {
585 htmlparse();
586 *warn = clearHTMLlexer ();
587 l = HTMLstate.lbl;
588 }
589
590 dtclose (HTMLstate.fitemList);
591 dtclose (HTMLstate.fspanList);
592
593 HTMLstate.fitemList = NULL;
594 HTMLstate.fspanList = NULL;
595 HTMLstate.fontstack = NULL;
596
597 agxbfree (&str);
598
599 return l;
600}
601
static void agxbfree(agxbuf *xb)
free any malloced resources
Definition agxbuf.h:77
static char * agxbuse(agxbuf *xb)
Definition agxbuf.h:286
static char * agxbdisown(agxbuf *xb)
Definition agxbuf.h:299
Memory allocation wrappers that exit on failure.
static char * gv_strdup(const char *original)
Definition alloc.h:101
static void * gv_calloc(size_t nmemb, size_t size)
Definition alloc.h:26
static void * gv_alloc(size_t size)
Definition alloc.h:47
CDT_API Dtlink_t * dtflatten(Dt_t *)
Definition dtflatten.c:10
#define dtclear(d)
Definition cdt.h:196
CDT_API int dtsize(Dt_t *)
Definition dtsize.c:12
#define dtlink(d, e)
Definition cdt.h:183
#define dtinsert(d, o)
Definition cdt.h:193
CDT_API int dtclose(Dt_t *)
Definition dtclose.c:8
#define dtlast(d)
Definition cdt.h:189
CDT_API Dtmethod_t * Dtqueue
queue: insert at top, delete at tail
Definition dtlist.c:134
CDT_API Dt_t * dtopen(Dtdisc_t *, Dtmethod_t *)
Definition dtopen.c:9
#define dtnext(d, o)
Definition cdt.h:188
#define dtfirst(d)
Definition cdt.h:187
#define sub(h, i)
Definition closest.c:65
void error(int level, const char *s,...)
Definition error.c:83
void free(void *)
glistitem $1
Definition gmlparse.y:278
node NULL
Definition grammar.y:149
atom $3
Definition grammar.y:150
static int cnt(Dict_t *d, Dtlink_t **set)
Definition graph.c:199
#define GD_gvc(g)
Definition types.h:355
static Agdesc_t kind
Definition gvpack.cpp:88
void htmlerror(const char *msg)
Definition htmllex.c:78
int initHTMLlexer(char *src, agxbuf *xb, htmlenv_t *env)
Definition htmllex.c:764
int clearHTMLlexer(void)
Definition htmllex.c:794
#define T_n_sup
Definition htmlparse.c:546
#define T_end_table
Definition htmlparse.c:537
Dt_t * fitemList
Definition htmlparse.c:95
#define T_br
Definition htmlparse.c:556
static struct @74 HTMLstate
static htmllabel_t * mkLabel(void *obj, char kind)
Definition htmlparse.c:322
#define T_vr
Definition htmlparse.c:553
#define T_error
Definition htmlparse.c:541
#define T_n_s
Definition htmlparse.c:548
Dt_t * fspanList
Definition htmlparse.c:96
static htmltxt_t * mkText(void)
Definition htmlparse.c:249
#define T_n_sub
Definition htmlparse.c:547
static void appendFItemList(agxbuf *ag)
Definition htmlparse.c:202
#define T_n_bold
Definition htmlparse.c:543
#define T_html
Definition htmlparse.c:535
static void cleanTbl(htmltbl_t *tp)
Definition htmlparse.c:116
#define T_underline
Definition htmlparse.c:564
#define T_sup
Definition htmlparse.c:566
#define T_row
Definition htmlparse.c:533
static void appendFLineList(int v)
Definition htmlparse.c:214
GVC_t * gvc
Definition htmlparse.c:99
#define T_table
Definition htmlparse.c:559
#define T_end_vr
Definition htmlparse.c:554
#define T_end_html
Definition htmlparse.c:536
static void setCell(htmlcell_t *cp, void *obj, char kind)
Definition htmlparse.c:300
static pitem * addRow(void)
Definition htmlparse.c:285
static void cleanCell(htmlcell_t *cp)
Definition htmlparse.c:127
sfont_t * fontstack
Definition htmlparse.c:98
static Dtdisc_t rowDisc
Definition htmlparse.c:144
static Dtdisc_t cellDisc
Definition htmlparse.c:150
#define T_VR
Definition htmlparse.c:552
static int nonSpace(char *s)
Definition htmlparse.c:385
static void free_citem(void *item)
Definition htmlparse.c:138
static Dtdisc_t fspanDisc
Definition htmlparse.c:193
#define T_bold
Definition htmlparse.c:563
static void popFont(void)
Definition htmlparse.c:424
#define T_end_img
Definition htmlparse.c:532
#define T_sub
Definition htmlparse.c:567
agxbuf * str
Definition htmlparse.c:97
int htmlparse(void)
#define T_s
Definition htmlparse.c:568
static Dtdisc_t fstrDisc
Definition htmlparse.c:188
static pitem * lastRow(void)
Definition htmlparse.c:275
static void free_ritem(void *item)
Definition htmlparse.c:106
#define T_n_italic
Definition htmlparse.c:542
static void freeFontstack(void)
Definition htmlparse.c:338
htmllabel_t * lbl
Definition htmlparse.c:93
#define T_end_font
Definition htmlparse.c:539
#define T_overline
Definition htmlparse.c:565
#define T_hr
Definition htmlparse.c:550
#define T_font
Definition htmlparse.c:561
#define T_italic
Definition htmlparse.c:562
htmltbl_t * tblstack
Definition htmlparse.c:94
static void free_fitem(void *item)
Definition htmlparse.c:167
#define T_end_br
Definition htmlparse.c:531
#define T_n_underline
Definition htmlparse.c:544
static void free_fspan(void *span)
Definition htmlparse.c:173
#define T_end_hr
Definition htmlparse.c:551
#define T_string
Definition htmlparse.c:540
#define T_img
Definition htmlparse.c:558
#define T_HR
Definition htmlparse.c:549
#define T_n_overline
Definition htmlparse.c:545
static void pushFont(textfont_t *fp)
Definition htmlparse.c:399
YYABORT
Definition htmlparse.y:493
italic table n_italic
Definition htmlparse.y:513
font text n_font italic text n_italic underline text n_underline overline text n_overline bold text n_bold sup text n_sup sub text n_sub strike text n_strike
Definition htmlparse.y:432
underline table n_underline
Definition htmlparse.y:514
cell $2
Definition htmlparse.y:536
htmllabel_t * parseHTML(char *txt, int *warn, htmlenv_t *env)
Definition htmlparse.y:563
T_BR
Definition htmlparse.y:483
T_cell fonttext
Definition htmlparse.y:537
overline table n_overline
Definition htmlparse.y:515
T_cell
Definition htmlparse.y:539
rows T_end_table opt_space
Definition htmlparse.y:501
bold table n_bold
Definition htmlparse.y:516
br
Definition htmlparse.y:423
font table n_font
Definition htmlparse.y:512
cells T_end_row
Definition htmlparse.y:528
cells cell
Definition htmlparse.y:532
$2 font
Definition htmlparse.y:498
$< tbl > $
Definition htmlparse.y:499
$2 u p rows
Definition htmlparse.y:496
rows row
Definition htmlparse.y:524
cell HTML_TBL
Definition htmlparse.y:536
T_IMG
Definition htmlparse.y:543
T_end_cell
Definition htmlparse.y:536
$$
Definition htmlparse.y:525
T_cell image
Definition htmlparse.y:538
cleanup()
void free_html_text(htmltxt_t *t)
Definition htmltable.c:814
void free_html_label(htmllabel_t *lp, int root)
Definition htmltable.c:876
void free_html_data(htmldata_t *dp)
Definition htmltable.c:803
#define HTML_TEXT
Definition htmltable.h:102
#define HTML_VRULE
Definition htmltable.h:105
#define HTML_IMAGE
Definition htmltable.h:103
#define UNSET_ALIGN
Definition htmltable.h:43
static int table[NTYPES][NTYPES]
Definition mincross.c:1752
Definition gvcint.h:80
Definition cdt.h:104
Dtfree_f freef
Definition cdt.h:93
int key
Definition cdt.h:89
int link
Definition cdt.h:91
result of partitioning available space, part of maze
Definition grid.h:33
gridpt p
Definition grid.h:34
textspan_t ti
Definition htmlparse.c:159
htextspan_t lp
Definition htmlparse.c:164
size_t nitems
Definition htmltable.h:53
textspan_t * items
Definition htmltable.h:52
htmllabel_t child
Definition htmltable.h:148
unsigned char ruled
Definition htmltable.h:150
htmldata_t data
Definition htmltable.h:143
graph_t * g
Definition htmltable.h:171
htmltxt_t * txt
Definition htmltable.h:136
union htmllabel_t::@78 u
htmltbl_t * tbl
Definition htmltable.h:135
htmlimg_t * img
Definition htmltable.h:137
bool vrule
vertical rule
Definition htmltable.h:130
union htmltbl_t::@75 u
bool hrule
horizontal rule
Definition htmltable.h:129
htmltbl_t * prev
Definition htmltable.h:119
struct htmltbl_t::@75::@77 p
htmldata_t data
Definition htmltable.h:112
Dt_t * rows
Definition htmltable.h:120
size_t nspans
Definition htmltable.h:61
htextspan_t * spans
Definition htmltable.h:60
Definition utils.c:748
unsigned char ruled
Definition htmltable.h:164
Dt_t * rp
Definition htmltable.h:161
union pitem::@79 u
htmlcell_t * cp
Definition htmltable.h:162
struct sfont_t * pfont
Definition htmlparse.c:89
textfont_t * cfont
Definition htmlparse.c:88
char * color
Definition textspan.h:55
char * name
Definition textspan.h:54
unsigned int flags
Definition textspan.h:58
double size
Definition textspan.h:57
char * str
Definition textspan.h:65
textfont_t * font
Definition textspan.h:66
Definition grammar.c:93