Graphviz: lib/util/tokenize.h Source File

Searching...
No Matches
 
 
#include <assert.h>
#include <stddef.h>
#include <string.h>
#include <util/strview.h>
 
typedef struct {
  const char *start;      
  const char *separators; 
  strview_t next;         
} tok_t;
 
static inline tok_t tok(const char *input, const char *separators) {
 
  assert(input != NULL);
  assert(separators != NULL);
  assert(strcmp(separators, "") != 0 &&
         "at least one separator must be provided");
 
#ifndef NDEBUG
  for (const char *s1 = separators; *s1 != '\0'; ++s1) {
    for (const char *s2 = s1 + 1; *s2 != '\0'; ++s2) {
      assert(*s1 != *s2 && "duplicate separator characters");
    }
  }
#endif
 
  tok_t t = {.start = input, .separators = separators};
 
  // find the end of the first token
  size_t size = strcspn(input, separators);
  t.next = (strview_t){.data = input, .size = size};
 
  return t;
}
 
static inline bool tok_end(const tok_t *t) {
 
  assert(t != NULL);
 
  return t->next.data == NULL;
}
 
static inline strview_t tok_get(const tok_t *t) {
 
  assert(t != NULL);
  assert(t->next.data != NULL && "extracting from an exhausted tokenizer");
 
  return t->next;
}
 
static inline void tok_next(tok_t *t) {
 
  assert(t != NULL);
  assert(t->start != NULL);
  assert(t->separators != NULL);
  assert(t->next.data != NULL && "advancing an exhausted tokenizer");
 
  // resume from where the previous token ended
  const char *start = t->next.data + t->next.size;
 
  // if we are at the end of the string, we are done
  if (start == t->start + strlen(t->start)) {
    t->next = (strview_t){0};
    return;
  }
 
  // skip last separator characters
  start += strspn(start, t->separators);
 
  // find the end of the next token
  size_t size = strcspn(start, t->separators);
 
  t->next = (strview_t){.data = start, .size = size};
}