99 # define PARAMS(args) args
101 # define PARAMS(args) ()
108 #define BOUNDED_TO_ALLOCA(beg, end, place) do { \
109 const char *BTA_beg = (beg); \
110 int BTA_len = (end) - BTA_beg; \
111 char **BTA_dest = &(place); \
112 *BTA_dest = alloca (BTA_len + 1); \
113 memcpy (*BTA_dest, BTA_beg, BTA_len); \
114 (*BTA_dest)[BTA_len] = '\0'; \
120 #define XDIGIT_TO_NUM(x) ((x) < 'A' ? (x) - '0' : TOUPPER (x) - 'A' + 10)
132 #define countof(array) (sizeof (array) / sizeof (*(array)))
140 # define xmalloc malloc
141 # define xrealloc realloc
152 # define ISSPACE(x) isspace (x)
153 # define ISDIGIT(x) isdigit (x)
154 # define ISXDIGIT(x) isxdigit (x)
155 # define ISALPHA(x) isalpha (x)
156 # define ISALNUM(x) isalnum (x)
157 # define TOLOWER(x) tolower (x)
158 # define TOUPPER(x) toupper (x)
164 hash_table_get (
const struct hash_table *ht,
void *ptr)
199 #define POOL_INIT(p, initial_storage, initial_size) do { \
200 struct pool *P = (p); \
201 P->contents = (initial_storage); \
202 P->size = (initial_size); \
205 P->orig_contents = P->contents; \
206 P->orig_size = P->size; \
212 #define POOL_GROW(p, increase) \
213 GROW_ARRAY ((p)->contents, (p)->size, (p)->tail + (increase), \
219 #define POOL_APPEND(p, beg, end) do { \
220 const char *PA_beg = (beg); \
221 int PA_size = (end) - PA_beg; \
222 POOL_GROW (p, PA_size); \
223 memcpy ((p)->contents + (p)->tail, PA_beg, PA_size); \
224 (p)->tail += PA_size; \
230 #define POOL_APPEND_CHR(p, ch) do { \
231 char PAC_char = (ch); \
233 (p)->contents[(p)->tail++] = PAC_char; \
237 #define POOL_REWIND(p) (p)->tail = 0
245 #define POOL_FREE(p) do { \
246 struct pool *P = p; \
248 xfree (P->contents); \
249 P->contents = P->orig_contents; \
250 P->size = P->orig_size; \
267 #define GROW_ARRAY(basevar, sizevar, needed_size, resized, type) do { \
268 long ga_needed_size = (needed_size); \
269 long ga_newsize = (sizevar); \
270 while (ga_newsize < ga_needed_size) \
272 if (ga_newsize != (sizevar)) \
275 basevar = (type *)xrealloc (basevar, ga_newsize * sizeof (type)); \
278 void *ga_new = xmalloc (ga_newsize * sizeof (type)); \
279 memcpy (ga_new, basevar, (sizevar) * sizeof (type)); \
280 (basevar) = ga_new; \
283 (sizevar) = ga_newsize; \
287 #define AP_DOWNCASE 1
288 #define AP_PROCESS_ENTITIES 2
289 #define AP_TRIM_BLANKS 4
308 int old_tail = pool->
tail;
316 while (beg < end &&
ISSPACE (*beg))
318 while (end > beg &&
ISSPACE (end[-1]))
332 const char *from = beg;
344 const char *save = from;
354 int numeric = 0, digits = 0;
359 for (; from < end &&
ISXDIGIT (*from); from++, digits++)
364 for (; from < end &&
ISDIGIT (*from); from++, digits++)
365 numeric = (numeric * 10) + (*from -
'0');
372 #define FROB(x) (remain >= (sizeof (x) - 1) \
373 && 0 == memcmp (from, x, sizeof (x) - 1) \
374 && (*(from + sizeof (x) - 1) == ';' \
375 || remain == sizeof (x) - 1 \
376 || !ISALNUM (*(from + sizeof (x) - 1))))
377 else if (
FROB (
"lt"))
378 *to++ =
'<', from += 2;
379 else if (
FROB (
"gt"))
380 *to++ =
'>', from += 2;
381 else if (
FROB (
"amp"))
382 *to++ =
'&', from += 3;
383 else if (
FROB (
"quot"))
384 *to++ =
'\"', from += 4;
389 else if (
FROB (
"nbsp"))
390 *to++ = 160, from += 4;
397 if (from < end && *from ==
';')
409 assert (to - (pool->
contents + pool->
tail) <= end - beg);
425 char *p = pool->
contents + old_tail;
446 #define NAME_CHAR_P(x) ((x) > 32 && (x) < 127 \
447 && (x) != '=' && (x) != '>' && (x) != '/')
450 static int comment_backout_count;
476 char quote_char =
'\0';
502 while (state != AC_S_DONE && state != AC_S_BACKOUT)
505 state = AC_S_BACKOUT;
515 state = AC_S_DEFAULT;
518 state = AC_S_BACKOUT;
541 state = AC_S_DCLNAME;
543 state = AC_S_BACKOUT;
553 state = AC_S_DEFAULT;
558 assert (ch ==
'\'' || ch == 0x22);
563 state = AC_S_IN_QUOTE;
566 if (ch == quote_char)
572 assert (ch == quote_char);
574 state = AC_S_DEFAULT;
586 state = AC_S_COMMENT;
589 state = AC_S_BACKOUT;
613 state = AC_S_DEFAULT;
616 state = AC_S_COMMENT;
623 if (state == AC_S_BACKOUT)
626 ++comment_backout_count;
644 const char *p = beg - 1;
646 while ((p += 3) < end)
650 if (p[-1] ==
'-' && p[-2] ==
'-')
658 if (++p == end)
return NULL;
661 case '>':
return p + 1;
662 case '-':
goto at_dash_dash;
667 if ((p += 2) >= end)
return NULL;
692 return hash_table_get (ht, copy) != NULL;
698 #define ADVANCE(p) do { \
706 #define SKIP_WS(p) do { \
707 while (ISSPACE (*p)) { \
714 #define SKIP_NON_WS(p) do { \
715 while (!ISSPACE (*p)) { \
721 static int tag_backout_count;
742 void (*mapfun) (
struct taginfo *,
void *),
void *maparg,
744 const struct hash_table *allowed_tags,
745 const struct hash_table *allowed_attributes)
749 char pool_initial_storage[256];
752 const char *p = text;
753 const char *end = text +
size;
755 struct attr_pair attr_pair_initial_storage[8];
756 int attr_pair_size =
countof (attr_pair_initial_storage);
757 int attr_pair_resized = 0;
758 struct attr_pair *pairs = attr_pair_initial_storage;
767 const char *tag_name_begin, *tag_name_end;
768 const char *tag_start_position;
769 int uninteresting_tag;
779 p =
memchr (p,
'<', end - p);
783 tag_start_position = p;
791 && p < end + 3 && p[1] ==
'-' && p[2] ==
'-')
822 if (p == tag_name_begin)
826 if (end_tag && *p !=
'>')
829 if (!
name_allowed (allowed_tags, tag_name_begin, tag_name_end))
832 uninteresting_tag = 1;
835 uninteresting_tag = 0;
842 const char *attr_name_begin, *attr_name_end;
843 const char *attr_value_begin, *attr_value_end;
844 const char *attr_raw_value_begin, *attr_raw_value_end;
873 if (attr_name_begin == attr_name_end)
885 attr_raw_value_begin = attr_value_begin = attr_name_begin;
886 attr_raw_value_end = attr_value_end = attr_name_end;
892 if (*p ==
'\"' || *p ==
'\'')
894 int newline_seen = 0;
895 char quote_char = *p;
896 attr_raw_value_begin = p;
898 attr_value_begin = p;
900 while (*p != quote_char)
902 if (!newline_seen && *p ==
'\n')
911 p = attr_value_begin;
915 else if (newline_seen && *p ==
'>')
921 if (*p == quote_char)
925 attr_raw_value_end = p;
933 attr_value_begin = p;
940 while (!
ISSPACE (*p) && *p !=
'>')
944 if (attr_value_begin == attr_value_end)
948 attr_raw_value_begin = attr_value_begin;
949 attr_raw_value_end = attr_value_end;
964 if (uninteresting_tag)
970 if (!
name_allowed (allowed_attributes, attr_name_begin, attr_name_end))
973 GROW_ARRAY (pairs, attr_pair_size, nattrs + 1, attr_pair_resized,
983 - attr_raw_value_begin);
987 if (uninteresting_tag)
1006 for (i = 0; i <
nattrs; i++)
1011 taginfo.
attrs = pairs;
1015 (*mapfun) (&taginfo, maparg);
1022 ++tag_backout_count;
1026 p = tag_start_position + 1;
1032 if (attr_pair_resized)
1049 #define HTMLPATH "http://download.fedora.redhat.com/pub/fedora/linux/core/3/i386/os/Fedora/RPMS/"
1050 #define HTMLPATH "http://localhost/rawhide/test/"
1052 #define HTMLPATH "http://localhost/rawhide/"
1054 static const char * htmlpath = HTMLPATH;
1061 printf (
"%s%s", taginfo->
end_tag_p ?
"/" :
"", taginfo->
name);
1062 for (i = 0; i < taginfo->
nattrs; i++)
1071 char *x = (
char *)
xmalloc (size);
1074 int tag_counter = 0;
1076 struct hash_table *interesting_tags = (
struct hash_table *)1;
1077 struct hash_table *interesting_attributes = (
struct hash_table *)1;
1082 fd =
Fopen(htmlpath,
"r");
1083 while ((read_count =
Fread (x + length, 1, size - length, fd)))
1085 if (read_count <= 0)
1087 length += read_count;
1094 fprintf(stderr,
"============== %p[%d]\n%s\n", x, length, x);
1098 flags, interesting_tags, interesting_attributes);
1099 printf (
"TAGS: %d\n", tag_counter);
1100 printf (
"Tag backouts: %d\n", tag_backout_count);
1101 printf (
"Comment backouts: %d\n", comment_backout_count);
void map_html_tags(const char *text, int size, void(*mapfun)(struct taginfo *, void *), void *maparg, int flags, const struct hash_table *allowed_tags, const struct hash_table *allowed_attributes)
#define GROW_ARRAY(basevar, sizevar, needed_size, resized, type)
#define POOL_GROW(p, increase)
static const char * find_comment_end(const char *beg, const char *end)
FD_t Fopen(const char *path, const char *_fmode)
fopen(3) clone.
#define POOL_INIT(p, initial_storage, initial_size)
static void convert_and_copy(struct pool *pool, const char *beg, const char *end, int flags)
int main(int argc, const char **argv, char **envp)
#define BOUNDED_TO_ALLOCA(beg, end, place)
static const char * advance_declaration(const char *beg, const char *end)
const char * end_position
The FD_t File Handle data structure.
#define POOL_APPEND(p, beg, end)
size_t Fread(void *buf, size_t size, size_t nmemb, FD_t fd)
fread(3) clone.
const char const char int arg
int Fclose(FD_t fd)
fclose(3) clone.
const char const bson int mongo_write_concern int flags
const char * value_raw_beginning
#define AP_PROCESS_ENTITIES
const char const char size_t size
#define POOL_APPEND_CHR(p, ch)
static int name_allowed(const struct hash_table *ht, const char *b, const char *e)
const char * start_position
#define MHT_STRICT_COMMENTS