1 | /* |
---|
2 | * Copyright (C) 2001 - 2005 Mike Wray <mike.wray@hp.com> |
---|
3 | * |
---|
4 | * This library is free software; you can redistribute it and/or modify |
---|
5 | * it under the terms of the GNU Lesser General Public License as |
---|
6 | * published by the Free Software Foundation; either version 2.1 of the |
---|
7 | * License, or (at your option) any later version. This library is |
---|
8 | * distributed in the hope that it will be useful, but WITHOUT ANY |
---|
9 | * WARRANTY; without even the implied warranty of MERCHANTABILITY or |
---|
10 | * FITNESS FOR A PARTICULAR PURPOSE. |
---|
11 | * See the GNU Lesser General Public License for more details. |
---|
12 | * |
---|
13 | * You should have received a copy of the GNU Lesser General Public License |
---|
14 | * along with this library; if not, write to the Free Software Foundation, |
---|
15 | * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
---|
16 | */ |
---|
17 | |
---|
18 | #ifdef __KERNEL__ |
---|
19 | # include <linux/config.h> |
---|
20 | # include <linux/module.h> |
---|
21 | # include <linux/kernel.h> |
---|
22 | # include <linux/string.h> |
---|
23 | # include <linux/errno.h> |
---|
24 | #else |
---|
25 | # include <stdlib.h> |
---|
26 | # include <errno.h> |
---|
27 | #endif |
---|
28 | |
---|
29 | #include "sys_net.h" |
---|
30 | |
---|
31 | #include "iostream.h" |
---|
32 | #include "lexis.h" |
---|
33 | #include "sxpr_parser.h" |
---|
34 | #include "sys_string.h" |
---|
35 | #include "enum.h" |
---|
36 | |
---|
37 | /** @file |
---|
38 | * Sxpr parsing. |
---|
39 | * |
---|
40 | * So that the parser does not leak memory, all sxprs constructed by |
---|
41 | * the parser must be freed on error. On successful parse the sxpr |
---|
42 | * returned becomes the responsibility of the caller. |
---|
43 | * |
---|
44 | * @author Mike Wray <mike.wray@hpl.hp.com> |
---|
45 | */ |
---|
46 | |
---|
47 | #ifdef DEBUG |
---|
48 | #define dprintf(fmt, args...) IOStream_print(iostdout, "[DEBUG] %s" fmt, __FUNCTION__, ##args) |
---|
49 | #else |
---|
50 | #define dprintf(fmt, args...) do{ }while(0) |
---|
51 | #endif |
---|
52 | |
---|
53 | #undef printf |
---|
54 | #define printf(fmt, args...) IOStream_print(iostdout, fmt, ##args) |
---|
55 | |
---|
56 | static int state_start(Parser *p, char c); |
---|
57 | static int begin_start(Parser *p, char c); |
---|
58 | |
---|
59 | #if 0 |
---|
60 | /** Print a parse error. |
---|
61 | * |
---|
62 | * @param in parser |
---|
63 | * @param msg format followed by printf arguments |
---|
64 | */ |
---|
65 | static void eprintf(Parser *in, char *msg, ...){ |
---|
66 | va_list args; |
---|
67 | if(in->error_out){ |
---|
68 | va_start(args, msg); |
---|
69 | IOStream_vprint(in->error_out, msg, args); |
---|
70 | va_end(args); |
---|
71 | } |
---|
72 | } |
---|
73 | |
---|
74 | /** Print a parse warning. |
---|
75 | * |
---|
76 | * @param in parser |
---|
77 | * @param msg format followed by printf arguments |
---|
78 | */ |
---|
79 | static void wprintf(Parser *in, char *msg, ...){ |
---|
80 | va_list args; |
---|
81 | if(in->error_out){ |
---|
82 | va_start(args, msg); |
---|
83 | IOStream_vprint(in->error_out, msg, args); |
---|
84 | va_end(args); |
---|
85 | } |
---|
86 | } |
---|
87 | #endif |
---|
88 | |
---|
89 | |
---|
90 | /*============================================================================*/ |
---|
91 | |
---|
92 | /** Record defining the message for a parse error. */ |
---|
93 | typedef struct { |
---|
94 | ParseErrorId id; |
---|
95 | char *message; |
---|
96 | } ParseError; |
---|
97 | |
---|
98 | /** Format for printing parse error messages. */ |
---|
99 | #define PARSE_ERR_FMT "parse error> line %3d, column %2d: %s" |
---|
100 | |
---|
101 | /** Message catalog for the parse error codes. */ |
---|
102 | static ParseError catalog[] = { |
---|
103 | { PARSE_ERR_UNSPECIFIED, "unspecified error" }, |
---|
104 | { PARSE_ERR_NOMEM, "out of memory" }, |
---|
105 | { PARSE_ERR_UNEXPECTED_EOF, "unexpected end of input" }, |
---|
106 | { PARSE_ERR_TOKEN_TOO_LONG, "token too long" }, |
---|
107 | { PARSE_ERR_INVALID_SYNTAX, "syntax error" }, |
---|
108 | { PARSE_ERR_INVALID_ESCAPE, "invalid escape" }, |
---|
109 | { 0, NULL } |
---|
110 | }; |
---|
111 | |
---|
112 | /** Number of entries in the message catalog. */ |
---|
113 | const static int catalog_n = sizeof(catalog)/sizeof(ParseError); |
---|
114 | |
---|
115 | /** Set the parser error stream. |
---|
116 | * Parse errors are reported on the the error stream if it is non-null. |
---|
117 | * |
---|
118 | * @param z parser |
---|
119 | * @param error_out error stream |
---|
120 | */ |
---|
121 | void Parser_set_error_stream(Parser *z, IOStream *error_out){ |
---|
122 | z->error_out = error_out; |
---|
123 | } |
---|
124 | |
---|
125 | /** Get the parser error message for an error code. |
---|
126 | * |
---|
127 | * @param id error code |
---|
128 | * @return error message (empty string if the code is unknown) |
---|
129 | */ |
---|
130 | static char *get_message(ParseErrorId id){ |
---|
131 | int i; |
---|
132 | for(i = 0; i < catalog_n; i++){ |
---|
133 | if(id == catalog[i].id){ |
---|
134 | return catalog[i].message; |
---|
135 | } |
---|
136 | } |
---|
137 | return ""; |
---|
138 | } |
---|
139 | |
---|
140 | #if 0 |
---|
141 | /** Get the line number. |
---|
142 | * |
---|
143 | * @param in parser |
---|
144 | */ |
---|
145 | static int get_line(Parser *in){ |
---|
146 | return in->line_no; |
---|
147 | } |
---|
148 | |
---|
149 | /** Get the column number. |
---|
150 | * |
---|
151 | * @param in parser |
---|
152 | */ |
---|
153 | static int get_column(Parser *in){ |
---|
154 | return in->char_no; |
---|
155 | } |
---|
156 | #endif |
---|
157 | |
---|
158 | /** Get the line number the current token started on. |
---|
159 | * |
---|
160 | * @param in parser |
---|
161 | */ |
---|
162 | static int get_tok_line(Parser *in){ |
---|
163 | return in->tok_begin_line; |
---|
164 | } |
---|
165 | |
---|
166 | /** Get the column number the current token started on. |
---|
167 | * |
---|
168 | * @param in parser |
---|
169 | */ |
---|
170 | static int get_tok_column(Parser *in){ |
---|
171 | return in->tok_begin_char; |
---|
172 | } |
---|
173 | |
---|
174 | /** Return the current token. |
---|
175 | * The return value points at the internal buffer, so |
---|
176 | * it must not be modified (or freed). Use copy_token() if you need a copy. |
---|
177 | * |
---|
178 | * @param p parser |
---|
179 | * @return token |
---|
180 | */ |
---|
181 | char *peek_token(Parser *p){ |
---|
182 | return p->tok; |
---|
183 | } |
---|
184 | |
---|
185 | int token_len(Parser *p){ |
---|
186 | return p->tok_end - p->tok; |
---|
187 | } |
---|
188 | |
---|
189 | /** Return a copy of the current token. |
---|
190 | * The returned value should be freed when finished with. |
---|
191 | * |
---|
192 | * @param p parser |
---|
193 | * @return copy of token |
---|
194 | */ |
---|
195 | char *copy_token(Parser *p){ |
---|
196 | int n = token_len(p); |
---|
197 | char *buf = allocate(n + 1); |
---|
198 | if(buf){ |
---|
199 | memcpy(buf, peek_token(p), n); |
---|
200 | buf[n] = '\0'; |
---|
201 | } |
---|
202 | return buf; |
---|
203 | } |
---|
204 | |
---|
205 | void new_token(Parser *p){ |
---|
206 | memset(p->buf, 0, p->buf_end - p->buf); |
---|
207 | p->tok = p->buf; |
---|
208 | p->tok_end = p->tok; |
---|
209 | p->tok_begin_line = p->line_no; |
---|
210 | p->tok_begin_char = p->char_no; |
---|
211 | } |
---|
212 | |
---|
213 | /** Report a parse error. |
---|
214 | * Does nothing if the error stream is null or there is no error. |
---|
215 | * |
---|
216 | * @param in parser |
---|
217 | */ |
---|
218 | static void report_error(Parser *in){ |
---|
219 | if(in->error_out && in->err){ |
---|
220 | char *msg = get_message(in->err); |
---|
221 | char *tok = peek_token(in); |
---|
222 | IOStream_print(in->error_out, PARSE_ERR_FMT, |
---|
223 | get_tok_line(in), get_tok_column(in), msg); |
---|
224 | if(tok && tok[0]){ |
---|
225 | IOStream_print(in->error_out, " '%s'", tok); |
---|
226 | } |
---|
227 | IOStream_print(in->error_out, "\n"); |
---|
228 | } |
---|
229 | } |
---|
230 | |
---|
231 | /** Get the error message for the current parse error code. |
---|
232 | * Does nothing if there is no error. |
---|
233 | * |
---|
234 | * @param in parser |
---|
235 | * @param buf where to place the message |
---|
236 | * @param n maximum number of characters to place in buf |
---|
237 | * @return current error code (zero for no error) |
---|
238 | */ |
---|
239 | int Parser_error_message(Parser *in, char *buf, int n){ |
---|
240 | if(in->err){ |
---|
241 | char *msg = get_message(in->err); |
---|
242 | snprintf(buf, n, PARSE_ERR_FMT, get_tok_line(in), |
---|
243 | get_tok_column(in), msg); |
---|
244 | } |
---|
245 | return in->err; |
---|
246 | } |
---|
247 | |
---|
248 | /** Flag a parse error. All subsequent reads will fail. |
---|
249 | * Does not change the parser error code if it is already set. |
---|
250 | * |
---|
251 | * @param in parser |
---|
252 | * @param id error code |
---|
253 | */ |
---|
254 | int Parser_error_id(Parser *in, ParseErrorId id){ |
---|
255 | if(!in->err){ |
---|
256 | in->err = id; |
---|
257 | report_error(in); |
---|
258 | } |
---|
259 | return -EINVAL; |
---|
260 | } |
---|
261 | |
---|
262 | /** Flag an unspecified parse error. |
---|
263 | * |
---|
264 | * @param in parser |
---|
265 | */ |
---|
266 | int Parser_error(Parser *in){ |
---|
267 | return Parser_error_id(in, PARSE_ERR_INVALID_SYNTAX); |
---|
268 | } |
---|
269 | |
---|
270 | /** Test if the parser's error flag is set. |
---|
271 | * |
---|
272 | * @param in parser |
---|
273 | * @return 1 if set, 0 otherwise |
---|
274 | */ |
---|
275 | int Parser_has_error(Parser *in){ |
---|
276 | return (in->err > 0); |
---|
277 | } |
---|
278 | |
---|
279 | /** Test if the parser is at end of input. |
---|
280 | * |
---|
281 | * @param in parser |
---|
282 | * @return 1 if at EOF, 0 otherwise |
---|
283 | */ |
---|
284 | int Parser_at_eof(Parser *p){ |
---|
285 | return p->eof; |
---|
286 | } |
---|
287 | |
---|
288 | void ParserState_free(ParserState *z){ |
---|
289 | if(!z) return; |
---|
290 | objfree(z->val); |
---|
291 | deallocate(z); |
---|
292 | } |
---|
293 | |
---|
294 | int ParserState_new(ParserStateFn *fn, char *name, |
---|
295 | ParserState *parent, ParserState **val){ |
---|
296 | int err = -ENOMEM; |
---|
297 | ParserState *z; |
---|
298 | z = ALLOCATE(ParserState); |
---|
299 | if(!z) goto exit; |
---|
300 | z->name = name; |
---|
301 | z->fn = fn; |
---|
302 | z->parent = parent; |
---|
303 | z->val = ONULL; |
---|
304 | err = 0; |
---|
305 | exit: |
---|
306 | *val = (err ? NULL : z); |
---|
307 | return err; |
---|
308 | } |
---|
309 | |
---|
310 | void Parser_pop(Parser *p){ |
---|
311 | ParserState *s = p->state; |
---|
312 | if(!s) return; |
---|
313 | dprintf("Parser_pop> %s\n", s->name); |
---|
314 | p->state = s->parent; |
---|
315 | if (p->start_state == s) { |
---|
316 | p->start_state = NULL; |
---|
317 | } |
---|
318 | ParserState_free(s); |
---|
319 | } |
---|
320 | |
---|
321 | /** Free a parser. |
---|
322 | * No-op if the parser is null. |
---|
323 | * |
---|
324 | * @param z parser |
---|
325 | */ |
---|
326 | void Parser_free(Parser *z){ |
---|
327 | if(!z) return; |
---|
328 | // Hmmm. Need to free states, but careful about double free of values. |
---|
329 | while(z->state){ |
---|
330 | objfree(z->state->val); |
---|
331 | Parser_pop(z); |
---|
332 | } |
---|
333 | if(z->buf) deallocate(z->buf); |
---|
334 | objfree(z->val); |
---|
335 | z->val = ONONE; |
---|
336 | deallocate(z); |
---|
337 | } |
---|
338 | |
---|
339 | int Parser_push(Parser *p, ParserStateFn *fn, char *name){ |
---|
340 | dprintf("Parser_push> %s\n", name); |
---|
341 | return ParserState_new(fn, name, p->state, &p->state); |
---|
342 | } |
---|
343 | |
---|
344 | int Parser_return(Parser *p){ |
---|
345 | int err = 0; |
---|
346 | Sxpr val = ONONE; |
---|
347 | if(!p->state){ |
---|
348 | err = -EINVAL; |
---|
349 | goto exit; |
---|
350 | } |
---|
351 | val = p->state->val; |
---|
352 | p->state->val = ONONE; |
---|
353 | Parser_pop(p); |
---|
354 | if(p->state){ |
---|
355 | err = cons_push(&p->state->val, val); |
---|
356 | } else { |
---|
357 | val = nrev(val); |
---|
358 | p->val = val; |
---|
359 | } |
---|
360 | exit: |
---|
361 | if(err){ |
---|
362 | objfree(val); |
---|
363 | } |
---|
364 | return err; |
---|
365 | } |
---|
366 | |
---|
367 | /** Reset the fields of a parser to initial values. |
---|
368 | * |
---|
369 | * @param z parser |
---|
370 | */ |
---|
371 | static void reset(Parser *z){ |
---|
372 | // leave flags |
---|
373 | // leave error_out |
---|
374 | while(z->state){ |
---|
375 | Parser_pop(z); |
---|
376 | } |
---|
377 | z->val = ONONE; |
---|
378 | z->eof = 0; |
---|
379 | z->err = 0; |
---|
380 | z->line_no = 1; |
---|
381 | z->char_no = 0; |
---|
382 | memset(z->buf, 0, z->buf_end - z->buf); |
---|
383 | z->tok = z->buf; |
---|
384 | z->tok_end = z->tok; |
---|
385 | z->tok_begin_line = 0; |
---|
386 | z->tok_begin_char = 0; |
---|
387 | z->start_state = NULL; |
---|
388 | } |
---|
389 | |
---|
390 | /** Create a new parser. The error stream defaults to null. |
---|
391 | */ |
---|
392 | Parser * Parser_new(void){ |
---|
393 | Parser *z = ALLOCATE(Parser); |
---|
394 | int n = PARSER_BUF_SIZE; |
---|
395 | int err = -ENOMEM; |
---|
396 | |
---|
397 | if(!z) goto exit; |
---|
398 | z->buf = allocate(n); |
---|
399 | if(!z->buf) goto exit; |
---|
400 | err = 0; |
---|
401 | z->buf_end = z->buf + n; |
---|
402 | z->begin = begin_start; |
---|
403 | reset(z); |
---|
404 | exit: |
---|
405 | if(err){ |
---|
406 | Parser_free(z); |
---|
407 | z = NULL; |
---|
408 | } |
---|
409 | return z; |
---|
410 | } |
---|
411 | |
---|
412 | /** Get the next character. |
---|
413 | * Records the character read in the parser, |
---|
414 | * and sets the line and character counts. |
---|
415 | * |
---|
416 | * @param p parser |
---|
417 | * @return error flag: 0 on success, non-zero on error |
---|
418 | */ |
---|
419 | static int input_char(Parser *p, char c){ |
---|
420 | int err = 0; |
---|
421 | if(c=='\n'){ |
---|
422 | p->line_no++; |
---|
423 | p->char_no = 0; |
---|
424 | } else { |
---|
425 | p->char_no++; |
---|
426 | } |
---|
427 | return err; |
---|
428 | } |
---|
429 | |
---|
430 | int save_char(Parser *p, char c){ |
---|
431 | int err = 0; |
---|
432 | if(p->tok_end >= p->buf_end){ |
---|
433 | int buf_n = (p->buf_end - p->buf) + PARSER_BUF_INCREMENT; |
---|
434 | char *buf = allocate(buf_n); |
---|
435 | if(!buf){ |
---|
436 | err = -ENOMEM; |
---|
437 | goto exit; |
---|
438 | } |
---|
439 | memcpy(buf, p->buf, p->tok_end - p->buf); |
---|
440 | p->buf_end = buf + buf_n; |
---|
441 | p->tok = buf + (p->tok - p->buf); |
---|
442 | p->tok_end = buf + (p->tok_end - p->buf); |
---|
443 | deallocate(p->buf); |
---|
444 | p->buf = buf; |
---|
445 | } |
---|
446 | *p->tok_end++ = c; |
---|
447 | exit: |
---|
448 | return err; |
---|
449 | } |
---|
450 | |
---|
451 | /** Determine if a character is a separator. |
---|
452 | * |
---|
453 | * @param p parser |
---|
454 | * @param c character to test |
---|
455 | * @return 1 if a separator, 0 otherwise |
---|
456 | */ |
---|
457 | static int is_separator(Parser *p, char c){ |
---|
458 | return in_sep_class(c); |
---|
459 | } |
---|
460 | |
---|
461 | int Parser_set_value(Parser *p, Sxpr obj){ |
---|
462 | int err = 0; |
---|
463 | if(NOMEMP(obj)){ |
---|
464 | err = -ENOMEM; |
---|
465 | } else { |
---|
466 | p->state->val = obj; |
---|
467 | } |
---|
468 | return err; |
---|
469 | } |
---|
470 | |
---|
471 | int Parser_intern(Parser *p){ |
---|
472 | Sxpr obj = intern(peek_token(p)); |
---|
473 | return Parser_set_value(p, obj); |
---|
474 | } |
---|
475 | |
---|
476 | int Parser_atom(Parser *p){ |
---|
477 | Sxpr obj; |
---|
478 | long v; |
---|
479 | if(Parser_flags(p, PARSE_INT) && |
---|
480 | convert_atol(peek_token(p), &v) == 0){ |
---|
481 | obj = OINT(v); |
---|
482 | } else { |
---|
483 | obj = atom_new(peek_token(p)); |
---|
484 | } |
---|
485 | return Parser_set_value(p, obj); |
---|
486 | } |
---|
487 | |
---|
488 | int Parser_string(Parser *p){ |
---|
489 | Sxpr obj = string_new_n(peek_token(p), token_len(p)); |
---|
490 | return Parser_set_value(p, obj); |
---|
491 | } |
---|
492 | |
---|
493 | int Parser_data(Parser *p){ |
---|
494 | Sxpr obj = string_new_n(peek_token(p), token_len(p)); |
---|
495 | return Parser_set_value(p, obj); |
---|
496 | } |
---|
497 | |
---|
498 | int Parser_uint(Parser *p){ |
---|
499 | unsigned int x = htonl(*(unsigned int *)peek_token(p)); |
---|
500 | return Parser_set_value(p, OINT(x)); |
---|
501 | } |
---|
502 | |
---|
503 | static int get_escape(char c, char *d){ |
---|
504 | int err = 0; |
---|
505 | switch(c){ |
---|
506 | case 'a': *d = '\a'; break; |
---|
507 | case 'b': *d = '\b'; break; |
---|
508 | case 'f': *d = '\f'; break; |
---|
509 | case 'n': *d = '\n'; break; |
---|
510 | case 'r': *d = '\r'; break; |
---|
511 | case 't': *d = '\t'; break; |
---|
512 | case 'v': *d = '\v'; break; |
---|
513 | case c_escape: *d = c_escape; break; |
---|
514 | case c_single_quote: *d = c_single_quote; break; |
---|
515 | case c_double_quote: *d = c_double_quote; break; |
---|
516 | default: |
---|
517 | err = -EINVAL; |
---|
518 | } |
---|
519 | return err; |
---|
520 | } |
---|
521 | |
---|
522 | int Parser_ready(Parser *p){ |
---|
523 | return CONSP(p->val) || (p->start_state && CONSP(p->start_state->val)); |
---|
524 | } |
---|
525 | |
---|
526 | Sxpr Parser_get_val(Parser *p){ |
---|
527 | Sxpr v = ONONE, w = ONONE; |
---|
528 | if(CONSP(p->val)){ |
---|
529 | } else if (p->start_state && CONSP(p->start_state->val)){ |
---|
530 | p->val = p->start_state->val; |
---|
531 | p->val = nrev(p->val); |
---|
532 | p->start_state->val = ONULL; |
---|
533 | } else { |
---|
534 | goto exit; |
---|
535 | } |
---|
536 | w = p->val; |
---|
537 | v = CAR(w); |
---|
538 | p->val = CDR(w); |
---|
539 | hfree(w); |
---|
540 | exit: |
---|
541 | return v; |
---|
542 | } |
---|
543 | |
---|
544 | Sxpr Parser_get_all(Parser *p){ |
---|
545 | Sxpr v = ONULL; |
---|
546 | if(CONSP(p->val)){ |
---|
547 | v = p->val; |
---|
548 | p->val = ONONE; |
---|
549 | } else if(p->start_state && CONSP(p->start_state->val)){ |
---|
550 | v = p->start_state->val; |
---|
551 | p->start_state->val = ONULL; |
---|
552 | v = nrev(v); |
---|
553 | } |
---|
554 | return v; |
---|
555 | } |
---|
556 | |
---|
557 | static int state_comment(Parser *p, char c){ |
---|
558 | int err = 0; |
---|
559 | if(c == '\n' || Parser_at_eof(p)){ |
---|
560 | Parser_pop(p); |
---|
561 | } else { |
---|
562 | err = input_char(p, c); |
---|
563 | } |
---|
564 | return err; |
---|
565 | } |
---|
566 | |
---|
567 | static int begin_comment(Parser *p, char c){ |
---|
568 | int err = 0; |
---|
569 | err = Parser_push(p, state_comment, "comment"); |
---|
570 | if(err) goto exit; |
---|
571 | err = input_char(p, c); |
---|
572 | exit: |
---|
573 | return err; |
---|
574 | } |
---|
575 | |
---|
576 | static int end_string(Parser *p){ |
---|
577 | int err = 0; |
---|
578 | err = Parser_string(p); |
---|
579 | if(err) goto exit; |
---|
580 | err = Parser_return(p); |
---|
581 | exit: |
---|
582 | return err; |
---|
583 | } |
---|
584 | |
---|
585 | static int octaldone(Parser *p){ |
---|
586 | int err = 0; |
---|
587 | char d = (char)(p->state->ival & 0xff); |
---|
588 | Parser_pop(p); |
---|
589 | err = Parser_input_char(p, d); |
---|
590 | return err; |
---|
591 | } |
---|
592 | |
---|
593 | static int octaldigit(Parser *p, int d){ |
---|
594 | int err = 0; |
---|
595 | p->state->ival *= 8; |
---|
596 | p->state->ival += d; |
---|
597 | p->state->count++; |
---|
598 | if(err) goto exit; |
---|
599 | if(p->state->ival < 0 || p->state->ival > 0xff){ |
---|
600 | err = Parser_error(p); |
---|
601 | goto exit; |
---|
602 | } |
---|
603 | if(p->state->count == 3){ |
---|
604 | err = octaldone(p); |
---|
605 | } |
---|
606 | exit: |
---|
607 | return err; |
---|
608 | } |
---|
609 | |
---|
610 | static int state_octal(Parser *p, char c){ |
---|
611 | int err = 0; |
---|
612 | if(Parser_at_eof(p)){ |
---|
613 | err = Parser_error_id(p, PARSE_ERR_UNEXPECTED_EOF); |
---|
614 | goto exit; |
---|
615 | } else if('0' <= c && c <= '7'){ |
---|
616 | err = octaldigit(p, c - '0'); |
---|
617 | } else { |
---|
618 | err = octaldone(p); |
---|
619 | if(err) goto exit; |
---|
620 | Parser_input_char(p, c); |
---|
621 | } |
---|
622 | exit: |
---|
623 | return err; |
---|
624 | } |
---|
625 | |
---|
626 | static int hexdone(Parser *p){ |
---|
627 | int err = 0; |
---|
628 | char d = (char)(p->state->ival & 0xff); |
---|
629 | Parser_pop(p); |
---|
630 | err = Parser_input_char(p, d); |
---|
631 | return err; |
---|
632 | } |
---|
633 | |
---|
634 | static int hexdigit(Parser *p, int d){ |
---|
635 | int err = 0; |
---|
636 | p->state->ival *= 16; |
---|
637 | p->state->ival += d; |
---|
638 | p->state->count++; |
---|
639 | if(err) goto exit; |
---|
640 | if(p->state->ival < 0 || p->state->ival > 0xff){ |
---|
641 | err = Parser_error(p); |
---|
642 | goto exit; |
---|
643 | } |
---|
644 | if(p->state->count == 2){ |
---|
645 | err = hexdone(p); |
---|
646 | } |
---|
647 | exit: |
---|
648 | return err; |
---|
649 | } |
---|
650 | |
---|
651 | static int state_hex(Parser *p, char c){ |
---|
652 | int err = 0; |
---|
653 | if(Parser_at_eof(p)){ |
---|
654 | err = Parser_error_id(p, PARSE_ERR_UNEXPECTED_EOF); |
---|
655 | goto exit; |
---|
656 | } else if('0' <= c && c <= '9'){ |
---|
657 | err = hexdigit(p, c - '0'); |
---|
658 | } else if('A' <= c && c <= 'F'){ |
---|
659 | err = hexdigit(p, c - 'A' + 10); |
---|
660 | } else if('a' <= c && c <= 'f'){ |
---|
661 | err = hexdigit(p, c - 'a' + 10); |
---|
662 | } else if(p->state->count){ |
---|
663 | err = hexdone(p); |
---|
664 | if(err) goto exit; |
---|
665 | Parser_input_char(p, c); |
---|
666 | } |
---|
667 | exit: |
---|
668 | return err; |
---|
669 | } |
---|
670 | |
---|
671 | static int state_escape(Parser *p, char c){ |
---|
672 | int err = 0; |
---|
673 | char d; |
---|
674 | if(Parser_at_eof(p)){ |
---|
675 | err = Parser_error_id(p, PARSE_ERR_UNEXPECTED_EOF); |
---|
676 | goto exit; |
---|
677 | } |
---|
678 | if(get_escape(c, &d) == 0){ |
---|
679 | err = save_char(p, d); |
---|
680 | if(err) goto exit; |
---|
681 | Parser_pop(p); |
---|
682 | } else if(c == 'x'){ |
---|
683 | p->state->fn = state_hex; |
---|
684 | p->state->ival = 0; |
---|
685 | p->state->count = 0; |
---|
686 | } else { |
---|
687 | p->state->fn = state_octal; |
---|
688 | p->state->ival = 0; |
---|
689 | p->state->count = 0; |
---|
690 | err = Parser_input_char(p, c); |
---|
691 | } |
---|
692 | exit: |
---|
693 | return err; |
---|
694 | } |
---|
695 | |
---|
696 | static int state_string(Parser *p, char c){ |
---|
697 | int err = 0; |
---|
698 | if(Parser_at_eof(p)){ |
---|
699 | err = Parser_error_id(p, PARSE_ERR_UNEXPECTED_EOF); |
---|
700 | } else if(c == p->state->delim){ |
---|
701 | err = end_string(p); |
---|
702 | } else if(c == '\\'){ |
---|
703 | err = Parser_push(p, state_escape, "escape"); |
---|
704 | } else { |
---|
705 | err = save_char(p, c); |
---|
706 | } |
---|
707 | return err; |
---|
708 | } |
---|
709 | |
---|
710 | static int begin_string(Parser *p, char c){ |
---|
711 | int err = 0; |
---|
712 | err = Parser_push(p, state_string, "string"); |
---|
713 | if(err) goto exit; |
---|
714 | new_token(p); |
---|
715 | p->state->delim = c; |
---|
716 | exit: |
---|
717 | return err; |
---|
718 | } |
---|
719 | |
---|
720 | static int end_atom(Parser *p){ |
---|
721 | int err = 0; |
---|
722 | err = Parser_atom(p); |
---|
723 | if(err) goto exit; |
---|
724 | err = Parser_return(p); |
---|
725 | exit: |
---|
726 | return err; |
---|
727 | } |
---|
728 | |
---|
729 | static int state_atom(Parser *p, char c){ |
---|
730 | int err = 0; |
---|
731 | if(Parser_at_eof(p)){ |
---|
732 | err = end_atom(p); |
---|
733 | } else if(is_separator(p, c) || |
---|
734 | in_space_class(c) || |
---|
735 | in_comment_class(c)){ |
---|
736 | err = end_atom(p); |
---|
737 | if(err) goto exit; |
---|
738 | err = Parser_input_char(p, c); |
---|
739 | } else { |
---|
740 | err = save_char(p, c); |
---|
741 | } |
---|
742 | exit: |
---|
743 | return err; |
---|
744 | } |
---|
745 | |
---|
746 | static int begin_atom(Parser *p, char c){ |
---|
747 | int err = 0; |
---|
748 | err = Parser_push(p, state_atom, "atom"); |
---|
749 | if(err) goto exit; |
---|
750 | new_token(p); |
---|
751 | err = save_char(p, c); |
---|
752 | exit: |
---|
753 | return err; |
---|
754 | } |
---|
755 | |
---|
756 | static int end_data(Parser *p){ |
---|
757 | int err = 0; |
---|
758 | err = Parser_data(p); |
---|
759 | if(err) goto exit; |
---|
760 | err = Parser_return(p); |
---|
761 | exit: |
---|
762 | return err; |
---|
763 | } |
---|
764 | |
---|
765 | static int counted_data(Parser *p, char c){ |
---|
766 | int err = 0; |
---|
767 | err = save_char(p, c); |
---|
768 | if(err) goto exit; |
---|
769 | if(token_len(p) == p->state->count){ |
---|
770 | err = end_data(p); |
---|
771 | } |
---|
772 | exit: |
---|
773 | return err; |
---|
774 | } |
---|
775 | |
---|
776 | static int counted_data_count(Parser *p, char c){ |
---|
777 | int err = 0; |
---|
778 | if(c == p->state->delim){ |
---|
779 | new_token(p); |
---|
780 | p->state->count = p->state->ival; |
---|
781 | p->state->fn = counted_data; |
---|
782 | } else if('0' <= c && c <= '9'){ |
---|
783 | p->state->ival *= 10; |
---|
784 | p->state->ival += c - '0'; |
---|
785 | } else { |
---|
786 | err = -EINVAL; |
---|
787 | } |
---|
788 | return err; |
---|
789 | } |
---|
790 | |
---|
791 | static int quoted_data(Parser *p, char c){ |
---|
792 | int err = 0; |
---|
793 | int count = p->state->count; |
---|
794 | err = save_char(p, c); |
---|
795 | if(err) goto exit; |
---|
796 | // Check that buf is longer than delim and |
---|
797 | // ends with delim. If so, trim delim off and return. |
---|
798 | if((token_len(p) >= count) && |
---|
799 | !memcmp(p->tok_end - count, p->buf, count)){ |
---|
800 | p->tok_end -= count; |
---|
801 | end_data(p); |
---|
802 | } |
---|
803 | exit: |
---|
804 | return err; |
---|
805 | } |
---|
806 | |
---|
807 | static int quoted_data_delim(Parser *p, char c){ |
---|
808 | // Saves the delim in the token buffer. |
---|
809 | int err = 0; |
---|
810 | err = save_char(p, c); |
---|
811 | if(err) goto exit; |
---|
812 | if(c == p->state->delim){ |
---|
813 | p->state->fn = quoted_data; |
---|
814 | p->state->count = token_len(p); |
---|
815 | // Advance the token pointer past the delim. |
---|
816 | p->tok = p->tok_end; |
---|
817 | } |
---|
818 | exit: |
---|
819 | return err; |
---|
820 | } |
---|
821 | |
---|
822 | static int state_data(Parser *p, char c){ |
---|
823 | // Quoted data: |
---|
824 | // <<delim< anything not containing delimiter<delim< |
---|
825 | // Where 'delim' is anything not containing '<'. |
---|
826 | // Counted data: |
---|
827 | // <*nnn..* N bytes |
---|
828 | // Where nnn... is N in decimal ( |
---|
829 | int err = 0; |
---|
830 | switch(c){ |
---|
831 | case c_data_count: |
---|
832 | p->state->delim = c; |
---|
833 | p->state->fn = counted_data_count; |
---|
834 | p->state->ival = 0; |
---|
835 | new_token(p); |
---|
836 | break; |
---|
837 | case c_data_quote: |
---|
838 | p->state->delim = c; |
---|
839 | p->state->fn = quoted_data_delim; |
---|
840 | new_token(p); |
---|
841 | err = save_char(p, c); |
---|
842 | break; |
---|
843 | default: |
---|
844 | err = Parser_error(p); |
---|
845 | break; |
---|
846 | } |
---|
847 | return err; |
---|
848 | } |
---|
849 | |
---|
850 | static int begin_data(Parser *p, char c){ |
---|
851 | int err = 0; |
---|
852 | err = Parser_push(p, state_data, "data"); |
---|
853 | if(err) goto exit; |
---|
854 | new_token(p); |
---|
855 | exit: |
---|
856 | return err; |
---|
857 | } |
---|
858 | |
---|
859 | static int state_list(Parser *p, char c){ |
---|
860 | int err = 0; |
---|
861 | dprintf(">\n"); |
---|
862 | if(Parser_at_eof(p)){ |
---|
863 | err = Parser_error_id(p, PARSE_ERR_UNEXPECTED_EOF); |
---|
864 | } else if(c == c_list_close){ |
---|
865 | p->state->val = nrev(p->state->val); |
---|
866 | err = Parser_return(p); |
---|
867 | } else { |
---|
868 | err = state_start(p, c); |
---|
869 | } |
---|
870 | dprintf("< err=%d\n", err); |
---|
871 | return err; |
---|
872 | |
---|
873 | } |
---|
874 | |
---|
875 | static int begin_list(Parser *p, char c){ |
---|
876 | return Parser_push(p, state_list, "list"); |
---|
877 | } |
---|
878 | |
---|
879 | static int state_start(Parser *p, char c){ |
---|
880 | int err = 0; |
---|
881 | dprintf(">\n"); |
---|
882 | if(Parser_at_eof(p)){ |
---|
883 | err = Parser_return(p); |
---|
884 | } else if(in_space_class(c)){ |
---|
885 | //skip |
---|
886 | } else if(in_comment_class(c)){ |
---|
887 | begin_comment(p, c); |
---|
888 | } else if(c == c_list_open){ |
---|
889 | begin_list(p, c); |
---|
890 | } else if(c == c_list_close){ |
---|
891 | err = Parser_error(p); |
---|
892 | } else if(in_string_quote_class(c)){ |
---|
893 | begin_string(p, c); |
---|
894 | } else if(c == c_data_open){ |
---|
895 | begin_data(p, c); |
---|
896 | } else if(in_printable_class(c)){ |
---|
897 | begin_atom(p, c); |
---|
898 | } else if(c == 0x04){ |
---|
899 | //ctrl-D, EOT: end-of-text. |
---|
900 | Parser_input_eof(p); |
---|
901 | } else { |
---|
902 | err = Parser_error(p); |
---|
903 | } |
---|
904 | dprintf("< err=%d\n", err); |
---|
905 | return err; |
---|
906 | } |
---|
907 | |
---|
908 | int begin_start(Parser *p, char c){ |
---|
909 | int err = 0; |
---|
910 | dprintf(">\n"); |
---|
911 | err = Parser_push(p, state_start, "start"); |
---|
912 | if(err) goto exit; |
---|
913 | p->start_state = p->state; |
---|
914 | exit: |
---|
915 | dprintf("< err=%d\n", err); |
---|
916 | return err; |
---|
917 | } |
---|
918 | |
---|
919 | int Parser_input_char(Parser *p, char c){ |
---|
920 | int err = 0; |
---|
921 | if(Parser_at_eof(p)){ |
---|
922 | //skip; |
---|
923 | } else { |
---|
924 | input_char(p, c); |
---|
925 | } |
---|
926 | if(!p->state){ |
---|
927 | err = p->begin(p, c); |
---|
928 | if(err) goto exit; |
---|
929 | } |
---|
930 | err = p->state->fn(p, c); |
---|
931 | exit: |
---|
932 | return err; |
---|
933 | } |
---|
934 | |
---|
935 | int Parser_input_eof(Parser *p){ |
---|
936 | int err = 0; |
---|
937 | p->eof = 1; |
---|
938 | err = Parser_input_char(p, IOSTREAM_EOF); |
---|
939 | return err; |
---|
940 | } |
---|
941 | |
---|
942 | int Parser_input(Parser *p, char *buf, int buf_n){ |
---|
943 | int err = 0; |
---|
944 | int i = 0; |
---|
945 | dprintf("> buf_n=%d\n", buf_n); |
---|
946 | if(buf_n <= 0){ |
---|
947 | buf_n = 0; |
---|
948 | err = Parser_input_eof(p); |
---|
949 | goto exit; |
---|
950 | } |
---|
951 | dprintf("> buf=|%*s|\n", buf_n, buf); |
---|
952 | for(i = 0; i < buf_n; i++){ |
---|
953 | err = Parser_input_char(p, buf[i]); |
---|
954 | if(err) goto exit; |
---|
955 | } |
---|
956 | exit: |
---|
957 | err = (err < 0 ? err : buf_n); |
---|
958 | dprintf("< err=%d\n", err); |
---|
959 | return err; |
---|
960 | } |
---|
961 | |
---|
962 | #ifdef SXPR_PARSER_MAIN |
---|
963 | /* Stuff for standalone testing. */ |
---|
964 | |
---|
965 | #include "file_stream.h" |
---|
966 | //#include "string_stream.h" |
---|
967 | |
---|
968 | /** Main program for testing. |
---|
969 | * Parses input and prints it. |
---|
970 | * |
---|
971 | * @param argc number of arguments |
---|
972 | * @param argv arguments |
---|
973 | * @return error code |
---|
974 | */ |
---|
975 | int main(int argc, char *argv[]){ |
---|
976 | Parser *pin; |
---|
977 | int err = 0; |
---|
978 | char buf[1024]; |
---|
979 | int k; |
---|
980 | Sxpr obj; |
---|
981 | int i = 0; |
---|
982 | |
---|
983 | pin = Parser_new(); |
---|
984 | Parser_set_error_stream(pin, iostdout); |
---|
985 | dprintf("> parse...\n"); |
---|
986 | while(1){ |
---|
987 | k = fread(buf, 1, 100, stdin); |
---|
988 | if(k>=0){ |
---|
989 | buf[k+1] = '\0'; |
---|
990 | } |
---|
991 | err = Parser_input(pin, buf, k); |
---|
992 | while(Parser_ready(pin)){ |
---|
993 | obj = Parser_get_val(pin); |
---|
994 | printf("obj %d\n", i++); |
---|
995 | objprint(iostdout, obj, 0); printf("\n"); |
---|
996 | } |
---|
997 | if(k <= 0) break; |
---|
998 | } |
---|
999 | dprintf("> err=%d\n", err); |
---|
1000 | return 0; |
---|
1001 | } |
---|
1002 | #endif |
---|