4 * Copyright (c) Tuomo Valkonen 1999-2002.
6 * You may distribute and modify this library under the terms of either
7 * the Clarified Artistic License or the GNU LGPL, version 2.1 or later.
18 #include "tokenizer.h"
24 static const char *errors[]={
25 DUMMY_TR("(no error)"),
26 DUMMY_TR("Unexpected end of file"), /* E_TOKZ_UNEXPECTED_EOF */
27 DUMMY_TR("Unexpected end of line"), /* E_TOKZ_UNEXPECTED_EOL */
28 DUMMY_TR("End of line expected"), /* E_TOKZ_EOL_EXPECTED */
29 DUMMY_TR("Invalid character"), /* E_TOKZ_INVALID_CHAR*/
30 DUMMY_TR("Numeric constant too big"), /* E_TOKZ_TOOBIG */
31 DUMMY_TR("Invalid numberic format"), /* E_TOKZ_NUMFMT */
32 DUMMY_TR("Junk after numeric constant"), /* E_TOKZ_NUM_JUNK */
33 DUMMY_TR("Not an integer"), /* E_TOKZ_NOTINT */
34 DUMMY_TR("Numeric constant out of range"), /* E_TOKZ_RANGE */
35 DUMMY_TR("Multi-character character constant"), /* E_TOKZ_MULTICHAR */
36 DUMMY_TR("Token/statement limit reached"), /* E_TOKZ_TOKEN_LIMIT */
37 DUMMY_TR("Unknown option"), /* E_TOKZ_UNKONWN_OPTION */
38 DUMMY_TR("Syntax error"), /* E_TOKZ_SYNTAX */
39 DUMMY_TR("Invalid argument"), /* E_TOKZ_INVALID_ARGUMENT */
40 DUMMY_TR("End of statement expected"), /* E_TOKZ_EOS_EXPECTED */
41 DUMMY_TR("Too few arguments"), /* E_TOKZ_TOO_FEW_ARGS */
42 DUMMY_TR("Too many arguments"), /* E_TOKZ_TOO_MANY_ARGS */
43 DUMMY_TR("Maximum section nestin level exceeded"), /* E_TOK_Z_MAX_NEST */
44 DUMMY_TR("Identifier expected"), /* E_TOKZ_IDENTIFIER_EXPECTED */
45 DUMMY_TR("Starting brace ('{') expected"), /* E_TOKZ_LBRACE_EXPECTED */
53 #define STRING_DECL(X) int err=0; char* X=NULL; char X##_tmp[STRBLEN]; int X##_tmpl=0
54 #define STRING_DECL_P(X, P) int err=0; char* X=NULL; char X##_tmp[STRBLEN]=P; int X##_tmpl=sizeof(P)-1
55 #define STRING_APPEND(X, C) {if(!_string_append(&X, X##_tmp, &X##_tmpl, c)) err=-ENOMEM;}
56 #define STRING_FREE(X) if(X!=NULL) free(X)
57 #define STRING_FINISH(X) {if(err!=0) return err; if(!_string_finish(&X, X##_tmp, X##_tmpl)) err=-ENOMEM;}
60 static bool _string_append(char **p, char *tmp, int *tmplen, char c)
64 if(*tmplen==STRBLEN-1){
83 static bool _string_finish(char **p, char *tmp, int tmplen)
107 #define INC_LINE() tokz->line++
108 #define GETCH() _getch(tokz)
109 #define UNGETCH(C) _ungetch(tokz, C)
111 static int _getch(Tokenizer *tokz)
115 if(tokz->ungetc!=-1){
118 }else if (tokz->flags&TOKZ_READ_FROM_BUFFER) {
119 assert(tokz->buffer.data!=NULL);
120 if (tokz->buffer.pos==tokz->buffer.len)
123 c=tokz->buffer.data[tokz->buffer.pos++];
132 static void _ungetch(Tokenizer *tokz, int c)
141 static int scan_line_comment(Token *tok, Tokenizer *tokz)
143 STRING_DECL_P(s, "#");
148 while(c!='\n' && c!=EOF){
157 TOK_SET_COMMENT(tok, s);
163 static int skip_line_comment(Tokenizer *tokz)
169 }while(c!='\n' && c!=EOF);
180 static int scan_c_comment(Token *tok, Tokenizer *tokz)
182 STRING_DECL_P(s, "/*");
191 return E_TOKZ_UNEXPECTED_EOF;
198 }else if(st==0 && c=='*'){
209 TOK_SET_COMMENT(tok, s);
215 static int skip_c_comment(Tokenizer *tokz)
224 return E_TOKZ_UNEXPECTED_EOF;
228 else if(st==0 && c=='*')
244 static int scan_char_escape(Tokenizer *tokz)
246 static char* special_chars="nrtbae";
247 static char* specials="\n\r\t\b\a\033";
253 for(i=0;special_chars[i];i++){
254 if(special_chars[i]==c)
258 if(c=='x' || c=='X'){
260 }else if(c=='d' || c=='D'){
262 }else if(c=='8' || c=='9'){
263 base=10;max=2;i=c-'0';
264 }else if('0'<=c && c<='7'){
265 base=8;max=2;i=c-'0';
321 static int scan_string(Token *tok, Tokenizer *tokz, bool escapes)
335 return E_TOKZ_UNEXPECTED_EOL;
338 if(c=='\\' && escapes){
339 c=scan_char_escape(tokz);
342 return E_TOKZ_UNEXPECTED_EOL;
348 return E_TOKZ_UNEXPECTED_EOF;
356 TOK_SET_STRING(tok, s);
365 static int scan_char(Token *tok, Tokenizer *tokz)
372 return E_TOKZ_UNEXPECTED_EOF;
375 return E_TOKZ_UNEXPECTED_EOL;
378 c=scan_char_escape(tokz);
381 return E_TOKZ_UNEXPECTED_EOF;
384 return E_TOKZ_UNEXPECTED_EOL;
390 return E_TOKZ_MULTICHAR;
392 TOK_SET_CHAR(tok, c);
401 #define START_IDENT(X) (isalpha(X) || X=='_' || X=='$')
404 static int scan_identifier(Token *tok, Tokenizer *tokz, int c)
411 }while(isalnum(c) || c=='_' || c=='$');
417 TOK_SET_IDENT(tok, s);
422 #define NP_SIMPLE_IMPL
423 #include "np/numparser2.h"
424 #include "np/np-conv.h"
427 static int scan_number(Token *tok, Tokenizer *tokz, int c)
432 if((e=parse_number(&num, tokz, c)))
435 if(num.type==NPNUM_INT){
437 if((e=num_to_long(&l, &num, TRUE)))
440 TOK_SET_LONG(tok, l);
441 }else if(num.type==NPNUM_FLOAT){
443 if((e=num_to_double(&d, &num)))
446 TOK_SET_DOUBLE(tok, d);
448 return E_TOKZ_NUMFMT;
458 static uchar op_map[]={
459 0x00, /* ________ 0-7 */
460 0x00, /* ________ 8-15 */
461 0x00, /* ________ 16-23 */
462 0x00, /* ________ 24-31 */
463 0x62, /* _!___%&_ 32-39 */
464 0xff, /* ()*+,-./ 40-47 */
465 0x00, /* ________ 48-55 */
466 0xfc, /* __:;<=>? 56-63 */
467 0x01, /* @_______ 64-71 */
468 0x00, /* ________ 72-79 */
469 0x00, /* ________ 80-87 */
470 0x78, /* ___[_]^_ 88-95 */
471 0x00, /* ________ 96-103 */
472 0x00, /* ________ 104-111 */
473 0x00, /* ________ 112-119 */
474 0x38 /* ___{|}__ 120-127 */
478 static bool map_isset(uchar *map, uint ch)
483 return map[ch>>3]&(1<<(ch&7));
487 static bool is_opch(uint ch)
489 return map_isset(op_map, ch);
493 static int scan_op(Token *tok, Tokenizer *tokz, int c)
498 /* Quickly check it is an operator character */
500 return E_TOKZ_INVALID_CHAR;
506 /* case '/': Checked elsewhere */
516 }else if(c2==c && (c2!='%' && c2!='!' && c2!='*')){
517 if(c=='<' || c=='>'){
520 op=c|(c2<<8)|(c3<<16);
534 /* It is already known that it is a operator so these are not needed
559 void tokz_warn(const Tokenizer *tokz, int line, const char *fmt, ...)
566 warn_obj_line_v(tokz->name, line, fmt, args);
574 void tokz_warn_error(const Tokenizer *tokz, int line, int e)
576 if(e==E_TOKZ_UNEXPECTED_EOF)
580 tokz_warn(tokz, line, "%s", strerror(-e));
582 tokz_warn(tokz, line, "%s", TR(errors[e]));
586 bool tokz_get_token(Tokenizer *tokz, Token *tok)
590 if (!(tokz->flags&TOKZ_READ_FROM_BUFFER))
591 assert(tokz->file!=NULL);
595 if(!TOK_IS_INVALID(&(tokz->ungettok))){
597 tokz->ungettok.type=TOK_INVALID;
607 }while(c!='\n' && c!=EOF && isspace(c));
609 tok->line=tokz->line;
613 TOK_SET_OP(tok, OP_EOF);
619 if(tokz->flags&TOKZ_IGNORE_NEXTLINE)
622 TOK_SET_OP(tok, OP_NEXTLINE);
630 TOK_SET_OP(tok, OP_EOF);
633 if(!isspace(c) && e==0){
634 e=E_TOKZ_EOL_EXPECTED;
635 tokz_warn_error(tokz, tokz->line, e);
636 if(!(tokz->flags&TOKZ_ERROR_TOLERANT))
645 if(tokz->flags&TOKZ_READ_COMMENTS){
646 e=scan_line_comment(tok, tokz);
648 }else if((e=skip_line_comment(tokz))){
658 TOK_SET_OP(tok, OP_AS_DIV);
664 TOK_SET_OP(tok, OP_DIV);
668 if(tokz->flags&TOKZ_READ_COMMENTS){
669 e=scan_c_comment(tok, tokz);
671 }else if((e=skip_c_comment(tokz))){
678 e=scan_string(tok, tokz, TRUE);
682 e=scan_char(tok, tokz);
686 if(('0'<=c && c<='9') || c=='-' || c=='+'){
687 e=scan_number(tok, tokz, c);
692 e=scan_identifier(tok, tokz, c);
694 e=scan_op(tok, tokz, c);
700 tokz_warn_error(tokz, tokz->line, e);
706 void tokz_unget_token(Tokenizer *tokz, Token *tok)
708 tok_free(&(tokz->ungettok));
710 tok->type=TOK_INVALID;
718 static bool do_tokz_pushf(Tokenizer *tokz)
720 Tokenizer_FInfo *finfo;
722 finfo=REALLOC_N(tokz->filestack, Tokenizer_FInfo,
723 tokz->filestack_n, tokz->filestack_n+1);
728 tokz->filestack=finfo;
729 finfo=&(finfo[tokz->filestack_n++]);
731 finfo->file=tokz->file;
732 finfo->name=tokz->name;
733 finfo->line=tokz->line;
734 finfo->ungetc=tokz->ungetc;
735 finfo->ungettok=tokz->ungettok;
741 bool tokz_pushf_file(Tokenizer *tokz, FILE *file, const char *fname)
743 char *fname_copy=NULL;
749 fname_copy=scopy(fname);
750 if(fname_copy==NULL){
756 if(tokz->file!=NULL){
757 if(!do_tokz_pushf(tokz)){
766 tokz->name=fname_copy;
769 tokz->ungettok.type=TOK_INVALID;
775 bool tokz_pushf(Tokenizer *tokz, const char *fname)
779 file=fopen(fname, "r");
786 if(!tokz_pushf_file(tokz, file, fname)){
796 static Tokenizer *tokz_create()
800 tokz=ALLOC(Tokenizer);
811 tokz->ungettok.type=TOK_INVALID;
816 tokz->filestack=NULL;
825 Tokenizer *tokz_open(const char *fname)
831 if(!tokz_pushf(tokz, fname)){
840 Tokenizer *tokz_open_file(FILE *file, const char *fname)
846 if(!tokz_pushf_file(tokz, file, fname)){
854 Tokenizer *tokz_prepare_buffer(char *buffer, int len)
865 tokz->flags|=TOKZ_READ_FROM_BUFFER;
866 tokz->buffer.data=scopy(buffer);
867 tokz->buffer.len=(len>0 ? (uint)len : strlen(tokz->buffer.data));
880 static bool do_tokz_popf(Tokenizer *tokz, bool shrink)
882 Tokenizer_FInfo *finfo;
884 if(tokz->filestack_n<=0)
892 finfo=&(tokz->filestack[--tokz->filestack_n]);
894 tokz->file=finfo->file;
895 tokz->name=finfo->name;
896 tokz->line=finfo->line;
897 tokz->ungetc=finfo->ungetc;
898 tokz->ungettok=finfo->ungettok;
900 if(tokz->filestack_n==0){
901 free(tokz->filestack);
902 tokz->filestack=NULL;
904 finfo=REALLOC_N(tokz->filestack, Tokenizer_FInfo,
905 tokz->filestack_n+1, tokz->filestack_n);
909 tokz->filestack=finfo;
916 bool tokz_popf(Tokenizer *tokz)
918 return do_tokz_popf(tokz, TRUE);
922 void tokz_close(Tokenizer *tokz)
924 while(tokz->filestack_n>0)
925 do_tokz_popf(tokz, FALSE);
931 tok_free(&(tokz->ungettok));
941 void tok_free(Token *tok)
943 if(TOK_IS_STRING(tok) || TOK_IS_IDENT(tok) || TOK_IS_COMMENT(tok)){
944 if(TOK_STRING_VAL(tok)!=NULL)
945 free(TOK_STRING_VAL(tok));
948 tok->type=TOK_INVALID;
952 void tok_init(Token *tok)
954 static Token dummy=TOK_INIT;
956 memcpy(tok, &dummy, sizeof(*tok));