LIST * var_expand( LIST *prefix, const char *in, const char *end, LOL *lol, int cancopyin ) { BUFFER buff; const char *inp = in; int depth; size_t save_buffer_pos, ov_save_buffer_pos; int literal = 0; if( DEBUG_VAREXP ) printf( "expand '%.*s'\n", end - in, in ); /* This gets alot of cases: $(<) and $(>) */ if( end - in == 4 && in[0] == '$' && in[1] == leftParen && in[3] == rightParen ) { switch( in[2] ) { case '1': case '<': return list_copy( prefix, lol_get( lol, 0 ) ); case '2': case '>': return list_copy( prefix, lol_get( lol, 1 ) ); } } buffer_init( &buff ); /* Just try simple copy of in to out. */ while( in < end ) { char ch = *in++; buffer_addchar( &buff, ch ); if( ch == '$' && *in == leftParen ) goto expand; #ifdef OPT_EXPAND_LITERALS_EXT if( ch == '@' && *in == leftParen ) { literal = 1; goto expand; } if( ch == '@' && in[0] == '$' && in[1] == leftParen ) { ++in; literal = 1; goto expand; } #endif } /* No variables expanded - just add copy of input string to list. */ /* Cancopyin is an optimization: if the input was already a list */ /* item, we can use the copystr() to put it on the new list. */ /* Otherwise, we use the slower newstr(). */ buffer_putchar( &buff, 0 ); if( cancopyin ) { LIST *new_list = list_append( prefix, inp, 1 ); buffer_free( &buff ); return new_list; } else { LIST *new_list = list_append( prefix, buffer_ptr( &buff ), 0 ); buffer_free( &buff ); return new_list; } expand: /* * Input so far (ignore blanks): * * stuff-in-outbuf $(variable) remainder * ^ ^ * in end * Output so far: * * stuff-in-outbuf $ * ^ ^ * out_buf out * * * We just copied the $ of $(...), so back up one on the output. * We now find the matching close paren, copying the variable and * modifiers between the $( and ) temporarily into out_buf, so that * we can replace :'s with MAGIC_COLON. This is necessary to avoid * being confused by modifier values that are variables containing * :'s. Ugly. */ depth = 1; buffer_deltapos( &buff, -1 ); save_buffer_pos = buffer_pos( &buff ); in++; while( in < end && depth ) { char ch = *in++; buffer_addchar( &buff, ch ); if ( ch == leftParen ) { depth++; } else if ( ch == rightParen ) { depth--; } else { switch( ch ) { case ':': buffer_deltapos( &buff, -1 ); buffer_addchar( &buff, MAGIC_COLON ); break; case '[': buffer_deltapos( &buff, -1 ); buffer_addchar( &buff, MAGIC_LEFT ); break; case ']': buffer_deltapos( &buff, -1 ); buffer_addchar( &buff, MAGIC_RIGHT ); break; } } } /* Copied ) - back up. */ buffer_deltapos( &buff, -1 ); ov_save_buffer_pos = buffer_pos( &buff ); buffer_setpos( &buff, save_buffer_pos ); /* * Input so far (ignore blanks): * * stuff-in-outbuf $(variable) remainder * ^ ^ * in end * Output so far: * * stuff-in-outbuf variable * ^ ^ ^ * out_buf out ov * * Later we will overwrite 'variable' in out_buf, but we'll be * done with it by then. 'variable' may be a multi-element list, * so may each value for '$(variable element)', and so may 'remainder'. * Thus we produce a product of three lists. */ { LIST *variables = 0; LIST *remainder = 0; LISTITEM *vars; /* Recursively expand variable name & rest of input */ if( save_buffer_pos < ov_save_buffer_pos ) variables = var_expand( L0, buffer_posptr( &buff ), buffer_ptr( &buff ) + ov_save_buffer_pos, lol, 0 ); if( in < end ) remainder = var_expand( L0, in, end, lol, 0 ); /* Now produce the result chain */ /* For each variable name */ for( vars = list_first(variables); vars; vars = list_next( vars ) ) { LIST *value, *evalue = 0; LISTITEM* valueSliceStart = NULL; #ifdef OPT_EXPAND_LITERALS_EXT LIST *origvalue = 0; #endif char *colon; char *bracket; BUFFER varnamebuff; int sub1 = 0, sub2 = -1; VAR_EDITS edits; memset(&edits, 0, sizeof(VAR_EDITS)); if (leftParen == '{') { edits.empty.ptr = ""; edits.empty.len = 0; } /* Look for a : modifier in the variable name */ /* Must copy into varname so we can modify it */ buffer_init( &varnamebuff ); buffer_addstring( &varnamebuff, list_value(vars), strlen( list_value(vars) ) ); buffer_addchar( &varnamebuff, 0 ); if( ( colon = strchr( buffer_ptr( &varnamebuff ), MAGIC_COLON ) ) ) { *colon = '\0'; var_edit_parse( colon + 1, &edits ); } /* Look for [x-y] and [x-] subscripting */ /* sub1 is x (0 default) */ /* sub2 is length (-1 means forever) */ if( ( bracket = strchr( buffer_ptr( &varnamebuff ), MAGIC_LEFT ) ) ) { char *dash; if( ( dash = strchr( bracket + 1, '-' ) ) ) *dash = '\0'; sub1 = atoi( bracket + 1 ) - 1; if( !dash ) sub2 = 1; else if( !dash[1] || dash[1] == MAGIC_RIGHT ) sub2 = -1; else sub2 = atoi( dash + 1 ) - sub1; *bracket = '\0'; } /* Get variable value, specially handling $(<), $(>), $(n) */ #ifdef OPT_EXPAND_LITERALS_EXT if ( !literal ) #endif { const char* varname = buffer_ptr( &varnamebuff ); if( varname[0] == '<' && !varname[1] ) value = lol_get( lol, 0 ); else if( varname[0] == '>' && !varname[1] ) value = lol_get( lol, 1 ); else if( varname[0] >= '1' && varname[0] <= '9' && !varname[1] ) value = lol_get( lol, varname[0] - '1' ); else if ( edits.targetsetting ) { TARGET* t = bindtarget(edits.targetname.ptr); SETTINGS* settings = quicksettingslookup(t, varname); if (settings) value = list_copy(L0, settings->value); else value = L0; } else value = var_get( varname ); } #ifdef OPT_EXPAND_LITERALS_EXT else { origvalue = value = list_append( L0, buffer_ptr( &varnamebuff ), 0 ); } #endif /* The fast path: $(x) - just copy the variable value. */ /* This is only an optimization */ if( buffer_isempty( &buff ) && !bracket && !colon && in == end ) { prefix = list_copy( prefix, value ); buffer_free( &buff ); continue; } /* Handle start subscript */ valueSliceStart = list_first(value); while(sub1 > 0 && valueSliceStart) { sub1 -= 1; valueSliceStart = list_next(valueSliceStart); } /* Empty w/ :E=default? */ if( !valueSliceStart && (colon || leftParen == '{') && edits.empty.ptr ) { evalue = value = list_append( L0, edits.empty.ptr, 0 ); valueSliceStart = list_first(value); } #ifdef OPT_EXPAND_LITERALS_EXT if ( colon && edits.expandliteral ) { LOL lol; char const* string = list_value(list_first(value)); LIST *newvalue = var_expand( L0, string, string + strlen( string ), &lol, 0 ); if ( origvalue ) { list_free( origvalue ); origvalue = 0; } value = newvalue; valueSliceStart = list_first(value); sub2 = -1; } #endif #ifdef OPT_EXPAND_FILEGLOB_EXT if ( edits.wildcard ) { LIST *newl = L0; for( ; valueSliceStart; valueSliceStart = list_next( valueSliceStart ) ) { LIST *foundfiles = L0; fileglob* glob; /* Handle end subscript (length actually) */ if( sub2 >= 0 && --sub2 < 0 ) break; glob = fileglob_Create( list_value(valueSliceStart) ); while ( fileglob_Next( glob ) ) { foundfiles = list_append( foundfiles, fileglob_FileName( glob ) + edits.wildcard_remove_prepend.len, 0 ); } fileglob_Destroy( glob ); /* TODO: Efficiency: Just append to newl above? */ newl = list_copy( newl, foundfiles ); list_free( foundfiles ); } if ( origvalue ) { list_free( origvalue ); origvalue = 0; } value = newl; origvalue = value; valueSliceStart = list_first(value); } #endif /* For each variable value */ for( ; valueSliceStart; valueSliceStart = list_next( valueSliceStart ) ) { LISTITEM *rem; size_t save_buffer_pos; size_t end_buffer_pos; const char *valuestring; /* Handle end subscript (length actually) */ if( sub2 >= 0 && --sub2 < 0 ) break; /* Apply : mods, if present */ save_buffer_pos = buffer_pos( &buff ); valuestring = list_value(valueSliceStart); #ifdef OPT_EXPAND_BINDING_EXT if( colon && edits.expandbinding ) { SETTINGS *expandText; TARGET *t = bindtarget( valuestring ); expandText = quicksettingslookup( t, "EXPAND_TEXT" ); if ( expandText && list_first(expandText->value) ) { valuestring = list_value(list_first(expandText->value)); } else { if( t->binding == T_BIND_UNBOUND ) { t->boundname = search_using_target_settings( t, t->name, &t->time ); t->binding = t->time ? T_BIND_EXISTS : T_BIND_MISSING; } valuestring = t->boundname; } } #endif if( colon && edits.filemods ) { var_edit_file( valuestring, &buff, &edits ); } else { buffer_addstring( &buff, valuestring, strlen( valuestring ) + 1 ); } buffer_setpos( &buff, save_buffer_pos ); if( colon && ( edits.upshift || edits.downshift ) ) var_edit_shift( buffer_posptr( &buff ), &edits ); #ifdef OPT_SLASH_MODIFIERS_EXT if( colon && ( edits.fslash || edits.bslash ) ) var_edit_slash( buffer_posptr( &buff ), &edits ); #endif #ifdef OPT_EXPAND_ESCAPE_PATH_EXT if ( colon && edits.escapepath ) { const char* ptr = buffer_posptr( &buff ); const char* endptr = ptr + strlen( ptr ); BUFFER escapebuff; buffer_init( &escapebuff ); save_buffer_pos = buffer_pos( &buff ); #ifdef NT while ( ptr != endptr && *ptr != ' ' && *ptr != '/' ) ++ptr; if (*ptr == ' ' || *ptr == '/' ) { buffer_addchar( &escapebuff, '"' ); buffer_addstring( &escapebuff, buffer_posptr( &buff ), endptr - buffer_posptr( &buff ) ); buffer_addchar( &escapebuff, '"' ); buffer_addchar( &escapebuff, 0 ); buffer_addstring( &buff, buffer_ptr( &escapebuff ), buffer_pos( &escapebuff ) ); } #else while ( ptr != endptr ) { if ( *ptr == ' ' || *ptr == '\\' || *ptr == leftParen || *ptr == rightParen || *ptr == '"' ) { buffer_addchar( &escapebuff, '\\' ); } buffer_addchar( &escapebuff, *ptr ); ++ptr; } buffer_addchar( &escapebuff, 0 ); buffer_addstring( &buff, buffer_ptr( &escapebuff ), buffer_pos( &escapebuff ) ); #endif buffer_setpos( &buff, save_buffer_pos ); buffer_free( &escapebuff ); } #endif /* Handle :J=joinval */ /* If we have more values for this var, just */ /* keep appending them (with the join value) */ /* rather than creating separate LIST elements. */ if( colon && edits.join.ptr && ( list_next( valueSliceStart ) || list_next( vars ) ) ) { buffer_setpos( &buff, buffer_pos( &buff ) + strlen( buffer_posptr( &buff ) ) ); buffer_addstring( &buff, edits.join.ptr, strlen( edits.join.ptr ) + 1 ); buffer_deltapos( &buff, -1 ); continue; } /* If no remainder, append result to output chain. */ if( in == end ) { prefix = list_append( prefix, buffer_ptr( &buff ), 0 ); continue; } /* For each remainder, append the complete string */ /* to the output chain. */ /* Remember the end of the variable expansion so */ /* we can just tack on each instance of 'remainder' */ save_buffer_pos = buffer_pos( &buff ); end_buffer_pos = strlen( buffer_ptr( &buff ) ); buffer_setpos( &buff, end_buffer_pos ); for( rem = list_first(remainder); rem; rem = list_next( rem ) ) { buffer_addstring( &buff, list_value(rem), strlen( list_value(rem) ) + 1 ); buffer_setpos( &buff, end_buffer_pos ); prefix = list_append( prefix, buffer_ptr( &buff ), 0 ); } buffer_setpos( &buff, save_buffer_pos ); } /* Toss used empty */ if( evalue ) list_free( evalue ); #ifdef OPT_EXPAND_LITERALS_EXT if ( origvalue ) list_free( origvalue ); #endif #ifdef OPT_EXPAND_INCLUDES_EXCLUDES_EXT if ( edits.includes_excludes ) { LIST *newl = L0; LISTITEM* l; LIST *origprefix = prefix; int hasInclude = 0; if ( !regexhash ) regexhash = hashinit( sizeof(regexdata), "regex" ); { LISTITEM *inex = list_first(edits.includes_excludes); while ( inex ) { char mod = list_value(inex)[0]; inex = list_next( inex ); if ( mod == 'I' ) { hasInclude = 1; } } } for (l = list_first(prefix) ; l; l = list_next( l ) ) { LISTITEM *inex = list_first(edits.includes_excludes); int remove = hasInclude; while ( inex ) { char mod = list_value(inex)[0]; regexp *re; regexdata data, *d = &data; inex = list_next( inex ); data.name = list_value(inex); if( !hashcheck( regexhash, (HASHDATA **)&d ) ) { d->re = jam_regcomp( list_value(inex) ); (void)hashenter( regexhash, (HASHDATA **)&d ); } re = d->re; inex = list_next( inex ); if ( mod == 'X' ) { if( jam_regexec( re, list_value(l) ) ) remove = 1; } else if ( mod == 'I' ) { if( jam_regexec( re, list_value(l) ) ) remove = 0; } } if ( !remove ) newl = list_append( newl, list_value(l), 1 ); } /* TODO: Efficiency: Just modify prefix? */ list_free( origprefix ); prefix = newl; } #endif //#ifdef OPT_EXPAND_LITERALS_EXT // buffer_free( &buff ); //#endif #ifdef OPT_EXPAND_INCLUDES_EXCLUDES_EXT list_free( edits.includes_excludes ); #endif } /* variables & remainder were gifts from var_expand */ /* and must be freed */ list_free( variables ); list_free( remainder ); if( DEBUG_VAREXP ) { printf( "expanded to " ); list_print( prefix ); printf( "\n" ); } buffer_free( &buff ); return prefix; } }
int _fileglob_GlobHelper(fileglob* self, const char* inPattern) { fileglob_Context* context = self->context; int hasWildcard; int found; Setup: if (!context) { context = (fileglob_Context*)self->allocFunction(self->userData, NULL, sizeof(fileglob_Context)); context->prev = self->context; #if defined(WIN32) context->handle = INVALID_HANDLE_VALUE; #else context->dirp = NULL; context->hasattr = 0; context->statted = 0; #endif context->pattern = NULL; context->iterNode = NULL; context->directoryListHead = context->directoryListTail = 0; context->basePathLastSlashPos = 0; buffer_initwithalloc(&context->patternBuf, self->allocFunction, self->userData); buffer_addstring(&context->patternBuf, inPattern, strlen(inPattern) + 1); buffer_initwithalloc(&context->basePath, self->allocFunction, self->userData); buffer_initwithalloc(&context->matchPattern, self->allocFunction, self->userData); self->context = context; if (context->prev == NULL) return 1; } DoRecursion: found = 1; if (!context->pattern) { char* pattern; context->basePathEndPos = context->basePathLastSlashPos = 0; context->recurseAtPos = (size_t)-1; // Split the path into base path and pattern to match against. hasWildcard = 0; for (pattern = buffer_ptr(&context->patternBuf); *pattern != '\0'; ++pattern) { char ch = *pattern; // Is it a '?' ? if (ch == '?') hasWildcard = 1; // Is it a '*' ? else if (ch == '*') { hasWildcard = 1; // Is there a '**'? if (pattern[1] == '*') { // If we're just starting the pattern or the characters immediately // preceding the pattern are a drive letter ':' or a directory path // '/', then set up the internals for later recursion. if (pattern == buffer_ptr(&context->patternBuf) || pattern[-1] == '/' || pattern[-1] == ':') { char ch2 = pattern[2]; if (ch2 == '/') { context->recurseAtPos = pattern - buffer_ptr(&context->patternBuf); memcpy(pattern, pattern + 3, strlen(pattern) - 2); buffer_deltapos(&context->patternBuf, -3); } else if (ch2 == '\0') { context->recurseAtPos = pattern - buffer_ptr(&context->patternBuf); *pattern = '\0'; } } } } // Is there a '/' or ':' in the pattern at this location? if (ch == '/' || ch == ':') { if (hasWildcard) break; else { if (pattern[1]) context->basePathLastSlashPos = pattern - buffer_ptr(&context->patternBuf) + 1; context->basePathEndPos = pattern - buffer_ptr(&context->patternBuf) + 1; } } } context->pattern = pattern; // Copy the directory down. context->basePathLen = context->basePathEndPos; buffer_reset(&context->basePath); buffer_addstring(&context->basePath, buffer_ptr(&context->patternBuf), context->basePathLen); buffer_addchar(&context->basePath, 0); if (context->iterNode) { context->matchFiles = *context->pattern == 0; goto NextDirectory; } } #if defined(WIN32) if (context->handle == INVALID_HANDLE_VALUE) { #else if (!context->dirp && !context->statted) { #endif size_t matchLen; // Did we make it to the end of the pattern? If so, we should match files, // since there were no slashes encountered. context->matchFiles = *context->pattern == 0; // Copy the wildcard matching string. matchLen = (context->pattern - buffer_ptr(&context->patternBuf)) - context->basePathLen; buffer_reset(&context->matchPattern); buffer_addstring(&context->matchPattern, buffer_ptr(&context->patternBuf) + context->basePathLen, matchLen + 1); buffer_deltapos(&context->matchPattern, -1); if (*buffer_posptr(&context->matchPattern) == '/') { buffer_deltapos(&context->matchPattern, 1); buffer_addchar(&context->matchPattern, 0); } #if defined(WIN32) // Do the file search with *.* in the directory specified in basePattern. buffer_setpos(&context->basePath, context->basePathEndPos); buffer_addstring(&context->basePath, "*.*", 4); // Start the find. context->handle = FindFirstFile(buffer_ptr(&context->basePath), &context->fd); if (context->handle == INVALID_HANDLE_VALUE) { found = 0; } #else // Start the find. buffer_setpos(&context->basePath, context->basePathEndPos); buffer_addchar(&context->basePath, 0); context->dirp = opendir(buffer_ptr(&context->basePath)[0] ? buffer_ptr(&context->basePath) : "."); if (!context->dirp) { found = 0; } else { context->dp = readdir(context->dirp); found = context->dp != NULL; } #endif // Clear out the *.* so we can use the original basePattern string. buffer_setpos(&context->basePath, context->basePathEndPos); buffer_putchar(&context->basePath, 0); } else { goto NextFile; } // Any files found? #if defined(WIN32) if (context->handle != INVALID_HANDLE_VALUE) { #else if (context->dirp) { #endif for (;;) { #if defined(WIN32) char* filename = context->fd.cFileName; #else char* filename = context->dp->d_name; context->hasattr = 0; #endif // Is the file a directory? #if defined(WIN32) if (context->fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { #else if (context->dp->d_type == DT_DIR) { #endif // Knock out "." or ".." int ignore = filename[0] == '.' && (filename[1] == 0 || (filename[1] == '.' && filename[2] == 0)); // Should this file be ignored? int matches = 0; if (!ignore) { size_t len = strlen(filename); filename[len] = '/'; filename[len + 1] = '\0'; matches = fileglob_WildMatch(buffer_ptr(&context->matchPattern), filename, 0); } // Do a wildcard match. if (!ignore && matches) { // It matched. Let's see if the file should be ignored. // See if this is a directory to ignore. ignore = _fileglob_MatchIgnoreDirectoryPattern(self, filename); // Should this file be ignored? if (!ignore) { _fileglob_list_append(self, &context->directoryListHead, &context->directoryListTail, filename); // Is this pattern exclusive? if (self->exclusiveDirectoryPatternsHead) { if (_fileglob_MatchExclusiveDirectoryPattern(self, filename)) break; } else { if ((!context->matchFiles && context->pattern[0] == '/' && context->pattern[1] == 0) || (self->filesAndFolders)) break; } } } } else { // Do a wildcard match. if (fileglob_WildMatch(buffer_ptr(&context->matchPattern), filename, 0)) { // It matched. Let's see if the file should be ignored. int ignore = _fileglob_MatchIgnoreFilePattern(self, filename); // Is this pattern exclusive? if (!ignore && self->exclusiveFilePatternsHead) { ignore = !_fileglob_MatchExclusiveFilePattern(self, filename); } // Should this file be ignored? if (!ignore) { if (context->matchFiles) break; } } } NextFile: // Look up the next file. #if defined(WIN32) found = FindNextFile(context->handle, &context->fd) == TRUE; #else if (context->dirp) { context->dp = readdir(context->dirp); found = context->dp != NULL; } else { found = 0; } #endif if (!found) break; } if (!found) { // Close down the file find handle. #if defined(WIN32) FindClose(context->handle); context->handle = INVALID_HANDLE_VALUE; #else if (context->dirp) { closedir(context->dirp); context->dirp = NULL; } #endif context->iterNode = context->directoryListHead; } } // Iterate the file list and either recurse or add the file as a found // file. if (!context->matchFiles) { if (found) { return 1; } NextDirectory: if (context->iterNode) { // Need more directories. SplicePath(&self->combinedName, buffer_ptr(&context->basePath), context->iterNode->buffer); buffer_deltapos(&self->combinedName, -2); buffer_addstring(&self->combinedName, context->pattern, strlen(context->pattern) + 1); context->iterNode = context->iterNode->next; context = NULL; inPattern = buffer_ptr(&self->combinedName); goto Setup; } } else { if (found) return 1; } // Do we need to recurse? if (context->recurseAtPos == (size_t)-1) { _fileglob_FreeContextLevel(self); context = self->context; if (!context) return 0; goto NextDirectory; } buffer_reset(&context->matchPattern); buffer_setpos(&context->patternBuf, context->recurseAtPos); buffer_addstring(&context->matchPattern, buffer_posptr(&context->patternBuf), strlen(buffer_posptr(&context->patternBuf))); buffer_addstring(&context->patternBuf, "*/**/", 5); buffer_addstring(&context->patternBuf, buffer_ptr(&context->matchPattern), buffer_pos(&context->matchPattern) + 1); inPattern = buffer_ptr(&context->patternBuf); context->pattern = NULL; if (context->matchFiles) { context->iterNode = context->directoryListHead; } else { _fileglob_list_clear(self, &context->directoryListHead, &context->directoryListTail); } goto DoRecursion; }
/* Lexer FSM main loop */ TokenList *lexer_split(const char *src) { LexerState state = ST_WHITESPACE; const size_t length = strlen(src); Buffer *tokens = buffer_new(); size_t i; /* terminating zero is handled like a normal character */ for (i=0; state!=ST_ERROR && i<length+1; i++) { const char c = src[i]; switch (state) { case ST_WHITESPACE: if (c=='"') /* start a quoted string, new token */ state = ST_QUOTE; else if (c=='\\') /* start escape-sequence, new token */ state = ST_ESCAPE; else if (c!='\0' && !isspace(c)) /* start a new token */ { state = ST_WORD; buffer_putchar(tokens, c); } /* else: whitespace -> ignore */ break; case ST_QUOTE: if (c=='"') /* end a quoted string */ state = ST_WORD; else if (c=='\\') /* start escape-sequence */ state = ST_QUOTE_ESCAPE; else if (c!='\0') /* append a char from inside quotes to token */ buffer_putchar(tokens, c); else /* '\0' -> set error flag */ state = ST_ERROR; break; case ST_WORD: if (c=='\0' || isspace(c)) /* end a token */ { state = ST_WHITESPACE; buffer_putchar(tokens, '\0'); } else if (c=='"') /* start a quoted string */ state = ST_QUOTE; else if (c=='\\') /* start escape-sequence */ state = ST_ESCAPE; else /* append a char to token */ buffer_putchar(tokens, c); break; case ST_ESCAPE: if (c!='\0') { state = ST_WORD; buffer_putchar(tokens, c); } else /* '\0' -> set error flag */ state = ST_ERROR; break; case ST_QUOTE_ESCAPE: if (c!='\0') { state = ST_QUOTE; buffer_putchar(tokens, c); } else /* '\0' -> set error flag */ state = ST_ERROR; break; default: break; } } /* Detect lexing errors */ if (state == ST_ERROR) { buffer_delete(tokens); return NULL; } else { TokenList *tl = (TokenList *) malloc(sizeof(TokenList)); tl->buf = tokens; tl->tokens = make_token_list(tokens); return tl; } }
/* Lexer FSM main loop */ Buffer *lexer_split(const char *src, Diagnostic *diag) { LexerState state = ST_WHITESPACE; LexerState prev_state; const size_t length = strlen(src); Buffer *tokens = buffer_alloc(); size_t i; char op = 0; /* terminating zero is handled like a normal character */ for (i=0; state!=ST_ERROR && i<length+1; i++) { const char c = src[i]; prev_state = state; switch (state) { case ST_OPERATOR: state = ST_WHITESPACE; if (c==op) { /* double operator */ buffer_putchar(tokens, op); buffer_putchar(tokens, op); buffer_putchar(tokens, '\0'); break; } else { /* single operator */ buffer_putchar(tokens, op); buffer_putchar(tokens, '\0'); } /* no break (intended) -- fall back to whitespace case */ case ST_WHITESPACE: if (c=='"') /* start a quoted string, new token */ state = ST_QUOTE; else if (c=='\\') /* start escape-sequence, new token */ state = ST_ESCAPE; else if (is_simple_operator(c)) { /* a new token for operator */ buffer_putchar(tokens, c); buffer_putchar(tokens, '\0'); } else if (is_double_operator(c)) { state = ST_OPERATOR; op = c; } else if (c!='\0' && !isspace(c)) /* start a new token */ { state = ST_WORD; buffer_putchar(tokens, c); } /* else: whitespace -> ignore */ break; case ST_QUOTE: if (c=='"') /* end a quoted string */ state = ST_WORD; else if (c=='\\') /* start escape-sequence */ state = ST_QUOTE_ESCAPE; else if (c!='\0') /* append a char from inside quotes to token */ buffer_putchar(tokens, c); else /* '\0' -> set error flag */ state = ST_ERROR; break; case ST_WORD: if (c=='\0' || isspace(c)) /* end a token */ { state = ST_WHITESPACE; buffer_putchar(tokens, '\0'); } else if (c=='"') /* start a quoted string */ state = ST_QUOTE; else if (c=='\\') /* start escape-sequence */ state = ST_ESCAPE; else if (is_simple_operator(c)) { state = ST_WHITESPACE; /* a new token for operator */ buffer_putchar(tokens, '\0'); buffer_putchar(tokens, c); buffer_putchar(tokens, '\0'); } else if (is_double_operator(c)) { state = ST_OPERATOR; buffer_putchar(tokens, '\0'); op = c; } else /* append a char to token */ buffer_putchar(tokens, c); break; case ST_ESCAPE: if (c!='\0') { state = ST_WORD; buffer_putchar(tokens, c); } else /* '\0' -> set error flag */ { state = ST_ERROR; } break; case ST_QUOTE_ESCAPE: if (c!='\0') { state = ST_QUOTE; buffer_putchar(tokens, c); } else /* '\0' -> set error flag */ state = ST_ERROR; break; default: break; } } /* Detect lexing errors */ if (state == ST_ERROR) { buffer_free(tokens); diag->error = 1; switch (prev_state) { case ST_QUOTE: diag->error_message = "Unfinished quote string"; break; case ST_ESCAPE: case ST_QUOTE_ESCAPE: diag->error_message = "Unfinished escape sequence"; break; default: break; } return NULL; } else { diag->error = 0; return tokens; } }