std::string TrimLeadingAndTrailingSpaces(const std::string& s) { std::string r; size_t start = 0; for (; start < s.length(); ++start) { if (!IsCharWhiteSpace(s[start])) break; } size_t end = s.size(); while (end > 0 && end > start) { if (!IsCharWhiteSpace(s[end - 1])) break; --end; } return s.substr(start, end - start); }
void SVNLineDiff::ParseLineWords( LPCTSTR line, apr_size_t lineLength, std::vector<std::wstring> &tokens) { std::wstring token; int prevCharType = SVNLINEDIFF_CHARTYPE_NONE; tokens.reserve(lineLength/2); for (apr_size_t i = 0; i < lineLength; ++i) { int charType = IsCharAlphaNumeric(line[i]) ? SVNLINEDIFF_CHARTYPE_ALPHANUMERIC : IsCharWhiteSpace(line[i]) ? SVNLINEDIFF_CHARTYPE_SPACE : SVNLINEDIFF_CHARTYPE_OTHER; // Token is a sequence of either alphanumeric or whitespace characters. // Treat all other characters as a separate tokens. if (charType == prevCharType && charType != SVNLINEDIFF_CHARTYPE_OTHER) token += line[i]; else { if (!token.empty()) tokens.push_back(token); token = line[i]; } prevCharType = charType; } if (!token.empty()) tokens.push_back(token); }
int IsCharDelimiter(char cChar) { if (cChar == ':' || cChar == ',' || cChar == '"' || cChar == '[' || cChar == ']' || cChar == '{' || cChar == '}' || IsCharWhiteSpace(cChar) || IsCharReturnOrNewline(cChar)) return TRUE; else return FALSE; }
void TrimWhiteSpace(char* sourceLine) { int strLength = strlen(sourceLine); int currentIndex = 0; int padIndex = 0; if (strLength > 1) { //calculate the number of white space at left side for (currentIndex = 0; currentIndex < strLength; currentIndex++) { if (!IsCharWhiteSpace(sourceLine[currentIndex])) break; } padIndex = currentIndex; if (padIndex) { for (currentIndex = padIndex; currentIndex < strLength; currentIndex++) { sourceLine[currentIndex - padIndex] = sourceLine[currentIndex]; } for (currentIndex = strLength - padIndex; currentIndex < strLength; currentIndex++) { sourceLine[currentIndex] = ' '; } } for (currentIndex = strLength - 1; currentIndex > 0; currentIndex--) { if (!IsCharWhiteSpace(sourceLine[currentIndex])) { sourceLine[currentIndex + 1] = '\0'; break; } } } }
int IsStringWhitespace(const char* str) { if (!str) return FALSE; if (strlen(str) == 0) return TRUE; for (int i = 0; i < strlen(str); i++) { if (!IsCharWhiteSpace(str[i]) && str[i] != '\n') return FALSE; } return TRUE; }
char GetLookAheadChar() { int currentSourceLine = sasm.lexer.iCurrentSourceLine; unsigned int index = sasm.lexer.iIndex1; if (sasm.lexer.iState != ASM_LEX_STATE_IN_STRING) { while (TRUE) { if (index >= strlen(sasm.sourceCode[currentSourceLine])) { currentSourceLine++; if (currentSourceLine >= sasm.sourceCodeLines) return 0; index = 0; } if (!IsCharWhiteSpace(sasm.sourceCode[currentSourceLine][index])) break; index++; } } return sasm.sourceCode[currentSourceLine][index]; }
Token GetNextToken() { //When we read the source code line by line, we use two index to indicate the both ends of a source line. //At first, the right end index equals to the left end index: they both are 0. sasm.lexer.iIndex0 = sasm.lexer.iIndex1; if (sasm.lexer.iIndex0 >= strlen(sasm.sourceCode[sasm.lexer.iCurrentSourceLine])) { if (!SkipToNextLine()) return ASM_END_OF_TOKEN_STREAM; } if (sasm.lexer.iState == ASM_LEX_STATE_END_STRING) sasm.lexer.iState = ASM_LEX_STATE_NO_STRING; //locate index to the first non-whitespace character of a source code line if (sasm.lexer.iState != ASM_LEX_STATE_IN_STRING) { while (TRUE) { if (!IsCharWhiteSpace((sasm.sourceCode[sasm.lexer.iCurrentSourceLine][sasm.lexer.iIndex0]))) break; sasm.lexer.iIndex0++; } } sasm.lexer.iIndex1 = sasm.lexer.iIndex0; int isGetEscapeChar = FALSE; //move iIndex1 to the location of next token end while (TRUE) { if (sasm.lexer.iState == ASM_LEX_STATE_IN_STRING) { if (sasm.lexer.iIndex1 >= strlen(sasm.sourceCode[sasm.lexer.iCurrentSourceLine])) { sasm.lexer.currentToken = ASM_TOKEN_TYPE_INVALID; return sasm.lexer.currentToken; } if (sasm.sourceCode[sasm.lexer.iCurrentSourceLine][sasm.lexer.iIndex1] == '\\') { //isGetEscapeChar = TRUE; sasm.lexer.iIndex1 += 2; continue; } if (sasm.sourceCode[sasm.lexer.iCurrentSourceLine][sasm.lexer.iIndex1] == '"') break; sasm.lexer.iIndex1++; } else { if (sasm.lexer.iIndex1 >= strlen(sasm.sourceCode[sasm.lexer.iCurrentSourceLine])) break; if (IsCharDelimiter(sasm.sourceCode[sasm.lexer.iCurrentSourceLine][sasm.lexer.iIndex1])) break; sasm.lexer.iIndex1++; } } if (sasm.lexer.iIndex1 - sasm.lexer.iIndex0 == 0) sasm.lexer.iIndex1++; //Get the next lexeme unsigned int currentTargetIndex = 0; for (int i = sasm.lexer.iIndex0; i < sasm.lexer.iIndex1; i++) { if (sasm.lexer.iState == ASM_LEX_STATE_IN_STRING) if (sasm.sourceCode[sasm.lexer.iCurrentSourceLine][i] == '\\') { switch (sasm.sourceCode[sasm.lexer.iCurrentSourceLine][i + 1]) { case 'a': sasm.lexer.pCurrentLexeme[currentTargetIndex] = '\n'; break; case 'b': sasm.lexer.pCurrentLexeme[currentTargetIndex] = '\b'; break; case 'f': sasm.lexer.pCurrentLexeme[currentTargetIndex] = '\f'; break; case 'n': sasm.lexer.pCurrentLexeme[currentTargetIndex] = '\n'; break; case 'r': sasm.lexer.pCurrentLexeme[currentTargetIndex] = '\r'; break; case 't': sasm.lexer.pCurrentLexeme[currentTargetIndex] = '\t'; break; case 'v': sasm.lexer.pCurrentLexeme[currentTargetIndex] = '\n'; break; case '\\': sasm.lexer.pCurrentLexeme[currentTargetIndex] = '\\'; break; case '\'': sasm.lexer.pCurrentLexeme[currentTargetIndex] = '\''; break; case '"': sasm.lexer.pCurrentLexeme[currentTargetIndex] = '\"'; break; case '0': sasm.lexer.pCurrentLexeme[currentTargetIndex] = '\0'; break; default: break; } currentTargetIndex += 1; i++; continue; } sasm.lexer.pCurrentLexeme[currentTargetIndex] = sasm.sourceCode[sasm.lexer.iCurrentSourceLine][i]; currentTargetIndex++; } sasm.lexer.pCurrentLexeme[currentTargetIndex] = '\0'; /*if (sasm.lexer.iState != ASM_LEX_STATE_IN_STRING) strtoupper(sasm.lexer.pCurrentLexeme);*/ //Decide which token the lexeme is sasm.lexer.currentToken = ASM_TOKEN_TYPE_INVALID; if (strlen(sasm.lexer.pCurrentLexeme) > 1 || sasm.lexer.pCurrentLexeme[0] != '"') { if (sasm.lexer.iState == ASM_LEX_STATE_IN_STRING) { sasm.lexer.currentToken = ASM_TOKEN_TYPE_STRING; return ASM_TOKEN_TYPE_STRING; } } if (IsStringInt(sasm.lexer.pCurrentLexeme)) sasm.lexer.currentToken = ASM_TOKEN_TYPE_INT; if (IsStringFloat(sasm.lexer.pCurrentLexeme)) sasm.lexer.currentToken = ASM_TOKEN_TYPE_FLOAT; if (IsStringIdent(sasm.lexer.pCurrentLexeme)) sasm.lexer.currentToken = ASM_TOKEN_TYPE_IDENT; if (strcmp(sasm.lexer.pCurrentLexeme, ASM_KW_SET_STACK_SIZE) == 0) sasm.lexer.currentToken = ASM_TOKEN_TYPE_SETSTACKSIZE; if (strcmp(sasm.lexer.pCurrentLexeme, ASM_KW_VAR) == 0) sasm.lexer.currentToken = ASM_TOKEN_TYPE_VAR; if (strcmp(sasm.lexer.pCurrentLexeme, ASM_KW_FUNCTION) == 0) sasm.lexer.currentToken = ASM_TOKEN_TYPE_FUNC; if (strcmp(sasm.lexer.pCurrentLexeme, ASM_KW_PARAM) == 0) sasm.lexer.currentToken = ASM_TOKEN_TYPE_PARAM; if (strcmp(sasm.lexer.pCurrentLexeme, ASM_KW_RETVAL) == 0) sasm.lexer.currentToken = ASM_TOKEN_TYPE_REG_RETVAL; if (strlen(sasm.lexer.pCurrentLexeme) == 1) { switch (sasm.lexer.pCurrentLexeme[0]) { case '"': switch (sasm.lexer.iState) { case ASM_LEX_STATE_NO_STRING: sasm.lexer.iState = ASM_LEX_STATE_IN_STRING; break; case ASM_LEX_STATE_IN_STRING: sasm.lexer.iState = ASM_LEX_STATE_END_STRING; } sasm.lexer.currentToken = ASM_TOKEN_TYPE_QUATE; break; case ',': sasm.lexer.currentToken = ASM_TOKEN_TYPE_COMMA; break; case ':': sasm.lexer.currentToken = ASM_TOKEN_TYPE_COLON; break; case '[': sasm.lexer.currentToken = ASM_TOKEN_TYPE_OPEN_BRACKET; break; case ']': sasm.lexer.currentToken = ASM_TOKEN_TYPE_CLOSE_BRACKET; break; case '{': sasm.lexer.currentToken = ASM_TOKEN_TYPE_OPEN_BRACE; break; case '}': sasm.lexer.currentToken = ASM_TOKEN_TYPE_CLOSE_BRACE; break; case '\n': sasm.lexer.currentToken = ASM_TOKEN_TYPE_NEWLINE; break; case '\r': sasm.lexer.currentToken = ASM_TOKEN_TYPE_NEWLINE; break; } } InstrLookup instrLookup; if (GetInstruction(sasm.instrLookup, sasm.lexer.pCurrentLexeme, &instrLookup)) sasm.lexer.currentToken = ASM_TOKEN_TYPE_INSTR; return sasm.lexer.currentToken; }