int main(int argc, char *argv[]) { char *pcLine; DynArray_T oTokens; pcPgmName = argv[0]; printf("%s \n", pcPgmName); printf("------------------------------------\n"); while ((pcLine = readLine(stdin)) != NULL) { printf("Line: %s\n", pcLine); fflush(stdout); oTokens = lexLine(pcLine); if (oTokens != NULL) { writeTokens(oTokens); freeTokens(oTokens); DynArray_free(oTokens); } printf("------------------------------------\n"); free(pcLine); } return 0; }
MojErr MojDbKindState::initTokens(MojDbReq& req, const StringSet& strings) { // TODO: bug inside this function. (latest strace step) MojAssertMutexLocked(m_lock); // TODO: filing load tokens. Go inside readObj // load tokens MojErr err = readObj(TokensKey, m_tokensObj, m_kindEngine->kindDb(), req.txn(), m_oldTokensItem); MojErrCheck(err); // populate token vec MojUInt8 maxToken = 0; err = m_tokenVec.resize(m_tokensObj.size()); MojErrCheck(err); for (MojObject::ConstIterator i = m_tokensObj.begin(); i != m_tokensObj.end(); ++i) { MojString key = i.key(); MojInt64 value = i.value().intValue(); MojSize idx = (MojSize) (value - MojObjectWriter::TokenStartMarker); if (value < MojObjectWriter::TokenStartMarker || value >= MojUInt8Max || idx >= m_tokenVec.size()) { MojErrThrow(MojErrDbInvalidToken); } if (value > maxToken) { maxToken = (MojUInt8) value; } err = m_tokenVec.setAt(idx, key); MojErrCheck(err); } if (maxToken > 0) { m_nextToken = (MojUInt8) (maxToken + 1); } // add strings bool updated = false; for (StringSet::ConstIterator i = strings.begin(); i != strings.end(); ++i) { if (!m_tokensObj.contains(*i)) { updated = true; MojUInt8 token = 0; TokenVec tokenVec; MojObject tokenObj; err = addPropImpl(*i, false, token, tokenVec, tokenObj); MojErrCheck(err); } } if (updated) { err = writeTokens(m_tokensObj); MojErrCheck(err); } return MojErrNone; }
MojErr MojDbKindState::addPropImpl(const MojChar* propName, bool write, MojUInt8& tokenOut, TokenVec& vecOut, MojObject& tokenObjOut) { MojAssert(propName); MojAssertMutexLocked(m_lock); tokenOut = MojTokenSet::InvalidToken; // check if we've already added the prop MojUInt32 token; bool found = false; MojErr err = m_tokensObj.get(propName, token, found); MojErrCheck(err); if (found) { MojAssert(token <= MojUInt8Max); tokenOut = (MojUInt8) token; return MojErrNone; } // update the db and our in-memory state if (m_nextToken < MojUInt8Max) { // update copies of obj and vec MojObject obj(m_tokensObj); TokenVec tokenVec(m_tokenVec); MojString prop; err = prop.assign(propName); MojErrCheck(err); err = tokenVec.push(prop); MojErrCheck(err); err = obj.put(propName, m_nextToken); MojErrCheck(err); // write object if (write) { err = writeTokens(obj); MojErrCheck(err); } // update state and return values m_tokensObj = obj; m_tokenVec.swap(tokenVec); tokenOut = m_nextToken++; vecOut = m_tokenVec; tokenObjOut = m_tokensObj; } return MojErrNone; }
void LZ::myEncode(char n , char l , char s ) { cout << "encoding..."<<endl; N = n; L = l; S = s; //default values: N = 11, L = 4, S = 3 if (ifile.is_open()) { cout << "file is opened..." << endl; W = pow(2, N); F = pow(2, L)-1; char* buffer = new char[F]; window = ""; lookahead = ""; //while there is input to process //(i.e. there is unread data or unprocessed data in the lookahead) while (!ifile.eof()|| lookahead!="") { //if there is unread data if (!ifile.eof()) {//read enough chars(at most F) to make lookahead contain F chars ifile.read(buffer, F - lookahead.length()); //only append the number of chars that were read in the previous step to the //lookahead(otherwise the rest of the data will be junk data) if you dont use //gcount, at the end of the file you will try to append more chars than you have lookahead.append(buffer, ifile.gcount()); } //make sure the size of window is correct if (window.length() + lookahead.length() > W) { //resize window so that len(window) + len(lookahead)<=W int window_start= window.length() - (W - lookahead.length()); int window_end = W - lookahead.length(); window = window.substr(window_start,window_end); } findMatch(); } ifile.close(); } //now we need to encode the completed tokens to a file and output the reuslt //cleanTokens(); writeTokens(); }
/* clean and scan input into lex table and token list */ int scan(const char* inputPath, const char* cleanInputPath, const char* lexTablePath, const char* tokenListPath, token* tokens) { // Open clean input for reading FILE* ifp = getCleanInput(inputPath, cleanInputPath); // Linked list of tokens token* firstToken = tokens; tokens->type = -1; int countTokens = 0; // Keep track of current line number int lineNumber = 1; // Loop through input as DFA simulation while(!feof(ifp)) { // Get Character from stream char ch = getc(ifp); // Copy character into a temp string char lexeme[12] = ""; // Boolean to check if the current ch has been matched int matched = 0; // Check if ch is part of an Identifier or Reserved Word if(isalpha(ch)) { int couldBeReserved = 1; int letterCount = 0; matched = 1; // Get the next char while checking if it's alphanumeric while( (isalpha(ch) || isdigit(ch)) && !feof(ifp)) { // If token contains a digit then it's not a reserved word if (isdigit(ch)) { couldBeReserved = 0; } // Append ch to temp token append(lexeme, ch); letterCount++; // Identifier can't be longer than 11 characters if (letterCount > 11) { fprintf(stdout, "[SCANNER-ERROR] Identifiers may not be longer than 11 characters. line %d.\n", lineNumber); exit(EXIT_FAILURE); } // Get next ch ch = getc(ifp); } // Go back 1 char ungetc(ch, ifp); token_type type; if (couldBeReserved) { type = getReservedType(lexeme); } else { type = identsym; } addToList(&tokens, lexeme, 0, type, &countTokens, lineNumber); } else { // Not alphabetic, go back matched = 0; ungetc(ch, ifp); } // Get next char ch = getc(ifp); // Check if ch is part of a Value if(isdigit(ch)) { matched = 1; int numCount = 0; while(isdigit(ch)) { // Append ch to temp token append(lexeme, ch); numCount++; // Number can't be longer than 5 digits if (numCount > 5) { fprintf(stdout, "[SCANNER-ERROR] Numbers may not be longer than 5 digits. line %d.\n", lineNumber); exit(EXIT_FAILURE); } // Get next ch ch = getc(ifp); // Identifiers can't start with numbers, throw error if (isalpha(ch)) { fprintf(stdout, "[SCANNER-ERROR] Variable doesn't start with a letter. line %d.\n", lineNumber); exit(EXIT_FAILURE); } } // Parse int value int value = atoi(lexeme); // Go back 1 char ungetc(ch, ifp); addToList(&tokens, lexeme, value, numbersym, &countTokens, lineNumber); } else { // Not a digit, go back matched = 0; ungetc(ch, ifp); } // Get next ch ch = getc(ifp); // Check for := if (ch == ':') { char cha = getc(ifp); if (cha == '=') { matched = 1; addToList(&tokens, ":=", 0, becomesym, &countTokens, lineNumber); } else { // Not :=, go back matched = 0; ungetc(ch, ifp); } } // Check for = if(ch == '=') { matched = 1; addToList(&tokens, "=", 0, equalsym, &countTokens, lineNumber); } // Check for > and >= if (ch == '>') { matched = 1; // Check if >= ch = getc(ifp); if (ch == '=') { addToList(&tokens, ">=", 0, geqsym, &countTokens, lineNumber); } else { ungetc(ch, ifp); addToList(&tokens, ">", 0, gtrsym, &countTokens, lineNumber); } } // Check for < and <= if (ch == '<') { matched = 1; // Check if <= or <> ch = getc(ifp); if (ch == '=') { addToList(&tokens, "<=", 0, leqsym, &countTokens, lineNumber); } else if ( ch == '>') { addToList(&tokens, "<>", 0, neqsym, &countTokens, lineNumber); } else { ungetc(ch, ifp); addToList(&tokens, "<", 0, lessym, &countTokens, lineNumber); } } // Check for ( if (ch == '(') { matched = 1; addToList(&tokens, "(", 0, lparentsym, &countTokens, lineNumber); } // Check for ) if (ch == ')') { matched = 1; addToList(&tokens, ")", 0, rparentsym, &countTokens, lineNumber); } // Check for , if (ch == ',') { matched = 1; addToList(&tokens, ",", 0, commasym, &countTokens, lineNumber); } // Check for ; if (ch == ';') { matched = 1; addToList(&tokens, ";", 0, semicolonsym, &countTokens, lineNumber); } // Check for . if (ch == '.') { matched = 1; addToList(&tokens, ".", 0, periodsym, &countTokens, lineNumber); } // Check for + if (ch == '+') { matched = 1; addToList(&tokens, "+", 0, plussym, &countTokens, lineNumber); } // Check for - if (ch == '-') { matched = 1; addToList(&tokens, "-", 0, minussym, &countTokens, lineNumber); } // Check for * if (ch == '*') { matched = 1; addToList(&tokens, "*", 0, multsym, &countTokens, lineNumber); } // Check for / if (ch == '/') { matched = 1; addToList(&tokens, "/", 0, slashsym, &countTokens, lineNumber); } // Increment line number on newline if (ch == '\n') lineNumber++; // Throw error for invalid character if (!matched && ch != ' ' && ch != '\n' && ch != '\r' && ch != -1 && ch != 9) { fprintf(stdout, "[SCANNER-ERROR] Invalid character '%c'. line %d.\n", ch, lineNumber); exit(EXIT_FAILURE); } } // Close input fclose(ifp); // Write lexeme table FILE* lexTblPtr = openFileScanner(lexTablePath, "w"); FILE* tokLstPtr = openFileScanner(tokenListPath, "w"); writeTokens(firstToken, lexTblPtr, tokLstPtr, countTokens); // Close output fclose(lexTblPtr); fclose(tokLstPtr); return 0; }