예제 #1
0
int main(int argc, char *argv[])
{
   char *pcLine;
   DynArray_T oTokens;

   pcPgmName = argv[0];

   printf("%s \n", pcPgmName);
   printf("------------------------------------\n");

   while ((pcLine = readLine(stdin)) != NULL)
   {
      printf("Line: %s\n", pcLine);
      fflush(stdout);
      oTokens = lexLine(pcLine);
      if (oTokens != NULL)
      {
         writeTokens(oTokens);
         freeTokens(oTokens);
         DynArray_free(oTokens);
      }

      printf("------------------------------------\n");
      free(pcLine);
   }

   return 0;
}
예제 #2
0
MojErr MojDbKindState::initTokens(MojDbReq& req, const StringSet& strings)
{
	// TODO: bug inside this function. (latest strace step)
	MojAssertMutexLocked(m_lock);

	// TODO: filing load tokens. Go inside readObj
	// load tokens
	MojErr err = readObj(TokensKey, m_tokensObj, m_kindEngine->kindDb(), req.txn(), m_oldTokensItem);
	MojErrCheck(err);

	// populate token vec
	MojUInt8 maxToken = 0;
	err = m_tokenVec.resize(m_tokensObj.size());
	MojErrCheck(err);
	for (MojObject::ConstIterator i = m_tokensObj.begin(); i != m_tokensObj.end(); ++i) {
		MojString key = i.key();
		MojInt64 value = i.value().intValue();
		MojSize idx = (MojSize) (value - MojObjectWriter::TokenStartMarker);
		if (value < MojObjectWriter::TokenStartMarker || value >= MojUInt8Max || idx >= m_tokenVec.size()) {
			MojErrThrow(MojErrDbInvalidToken);
		}
		if (value > maxToken) {
			maxToken = (MojUInt8) value;
		}
		err = m_tokenVec.setAt(idx, key);
		MojErrCheck(err);
	}
	if (maxToken > 0) {
		m_nextToken = (MojUInt8) (maxToken + 1);
	}

	// add strings
	bool updated = false;
	for (StringSet::ConstIterator i = strings.begin(); i != strings.end(); ++i) {
		if (!m_tokensObj.contains(*i)) {
			updated = true;
			MojUInt8 token = 0;
			TokenVec tokenVec;
			MojObject tokenObj;
			err = addPropImpl(*i, false, token, tokenVec, tokenObj);
			MojErrCheck(err);
		}
	}
	if (updated) {
		err = writeTokens(m_tokensObj);
		MojErrCheck(err);
	}
	return MojErrNone;
}
예제 #3
0
MojErr MojDbKindState::addPropImpl(const MojChar* propName, bool write, MojUInt8& tokenOut, TokenVec& vecOut, MojObject& tokenObjOut)
{
	MojAssert(propName);
	MojAssertMutexLocked(m_lock);

	tokenOut = MojTokenSet::InvalidToken;

	// check if we've already added the prop
	MojUInt32 token;
	bool found = false;
	MojErr err = m_tokensObj.get(propName, token, found);
	MojErrCheck(err);
	if (found) {
		MojAssert(token <= MojUInt8Max);
		tokenOut = (MojUInt8) token;
		return MojErrNone;
	}
	// update the db and our in-memory state
	if (m_nextToken < MojUInt8Max) {
		// update copies of obj and vec
		MojObject obj(m_tokensObj);
		TokenVec tokenVec(m_tokenVec);
		MojString prop;
		err = prop.assign(propName);
		MojErrCheck(err);
		err = tokenVec.push(prop);
		MojErrCheck(err);
		err = obj.put(propName, m_nextToken);
		MojErrCheck(err);
		// write object
		if (write) {
			err = writeTokens(obj);
			MojErrCheck(err);
		}
		// update state and return values
		m_tokensObj = obj;
		m_tokenVec.swap(tokenVec);
		tokenOut = m_nextToken++;
		vecOut = m_tokenVec;
		tokenObjOut = m_tokensObj;
	}
	return MojErrNone;
}
예제 #4
0
void LZ::myEncode(char n , char l , char s )
{
	cout << "encoding..."<<endl;
	N = n;
	L = l;
	S = s;
	//default values: N = 11, L = 4, S = 3
	if (ifile.is_open()) {
		cout << "file is opened..." << endl;
		W = pow(2, N);
		F = pow(2, L)-1;
		char* buffer = new char[F];
		window = "";
		lookahead = "";
		//while there is input to process 
		//(i.e. there is unread data or unprocessed data in the lookahead)
		while (!ifile.eof()|| lookahead!="") {
			//if there is unread data
			if (!ifile.eof())
			{//read enough chars(at most F) to make lookahead contain F chars
				ifile.read(buffer, F - lookahead.length());
				//only append the number of chars that were read in the previous step to the
				//lookahead(otherwise the rest of the data will be junk data) if you dont use
				//gcount, at the end of the file you will try to append more chars than you have
				lookahead.append(buffer, ifile.gcount());
			}
			//make sure the size of window is correct
			if (window.length() + lookahead.length() > W) {
				//resize window so that len(window) + len(lookahead)<=W 
				int window_start= window.length() - (W - lookahead.length());
				int window_end	= W - lookahead.length();
				window = window.substr(window_start,window_end);
			}
			findMatch();
		}
		ifile.close();
	}
	//now we need to encode the completed tokens to a file and output the reuslt
	//cleanTokens();
	writeTokens();

}
예제 #5
0
/* clean and scan input into lex table and token list */
int scan(const char* inputPath, const char* cleanInputPath,
        const char* lexTablePath, const char* tokenListPath, token* tokens) {

    // Open clean input for reading
    FILE* ifp = getCleanInput(inputPath, cleanInputPath);

    // Linked list of tokens
    token* firstToken = tokens;
    tokens->type = -1;
    int countTokens = 0;

    // Keep track of current line number
    int lineNumber = 1;

    // Loop through input as DFA simulation
    while(!feof(ifp)) {

        // Get Character from stream
        char ch = getc(ifp);

        // Copy character into a temp string
        char lexeme[12] = "";

        // Boolean to check if the current ch has been matched
        int matched = 0;

        // Check if ch is part of an Identifier or Reserved Word
        if(isalpha(ch)) {
            int couldBeReserved = 1;
            int letterCount = 0;
            matched = 1;

            // Get the next char while checking if it's alphanumeric
            while( (isalpha(ch) || isdigit(ch)) && !feof(ifp)) {

                // If token contains a digit then it's not a reserved word
                if (isdigit(ch)) {
                    couldBeReserved = 0;
                }

                // Append ch to temp token
                append(lexeme, ch);
                letterCount++;

                // Identifier can't be longer than 11 characters
                if (letterCount > 11) {
                    fprintf(stdout, "[SCANNER-ERROR] Identifiers may not be longer than 11 characters. line %d.\n", lineNumber);
                    exit(EXIT_FAILURE);
                }

                // Get next ch
                ch = getc(ifp);
            }

            // Go back 1 char
            ungetc(ch, ifp);

            token_type type;
            if (couldBeReserved) {
                type = getReservedType(lexeme);
            } else {
                type = identsym;
            }

            addToList(&tokens, lexeme, 0, type, &countTokens, lineNumber);
        } else {
            // Not alphabetic, go back
            matched = 0;
            ungetc(ch, ifp);
        }

        // Get next char
        ch = getc(ifp);

        // Check if ch is part of a Value
        if(isdigit(ch)) {

            matched = 1;
            int numCount = 0;

            while(isdigit(ch)) {
                // Append ch to temp token
                append(lexeme, ch);
                numCount++;

                // Number can't be longer than 5 digits
                if (numCount > 5) {
                    fprintf(stdout, "[SCANNER-ERROR] Numbers may not be longer than 5 digits. line %d.\n", lineNumber);
                    exit(EXIT_FAILURE);
                }

                // Get next ch
                ch = getc(ifp);

                // Identifiers can't start with numbers, throw error
                if (isalpha(ch)) {
                    fprintf(stdout, "[SCANNER-ERROR] Variable doesn't start with a letter. line %d.\n", lineNumber);
                    exit(EXIT_FAILURE);
                }
            }

            // Parse int value
            int value = atoi(lexeme);

            // Go back 1 char
            ungetc(ch, ifp);

            addToList(&tokens, lexeme, value, numbersym, &countTokens, lineNumber);
        } else {
            // Not a digit, go back
            matched = 0;
            ungetc(ch, ifp);
        }

        // Get next ch
        ch = getc(ifp);

        // Check for :=
        if (ch == ':') {
            char cha = getc(ifp);
            if (cha == '=') {
                matched = 1;
                addToList(&tokens, ":=", 0, becomesym, &countTokens, lineNumber);
            } else {
                // Not :=, go back
                matched = 0;
                ungetc(ch, ifp);
            }
        }

        // Check for =
        if(ch == '=') {
            matched = 1;
            addToList(&tokens, "=", 0, equalsym, &countTokens, lineNumber);
        }

        // Check for > and >=
        if (ch == '>') {
            matched = 1;

            // Check if >=
            ch = getc(ifp);

            if (ch == '=') {
                addToList(&tokens, ">=", 0, geqsym, &countTokens, lineNumber);
            } else {
                ungetc(ch, ifp);
                addToList(&tokens, ">", 0, gtrsym, &countTokens, lineNumber);
            }
        }

        // Check for < and <=
        if (ch == '<') {
            matched = 1;

            // Check if <= or <>
            ch = getc(ifp);

            if (ch == '=') {
                addToList(&tokens, "<=", 0, leqsym, &countTokens, lineNumber);
            } else if ( ch == '>') {
                addToList(&tokens, "<>", 0, neqsym, &countTokens, lineNumber);
            } else {
                ungetc(ch, ifp);
                addToList(&tokens, "<", 0, lessym, &countTokens, lineNumber);
            }
        }

        // Check for (
        if (ch == '(') {
            matched = 1;
            addToList(&tokens, "(", 0, lparentsym, &countTokens, lineNumber);
        }

        // Check for )
        if (ch == ')') {
            matched = 1;
            addToList(&tokens, ")", 0, rparentsym, &countTokens, lineNumber);
        }

        // Check for ,
        if (ch == ',') {
            matched = 1;
            addToList(&tokens, ",", 0, commasym, &countTokens, lineNumber);
        }

        // Check for ;
        if (ch == ';') {
            matched = 1;
            addToList(&tokens, ";", 0, semicolonsym, &countTokens, lineNumber);
        }

        // Check for .
        if (ch == '.') {
            matched = 1;
            addToList(&tokens, ".", 0, periodsym, &countTokens, lineNumber);
        }

        // Check for +
        if (ch == '+') {
            matched = 1;
            addToList(&tokens, "+", 0, plussym, &countTokens, lineNumber);
        }

        // Check for -
        if (ch == '-') {
            matched = 1;
            addToList(&tokens, "-", 0, minussym, &countTokens, lineNumber);
        }

        // Check for *
        if (ch == '*') {
            matched = 1;
            addToList(&tokens, "*", 0, multsym, &countTokens, lineNumber);
        }

        // Check for /
        if (ch == '/') {
            matched = 1;
            addToList(&tokens, "/", 0, slashsym, &countTokens, lineNumber);
        }

        // Increment line number on newline
        if (ch == '\n')
            lineNumber++;

        // Throw error for invalid character
        if (!matched && ch != ' ' && ch != '\n' && ch != '\r' && ch != -1 && ch != 9) {
            fprintf(stdout, "[SCANNER-ERROR] Invalid character '%c'. line %d.\n", ch, lineNumber);
            exit(EXIT_FAILURE);
        }
    }

    // Close input
	fclose(ifp);

    // Write lexeme table
    FILE* lexTblPtr = openFileScanner(lexTablePath, "w");
    FILE* tokLstPtr = openFileScanner(tokenListPath, "w");
    writeTokens(firstToken, lexTblPtr, tokLstPtr, countTokens);

    // Close output
    fclose(lexTblPtr);
    fclose(tokLstPtr);

	return 0;
}