Beispiel #1
0
bool Tokenizer::getNumber(const QString &str, int i, QString &numb)
{
    // Examples of valid numbers are:
    // 1234
    // +1234
    // -1234
    // -1234.99
    // .99
    // +.99
    // -1234.

    numb.clear();

    bool first = true;
    bool dotFound = false;

    for (; i < str.size(); ++i) {
        if (first && isSign(str, i)) {
            numb.append(str[i]);
        } else if (isDigit(str, i)) {
            numb.append(str[i]);
        } else if (!dotFound && isDot(str, i)) {
            numb.append(str[i]);
            dotFound = true;
        } else {
            break;
        }

        first = false;
    }

    return numb.size() > 0;
}
Beispiel #2
0
	void parseNumber() {
		skipWhiteSpace();
		while (!eof() && !isWhiteSpace() &&
			(isSign() || isDot() || isDigit())) {
			put();
			next();
		}
	}
Beispiel #3
0
            bool Number::IsValid(char character) {

                switch ((States) _currentState) {
                    case States::Start:
                        return isMinus(character) || isDigit(character);
                    case States::Negation:
                        return isDigit(character);
                    case States::RationalPercent:
                        return isDot(character) || isExponentialSymbol(character);
                    case States::Number:
                        return isDigit(character) || isDot(character) || isExponentialSymbol(character);
                    case States::Decimal:
                        return isDigit(character) || isExponentialSymbol(character);
                    case States::Exponential_1:
                        return isDigit(character) || isPlus(character) || isMinus(character);
                    case States::Exponential_2:
                        return isDigit(character);
                    case States::Closing:
                        return false;
                }
            }
Beispiel #4
0
/**
 * Searches in the "name" directory for the "find" file,
 * if file is found and the user wanted to find that filetype
 * the path is added to a list that will be printed at the end.
 * Also puts all directories found in a list for it to be
 * searched.
 */
bool searchForFile(char* name, char *find, int t, struct Node* list) {
	DIR *dir;

	struct dirent *ent;
	struct stat f_info;
	bool foundFile = false;

	if (openDir(&dir, name, &ent)) {
		do {
			if ((lstat(ent->d_name, &f_info)) < 0) {
				fprintf(stderr, "lstat error: ");
				perror(ent->d_name);
			} else {
				if (t == 0 && compareName(ent->d_name, find)) {
					foundFile = true;
				} else if (checkDir(t, f_info, ent->d_name, find)) {
					foundFile = true;
				} else if (checkReg(t, f_info, ent->d_name, find)) {
					foundFile = true;
				} else if (checkLink(t, f_info, ent->d_name, find)) {
					foundFile = true;
				}
				if (checkDirAndRights(f_info) && !isDot(ent->d_name)) {
					int l = strlen(name) + strlen(ent->d_name);
					char *str = calloc(1, sizeof(char[l + 2]));

					strcpy(str, name);
					strcat(str, "/");
					strcat(str, ent->d_name);
					insert(list, str);
				}
			}
		} while ((ent = readdir(dir)));

	}
	closedir(dir);

	return foundFile;
}
Beispiel #5
0
int main(void){
  FILE *f, *g, *h, *out;
  char c;
  char keys[NUM][4];
  char word[4];
  int keyarr[NUM];
  int len = 0, n, spam, a, b, dot, i;
  int msglen, wordnum, numnum, weirdnum, spel;
  double r;
  f = fopen("data.txt", "r");
  g = fopen("data2.txt", "r");
  h = fopen("keywords.txt", "r");
  out = fopen("out.txt", "w");

  // ucitaj rjecnik keyworda
  while (fscanf(h, "%c%c%c%c %d %d %lg ", keys[len], keys[len]+1, keys[len]+2, 
        keys[len]+3, &a, &b, &r) == 7) len++;
  
  // napisi header u out.txt
  fprintf(out,
      "spam broj_rijeci duljina nepismenost udio_brojeva udio_nealfanumerickih ");
  for (i = 0; i < len; i++){
    fprintf(out, "%c%c%c%c ", keys[i][0], keys[i][1], keys[i][2], keys[i][3]);
  }
  fprintf(out, "\n");

  for (n = 0; n < LINES; n++){
    // prvo iz neobradenog fajla izvadi broj charactera itd..
    fscanf(f, "%c", &c);
    if (c == 'h') {
      // zapamti jeli poruka ham ...
      spam = 0;
    }
    else if (c == 's'){
      // ... ili spam
      spam = 1;
    }
    else printf("nesto nevalja: linija %d\n", n);
    // ako na pocetku linije ne pise ni spam ni ham, nesto je krivo
    
    while (isAlpha(c)) fscanf(f, "%c", &c);
    // ucitaj cijelu rijec (spam/ham) do kraja
    dot = 0;
    msglen = 0;
    numnum = 0;
    weirdnum = 0;
    spel = 0;
    wordnum = 0;

    while (1){
      fscanf(f, "%c", &c);
      if (c == '\n') break;
      msglen++; // broj znakova
      if (isNum(c)) numnum++; // broji udio brojeva
      if (isWeird(c)) weirdnum++; // broji udio nealfanumerickih
      if ((isNum(c) || isAlpha(c)) && (dot == 1)) spel = 1; // nepismenost
      if (isDot(c)) dot = 1;
      else dot = 0;
    }

    // zatim iz obradenog izvadi kljucne rijeci
    fscanf(g, "%d", &a); // ucitaj jeli spam ili ham
    if (a != spam) printf("nisu konzistentni na liniji: %d\n", n);
    fscanf(g, "%c", &c); // ucitaj razmak
    for (i = 0; i < len; i++){
      keyarr[i] = 0; // ocisti niz koji cuva postojanje keyworda
    }

    while (c != '\n'){
      for (i = 0; i < 4; i++){
        fscanf(g, "%c", word+i);
      }
      wordnum++; // broj rijeci u poruci
      b = exsist(word, keys, len); // kljucna rijec
      if (b >= 0) keyarr[b] = 1;
      fscanf(g, "%c", &c); // ucitaj razmak
    }

    // zapisi feature vektor u out.txt
    fprintf(out, "%d %d %d %d %lg %lg ", spam, wordnum, msglen, spel, 
        (double)numnum/msglen, (double)weirdnum/msglen);
    for (i = 0; i < len; i++){
      fprintf(out, "%d ", keyarr[i]);
    }
    fprintf(out, "\n");
  }

  return 0;
}
Beispiel #6
0
short CRConData::CheckProgressInConsole(UINT nCursorLine)
{
	if (!isValid(true, nWidth*(nCursorLine+1)))
		return -1;

	const wchar_t* pszCurLine = pConChar + (nWidth * nCursorLine);

	// Обработка прогресса NeroCMD и пр. консольных программ (если курсор находится в видимой области)
	//Плагин Update
	//"Downloading Far                                               99%"
	//NeroCMD
	//"012% ########.................................................................."
	//ChkDsk
	//"Completed: 25%"
	//Rar
	// ...       Vista x86\Vista x86.7z         6%
	//aria2c
	//[#1 SIZE:0B/9.1MiB(0%) CN:1 SPD:1.2KiBs ETA:2h1m11s]
	int nIdx = 0;
	bool bAllowDot = false;
	short nProgress = -1;

	const wchar_t *szPercentEng = L" percent";
	const wchar_t *szComplEng  = L"Completed:";
	static wchar_t szPercentRus[16] = {}, szComplRus[16] = {};
	static int nPercentEngLen = lstrlen(szPercentEng), nComplEngLen = lstrlen(szComplEng);
	static int nPercentRusLen, nComplRusLen;

	if (szPercentRus[0] == 0)
	{
		szPercentRus[0] = L' ';
		TODO("Хорошо бы и другие национальные названия обрабатывать, брать из настройки");
		lstrcpy(szPercentRus,L"процент");
		lstrcpy(szComplRus,L"Завершено:");

		nPercentRusLen = lstrlen(szPercentRus);
		nComplRusLen = lstrlen(szComplEng);
	}

	// Сначала проверим, может цифры идут в начале строки (лидирующие пробелы)?
	if (pszCurLine[nIdx] == L' ' && isDigit(pszCurLine[nIdx+1]))
		nIdx++; // один лидирующий пробел перед цифрой
	else if (pszCurLine[nIdx] == L' ' && pszCurLine[nIdx+1] == L' ' && isDigit(pszCurLine[nIdx+2]))
		nIdx += 2; // два лидирующих пробела перед цифрой
	else if (!isDigit(pszCurLine[nIdx]))
	{
		// Строка начинается НЕ с цифры. Может начинается одним из известных префиксов (ChkDsk)?

		if (!wcsncmp(pszCurLine, szComplRus, nComplRusLen))
		{
			nIdx += nComplRusLen;

			if (pszCurLine[nIdx] == L' ') nIdx++;

			if (pszCurLine[nIdx] == L' ') nIdx++;

			bAllowDot = true;
		}
		else if (!wcsncmp(pszCurLine, szComplEng, nComplEngLen))
		{
			nIdx += nComplEngLen;

			if (pszCurLine[nIdx] == L' ') nIdx++;

			if (pszCurLine[nIdx] == L' ') nIdx++;

			bAllowDot = true;
		}
		else if (!wcsncmp(pszCurLine, L"[#", 2))
		{
			const wchar_t* p = wcsstr(pszCurLine, L"%) ");
			while ((p > pszCurLine) && (p[-1] != L'('))
				p--;
			if (p > pszCurLine)
				nIdx = p - pszCurLine;
		}

		// Известных префиксов не найдено, проверяем, может процент есть в конце строки?
		if (!nIdx)
		{
			//TODO("Не работает с одной цифрой");
			// Creating archive T:\From_Work\VMWare\VMWare.part006.rar
			// ...       Vista x86\Vista x86.7z         6%
			int i = nWidth - 1;

			// Откусить trailing spaces
			while ((i > 3) && (pszCurLine[i] == L' '))
				i--;

			// Теперь, если дошли до '%' и перед ним - цифра
			if (i >= 3 && pszCurLine[i] == L'%' && isDigit(pszCurLine[i-1]))
			{
				//i -= 2;
				i--;

				int j = i, k = -1;
				while (j > 0 && isDigit(pszCurLine[j-1]))
					j--;

				// Может быть что-то типа "Progress 25.15%"
				if (((i - j) <= 2) && (j >= 2) && isDot(pszCurLine[j-1]))
				{
					k = j - 1;
					while (k > 0 && isDigit(pszCurLine[k-1]))
						k--;
				}

				if (k >= 0)
				{
					if (((j - k) <= 3) // 2 цифры + точка
						|| (((j - k) <= 4) && (pszCurLine[k] == L'1'))) // "100.0%"
					{
						nIdx = i = k;
						bAllowDot = true;
					}
				}
				else
				{
					if (((j - i) <= 2) // 2 цифры + точка
						|| (((j - i) <= 3) && (pszCurLine[j] == L'1'))) // "100%"
					{
						nIdx = i = j;
					}
				}

				#if 0
				// Две цифры перед '%'?
				if (isDigit(pszCurLine[i-1]))
					i--;

				// Три цифры допускается только для '100%'
				if (pszCurLine[i-1] == L'1' && !isDigit(pszCurLine[i-2]))
				{
					nIdx = i - 1;
				}
				// final check
				else if (!isDigit(pszCurLine[i-1]))
				{
					nIdx = i;
				}
				#endif

				// Может ошибочно детектировать прогресс, если его ввести в prompt
				// Допустим, что если в строке есть символ '>' - то это не прогресс
				while (i>=0)
				{
					if (pszCurLine[i] == L'>')
					{
						nIdx = 0;
						break;
					}

					i--;
				}
			}
		}
	}

	// Менять nProgress только если нашли проценты в строке с курсором
	if (isDigit(pszCurLine[nIdx]))
	{
		if (isDigit(pszCurLine[nIdx+1]) && isDigit(pszCurLine[nIdx+2])
			&& (pszCurLine[nIdx+3]==L'%' || (bAllowDot && isDot(pszCurLine[nIdx+3]))
				|| !wcsncmp(pszCurLine+nIdx+3,szPercentEng,nPercentEngLen)
				|| !wcsncmp(pszCurLine+nIdx+3,szPercentRus,nPercentRusLen)))
		{
			nProgress = 100*(pszCurLine[nIdx] - L'0') + 10*(pszCurLine[nIdx+1] - L'0') + (pszCurLine[nIdx+2] - L'0');
		}
		else if (isDigit(pszCurLine[nIdx+1])
			&& (pszCurLine[nIdx+2]==L'%' || (bAllowDot && isDot(pszCurLine[nIdx+2]))
				|| !wcsncmp(pszCurLine+nIdx+2,szPercentEng,nPercentEngLen)
				|| !wcsncmp(pszCurLine+nIdx+2,szPercentRus,nPercentRusLen)))
		{
			nProgress = 10*(pszCurLine[nIdx] - L'0') + (pszCurLine[nIdx+1] - L'0');
		}
		else if (pszCurLine[nIdx+1]==L'%' || (bAllowDot && isDot(pszCurLine[nIdx+1]))
			|| !wcsncmp(pszCurLine+nIdx+1,szPercentEng,nPercentEngLen)
			|| !wcsncmp(pszCurLine+nIdx+1,szPercentRus,nPercentRusLen))
		{
			nProgress = (pszCurLine[nIdx] - L'0');
		}
	}

	if (nProgress != -1)
	{
		mp_RCon->setLastConsoleProgress(nProgress, true); // его обновляем всегда
	}
	else
	{
		DWORD nDelta = GetTickCount() - mp_RCon->m_Progress.LastConProgrTick;
		if (nDelta < CONSOLEPROGRESSTIMEOUT) // Если таймаут предыдущего значения еще не наступил
			nProgress = mp_RCon->m_Progress.ConsoleProgress; // возъмем предыдущее значение
		mp_RCon->setLastConsoleProgress(-1, false); // его обновляем всегда
	}

	return nProgress;
}
Beispiel #7
0
            bool Number::CoreValidate(char character) {

                if (_lexeme.length() == 0)
                    _lexeme.reserve(20);

                switch ((States) _currentState) {

                    case States::Start: {
                        _lexeme.append(1, character);
                        if (isMinus(character)) _currentState = (int) States::Negation;
                        if (isZero(character)) _currentState = (int) States::RationalPercent;
                        if (isDigitOneToNine(character)) _currentState = (int) States::Number;
                        break;
                    }

                    case States::Negation: {
                        _lexeme.append(1, character);
                        if (isZero(character)) _currentState = (int) States::RationalPercent;
                        if (isDigitOneToNine(character)) _currentState = (int) States::Number;
                        break;
                    }

                    case States::RationalPercent: {
                        _lexeme.append(1, character);
                        if (isDot(character)) _currentState = (int) States::Decimal;
                        if (isExponentialSymbol(character)) _currentState = (int) States::Exponential_1;
                        break;
                    }

                    case States::Number: {
                        _lexeme.append(1, character);
                        if (isDigit(character)) _currentState = (int) States::Number;
                        if (isDot(character)) _currentState = (int) States::Decimal;
                        if (isExponentialSymbol(character)) _currentState = (int) States::Exponential_1;
                        break;
                    }

                    case States::Decimal: {
                        _lexeme.append(1, character);
                        if (isDigit(character)) _currentState = (int) States::Decimal;
                        if (isExponentialSymbol(character)) _currentState = (int) States::Exponential_1;
                        break;
                    }

                    case States::Exponential_1: {
                        _lexeme.append(1, character);
                        _currentState = (int) States::Exponential_2;
                        break;
                    }

                    case States::Exponential_2: {
                        _lexeme.append(1, character);
                        _currentState = (int) States::Exponential_2;
                        break;
                    }

                    case States::Closing:
                        return false;
                }

                return true;
            }
Beispiel #8
0
 bool Number::BeginWithCharacter(char character) {
     return isMinus(character) || isDigit(character) || isDot(character);
 }
Beispiel #9
0
void Tokenizer::shift()
{
    mpCurrent.reset();

    if (mBlockComment)
    {
        skipEOL();
    }
    else
    {
        skipWhiteSpaces();
        if (mpInput->eof())
            return;
    }

    mpCurrent.reset(new Token);
    mpCurrent->setLine(line());
    mpCurrent->setBeginColumn(column());

    if (mBlockComment)
    {
        consumeCStyleBlockComment();
        return;
    }

    auto ch = mpInput->get();
    if (isLetter(ch) || isUnderscore(ch))
    {
        consumeIdentifier(ch);
    }
    else if (isBiwiseOperatorSymbol(ch))
    {
        mpCurrent->setType(Token::TYPE_BITWISE_OPERATOR);
        absorbed(ch);
        mpCurrent->addChar(ch);
        mpCurrent->setEndColumn(column());
    }
    else if (isDot(ch) && consumeDot(ch))
    {
        // nothing
    }
    else if ((isDecimalDigit(ch) || isDot(ch) || isSign(ch)) && consumeNumber(ch))
    {
        // nothing
    }
    else if (isQuotationMark(ch))
    {
        if (!consumeString(ch))
            shift();
    }
    else if (isCStyleInitialCommentChar(ch))
    {
        consumeComment(ch);
    }
    else if (isArrowSymbol(ch) && consumeArrow(ch))
    {
        // nothing
    }
    else if (isBracket(ch))
    {
        mpCurrent->setType(Token::TYPE_BRACKET);
        absorbed(ch);
        mpCurrent->addChar(ch);
        mpCurrent->setEndColumn(column());
    }
    else if (isAngleBracket(ch))
    {
        mpCurrent->setType(Token::TYPE_ANGLE_BRACKET);
        absorbed(ch);
        mpCurrent->addChar(ch);
        mpCurrent->setEndColumn(column());
    }
    else if (isDelimiter(ch))
    {
        mpCurrent->setType(Token::TYPE_DELIMITER);
        absorbed(ch);
        mpCurrent->addChar(ch);
        mpCurrent->setEndColumn(column());
    }
    else if (isOperator(ch) && consumeEqualOperator(ch))
    {
        // nothing
    }
    else if (isOperator(ch))
    {
        mpCurrent->setType(Token::TYPE_OPERATOR);
        absorbed(ch);
        mpCurrent->addChar(ch);
        mpCurrent->setEndColumn(column());
    }
    else if (isAsterisk(ch))
    {
        mpCurrent->setType(Token::TYPE_ASTERISK);
        absorbed(ch);
        mpCurrent->addChar(ch);
        mpCurrent->setEndColumn(column());
    }
    else
    {
        mpCurrent.reset();
    }
}
Beispiel #10
0
bool Tokenizer::consumeNumber(std::streambuf::int_type ch)
{
    int count = 0;
    if (isSign(ch))
    {
        bool number = false;
        while (!eof())
        {
            ++count;
            auto nch = mpInput->get();
            if (isWhitespace(nch))
                continue;

            number = isDecimalDigit(nch);
            break;
        }
        if (count)
            mpInput->clear();
        for (int i = 0; i < count; ++i)
            mpInput->unget();

        if (!number)
            return false;
    }

    absorbed(ch);
    mpCurrent->addChar(ch);

    for (int i = 0; i < count; ++i)
    {
        auto nch = mpInput->get();
        absorbed(nch);
        mpCurrent->addChar(nch);
    }

    bool bDot = false;
    if (isDot(ch))
    {
        bDot = true;
        mpCurrent->setType(Token::TYPE_REAL_LITERAL);
    }
    else
    {
        mpCurrent->setType(Token::TYPE_INTEGER_LITERAL);
        if (!eof() && isZero(ch))
        {
            ch = mpInput->get();
            if (isHexIndicator(ch))
            {
                if (eof())
                {
                    mpMessageCollector->addMessage(
                            Message::SEVERITY_ERROR, Message::t_invalidIntegerLiteral,
                            mpSourceId, mpCurrent->line(), mpCurrent->beginColumn());
                    mpCurrent.reset();
                    return false;
                }

                mpCurrent->addChar(ch);
                absorbed(ch);

                while (!eof())
                {
                    ch = mpInput->get();
                    if (isNonHexicalLetter(ch) || isUnderscore(ch) || isDot(ch))
                    {
                        mpMessageCollector->addMessage(Message::SEVERITY_ERROR,
                                Message::t_invalidIntegerLiteral,
                                mpSourceId, mpCurrent->line(), mpCurrent->beginColumn());
                        mpCurrent.reset();
                        return false;
                    }

                    if (!isHexicalDigit(ch))
                    {
                        mpInput->clear();
                        mpInput->unget();
                        break;
                    }

                    absorbed(ch);
                    mpCurrent->addChar(ch);
                }

                mpCurrent->setEndColumn(column());
                return true;
            }

            for (;;)
            {
                if (   isLetter(ch)
                    || isDot(ch)
                    || isNonOctalDecimalDigit(ch)
                    || isUnderscore(ch))
                {
                    mpMessageCollector->addMessage(
                            Message::SEVERITY_ERROR, Message::t_invalidIntegerLiteral,
                            mpSourceId, mpCurrent->line(), mpCurrent->beginColumn());
                    mpCurrent.reset();
                    return false;
                }

                if (!isOctalDigit(ch))
                {
                    mpInput->clear();
                    mpInput->unget();
                    break;
                }

                absorbed(ch);
                mpCurrent->addChar(ch);

                if (eof())
                {
                    break;
                }
                ch = mpInput->get();
            }

            mpCurrent->setEndColumn(column());
            return true;
        }
    }

    bool bExponent = false;
    while (!eof())
    {
        ch = mpInput->get();

        if (isExponent(ch))
        {
            if (bExponent)
            {
                mpMessageCollector->addMessage(
                        Message::SEVERITY_ERROR, Message::t_invalidIntegerLiteral,
                        mpSourceId, mpCurrent->line(), mpCurrent->beginColumn());
                mpCurrent.reset();
                return false;
            }
            bExponent = true;
            mpCurrent->setType(Token::TYPE_REAL_LITERAL);
            absorbed(ch);
            mpCurrent->addChar(ch);

            ch = mpInput->get();
            if (isSign(ch))
            {
                mpCurrent->setType(Token::TYPE_REAL_LITERAL);
                absorbed(ch);
                mpCurrent->addChar(ch);
            }
            else
            {
                mpInput->clear();
                mpInput->unget();
            }
            continue;
        }

        if (isLetter(ch) || isUnderscore(ch))
        {
            mpMessageCollector->addMessage(
                    Message::SEVERITY_ERROR, Message::t_invalidIntegerLiteral,
                    mpSourceId, mpCurrent->line(), mpCurrent->beginColumn());
            mpCurrent.reset();
            return false;
        }

        if (isDot(ch))
        {
            if (bExponent || bDot)
            {
                mpMessageCollector->addMessage(
                        Message::SEVERITY_ERROR, Message::t_invalidIntegerLiteral,
                        mpSourceId, mpCurrent->line(), mpCurrent->beginColumn());
                mpCurrent.reset();
                return false;
            }
            bDot = true;
            mpCurrent->setType(Token::TYPE_REAL_LITERAL);
            absorbed(ch);
            mpCurrent->addChar(ch);
            continue;
        }

        if (!isDecimalDigit(ch))
        {
            mpInput->clear();
            mpInput->unget();
            break;
        }

        absorbed(ch);
        mpCurrent->addChar(ch);
    }

    mpCurrent->setEndColumn(column());
    return true;
}
Beispiel #11
0
bool isAlNumDot(char c){
	return isAlpha(c) || isNum(c) || isDot(c);
}