bool Tokenizer::getNumber(const QString &str, int i, QString &numb) { // Examples of valid numbers are: // 1234 // +1234 // -1234 // -1234.99 // .99 // +.99 // -1234. numb.clear(); bool first = true; bool dotFound = false; for (; i < str.size(); ++i) { if (first && isSign(str, i)) { numb.append(str[i]); } else if (isDigit(str, i)) { numb.append(str[i]); } else if (!dotFound && isDot(str, i)) { numb.append(str[i]); dotFound = true; } else { break; } first = false; } return numb.size() > 0; }
void parseNumber() { skipWhiteSpace(); while (!eof() && !isWhiteSpace() && (isSign() || isDot() || isDigit())) { put(); next(); } }
bool Number::IsValid(char character) { switch ((States) _currentState) { case States::Start: return isMinus(character) || isDigit(character); case States::Negation: return isDigit(character); case States::RationalPercent: return isDot(character) || isExponentialSymbol(character); case States::Number: return isDigit(character) || isDot(character) || isExponentialSymbol(character); case States::Decimal: return isDigit(character) || isExponentialSymbol(character); case States::Exponential_1: return isDigit(character) || isPlus(character) || isMinus(character); case States::Exponential_2: return isDigit(character); case States::Closing: return false; } }
/** * Searches in the "name" directory for the "find" file, * if file is found and the user wanted to find that filetype * the path is added to a list that will be printed at the end. * Also puts all directories found in a list for it to be * searched. */ bool searchForFile(char* name, char *find, int t, struct Node* list) { DIR *dir; struct dirent *ent; struct stat f_info; bool foundFile = false; if (openDir(&dir, name, &ent)) { do { if ((lstat(ent->d_name, &f_info)) < 0) { fprintf(stderr, "lstat error: "); perror(ent->d_name); } else { if (t == 0 && compareName(ent->d_name, find)) { foundFile = true; } else if (checkDir(t, f_info, ent->d_name, find)) { foundFile = true; } else if (checkReg(t, f_info, ent->d_name, find)) { foundFile = true; } else if (checkLink(t, f_info, ent->d_name, find)) { foundFile = true; } if (checkDirAndRights(f_info) && !isDot(ent->d_name)) { int l = strlen(name) + strlen(ent->d_name); char *str = calloc(1, sizeof(char[l + 2])); strcpy(str, name); strcat(str, "/"); strcat(str, ent->d_name); insert(list, str); } } } while ((ent = readdir(dir))); } closedir(dir); return foundFile; }
int main(void){ FILE *f, *g, *h, *out; char c; char keys[NUM][4]; char word[4]; int keyarr[NUM]; int len = 0, n, spam, a, b, dot, i; int msglen, wordnum, numnum, weirdnum, spel; double r; f = fopen("data.txt", "r"); g = fopen("data2.txt", "r"); h = fopen("keywords.txt", "r"); out = fopen("out.txt", "w"); // ucitaj rjecnik keyworda while (fscanf(h, "%c%c%c%c %d %d %lg ", keys[len], keys[len]+1, keys[len]+2, keys[len]+3, &a, &b, &r) == 7) len++; // napisi header u out.txt fprintf(out, "spam broj_rijeci duljina nepismenost udio_brojeva udio_nealfanumerickih "); for (i = 0; i < len; i++){ fprintf(out, "%c%c%c%c ", keys[i][0], keys[i][1], keys[i][2], keys[i][3]); } fprintf(out, "\n"); for (n = 0; n < LINES; n++){ // prvo iz neobradenog fajla izvadi broj charactera itd.. fscanf(f, "%c", &c); if (c == 'h') { // zapamti jeli poruka ham ... spam = 0; } else if (c == 's'){ // ... ili spam spam = 1; } else printf("nesto nevalja: linija %d\n", n); // ako na pocetku linije ne pise ni spam ni ham, nesto je krivo while (isAlpha(c)) fscanf(f, "%c", &c); // ucitaj cijelu rijec (spam/ham) do kraja dot = 0; msglen = 0; numnum = 0; weirdnum = 0; spel = 0; wordnum = 0; while (1){ fscanf(f, "%c", &c); if (c == '\n') break; msglen++; // broj znakova if (isNum(c)) numnum++; // broji udio brojeva if (isWeird(c)) weirdnum++; // broji udio nealfanumerickih if ((isNum(c) || isAlpha(c)) && (dot == 1)) spel = 1; // nepismenost if (isDot(c)) dot = 1; else dot = 0; } // zatim iz obradenog izvadi kljucne rijeci fscanf(g, "%d", &a); // ucitaj jeli spam ili ham if (a != spam) printf("nisu konzistentni na liniji: %d\n", n); fscanf(g, "%c", &c); // ucitaj razmak for (i = 0; i < len; i++){ keyarr[i] = 0; // ocisti niz koji cuva postojanje keyworda } while (c != '\n'){ for (i = 0; i < 4; i++){ fscanf(g, "%c", word+i); } wordnum++; // broj rijeci u poruci b = exsist(word, keys, len); // kljucna rijec if (b >= 0) keyarr[b] = 1; fscanf(g, "%c", &c); // ucitaj razmak } // zapisi feature vektor u out.txt fprintf(out, "%d %d %d %d %lg %lg ", spam, wordnum, msglen, spel, (double)numnum/msglen, (double)weirdnum/msglen); for (i = 0; i < len; i++){ fprintf(out, "%d ", keyarr[i]); } fprintf(out, "\n"); } return 0; }
short CRConData::CheckProgressInConsole(UINT nCursorLine) { if (!isValid(true, nWidth*(nCursorLine+1))) return -1; const wchar_t* pszCurLine = pConChar + (nWidth * nCursorLine); // Обработка прогресса NeroCMD и пр. консольных программ (если курсор находится в видимой области) //Плагин Update //"Downloading Far 99%" //NeroCMD //"012% ########.................................................................." //ChkDsk //"Completed: 25%" //Rar // ... Vista x86\Vista x86.7z 6% //aria2c //[#1 SIZE:0B/9.1MiB(0%) CN:1 SPD:1.2KiBs ETA:2h1m11s] int nIdx = 0; bool bAllowDot = false; short nProgress = -1; const wchar_t *szPercentEng = L" percent"; const wchar_t *szComplEng = L"Completed:"; static wchar_t szPercentRus[16] = {}, szComplRus[16] = {}; static int nPercentEngLen = lstrlen(szPercentEng), nComplEngLen = lstrlen(szComplEng); static int nPercentRusLen, nComplRusLen; if (szPercentRus[0] == 0) { szPercentRus[0] = L' '; TODO("Хорошо бы и другие национальные названия обрабатывать, брать из настройки"); lstrcpy(szPercentRus,L"процент"); lstrcpy(szComplRus,L"Завершено:"); nPercentRusLen = lstrlen(szPercentRus); nComplRusLen = lstrlen(szComplEng); } // Сначала проверим, может цифры идут в начале строки (лидирующие пробелы)? if (pszCurLine[nIdx] == L' ' && isDigit(pszCurLine[nIdx+1])) nIdx++; // один лидирующий пробел перед цифрой else if (pszCurLine[nIdx] == L' ' && pszCurLine[nIdx+1] == L' ' && isDigit(pszCurLine[nIdx+2])) nIdx += 2; // два лидирующих пробела перед цифрой else if (!isDigit(pszCurLine[nIdx])) { // Строка начинается НЕ с цифры. Может начинается одним из известных префиксов (ChkDsk)? if (!wcsncmp(pszCurLine, szComplRus, nComplRusLen)) { nIdx += nComplRusLen; if (pszCurLine[nIdx] == L' ') nIdx++; if (pszCurLine[nIdx] == L' ') nIdx++; bAllowDot = true; } else if (!wcsncmp(pszCurLine, szComplEng, nComplEngLen)) { nIdx += nComplEngLen; if (pszCurLine[nIdx] == L' ') nIdx++; if (pszCurLine[nIdx] == L' ') nIdx++; bAllowDot = true; } else if (!wcsncmp(pszCurLine, L"[#", 2)) { const wchar_t* p = wcsstr(pszCurLine, L"%) "); while ((p > pszCurLine) && (p[-1] != L'(')) p--; if (p > pszCurLine) nIdx = p - pszCurLine; } // Известных префиксов не найдено, проверяем, может процент есть в конце строки? if (!nIdx) { //TODO("Не работает с одной цифрой"); // Creating archive T:\From_Work\VMWare\VMWare.part006.rar // ... Vista x86\Vista x86.7z 6% int i = nWidth - 1; // Откусить trailing spaces while ((i > 3) && (pszCurLine[i] == L' ')) i--; // Теперь, если дошли до '%' и перед ним - цифра if (i >= 3 && pszCurLine[i] == L'%' && isDigit(pszCurLine[i-1])) { //i -= 2; i--; int j = i, k = -1; while (j > 0 && isDigit(pszCurLine[j-1])) j--; // Может быть что-то типа "Progress 25.15%" if (((i - j) <= 2) && (j >= 2) && isDot(pszCurLine[j-1])) { k = j - 1; while (k > 0 && isDigit(pszCurLine[k-1])) k--; } if (k >= 0) { if (((j - k) <= 3) // 2 цифры + точка || (((j - k) <= 4) && (pszCurLine[k] == L'1'))) // "100.0%" { nIdx = i = k; bAllowDot = true; } } else { if (((j - i) <= 2) // 2 цифры + точка || (((j - i) <= 3) && (pszCurLine[j] == L'1'))) // "100%" { nIdx = i = j; } } #if 0 // Две цифры перед '%'? if (isDigit(pszCurLine[i-1])) i--; // Три цифры допускается только для '100%' if (pszCurLine[i-1] == L'1' && !isDigit(pszCurLine[i-2])) { nIdx = i - 1; } // final check else if (!isDigit(pszCurLine[i-1])) { nIdx = i; } #endif // Может ошибочно детектировать прогресс, если его ввести в prompt // Допустим, что если в строке есть символ '>' - то это не прогресс while (i>=0) { if (pszCurLine[i] == L'>') { nIdx = 0; break; } i--; } } } } // Менять nProgress только если нашли проценты в строке с курсором if (isDigit(pszCurLine[nIdx])) { if (isDigit(pszCurLine[nIdx+1]) && isDigit(pszCurLine[nIdx+2]) && (pszCurLine[nIdx+3]==L'%' || (bAllowDot && isDot(pszCurLine[nIdx+3])) || !wcsncmp(pszCurLine+nIdx+3,szPercentEng,nPercentEngLen) || !wcsncmp(pszCurLine+nIdx+3,szPercentRus,nPercentRusLen))) { nProgress = 100*(pszCurLine[nIdx] - L'0') + 10*(pszCurLine[nIdx+1] - L'0') + (pszCurLine[nIdx+2] - L'0'); } else if (isDigit(pszCurLine[nIdx+1]) && (pszCurLine[nIdx+2]==L'%' || (bAllowDot && isDot(pszCurLine[nIdx+2])) || !wcsncmp(pszCurLine+nIdx+2,szPercentEng,nPercentEngLen) || !wcsncmp(pszCurLine+nIdx+2,szPercentRus,nPercentRusLen))) { nProgress = 10*(pszCurLine[nIdx] - L'0') + (pszCurLine[nIdx+1] - L'0'); } else if (pszCurLine[nIdx+1]==L'%' || (bAllowDot && isDot(pszCurLine[nIdx+1])) || !wcsncmp(pszCurLine+nIdx+1,szPercentEng,nPercentEngLen) || !wcsncmp(pszCurLine+nIdx+1,szPercentRus,nPercentRusLen)) { nProgress = (pszCurLine[nIdx] - L'0'); } } if (nProgress != -1) { mp_RCon->setLastConsoleProgress(nProgress, true); // его обновляем всегда } else { DWORD nDelta = GetTickCount() - mp_RCon->m_Progress.LastConProgrTick; if (nDelta < CONSOLEPROGRESSTIMEOUT) // Если таймаут предыдущего значения еще не наступил nProgress = mp_RCon->m_Progress.ConsoleProgress; // возъмем предыдущее значение mp_RCon->setLastConsoleProgress(-1, false); // его обновляем всегда } return nProgress; }
bool Number::CoreValidate(char character) { if (_lexeme.length() == 0) _lexeme.reserve(20); switch ((States) _currentState) { case States::Start: { _lexeme.append(1, character); if (isMinus(character)) _currentState = (int) States::Negation; if (isZero(character)) _currentState = (int) States::RationalPercent; if (isDigitOneToNine(character)) _currentState = (int) States::Number; break; } case States::Negation: { _lexeme.append(1, character); if (isZero(character)) _currentState = (int) States::RationalPercent; if (isDigitOneToNine(character)) _currentState = (int) States::Number; break; } case States::RationalPercent: { _lexeme.append(1, character); if (isDot(character)) _currentState = (int) States::Decimal; if (isExponentialSymbol(character)) _currentState = (int) States::Exponential_1; break; } case States::Number: { _lexeme.append(1, character); if (isDigit(character)) _currentState = (int) States::Number; if (isDot(character)) _currentState = (int) States::Decimal; if (isExponentialSymbol(character)) _currentState = (int) States::Exponential_1; break; } case States::Decimal: { _lexeme.append(1, character); if (isDigit(character)) _currentState = (int) States::Decimal; if (isExponentialSymbol(character)) _currentState = (int) States::Exponential_1; break; } case States::Exponential_1: { _lexeme.append(1, character); _currentState = (int) States::Exponential_2; break; } case States::Exponential_2: { _lexeme.append(1, character); _currentState = (int) States::Exponential_2; break; } case States::Closing: return false; } return true; }
bool Number::BeginWithCharacter(char character) { return isMinus(character) || isDigit(character) || isDot(character); }
void Tokenizer::shift() { mpCurrent.reset(); if (mBlockComment) { skipEOL(); } else { skipWhiteSpaces(); if (mpInput->eof()) return; } mpCurrent.reset(new Token); mpCurrent->setLine(line()); mpCurrent->setBeginColumn(column()); if (mBlockComment) { consumeCStyleBlockComment(); return; } auto ch = mpInput->get(); if (isLetter(ch) || isUnderscore(ch)) { consumeIdentifier(ch); } else if (isBiwiseOperatorSymbol(ch)) { mpCurrent->setType(Token::TYPE_BITWISE_OPERATOR); absorbed(ch); mpCurrent->addChar(ch); mpCurrent->setEndColumn(column()); } else if (isDot(ch) && consumeDot(ch)) { // nothing } else if ((isDecimalDigit(ch) || isDot(ch) || isSign(ch)) && consumeNumber(ch)) { // nothing } else if (isQuotationMark(ch)) { if (!consumeString(ch)) shift(); } else if (isCStyleInitialCommentChar(ch)) { consumeComment(ch); } else if (isArrowSymbol(ch) && consumeArrow(ch)) { // nothing } else if (isBracket(ch)) { mpCurrent->setType(Token::TYPE_BRACKET); absorbed(ch); mpCurrent->addChar(ch); mpCurrent->setEndColumn(column()); } else if (isAngleBracket(ch)) { mpCurrent->setType(Token::TYPE_ANGLE_BRACKET); absorbed(ch); mpCurrent->addChar(ch); mpCurrent->setEndColumn(column()); } else if (isDelimiter(ch)) { mpCurrent->setType(Token::TYPE_DELIMITER); absorbed(ch); mpCurrent->addChar(ch); mpCurrent->setEndColumn(column()); } else if (isOperator(ch) && consumeEqualOperator(ch)) { // nothing } else if (isOperator(ch)) { mpCurrent->setType(Token::TYPE_OPERATOR); absorbed(ch); mpCurrent->addChar(ch); mpCurrent->setEndColumn(column()); } else if (isAsterisk(ch)) { mpCurrent->setType(Token::TYPE_ASTERISK); absorbed(ch); mpCurrent->addChar(ch); mpCurrent->setEndColumn(column()); } else { mpCurrent.reset(); } }
bool Tokenizer::consumeNumber(std::streambuf::int_type ch) { int count = 0; if (isSign(ch)) { bool number = false; while (!eof()) { ++count; auto nch = mpInput->get(); if (isWhitespace(nch)) continue; number = isDecimalDigit(nch); break; } if (count) mpInput->clear(); for (int i = 0; i < count; ++i) mpInput->unget(); if (!number) return false; } absorbed(ch); mpCurrent->addChar(ch); for (int i = 0; i < count; ++i) { auto nch = mpInput->get(); absorbed(nch); mpCurrent->addChar(nch); } bool bDot = false; if (isDot(ch)) { bDot = true; mpCurrent->setType(Token::TYPE_REAL_LITERAL); } else { mpCurrent->setType(Token::TYPE_INTEGER_LITERAL); if (!eof() && isZero(ch)) { ch = mpInput->get(); if (isHexIndicator(ch)) { if (eof()) { mpMessageCollector->addMessage( Message::SEVERITY_ERROR, Message::t_invalidIntegerLiteral, mpSourceId, mpCurrent->line(), mpCurrent->beginColumn()); mpCurrent.reset(); return false; } mpCurrent->addChar(ch); absorbed(ch); while (!eof()) { ch = mpInput->get(); if (isNonHexicalLetter(ch) || isUnderscore(ch) || isDot(ch)) { mpMessageCollector->addMessage(Message::SEVERITY_ERROR, Message::t_invalidIntegerLiteral, mpSourceId, mpCurrent->line(), mpCurrent->beginColumn()); mpCurrent.reset(); return false; } if (!isHexicalDigit(ch)) { mpInput->clear(); mpInput->unget(); break; } absorbed(ch); mpCurrent->addChar(ch); } mpCurrent->setEndColumn(column()); return true; } for (;;) { if ( isLetter(ch) || isDot(ch) || isNonOctalDecimalDigit(ch) || isUnderscore(ch)) { mpMessageCollector->addMessage( Message::SEVERITY_ERROR, Message::t_invalidIntegerLiteral, mpSourceId, mpCurrent->line(), mpCurrent->beginColumn()); mpCurrent.reset(); return false; } if (!isOctalDigit(ch)) { mpInput->clear(); mpInput->unget(); break; } absorbed(ch); mpCurrent->addChar(ch); if (eof()) { break; } ch = mpInput->get(); } mpCurrent->setEndColumn(column()); return true; } } bool bExponent = false; while (!eof()) { ch = mpInput->get(); if (isExponent(ch)) { if (bExponent) { mpMessageCollector->addMessage( Message::SEVERITY_ERROR, Message::t_invalidIntegerLiteral, mpSourceId, mpCurrent->line(), mpCurrent->beginColumn()); mpCurrent.reset(); return false; } bExponent = true; mpCurrent->setType(Token::TYPE_REAL_LITERAL); absorbed(ch); mpCurrent->addChar(ch); ch = mpInput->get(); if (isSign(ch)) { mpCurrent->setType(Token::TYPE_REAL_LITERAL); absorbed(ch); mpCurrent->addChar(ch); } else { mpInput->clear(); mpInput->unget(); } continue; } if (isLetter(ch) || isUnderscore(ch)) { mpMessageCollector->addMessage( Message::SEVERITY_ERROR, Message::t_invalidIntegerLiteral, mpSourceId, mpCurrent->line(), mpCurrent->beginColumn()); mpCurrent.reset(); return false; } if (isDot(ch)) { if (bExponent || bDot) { mpMessageCollector->addMessage( Message::SEVERITY_ERROR, Message::t_invalidIntegerLiteral, mpSourceId, mpCurrent->line(), mpCurrent->beginColumn()); mpCurrent.reset(); return false; } bDot = true; mpCurrent->setType(Token::TYPE_REAL_LITERAL); absorbed(ch); mpCurrent->addChar(ch); continue; } if (!isDecimalDigit(ch)) { mpInput->clear(); mpInput->unget(); break; } absorbed(ch); mpCurrent->addChar(ch); } mpCurrent->setEndColumn(column()); return true; }
bool isAlNumDot(char c){ return isAlpha(c) || isNum(c) || isDot(c); }