int hadith(QString input_str) { //file parsing: QFile input(input_str); if (!input.open(QIODevice::ReadWrite)) { out << "File not found\n"; return 1; } QTextStream file(&input); file.setCodec("utf-8"); while (!file.atEnd()) { QString line=file.readLine(0); if (line.isNull()) break; if (line.isEmpty()) //ignore empty lines if they exist continue; QStringList wordList=line.split((QRegExp(delimiters)),QString::KeepEmptyParts);//space or enter int sanadBeginning=getSanadBeginning(wordList); int countOthersMax=5; int countOthers=0; if (sanadBeginning<0) return 0; int listSize=wordList.size()-sanadBeginning; int currentState=1; int nextState=1; wordType currentType; out<<"start of hadith"; for (int i=sanadBeginning; i<listSize; i++) { currentType=getWordType(wordList[i]); if (currentType!=OTHER) { countOthers=0; if (isValidTransition(currentState,currentType,nextState)) { //out << wordList[i]<< " "; currentState=nextState; continue; } } else { countOthers++; if (countOthers==countOthersMax) out<<"End of Sanad"; } } } return 0; ///hhh }
int getSanadBeginning(QStringList wordList) { int listSize=wordList.size(); for (int i=0; i<listSize; i++) { if (getWordType(wordList[i])==IKHBAR) return i; } return -1; }
bool RegularExpression::matchAnchor(Context* const context, const XMLInt32 ch, const int offset) { switch ((XMLCh) ch) { case chLatin_A: if (offset != context->fStart) return false; break; case chLatin_B: if (context->fLength == 0) break; { int after = getWordType(context->fString, context->fStart, context->fLimit, offset); if (after == WT_IGNORE || after == getPreviousWordType(context->fString, context->fStart, context->fLimit, offset)) break; } return false; case chLatin_b: if (context->fLength == 0) return false; { int after = getWordType(context->fString, context->fStart, context->fLimit, offset); if (after == WT_IGNORE || after == getPreviousWordType(context->fString, context->fStart , context->fLimit, offset)) return false; } break; case chLatin_Z: case chDollarSign: if ( (XMLCh) ch == chDollarSign && isSet(fOptions, MULTIPLE_LINE)) { if (!(offset == context->fLimit || (offset < context->fLimit && RegxUtil::isEOLChar(context->fString[offset])))) return false; } else { if (!(offset == context->fLimit || (offset+1 == context->fLimit && RegxUtil::isEOLChar(context->fString[offset])) || (offset+2 == context->fLimit && context->fString[offset] == chCR && context->fString[offset+1] == chLF))) return false; } break; case chLatin_z: if (offset != context->fLimit) return false; break; case chAt: case chCaret: if ( (XMLCh) ch == chCaret && !isSet(fOptions, MULTIPLE_LINE)) { if (offset != context->fStart) return false; } else { if (!(offset == context->fStart || (offset > context->fStart && RegxUtil::isEOLChar(context->fString[offset-1])))) return false; } break; case chOpenAngle: if (context->fLength == 0 || offset == context->fLimit) return false; if (getWordType(context->fString, context->fStart, context->fLimit, offset) != WT_LETTER || getPreviousWordType(context->fString, context->fStart, context->fLimit, offset) != WT_OTHER) return false; break; case chCloseAngle: if (context->fLength == 0 || offset == context->fStart) return false; if (getWordType(context->fString, context->fStart, context->fLimit, offset) != WT_OTHER || getPreviousWordType(context->fString, context->fStart, context->fLimit, offset) != WT_LETTER) return false; break; } return true; }
//-------------------------------------------------------------- ofxEditorSyntax::WordType ofxEditorSyntax::getWordType(const std::string &word) { getWordType(string_to_wstring(word)); }
static int getWords(const DocLines_t& doc, chunk_info& chunk, bool IgnoreSpaces) { unsigned int wordIndex = 0; for (unsigned int line = 0; line < chunk.lineCount; ++line) { wordType type = SPACECHAR; int len = 0; chunk.linePos[line] = wordIndex; int i = 0; unsigned int hash = 0; for (i = 0; doc[line + chunk.lineStart][i] != 0; ++i) { char l = doc[line + chunk.lineStart][i]; wordType newType = getWordType(l); if (newType == type) { ++len; hash = HASH(hash, l); } else { if (len > 0) { if (!IgnoreSpaces || type != SPACECHAR) { Word word; word.length = len; word.line = line; word.pos = i - len; word.type = type; word.hash = hash; chunk.words.push_back(word); ++wordIndex; } } type = newType; len = 1; hash = HASH(0, l); } } if (len > 0) { if (!IgnoreSpaces || type != SPACECHAR) { Word word; word.length = len; word.line = line; word.pos = i - len; word.type = type; word.hash = hash; chunk.words.push_back(word); ++wordIndex; } } chunk.lineEndPos[line] = wordIndex; } return wordIndex; }