void CKeywordTestDlg::OnOK() { CWaitCursor wc; if ( ! UpdateData() ) return; m_sSplitted = MakeKeywords( m_sInput, m_bExp != FALSE ); WordTable oWords, oNegWords; BuildWordTable( m_sSplitted, oWords, oNegWords ); m_pResults.ResetContent(); for ( WordTable::const_iterator i = oWords.begin() ; i != oWords.end() ; ++i ) { CString sWord( (*i).first, (*i).second ); m_pResults.AddString( sWord + _T(" (+)") ); } for ( WordTable::const_iterator i = oNegWords.begin() ; i != oNegWords.end() ; ++i ) { CString sNegWord( (*i).first, (*i).second ); m_pResults.AddString( sNegWord + _T(" (-)") ); } UpdateData( FALSE ); }
void WordGenerator (CXMLElement *pCmdLine) { int i; // Load input file CString sFilespec = pCmdLine->GetAttribute(CONSTLIT("input")); if (sFilespec.IsBlank()) { printf("ERROR: input filename expected.\n"); return; } CFileReadBlock InputFile(sFilespec); if (InputFile.Open() != NOERROR) { printf("ERROR: Unable to open file: %s\n", sFilespec.GetASCIIZPointer()); return; } // "Novel" means that we only generate words that are not // in the input file. bool bNovelWordsOnly = pCmdLine->GetAttributeBool(NOVEL_ATTRIB); // Build up a word generator CMarkovWordGenerator Generator; TMap<CString, DWORD> InputWords; // Read each line of the file char *pPos = InputFile.GetPointer(0); char *pEndPos = pPos + InputFile.GetLength(); while (pPos < pEndPos) { // Skip whitespace while (pPos < pEndPos && (strIsWhitespace(pPos) || *pPos < ' ')) pPos++; // Parse the line char *pStart = pPos; while (pPos < pEndPos && *pPos != '\r' && *pPos != '\n' && *pPos >= ' ') pPos++; CString sWord(pStart, pPos - pStart); // Add the word to the generator if (!sWord.IsBlank()) { Generator.AddSample(strTrimWhitespace(sWord)); // If we are looking for novel words we need to keep a map // of all words in the input file. if (bNovelWordsOnly) InputWords.Insert(sWord); } } // If we have a count, then output a list of random words int iCount; if (pCmdLine->FindAttributeInteger(COUNT_ATTRIB, &iCount)) { if (iCount > 0) { TArray<CString> Result; Generator.GenerateUnique(iCount, &Result); for (i = 0; i < Result.GetCount(); i++) if (InputWords.Find(Result[i])) { Result.Delete(i); i--; } Result.Sort(); for (i = 0; i < Result.GetCount(); i++) printf("%s\n", Result[i].GetASCIIZPointer()); } } // Otherwise, output the generator as XML else { CMemoryWriteStream Output; if (Output.Create() != NOERROR) { printf("ERROR: Out of memory.\n"); return; } if (Generator.WriteAsXML(&Output) != NOERROR) { printf("ERROR: Unable to output generator as XML.\n"); return; } Output.Write("\0", 1); printf(Output.GetPointer()); } }
FXMLElement* FXMLParser::ParseElement(std::string sCurInput, long* nLoc, long* nLineCount) { int nMode = PARSE_MODE_NONE; //set the initial mode std::string sWord(""); //set the current word read char ch; //current character read std::string sAttributeName; //attribute name to save int nSpecialMode; //if parsing comments, used to save old mode FXMLElement *fxInput = NULL; FXMLElement *fxChild = NULL; std::string sElementName; //element name, to be used for error checking //parse the line a char at a time for (long i = (*nLoc); i < sCurInput.length(); i++) { ch = sCurInput.at(i); if (ch == '<') { //parse the start of new elements if (nMode == PARSE_MODE_NONE) { //the beginning of an element nMode = PARSE_MODE_ELEMENT_PROVISIONAL; //create a new input element if (fxInput == NULL) { fxInput = new FXMLElement; fxInput->fxParent = NULL; } } else if ((nMode == PARSE_MODE_ELEMENT_DATA) || (nMode == PARSE_MODE_CDATA_END)) { nMode = PARSE_MODE_END_PROVISIONAL; //see if this is the last element or not if (sCurInput.at(i + 1) == '/') { //it is a last element, set mode fxInput->Value(sWord); nMode = PARSE_MODE_END; sWord.erase(); } else { //if not last element, perform recursive call if (fxInput->mChildren == NULL) fxInput->mChildren = new std::list<FXMLElement*>; fxChild = ParseElement(sCurInput, &i, nLineCount); if (fxChild != NULL) { fxChild->fxParent = fxInput; //V 0.92 Elements now in order thanks to //Max Belugin - [email protected] fxInput->mChildren->push_back(fxChild); } //set mode, initialize sWord with existing data nMode = PARSE_MODE_ELEMENT_DATA; //V0.93 removed - to be fixed later //sWord = fxInput->Value(); } } else if (nMode == PARSE_MODE_CDATA) { sWord.append(1, ch); } } else if ((ch == ' ') || (ch == '\n') || (ch == '\r') || (ch == '\t')) { //if in element data, add it. Else end state //also, do not add newlines or spaces to data if (nMode == PARSE_MODE_ELEMENT_DATA) { if (sWord.length() > 0) sWord.append(1,ch); } else if ((nMode == PARSE_MODE_CDATA) || (nMode == PARSE_MODE_ATTRIBUTE_VALUE)) //V 0.92 Attribute values with spaces now valid thanks to Max Belugin - [email protected] sWord.append(1, ch); else if (nMode == PARSE_MODE_ELEMENT_NAME) { //set the element name sElementName = sWord; fxInput->Name(sWord); sWord.erase(); //now set to look for attributes nMode = PARSE_MODE_ATTRIBUTE_PROVISIONAL; } //increment line count if \n if (ch == '\n') (*nLineCount)++; } else if ((ch == '?') || (ch == '!')) { //its a comment or declaration. For this version //of the parser, ignore them if (nMode == PARSE_MODE_ELEMENT_PROVISIONAL) { //save the old mode nSpecialMode = nMode; nMode = PARSE_MODE_SPECIAL; } } else if (ch == '[') { //look out for cdata tags if (nMode == PARSE_MODE_SPECIAL) { if (sCurInput.substr(i+1, 5).compare("CDATA") == 0) nMode = PARSE_MODE_CDATA_PROVISIONAL; } else if (nMode == PARSE_MODE_CDATA_PROVISIONAL) { sWord.erase(); nMode = PARSE_MODE_CDATA; } else sWord.append(1, ch); } else if (ch == ']') { //look for end of CDATA if (nMode == PARSE_MODE_CDATA) nMode = PARSE_MODE_CDATA_END_PROVISIONAL; else if (nMode == PARSE_MODE_CDATA_END_PROVISIONAL) nMode = PARSE_MODE_CDATA_END; else sWord.append(1, ch); } else if (ch == '>') { //end of element name if (nMode == PARSE_MODE_ELEMENT_NAME) { //set the element name sElementName = sWord; fxInput->Name(sWord); sWord.erase(); //now set more to look for element data nMode = PARSE_MODE_ELEMENT_DATA; } else if (nMode == PARSE_MODE_ATTRIBUTE_PROVISIONAL) { //attribute parsing over, prepare to parse element data sWord.erase(); nMode = PARSE_MODE_ELEMENT_DATA; } else if (nMode == PARSE_MODE_END) { //end of the element //only compare IF sWord.length > 0 if (sWord.length() > 0) { if (sWord.compare(sElementName) != 0) { //parse error!!! //V0.93 handle parse errors nicely! char* cError = (char*)malloc(6); sprintf(cError, "%d", (*nLineCount)); sError = (std::string)"FXML reports parsing error. Unmatched tag: Expecting " + sElementName + (std::string)" but encountered " + sWord + (std::string)" at line: " + cError; free(cError); delete fxInput; fxInput = NULL; } } //V0.93 //do NOT skip ahead one if xml file stuffed together //handles <xml>blah</xml><more>data</more> correctly //parse element data sWord.erase(); if (sCurInput.length() > (i + 1)) { if ((sCurInput.at(i + 1) =='\n') || (sCurInput.at(i + 1) =='\r')) { (*nLoc) = i + 1; } else (*nLoc) = i; } else (*nLoc) = i; return fxInput; } else if (nMode == PARSE_MODE_SPECIAL) { //end of a special element, reset the mode nMode = nSpecialMode; } else if (nMode == PARSE_MODE_CDATA) { sWord.append(1, ch); } } else if (ch == '/') { //handle elements such as <blah ..... /> if (nMode == PARSE_MODE_ATTRIBUTE_PROVISIONAL) { nMode = PARSE_MODE_END; } else if ((nMode != PARSE_MODE_SPECIAL) && (nMode != PARSE_MODE_END)) sWord.append(1, ch); } else if (ch == '=') { //parsing of attributes if (nMode == PARSE_MODE_ATTRIBUTE_NAME) { //set the attribute name sAttributeName = sWord; sWord.erase(); //set the mode nMode = PARSE_MODE_ATTRIBUTE_VALUE_PROVISIONAL; } else if (nMode != PARSE_MODE_SPECIAL) sWord.append(1, ch); } else if ((ch == '"') || (ch == '\'')) { //parsing of attributes if (nMode == PARSE_MODE_ATTRIBUTE_VALUE_PROVISIONAL) { nMode = PARSE_MODE_ATTRIBUTE_VALUE; } else if (nMode == PARSE_MODE_ATTRIBUTE_VALUE) { //save the pair fxInput->SetAttributePair(sAttributeName, sWord); //erase sWord.erase(); sAttributeName.erase(); //set new mode nMode = PARSE_MODE_ATTRIBUTE_PROVISIONAL; } else if (nMode != PARSE_MODE_SPECIAL) sWord.append(1, ch); } else { //parsing of straight text/words if (nMode == PARSE_MODE_ELEMENT_PROVISIONAL) nMode = PARSE_MODE_ELEMENT_NAME; else if (nMode == PARSE_MODE_ATTRIBUTE_PROVISIONAL) nMode = PARSE_MODE_ATTRIBUTE_NAME; //ignore comments and special declarations if ((nMode != PARSE_MODE_SPECIAL) && (nMode != PARSE_MODE_CDATA_END)) sWord.append(1, ch); } } return NULL; }