Пример #1
0
void CKeywordTestDlg::OnOK()
{
	CWaitCursor wc;

	if ( ! UpdateData() )
		return;

	m_sSplitted = MakeKeywords( m_sInput, m_bExp != FALSE );

	WordTable oWords, oNegWords;
	BuildWordTable( m_sSplitted, oWords, oNegWords );

	m_pResults.ResetContent();

	for ( WordTable::const_iterator i = oWords.begin() ; i != oWords.end() ; ++i )
	{
		CString sWord( (*i).first, (*i).second );
		m_pResults.AddString( sWord + _T(" (+)") );
	}

	for ( WordTable::const_iterator i = oNegWords.begin() ; i != oNegWords.end() ; ++i )
	{
		CString sNegWord( (*i).first, (*i).second );
		m_pResults.AddString( sNegWord + _T(" (-)") );
	}

	UpdateData( FALSE );
}
Пример #2
0
void WordGenerator (CXMLElement *pCmdLine)
{
    int i;

    //	Load input file

    CString sFilespec = pCmdLine->GetAttribute(CONSTLIT("input"));
    if (sFilespec.IsBlank())
    {
        printf("ERROR: input filename expected.\n");
        return;
    }

    CFileReadBlock InputFile(sFilespec);
    if (InputFile.Open() != NOERROR)
    {
        printf("ERROR: Unable to open file: %s\n", sFilespec.GetASCIIZPointer());
        return;
    }

    //	"Novel" means that we only generate words that are not
    //	in the input file.

    bool bNovelWordsOnly = pCmdLine->GetAttributeBool(NOVEL_ATTRIB);

    //	Build up a word generator

    CMarkovWordGenerator Generator;
    TMap<CString, DWORD> InputWords;

    //	Read each line of the file

    char *pPos = InputFile.GetPointer(0);
    char *pEndPos = pPos + InputFile.GetLength();
    while (pPos < pEndPos)
    {
        //	Skip whitespace

        while (pPos < pEndPos && (strIsWhitespace(pPos) || *pPos < ' '))
            pPos++;

        //	Parse the line

        char *pStart = pPos;
        while (pPos < pEndPos && *pPos != '\r' && *pPos != '\n' && *pPos >= ' ')
            pPos++;

        CString sWord(pStart, pPos - pStart);

        //	Add the word to the generator

        if (!sWord.IsBlank())
        {
            Generator.AddSample(strTrimWhitespace(sWord));

            //	If we are looking for novel words we need to keep a map
            //	of all words in the input file.

            if (bNovelWordsOnly)
                InputWords.Insert(sWord);
        }
    }

    //	If we have a count, then output a list of random words

    int iCount;
    if (pCmdLine->FindAttributeInteger(COUNT_ATTRIB, &iCount))
    {
        if (iCount > 0)
        {
            TArray<CString> Result;
            Generator.GenerateUnique(iCount, &Result);

            for (i = 0; i < Result.GetCount(); i++)
                if (InputWords.Find(Result[i]))
                {
                    Result.Delete(i);
                    i--;
                }

            Result.Sort();

            for (i = 0; i < Result.GetCount(); i++)
                printf("%s\n", Result[i].GetASCIIZPointer());
        }
    }

    //	Otherwise, output the generator as XML

    else
    {
        CMemoryWriteStream Output;
        if (Output.Create() != NOERROR)
        {
            printf("ERROR: Out of memory.\n");
            return;
        }

        if (Generator.WriteAsXML(&Output) != NOERROR)
        {
            printf("ERROR: Unable to output generator as XML.\n");
            return;
        }

        Output.Write("\0", 1);
        printf(Output.GetPointer());
    }
}
Пример #3
0
FXMLElement* FXMLParser::ParseElement(std::string sCurInput, long* nLoc, long* nLineCount)
{
	int nMode = PARSE_MODE_NONE; //set the initial mode
	std::string sWord("");	//set the current word read
	char ch;	//current character read
	std::string sAttributeName; //attribute name to save
	int nSpecialMode; //if parsing comments, used to save old mode
	FXMLElement *fxInput = NULL;
	FXMLElement *fxChild = NULL;
	std::string sElementName; //element name, to be used for error checking

    //parse the line a char at a time
    for (long i = (*nLoc); i < sCurInput.length(); i++)
    {
        ch = sCurInput.at(i);
		if (ch == '<')
		{
			//parse the start of new elements
			if (nMode == PARSE_MODE_NONE)
			{
				//the beginning of an element
				nMode = PARSE_MODE_ELEMENT_PROVISIONAL;
				//create a new input element
				if (fxInput == NULL)
				{
					fxInput = new FXMLElement;
					fxInput->fxParent = NULL;
				}
			}
			else if ((nMode == PARSE_MODE_ELEMENT_DATA) || (nMode == PARSE_MODE_CDATA_END))
			{
			    nMode = PARSE_MODE_END_PROVISIONAL;
			    
			    //see if this is the last element or not
			    if (sCurInput.at(i + 1) == '/')
			    {
					//it is a last element, set mode
					fxInput->Value(sWord);
					nMode = PARSE_MODE_END;	
					sWord.erase();			
			    }
			    else
			    {
					//if not last element, perform recursive call
					if (fxInput->mChildren == NULL)
						fxInput->mChildren = new std::list<FXMLElement*>;
					fxChild = ParseElement(sCurInput, &i, nLineCount);				
					if (fxChild != NULL)
					{
						fxChild->fxParent = fxInput;
						//V 0.92 Elements now in order thanks to
						//Max Belugin - [email protected]  
						fxInput->mChildren->push_back(fxChild);
					}

					//set mode, initialize sWord with existing data
					nMode = PARSE_MODE_ELEMENT_DATA;
					
					//V0.93 removed - to be fixed later
					//sWord = fxInput->Value();
			    }
			    
			}
			else if (nMode == PARSE_MODE_CDATA)
			{
			    sWord.append(1, ch);
			}
		}
		else if ((ch == ' ') || (ch == '\n') || (ch == '\r') || (ch == '\t'))
		{
			//if in element data, add it.  Else end state

			//also, do not add newlines or spaces to data
			if (nMode == PARSE_MODE_ELEMENT_DATA)
			{
				if (sWord.length() > 0) sWord.append(1,ch);
			}
			else if ((nMode == PARSE_MODE_CDATA) || (nMode == PARSE_MODE_ATTRIBUTE_VALUE))
				//V 0.92 Attribute values with spaces now valid thanks to Max Belugin - [email protected]  
				sWord.append(1, ch);
			else if (nMode == PARSE_MODE_ELEMENT_NAME)
			{
				//set the element name
				sElementName = sWord;
				fxInput->Name(sWord);
				sWord.erase();
				//now set to look for attributes
				nMode = PARSE_MODE_ATTRIBUTE_PROVISIONAL;
			}

			//increment line count if \n
			if (ch == '\n') (*nLineCount)++;
		}
		else if ((ch == '?') || (ch == '!'))
		{
			//its a comment or declaration.  For this version
			//of the parser, ignore them
			if (nMode == PARSE_MODE_ELEMENT_PROVISIONAL)
			{
			    //save the old mode
			    nSpecialMode = nMode;
			    nMode = PARSE_MODE_SPECIAL;
			}			    
		}
		else if (ch == '[')
		{
		    //look out for cdata tags
		    if (nMode == PARSE_MODE_SPECIAL)
		    {
			if (sCurInput.substr(i+1, 5).compare("CDATA") == 0)
			    nMode = PARSE_MODE_CDATA_PROVISIONAL;
		    }	
		    else if (nMode == PARSE_MODE_CDATA_PROVISIONAL)
		    {
				sWord.erase();
				nMode = PARSE_MODE_CDATA;
		    }
		    else
			sWord.append(1, ch);    
		}
		else if (ch == ']')
		{
		    //look for end of CDATA
		    if (nMode == PARSE_MODE_CDATA)
				nMode = PARSE_MODE_CDATA_END_PROVISIONAL;
		    else if (nMode == PARSE_MODE_CDATA_END_PROVISIONAL)
				nMode = PARSE_MODE_CDATA_END;
		    else
				sWord.append(1, ch);
		    
		}
		else if (ch == '>')
		{
			//end of element name
			if (nMode == PARSE_MODE_ELEMENT_NAME)
			{
				//set the element name
				sElementName = sWord;
				fxInput->Name(sWord);
				sWord.erase();
				//now set more to look for element data
				nMode = PARSE_MODE_ELEMENT_DATA;
			}
			else if (nMode == PARSE_MODE_ATTRIBUTE_PROVISIONAL)
			{
			    //attribute parsing over, prepare to parse element data
			    sWord.erase();
			    nMode = PARSE_MODE_ELEMENT_DATA;
			}
			else if (nMode == PARSE_MODE_END)
			{
			    //end of the element
				//only compare IF sWord.length > 0
				if (sWord.length() > 0)
				{
					if (sWord.compare(sElementName) != 0)
					{
						//parse error!!!
						//V0.93 handle parse errors nicely!
						char* cError = (char*)malloc(6);
						 sprintf(cError, "%d", (*nLineCount));
						sError = (std::string)"FXML reports parsing error.  Unmatched tag: Expecting " +
							sElementName + (std::string)" but encountered " + sWord + (std::string)" at line: " + cError;
						free(cError);
						delete fxInput;
						fxInput = NULL;
					}
				}
				//V0.93
				//do NOT skip ahead one if xml file stuffed together
				//handles <xml>blah</xml><more>data</more> correctly
				//parse element data
			    sWord.erase();
				if (sCurInput.length() > (i + 1))
				{
					if ((sCurInput.at(i + 1) =='\n') ||
						(sCurInput.at(i + 1) =='\r'))
					{
						(*nLoc) = i + 1;
					}
					else
						(*nLoc) = i;
				}
				else
					(*nLoc) = i;

			    return fxInput;
			}
			else if (nMode == PARSE_MODE_SPECIAL)
			{
			    //end of a special element, reset the mode
			    nMode = nSpecialMode;
			}
			else if (nMode == PARSE_MODE_CDATA)
			{
			    sWord.append(1, ch);
			}
		}
		else if (ch == '/')
		{
		    //handle elements such as <blah ..... />
		    if (nMode == PARSE_MODE_ATTRIBUTE_PROVISIONAL)
		    {
				nMode = PARSE_MODE_END;
		    }
		    else if ((nMode != PARSE_MODE_SPECIAL) && (nMode != PARSE_MODE_END))
				sWord.append(1, ch);
		    			
		}
		else if (ch == '=')
		{
			//parsing of attributes
			if (nMode == PARSE_MODE_ATTRIBUTE_NAME)
			{
				//set the attribute name	
				sAttributeName = sWord;
				sWord.erase();
				//set the mode
				nMode = PARSE_MODE_ATTRIBUTE_VALUE_PROVISIONAL;
			}
			else if (nMode != PARSE_MODE_SPECIAL)
				sWord.append(1, ch);

		}
		else if ((ch == '"') || (ch == '\''))
		{
			//parsing of attributes
			if (nMode == PARSE_MODE_ATTRIBUTE_VALUE_PROVISIONAL)
			{
				nMode = PARSE_MODE_ATTRIBUTE_VALUE;
			}
			else if (nMode == PARSE_MODE_ATTRIBUTE_VALUE)
			{
				//save the pair 
				fxInput->SetAttributePair(sAttributeName, sWord);
				//erase
				sWord.erase();	
				sAttributeName.erase();
				
				//set new mode
				nMode = PARSE_MODE_ATTRIBUTE_PROVISIONAL;
			}
			else if (nMode != PARSE_MODE_SPECIAL)
				sWord.append(1, ch);
	
			
		}
		else
		{
			//parsing of straight text/words
			if (nMode == PARSE_MODE_ELEMENT_PROVISIONAL)
				nMode = PARSE_MODE_ELEMENT_NAME;
			else if (nMode == PARSE_MODE_ATTRIBUTE_PROVISIONAL)
				nMode = PARSE_MODE_ATTRIBUTE_NAME;

			//ignore comments and special declarations
			if ((nMode != PARSE_MODE_SPECIAL) && (nMode != PARSE_MODE_CDATA_END))
				sWord.append(1, ch);
		}


	} 
	
	return NULL;
}