Exemple #1
0
void WTableRow::insertColumn(int column)
{
  cells_.insert(cells_.begin() + column, TableData());
  cells_[column].cell = new WTableCell(this, column);

  for (unsigned i = column; i < cells_.size(); ++i)
    cells_[i].cell->column_ = i;
}
Exemple #2
0
void WTableRow::expand(int numCells)
{
  int cursize = cells_.size();

  for (int col = cursize; col < numCells; ++col) {
    cells_.push_back(TableData());
    cells_.back().cell = new WTableCell(this, col);
  }
}
void CMiniLexicon::TakeSignaturesFindStems( CSignatureCollection* Sigs )
{
	CStem*      pWord;
	const int   SizeThreshold = m_pLexicon->GetIntParameter( "TakeSignaturesFindStems\\SizeThreshold", 2 ); // 2;
	const int   StemCountThreshold = m_pLexicon->GetIntParameter( "TakeSignaturesFindStems\\StemCountThreshold", 2 ); // 2;// 8;
 	const int   MinimumStemLength = m_pLexicon->GetIntParameter( "Main\\MinimumStemLength", 10 );

	CParse            PWord;
	QString           Word,
	Message;
	CStringSurrogate  ssAffix,
	ssWord,
	ssStem;
	CSignature*       pSig;

 	CWordCollection   TempWords (this);
	CWordCollection   WordsSplit (this);
	int               AffixLength;
	QString           msg;
	int               m;

	if( Sigs == NULL ) Sigs = m_pSignatures;

	GetDocument()->setStatusBar1 ( "Mini-Lexicon " + QString("%1").arg( m_Index+1 ) + ": Take signatures to find stems" );
	GetDocument()->BeginCountDown();

  
	if( LogFileOn() )
	{ 
		*GetLogFile() <<LargeTitle( "Phase: Take Signatures, Find Stems"); 
	}
 

	bool analyzingSuffixes = TRUE;
	if( GetAffixLocation() == STEM_INITIAL || GetAffixLocation() == WORD_INITIAL ) analyzingSuffixes = FALSE;

	for (int i = 0; i < (int)m_pWords->GetCount(); i++)
	{
		if ( !m_pWords->GetAt(i)->MayBeParsed() )
		{
			continue;
		}
		TempWords << m_pWords->GetAt(i);
	}


//	We loop through the good signatures, and then run through
//	the words to see if they could belong to the good signatures.
//	We have to be careful, because a word might have belonged
//	to a different signature, and still have the marks of those
//	suffixes in its factorization.

    if (LogFileOn()  )
     {
        *GetLogFile() << StartTable << 
        	StartTableRow << 
        		MakeTableHeader(" --") <<
        		MakeTableHeader("Signature") << 
        	EndTableRow;  
    }
	// Go through signatures:
	Sigs->Sort(SIGS);

	for (int j = 0; j < (int)Sigs->GetCount(); j++) //Loop A
	{

		pSig = Sigs->GetAtSort(j);

        if (!pSig) continue; 

		if (LogFileOn() )
		{ 
			*GetLogFile() << 
				StartTableRow << 
					TableData(j) << 
					TableData(pSig->Display('.'))  << 
				EndTableRow;          
		}

		GetDocument()->CountDownOnStatusBar ( j, Sigs->GetCount(), 5 );
		GetDocument()->setStatusBar2 ( pSig->Display() );


		if ( pSig->Size() < SizeThreshold ) continue;
		if ( pSig->GetNumberOfStems() < StemCountThreshold ) continue;
 
		// Choose the first suffix in pSig that isn't NULL:
		int k = 1;
		if( pSig->GetPiece(1).IsNULL() ) k = 2;

 		Q3ValueList<CStem> TempStems2;  // replaces TempStems1 -- fix memory leak
   
		ssAffix = pSig->GetPiece(k);
		AffixLength = ssAffix.GetLength();

		for ( m = 0; m < (int)TempWords.GetCount(); m++)
		{//Loop B
			pWord = TempWords[m];
			ssWord = pWord->GetKey();

			if( analyzingSuffixes )
			{ //Loop C
				if ( ssWord.Right(AffixLength) == ssAffix.Display() )
				{
					if ( (int)ssWord.GetLength() == AffixLength ) { continue; }
					ssStem = ssWord;
					ssStem = ssStem.Left(ssWord.GetLength() - AffixLength );

					Q_ASSERT ( ssStem.GetLength() != 0);

					if ( (int) ssStem.GetLength() < MinimumStemLength ) continue;

					if ( LogFileOn() ) { *GetLogFile() << 
							StartTableRow <<
								TableData(QString("")) <<
								TableData(ssStem) <<
							EndTableRow; 
					}

					// put into Temp Stems all those stems from words which might be analyzed as ending in ssAffix.
 					TempStems2.append(ssStem);
				}
			}//Loop C
			else
			{//Loop C
				if ( ssWord.Left(AffixLength) == ssAffix.Display() )
				{
					if ( (int)ssWord.GetLength() == AffixLength ) { continue; }
					ssStem = ssWord;
					ssStem = ssStem.Right( ssWord.GetLength() - AffixLength );

					Q_ASSERT ( ssStem.GetLength() != 0);

					if ( (int) ssStem.GetLength() < MinimumStemLength ) continue;

					if ( LogFileOn() ) { *GetLogFile() << 
							StartTableRow <<
								TableData(QString("")) <<
								TableData(ssStem) <<
							EndTableRow; 
					}
					// put into Temp Stems all those stems from words which might be analyzed as ending in ssAffix.
					TempStems2.append(ssStem);
				}
			} // Loop C
		} // Loop B

		for ( m = 1; m <= pSig->Size(); m++)
		{ // Loop B
			if ( (int) m == k ) continue; // we've already done it --

			ssAffix = pSig->GetPiece(m);

			if (ssAffix.IsNULL()) {
				for (Q3ValueList<CStem>::iterator it = TempStems2.begin();
						it != TempStems2.end(); ++it) {
					for (; it != TempStems2.end() &&
								!(*m_pWords ^= *it);
							it = TempStems2.erase(it)) {
						if ( LogFileOn() ) *GetLogFile() <<
							StartTableRow <<
								TableData(QString("")) <<
								TableData(*it) <<
							EndTableRow;
					}
				}
			} else {
 				Q3ValueList<CStem>::Iterator it = TempStems2.begin(); 
            	while (it != TempStems2.end())
				{
 					CStem& stem = *it;
					if (LogFileOn() ) { *GetLogFile() << endl << stem.GetKey().Display(); }
					if( analyzingSuffixes ) PWord = stem.GetKey() + ssAffix;
					else PWord = ssAffix + stem.GetKey();

 					if ( LogFileOn() ) { *GetLogFile() << 
							StartTableRow <<
								TableData(QString("")) <<
								TableData(PWord ) <<
							EndTableRow; }

					if ( ! (TempWords ^= PWord) )
					{
						// this is too stringent: we probably want to keep some of these subsignatures.
//						if (LogFileOn() ) { *GetLogFile() <<  "  Missing: " << stem.GetKey().Display(); }
						if ( LogFileOn() ) { *GetLogFile() << 
							StartTableRow <<
								TableData("Missing") <<
								TableData(stem.GetKey()) <<
							EndTableRow; }
						it = TempStems2.erase(it);
					} 
					else ++it;
				}
			}
		}



		// Now start building up pSig again....


		Q3ValueList<CStem>::Iterator it;
		for (it = TempStems2.begin(); it != TempStems2.end(); ++it)
		{			//Loop B
			CStem& stem = *it;
			ssStem    = stem;

			for (int n = 1; n <= pSig->Size(); n++)
			{
				ssAffix = pSig->GetPiece(n);
				if (ssAffix.Display() == "NULL" ) continue;

				if( analyzingSuffixes ) PWord = ssStem + ssAffix;
				else PWord = ssAffix + ssStem;

				pWord = *m_pWords ^= PWord;
				Q_ASSERT (pWord);

				if( analyzingSuffixes )
				{
					pWord->ClearRootSuffixSplit();
					pWord->CutRightBeforeHere ( ssStem.GetLength() );
					pWord->SetStemLoc(1);
					pWord->SetSuffixLoc(2);
					m_pLexicon->UpdateWord( pWord );
				}
				else
				{
					pWord->ClearPrefixStemSplit();
					pWord->CutRightBeforeHere ( ssAffix.GetLength() );
					pWord->SetStemLoc(2);
					pWord->SetPrefixLoc(1);
					m_pLexicon->UpdateWord( pWord );
				}

				if (pWord->GetConfidence().length() == 0)
				{
					msg = "3: From sig find stem";
					pWord->AppendToConfidence( msg ) ;
				}
				if (LogFileOn() ) { *GetLogFile() << endl << "Reanalyzed word: "<< pWord->Display(); }
				if ( LogFileOn() ) { *GetLogFile() << 
							StartTableRow <<
								TableData("Reanalyzed word:") <<
								TableData(pWord) <<
							EndTableRow; }

			} // cycle through this signature
		}// cycle through this set of Stems //Loop B
	}// end of pSig loop Loop A

    if (LogFileOn() ) { *GetLogFile() << EndTable; }

	GetDocument()->setStatusBar2( "" );

	QString mini_name( "Mini-Lexicon %1" );
	msg = "From sigs find stems";
	CStringSurrogate ssRemark	= msg;

	TakeSplitWords_ProduceStemsAndSigs( ssRemark );

	TempWords.Empty();
	GetDocument()				->setStatusBar1( "" );
	mini_name					= mini_name.arg( GetIndex() + 1 );
	QString remark				= "From sigs: find stems";
	GetDLHistory()				->append( mini_name, remark, this );


	////////////////////////////////////////////////////////////
	//
	//  end of function
	//
	////////////////////////////////////////////////////////////

}
//--------------------------------------------------------------------------------------------------
/// 
//--------------------------------------------------------------------------------------------------
TableData RifEclipseUserDataParserTools::tableDataFromText(std::stringstream& streamData, std::vector<std::string>* errorText)
{
    TableData emptyTable;

    std::string origin = "";
    std::string dateFormat = "";
    std::string startDate = "";

    std::string firstLine;
    std::getline(streamData, firstLine);

    while (isLineSkippable(firstLine) || keywordParser(firstLine, origin, dateFormat, startDate))
    {
        if (!streamData.good())
        {
            // End of file
            return emptyTable;
        }

        std::getline(streamData, firstLine);
    }

    std::vector<std::string> quantityNames = splitLineAndRemoveComments(firstLine);
    size_t columnCount = quantityNames.size();

    if (columnCount == 0)
    {
        if (errorText) errorText->push_back("No quantities detected in table");

        return emptyTable;
    }

    std::vector< std::vector< std::string > > allHeaderRows;

    {
        std::stringstream::pos_type posAtStartOfFirstLine = streamData.tellg();

        std::string secondLine;
        std::getline(streamData, firstLine);

        std::stringstream::pos_type posAtStartOfSecondLine = streamData.tellg();
        std::getline(streamData, secondLine);

        bool header = true;
        while (header)
        {
            if (isValidTableData(columnCount, firstLine) &&
                isValidTableData(columnCount, secondLine))
            {
                header = false;
                break;
            }
            else
            {
                std::vector<std::string> words = splitLineAndRemoveComments(firstLine);
                if (words.size() > 0)
                {
                    allHeaderRows.push_back(words);
                }
            }

            posAtStartOfFirstLine = posAtStartOfSecondLine;
            firstLine = secondLine;

            posAtStartOfSecondLine = streamData.tellg();
            std::getline(streamData, secondLine);

            if (!streamData.good())
            {
                header = false;
            }
        }

        streamData.seekg(posAtStartOfFirstLine);
    }

    std::vector<std::string> unitNames;
    std::vector<double> scaleFactors;
    std::vector< std::vector< std::string > > headerRows;

    for (const auto& rowWords : allHeaderRows)
    {
        bool excludeFromHeader = false;

        if (rowWords.size() == columnCount)
        {
            if (unitNames.size() == 0)
            {
                for (const std::string& word : rowWords)
                {
                    if (hasTimeUnit(word))
                    {
                        unitNames = rowWords;
                        excludeFromHeader = true;
                    }
                }
            }

            if (scaleFactors.size() == 0)
            {
                std::vector<double> values;

                if (hasOnlyValidDoubleValues(rowWords, &values))
                {
                    scaleFactors = values;
                    excludeFromHeader = true;
                }
            }
        }

        if (!excludeFromHeader)
        {
            headerRows.push_back(rowWords);
        }
    }

    if (columnCount != unitNames.size())
    {
        if (errorText) errorText->push_back("Number of quantities is different from number of units");

        return emptyTable;
    }


    std::vector<Column> columnInfos;

    // Create string vectors for each column
    {
        std::vector<std::string> parserErrors;
        std::vector<std::vector<std::string>> tableHeaderText = RifEclipseUserDataKeywordTools::buildColumnHeaderText(quantityNames, headerRows, &parserErrors);
        if (parserErrors.size() > 0)
        {
            if (errorText) errorText->insert(errorText->end(), parserErrors.begin(), parserErrors.end());
            
            return emptyTable;
        }


        // For each column header, create rif adress and date time
        for (size_t i = 0; i < tableHeaderText.size(); i++)
        {
            auto columnText = tableHeaderText[i];
            if (columnText.size() == 0)
            {
                if (errorText) errorText->push_back("Detected column with no content");
                continue;
            }

            std::string quantity = columnText[0];
            std::string unit = unitNames[i];

            std::vector<std::string> columnHeader;

            if (columnText.size() > 1) columnHeader.insert(columnHeader.begin(), columnText.begin() + 1, columnText.end());

            RifEclipseSummaryAddress adr = RifEclipseUserDataKeywordTools::makeAndFillAddress(quantity, columnHeader);

            Column ci = Column::createColumnInfoFromRsmData(quantity, unit, adr);

            columnInfos.push_back(ci);
        }
    }

    return TableData(origin, startDate, columnInfos);
}
Exemple #5
0
void CSvsFile::DeleteData(size_t row,size_t col)
{
	CheckRowCol(row, col);
	m_vecTR[row][col] = TableData();
}