void CClause::BuildVectorOfWords(CFormatCaller& FormatCaller, CSVI pVar) const { const CMorphVariant& synVar = *pVar; FormatCaller.sent.clear(); int iWordNum = m_iFirstWord; int i = 0; for(; i < synVar.m_SynUnits.size(); i++) { const CSynUnit& SynVarUnit = synVar.m_SynUnits[i]; CSynPlmLine plmWord; if( SynVarUnit.m_Type == EWord) { plmWord.m_UnitType = EWord; assert( SynVarUnit.m_iHomonymNum < GetWords()[iWordNum].GetHomonymsCount() ); GetWords()[iWordNum].InitializePlmLine(plmWord, SynVarUnit.m_iHomonymNum); plmWord.SetMorphByAncodePattern(SynVarUnit); plmWord.m_FoundPrepDependCases = SynVarUnit.m_FoundPrepDependCases; iWordNum++; } else { plmWord.m_UnitType = EClause; plmWord.m_Clause = SynVarUnit.m_SentPeriod; int ii = m_pSent->FindClauseIndexByPeriod(SynVarUnit.m_SentPeriod); assert(ii != -1); const CClause& clause = m_pSent->GetClause(ii); if (clause.IsRelative()) { plmWord.SetFlag(fl_relative_clause); const CSynHomonym& HomWhose = GetWords()[clause.m_RelativeWord.m_WordNo].GetSynHomonym(clause.m_RelativeWord.m_HomonymNo); plmWord.SetMorphByAncodePattern(HomWhose); } else { if (SynVarUnit.m_iClauseTypeNum != -1) { const CAncodePattern* P = clause.GetRootAncodePattern(SynVarUnit.m_iClauseTypeNum); if (P) plmWord.SetMorphByAncodePattern(*P); }; } if (SynVarUnit.m_iClauseTypeNum != -1) plmWord.m_ClauseType = clause.m_vectorTypes[SynVarUnit.m_iClauseTypeNum].m_Type; iWordNum += SynVarUnit.m_SentPeriod.size(); } FormatCaller.AddWord(plmWord); } /* for(i = 0 ; i < FormatCaller.sent.size() ; i++ ) { if( FormatCaller.sent[i].m_UnitType == EClause ) continue; assert(i < synVar.m_SynUnits.size() ); FormatCaller.SetGrammems(i,synVar.m_SynUnits[i].m_iGrammems, synVar.m_SynUnits[i].m_GramCodes.c_str()); }*/ }
void CClause::DeleteHomonym(int iW,int iH) { CSynWord& word = GetWords()[iW]; assert ( m_pSent->m_Words[iW].m_Homonyms.size() > 1); rml_TRACE("Delete homonym \"%s\" from Word \"%s\"(Only %i homonyms left)(InOborot=%s)\n", m_pSent->m_Words[iW].m_Homonyms[iH].m_strLemma.c_str(), m_pSent->m_Words[iW].m_strWord.c_str(), m_pSent->m_Words[iW].m_Homonyms.size(), m_pSent->m_Words[iW].m_Homonyms[iH].m_bInOb ? "true" : "false" ); int SynVarWordNo = UnitNoByWordNo(iW); // уничтожаем синварианты и группы с этим омонимом SVI pSynVar = m_SynVariants.begin(); while(pSynVar != m_SynVariants.end()) { const CMorphVariant& synVar = *pSynVar; if( synVar.m_SynUnits[SynVarWordNo].m_iHomonymNum == iH ) pSynVar = EraseMorphVariant(pSynVar ); else pSynVar++; } // удаляем омоним у слова word.EraseHomonym( iH ); // вершина клаузы могла указывать на этот омоним, такие вершины нао удалить for(int TypeNo = 0 ; TypeNo < m_vectorTypes.size() ; TypeNo++ ) if( m_vectorTypes[TypeNo].m_Root.m_WordNo == iW) if(m_vectorTypes[TypeNo].m_Root.m_HomonymNo == iH) { DeleteClauseType( TypeNo ); TypeNo--; } else m_vectorTypes[TypeNo].m_Root.UpdateWhenDeleteHomonym(iW,iH); m_RelativeWord.UpdateWhenDeleteHomonym(iW, iH); if (m_RelativeWord.m_HomonymNo == -1) m_RelativeWord.m_WordNo = -1; // уменьшить номер омонимов во всех морфологических вариантах // и заново определить СSynVariant::m_ClauseTypeNo, поскольку // вектор типов был изменен for(pSynVar = m_SynVariants.begin(); pSynVar != m_SynVariants.end() ; pSynVar++) { CMorphVariant& synVar = *pSynVar; assert (synVar.m_SynUnits[SynVarWordNo].m_iHomonymNum != iH); if( synVar.m_SynUnits[SynVarWordNo].m_iHomonymNum > iH ) synVar.m_SynUnits[SynVarWordNo].m_iHomonymNum--; m_pSent->ChooseClauseType(m_vectorTypes, synVar); } word.UpdateConjInfo(); };
void TNGramBs::GetNGramIdV( const TStr& HtmlStr, TIntV& NGramIdV, TIntPrV& NGramBEChXPrV) const { // create MxNGramLen queues TVec<TIntQ> WIdQV(MxNGramLen); TVec<TIntPrQ> BEChXPrQV(MxNGramLen); for (int NGramLen=1; NGramLen<MxNGramLen; NGramLen++){ WIdQV[NGramLen].Gen(100*NGramLen, NGramLen+1); BEChXPrQV[NGramLen].Gen(100*NGramLen, NGramLen+1); } bool AllWIdQClrP=true; // extract words from text-string PSIn HtmlSIn=TStrIn::New(HtmlStr, false); THtmlLx HtmlLx(HtmlSIn); while (HtmlLx.Sym!=hsyEof){ if ((HtmlLx.Sym==hsyStr)||(HtmlLx.Sym==hsyNum)){ // get word-string & word-id TStr WordStr=HtmlLx.UcChA; int WId; int SymBChX=HtmlLx.SymBChX; int SymEChX=HtmlLx.SymEChX; if ((SwSet.Empty())||(!SwSet->IsIn(WordStr))){ if (!Stemmer.Empty()){ WordStr=Stemmer->GetStem(WordStr);} if (IsWord(WordStr, WId)){ if (!IsSkipWord(WId)){ NGramIdV.Add(0+WId); // add single word NGramBEChXPrV.Add(TIntPr(SymBChX, SymEChX)); // add positions for (int NGramLen=1; NGramLen<MxNGramLen; NGramLen++){ TIntQ& WIdQ=WIdQV[NGramLen]; TIntPrQ& BEChXPrQ=BEChXPrQV[NGramLen]; WIdQ.Push(WId); BEChXPrQ.Push(TIntPr(SymBChX, SymEChX)); AllWIdQClrP=false; // if queue full if (WIdQ.Len()==NGramLen+1){ // create sequence TIntV WIdV; WIdQ.GetSubValVec(0, WIdQ.Len()-1, WIdV); TIntPrV BEChXPrV; BEChXPrQ.GetSubValVec(0, BEChXPrQ.Len()-1, BEChXPrV); // add ngram-id or reset queues int WIdVP; if (WIdVToFqH.IsKey(WIdV, WIdVP)){ // if sequence is frequent int NGramId=GetWords()+WIdVP; // get sequence ngram-id NGramIdV.Add(NGramId); // add sequence ngram-id NGramBEChXPrV.Add(TIntPr(BEChXPrV[0].Val1, BEChXPrV.Last().Val2)); // add positions } } } } } else { // break queue sequences if infrequent word occures if (!AllWIdQClrP){ for (int NGramLen=1; NGramLen<MxNGramLen; NGramLen++){ TIntQ& WIdQ=WIdQV[NGramLen]; TIntPrQ& BEChXPrQ=BEChXPrQV[NGramLen]; if (!WIdQ.Empty()){WIdQ.Clr(); BEChXPrQ.Clr();} } AllWIdQClrP=true; } } } } // get next symbol HtmlLx.GetSym(); } }
void CClause::AssignVariantWeight(CMorphVariant& synVariant) { try { int iWeight = 0; // adding weight by each maximal group int i = synVariant.m_vectorGroups.GetGroups().size() - 1; bool bSubjIsInsideGroup = false; bool bPredicateIsInsideGroup = false; while( i >= 0 ) { const CGroup& group = synVariant.m_vectorGroups.GetGroups()[i]; CPeriod PeriodInSentenceCoords = synVariant.GetSentenceCoordinates(group); // if the last or the first word in the group is indeclinable, bool bStartsOrEndsWithIndeclinable = false; { const CSynUnit& UnitStart = synVariant.m_SynUnits[group.m_iFirstWord]; if ( (UnitStart.m_Type == EWord) && (GetOpt()->m_IndeclinableMask & GetWords()[PeriodInSentenceCoords.m_iFirstWord].m_Homonyms[UnitStart.m_iHomonymNum].m_iGrammems) ) bStartsOrEndsWithIndeclinable = true; const CSynUnit& UnitEnd = synVariant.m_SynUnits[group.m_iLastWord]; if ( (UnitEnd.m_Type == EWord) && (GetOpt()->m_IndeclinableMask & GetWords()[PeriodInSentenceCoords.m_iLastWord].m_Homonyms[UnitEnd.m_iHomonymNum].m_iGrammems) ) bStartsOrEndsWithIndeclinable = true; }; if ( GetOpt()->IsGroupWithoutWeight(group.m_GroupType, group.m_Cause ) || ( (group.size() == 2) && bStartsOrEndsWithIndeclinable ) ) { i--; // go to the subgroup or to the next group continue; }; // adding weight (size of the group in words) Note, that the group can contain a clause iWeight += PeriodInSentenceCoords.size(); // if the last or the first word in the group is indeclinable, if (bStartsOrEndsWithIndeclinable) iWeight--; if ( !synVariant.m_Subjects.empty() || CPeriod(synVariant.GetFirstSubject()).is_part_of(group) ) bSubjIsInsideGroup = true; if (CPeriod(synVariant.m_iPredk).is_part_of(group)) bPredicateIsInsideGroup = true; // go to the subclauses and add weight by each subclause for (long k=group.m_iFirstWord; k <= group.m_iLastWord; k++) if( synVariant.m_SynUnits[k].m_Type == EClause ) { CPeriod p = synVariant.m_SynUnits[k].m_SentPeriod; CClause* clause = m_pSent->FindClauseByPeriod(p); assert(clause != NULL); vector<CMorphVariant*> clause_vars = clause->GetSynVariantIndexesByTypeNum(synVariant.m_SynUnits[k].m_iClauseTypeNum); for (long j=0; j < clause_vars.size(); j++) if( clause_vars[j]->m_iWeight != -1) clause_vars[j]->m_iWeight += iWeight; }; // go to the next groups for (i--; i >=0 && synVariant.m_vectorGroups.GetGroups()[i].is_part_of(group); i--); } if (!bSubjIsInsideGroup) if (synVariant.m_bGoodSubject) iWeight += 1; if (!bPredicateIsInsideGroup) if (synVariant.m_bGoodSubject) iWeight += 1; synVariant.m_iWeight = iWeight; } catch(...) { return; } }
static void ReadOBJ(const char *filename) { FILE *fp = fopen(filename, "r"); if (fp == NULL) sreFatalError("Could not open file %s.", filename); for (;;) { char *str = GetWords(fp); if (str == NULL) // End of file. break; RemoveComments(); if (nu_words == 0) continue; int command = - 1; if (strcmp(words[0], "v") == 0) command = 0; else if (strcmp(words[0], "vn") == 0) command = 1; else if (strcmp(words[0], "vt") == 0) command = 2; else if (strcmp(words[0], "f") == 0) command = 3; if (command < 0) // First word not recognized. continue; if (command <= 2) { // Get up to four coordinates. float coord[4]; int n = GetCoordinates(1, coord); if (command == 0) AddVertexAttribute(SRE_ATTRIBUTE_POSITION, coord, n); else if (command == 1) AddVertexAttribute(SRE_ATTRIBUTE_NORMAL, coord, n); else AddVertexAttribute(SRE_ATTRIBUTE_TEXCOORDS, coord, n); } else { // Face defition. BeginFace(4, SRE_POSITION_MASK | SRE_NORMAL_MASK | SRE_TEXCOORDS_MASK); for (int word_index = 1; word_index < nu_words; word_index++) { int vertex_index[3]; DecodeOBJFaceIndices(words[word_index], vertex_index); for (int k = 0; k < 3; k++) { // Special value INT_MAX means not used; AddFace expects - 1 // for unused attributes. if (vertex_index[k] == INT_MAX) vertex_index[k] = - 1; else { if (vertex_index[k] > 0) // Regular index; counting starts at 1 in OBJ files. vertex_index[k]--; else if (vertex_index[k] < 0) // Negative numer is relative index. vertex_index[k] += nu_attribute_vertices[OBJ_attributes[k]]; else ModelFileReadError("Vertex index of 0 not allowed in OBJ file"); } } AddFaceVertex(OBJ_attributes, vertex_index); } EndFace(); } } fclose(fp); }
int main (int aArgn,char** aArgs) { #define MAX_STRING 8192 char buffer[MAX_STRING+1]; int length; /*EXETYPE WINDOWS\n\ */ printf("\ LIBRARY %s\n\ EXPORTS\n",aArgs[1]); while(1){ if(fgets(buffer,MAX_STRING,stdin)==NULL) return EXIT_FAILURE; /* On some system (NT) editors when saving binary files put \r\n at place of \n ; we then look for \r\n. */ length = strlen(buffer); if( (length>=2) && (buffer[length-2]=='\r') && (buffer[length-1]=='\n') ) { buffer[length-2] = '\0'; length--; length--; } else if((length>=1) && (buffer[length-1]=='\n')) { buffer[length-1] = '\0'; length--; } if(strstr(buffer,"SECT")==NULL) continue; if(strstr(buffer,"External")==NULL) continue; if(strstr(buffer,"??_")!=NULL) { if(strstr(buffer,"operator/=")!=NULL) { /* Keep operator /= */ /*} else if(strstr(buffer,"operator[]")!=NULL) {*/ } else { continue; } } { char** words; int wordn; words = GetWords (buffer," ",&wordn); if(wordn>=5) { if((wordn>=7)&&(strcmp(words[4],"External")==0)) { int iword = 6; int offset = 0; #if defined(_MSC_VER) && _MSC_VER <= 1500 /* On my XP with Visual Studio 9.0, I need the below for C functions starting with an '_'. For example to build dll for ourex/Python and ourex/zlib */ if(words[iword][0]=='_') offset = 1; #endif // NOTE : with Visual, for global data in a shared lib, for example : // int test_g = 0; // a user program must use it with : // __declspec(dllimport) int test_g; // (extern int test_g; //does not work). if( (strstr(buffer,": static")!=NULL) || (strstr(buffer,"(class")!=NULL) ){ /* static data members are not DATA */ /* extern objects are not DATA */ printf(" %s\n",words[iword]+offset); } else { /* DATA */ printf(" %s\tDATA\n",words[iword]+offset); } } else if((wordn>=8)&&(strcmp(words[4],"()")==0)) { /*code*/ int iword = 7; int offset = 0; #if defined(_MSC_VER) && _MSC_VER <= 1500 /* On my XP with Visual Studio 9.0, I need the below for C functions starting with an '_'. For example to build dll for ourex/Python and ourex/zlib */ if(words[iword][0]=='_') offset = 1; #endif printf(" %s\n",words[iword]+offset); } } {int count; for(count=0;count<wordn;count++) if(words[count]) free(words[count]); if(words) free(words);} } /*printf("%s\n",buffer);*/ } aArgn = 0; return EXIT_SUCCESS; }