void TRSet::SaveXml(const TStr& FNm){ TFOut FOut(FNm); FILE* fOut=FOut.GetFileId(); fprintf(fOut, "<RSet>\n"); // fixed fields if (!GetUrlStr().Empty()){ fprintf(fOut, " <Url>%s</Url>\n", TXmlLx::GetXmlStrFromPlainStr(GetUrlStr()).CStr());} if (!GetNextUrlStr().Empty()){ fprintf(fOut, " <NextUrl>%s</NextUrl>\n", TXmlLx::GetXmlStrFromPlainStr(GetNextUrlStr()).CStr());} if (!GetQueryStr().Empty()){ fprintf(fOut, " <Query>%s</Query>\n", TXmlLx::GetXmlStrFromPlainStr(GetQueryStr()).CStr());} if (GetAllHits()!=-1){ fprintf(fOut, " <AllHits>%d</AllHits>\n", GetAllHits());} // hits fprintf(fOut, " <Hits Size=\"%d\">\n", GetHits()); for (int HitN=0; HitN<GetHits(); HitN++){ TStr HitUrlStr; TStr HitTitleStr; TStr HitSrcNm; TStr HitCtxStr; GetHit(HitN, HitUrlStr, HitTitleStr, HitSrcNm, HitCtxStr); fprintf(fOut, " <Hit Num=\"%d\">\n", 1+HitN); fprintf(fOut, " <Url>%s</Url>\n", TXmlLx::GetXmlStrFromPlainStr(HitUrlStr).CStr()); fprintf(fOut, " <Title>%s</Title>\n", TXmlLx::GetXmlStrFromPlainStr(HitTitleStr).CStr()); if (!HitSrcNm.Empty()){ fprintf(fOut, " <Source>%s</Source>\n", TXmlLx::GetXmlStrFromPlainStr(HitSrcNm).CStr());} fprintf(fOut, " <Snippet>%s</Snippet>\n", TXmlLx::GetXmlStrFromPlainStr(HitCtxStr).CStr()); fprintf(fOut, " </Hit>\n"); } fprintf(fOut, " </Hits>\n"); fprintf(fOut, "</RSet>"); }
PBowDocBs TRSet::GetBowDocBs( const TStr& SwSetTypeNm, const TStr& StemmerTypeNm, const int& MxNGramLen, const int& MnNGramFq) const { // prepare stop-words PSwSet SwSet=TSwSet::GetSwSet(SwSetTypeNm); // prepare stemmer PStemmer Stemmer=TStemmer::GetStemmer(StemmerTypeNm); // prepare n-grams TStrV HtmlStrV(GetHits(), 0); for (int HitN=0; HitN<GetHits(); HitN++){ TStr TitleStr=GetHitTitleStr(HitN); TStr CtxStr=GetHitCtxStr(HitN); TStr HtmlStr=TitleStr+". "+CtxStr; HtmlStrV.Add(HtmlStr); } PNGramBs NGramBs=TNGramBs::GetNGramBsFromHtmlStrV( HtmlStrV, MxNGramLen, MnNGramFq, SwSet, Stemmer); // create document-base printf("Create Bag-Of-Words Base ... "); PBowDocBs BowDocBs=TBowDocBs::New(); BowDocBs->PutNGramBs(NGramBs); for (int HitN=0; HitN<GetHits(); HitN++){ BowDocBs->AddHtmlDoc(GetHitTitleStr(HitN), TStrV(), HtmlStrV[HitN], true); } BowDocBs->AssertOk(); printf("Done.\n"); // return bag-of-words return BowDocBs; }
void CMSHit::CountHitsByType(int& Independent, int& Dependent, double Threshold, int MaxI) const { int i; Independent = Dependent = 0; int LastIon(-1), LastCharge(-1), LastNumber(-1); for (i = 0; i < GetHits(); i++) { if(GetHitInfo(i).GetIntensity() > MaxI*Threshold) { if(GetHitInfo(i).GetIonSeries() == LastIon && GetHitInfo(i).GetCharge() == LastCharge) { if(LastNumber + 1 == GetHitInfo(i).GetNumber()) Dependent++; else Independent++; } else Independent++; LastIon = GetHitInfo(i).GetIonSeries(); LastCharge = GetHitInfo(i).GetCharge(); LastNumber = GetHitInfo(i).GetNumber(); } } }
// return number of hits above threshold int CMSHit::CountHits(double Threshold, int MaxI) { int i, retval(0); for(i = 0; i < GetHits(); i++) if(SetHitInfo(i).GetIntensity() > MaxI*Threshold) retval++; return retval; }
void PoorManWindow::UpdateHitsLabel() { if (Lock()) { sprintf(fHitsLabel, B_TRANSLATE("Hits: %lu"), GetHits()); fHitsView->SetText(fHitsLabel); Unlock(); } }
void TGgSchRSet::Merge(const PGgSchRSet& RSet){ if (RSet.Empty()){return;} // create hash table of existing urls TStrH TitleStrH(GetHits()); for (int HitN=0; HitN<GetHits(); HitN++){ TitleStrH.AddKey(GetHit(HitN)->TitleStr); } // merge hits for (int HitN=0; HitN<RSet->GetHits(); HitN++){ PGgSchRef Ref=RSet->GetHit(HitN); if (!TitleStrH.IsKey(Ref->TitleStr)){ AddHit(Ref); TitleStrH.AddKey(Ref->TitleStr); } } // reset fixed fields PutNextUrlStr(""); PutAllHits(-1); }
void TRSet::Merge(const PRSet& RSet){ if (RSet.Empty()){return;} // create hash table of existing urls TStrH UrlStrH(GetHits()); for (int HitN=0; HitN<GetHits(); HitN++){ UrlStrH.AddKey(GetHitUrlStr(HitN)); } // merge hits for (int HitN=0; HitN<RSet->GetHits(); HitN++){ TStr HitUrlStr; TStr HitTitleStr; TStr HitSrcNm; TStr HitCtxStr; RSet->GetHit(HitN, HitUrlStr, HitTitleStr, HitSrcNm, HitCtxStr); if (!UrlStrH.IsKey(HitUrlStr)){ AddHit(HitUrlStr, HitTitleStr, HitSrcNm, HitCtxStr); UrlStrH.AddKey(HitUrlStr); } } // reset fixed fields PutNextUrlStr(""); PutAllHits(-1); }
int TRSet::GetHitN(const TStr& UrlStr, const bool& LcP) const { int Hits=GetHits(); TStr LcUrlStr=UrlStr.GetLc(); for (int HitN=0; HitN<Hits; HitN++){ if (LcP){ if (GetHitUrlStr(HitN).GetLc()==LcUrlStr){return HitN;} } else { if (GetHitUrlStr(HitN)==UrlStr){return HitN;} } } return -1; }
void TGgSchRSet::SaveXml(const TStr& FNm){ TFOut FOut(FNm); FILE* fOut=FOut.GetFileId(); fprintf(fOut, "<RSet>\n"); // fixed fields if (!GetUrlStr().Empty()){ fprintf(fOut, " <Url>%s</Url>\n", TXmlLx::GetXmlStrFromPlainStr(GetUrlStr()).CStr());} if (!GetNextUrlStr().Empty()){ fprintf(fOut, " <NextUrl>%s</NextUrl>\n", TXmlLx::GetXmlStrFromPlainStr(GetNextUrlStr()).CStr());} if (!GetQueryStr().Empty()){ fprintf(fOut, " <Query>%s</Query>\n", TXmlLx::GetXmlStrFromPlainStr(GetQueryStr()).CStr());} if (GetAllHits()!=-1){ fprintf(fOut, " <AllHits>%d</AllHits>\n", GetAllHits());} // hits fprintf(fOut, " <Hits Size=\"%d\">\n", GetHits()); for (int HitN=0; HitN<GetHits(); HitN++){ PGgSchRef Ref=GetHit(HitN); Ref->SaveXml(fOut, 1+HitN); } fprintf(fOut, " </Hits>\n"); fprintf(fOut, "</RSet>"); }
// for poisson test // return number of hits above threshold scaled by m/z positions int CMSHit::CountHits(double Threshold, int MaxI, int High) { int i ; float retval(0); for(i = 0; i < GetHits(); i++) if(SetHitInfo(i).GetIntensity() > MaxI*Threshold) { if (SetHitInfo(i).GetMZ() > High/2) retval += 0.5 + 2.0*(High - SetHitInfo(i).GetMZ())/(float)High; else retval += 1.5 - 2.0*SetHitInfo(i).GetMZ()/(float)High; } return (int)(retval+0.5); }
PBowDocBs TGgSchRSet::GetBowDocBs() const { // prepare stop-words, stemming PSwSet SwSet=TSwSet::GetSwSet(swstEn523); PStemmer Stemmer=TStemmer::New(stmtPorter, true); // prepare n-grams TStrV HtmlStrV(GetHits(), 0); for (int HitN=0; HitN<GetHits(); HitN++){ TStr HtmlStr=GetHit(HitN)->TitleStr; HtmlStrV.Add(HtmlStr); } PNGramBs NGramBs=TNGramBs::GetNGramBsFromHtmlStrV( HtmlStrV, 3, 3, SwSet, Stemmer); // create document-base printf("Create Bag-Of-Words Base ... "); PBowDocBs BowDocBs=TBowDocBs::New(); BowDocBs->PutNGramBs(NGramBs); for (int HitN=0; HitN<GetHits(); HitN++){ BowDocBs->AddHtmlDoc(TInt::GetStr(HitN), TStrV(), HtmlStrV[HitN], true); } BowDocBs->AssertOk(); printf("Done.\n"); // return bag-of-words return BowDocBs; }
int URLData::TopTen() { //add to a list, all the website visited and their hits for( auto it = dataMap.begin(); it != dataMap.end(); ++it ) { addTopTen(it->first,(it->second)->GetHits()); } //sort this list according to their hits number (less first) listTopTen.sort(); //displays the last 10 elements of this list auto itend = listTopTen.end(); for (int i = 0; i < TOP_DISP; i++) { itend--; //displays each ten first website with this syntax : "Website : X" (X, number of hits) std::cout << itend->GetName() << " : " << itend->GetHits() << std::endl; } return 0; }