Ejemplo n.º 1
0
void TRSet::SaveXml(const TStr& FNm){
  TFOut FOut(FNm); FILE* fOut=FOut.GetFileId();
  fprintf(fOut, "<RSet>\n");
  // fixed fields
  if (!GetUrlStr().Empty()){
    fprintf(fOut, "  <Url>%s</Url>\n", TXmlLx::GetXmlStrFromPlainStr(GetUrlStr()).CStr());}
  if (!GetNextUrlStr().Empty()){
    fprintf(fOut, "  <NextUrl>%s</NextUrl>\n", TXmlLx::GetXmlStrFromPlainStr(GetNextUrlStr()).CStr());}
  if (!GetQueryStr().Empty()){
    fprintf(fOut, "  <Query>%s</Query>\n", TXmlLx::GetXmlStrFromPlainStr(GetQueryStr()).CStr());}
  if (GetAllHits()!=-1){
    fprintf(fOut, "  <AllHits>%d</AllHits>\n", GetAllHits());}
  // hits
  fprintf(fOut, "  <Hits Size=\"%d\">\n", GetHits());
  for (int HitN=0; HitN<GetHits(); HitN++){
    TStr HitUrlStr; TStr HitTitleStr; TStr HitSrcNm; TStr HitCtxStr;
    GetHit(HitN, HitUrlStr, HitTitleStr, HitSrcNm, HitCtxStr);
    fprintf(fOut, "    <Hit Num=\"%d\">\n", 1+HitN);
    fprintf(fOut, "      <Url>%s</Url>\n", TXmlLx::GetXmlStrFromPlainStr(HitUrlStr).CStr());
    fprintf(fOut, "      <Title>%s</Title>\n", TXmlLx::GetXmlStrFromPlainStr(HitTitleStr).CStr());
    if (!HitSrcNm.Empty()){
      fprintf(fOut, "      <Source>%s</Source>\n", TXmlLx::GetXmlStrFromPlainStr(HitSrcNm).CStr());}
    fprintf(fOut, "      <Snippet>%s</Snippet>\n", TXmlLx::GetXmlStrFromPlainStr(HitCtxStr).CStr());
    fprintf(fOut, "    </Hit>\n");
  }
  fprintf(fOut, "  </Hits>\n");
  fprintf(fOut, "</RSet>");
}
Ejemplo n.º 2
0
PBowDocBs TRSet::GetBowDocBs(
 const TStr& SwSetTypeNm, const TStr& StemmerTypeNm,
 const int& MxNGramLen, const int& MnNGramFq) const {
  // prepare stop-words
  PSwSet SwSet=TSwSet::GetSwSet(SwSetTypeNm);
  // prepare stemmer
  PStemmer Stemmer=TStemmer::GetStemmer(StemmerTypeNm);
  // prepare n-grams
  TStrV HtmlStrV(GetHits(), 0);
  for (int HitN=0; HitN<GetHits(); HitN++){
    TStr TitleStr=GetHitTitleStr(HitN);
    TStr CtxStr=GetHitCtxStr(HitN);
    TStr HtmlStr=TitleStr+". "+CtxStr;
    HtmlStrV.Add(HtmlStr);
  }
  PNGramBs NGramBs=TNGramBs::GetNGramBsFromHtmlStrV(
   HtmlStrV, MxNGramLen, MnNGramFq, SwSet, Stemmer);
  // create document-base
  printf("Create Bag-Of-Words Base ... ");
  PBowDocBs BowDocBs=TBowDocBs::New();
  BowDocBs->PutNGramBs(NGramBs);
  for (int HitN=0; HitN<GetHits(); HitN++){
    BowDocBs->AddHtmlDoc(GetHitTitleStr(HitN), TStrV(), HtmlStrV[HitN], true);
  }
  BowDocBs->AssertOk();
  printf("Done.\n");
  // return bag-of-words
  return BowDocBs;
}
Ejemplo n.º 3
0
void 
CMSHit::CountHitsByType(int& Independent,
                        int& Dependent, 
                        double Threshold, 
                        int MaxI) const
{
	int i;
    Independent = Dependent = 0;
    int LastIon(-1), LastCharge(-1), LastNumber(-1);
	for (i = 0; i < GetHits(); i++) {
        if(GetHitInfo(i).GetIntensity() > MaxI*Threshold) {
            if(GetHitInfo(i).GetIonSeries() == LastIon && GetHitInfo(i).GetCharge() == LastCharge) {
                if(LastNumber + 1 == GetHitInfo(i).GetNumber())
                    Dependent++;
                else 
                    Independent++;
            }
            else
                Independent++;
            LastIon = GetHitInfo(i).GetIonSeries();
            LastCharge = GetHitInfo(i).GetCharge();
            LastNumber = GetHitInfo(i).GetNumber();
        }
    }
}
Ejemplo n.º 4
0
// return number of hits above threshold
int CMSHit::CountHits(double Threshold, int MaxI)
{
    int i, retval(0);

    for(i = 0; i < GetHits(); i++)
        if(SetHitInfo(i).GetIntensity() > MaxI*Threshold)
            retval++;
    return retval;
}
Ejemplo n.º 5
0
void 
PoorManWindow::UpdateHitsLabel()
{
	if (Lock()) {
		sprintf(fHitsLabel, B_TRANSLATE("Hits: %lu"), GetHits());
		fHitsView->SetText(fHitsLabel);
		
		Unlock();
	}
}
Ejemplo n.º 6
0
void TGgSchRSet::Merge(const PGgSchRSet& RSet){
  if (RSet.Empty()){return;}
  // create hash table of existing urls
  TStrH TitleStrH(GetHits());
  for (int HitN=0; HitN<GetHits(); HitN++){
    TitleStrH.AddKey(GetHit(HitN)->TitleStr);
  }
  // merge hits
  for (int HitN=0; HitN<RSet->GetHits(); HitN++){
    PGgSchRef Ref=RSet->GetHit(HitN);
    if (!TitleStrH.IsKey(Ref->TitleStr)){
      AddHit(Ref);
      TitleStrH.AddKey(Ref->TitleStr);
    }
  }
  // reset fixed fields
  PutNextUrlStr("");
  PutAllHits(-1);
}
Ejemplo n.º 7
0
void TRSet::Merge(const PRSet& RSet){
  if (RSet.Empty()){return;}
  // create hash table of existing urls
  TStrH UrlStrH(GetHits());
  for (int HitN=0; HitN<GetHits(); HitN++){
    UrlStrH.AddKey(GetHitUrlStr(HitN));
  }
  // merge hits
  for (int HitN=0; HitN<RSet->GetHits(); HitN++){
    TStr HitUrlStr; TStr HitTitleStr; TStr HitSrcNm; TStr HitCtxStr;
    RSet->GetHit(HitN, HitUrlStr, HitTitleStr, HitSrcNm, HitCtxStr);
    if (!UrlStrH.IsKey(HitUrlStr)){
      AddHit(HitUrlStr, HitTitleStr, HitSrcNm, HitCtxStr);
      UrlStrH.AddKey(HitUrlStr);
    }
  }
  // reset fixed fields
  PutNextUrlStr("");
  PutAllHits(-1);
}
Ejemplo n.º 8
0
int TRSet::GetHitN(const TStr& UrlStr, const bool& LcP) const {
  int Hits=GetHits();
  TStr LcUrlStr=UrlStr.GetLc();
  for (int HitN=0; HitN<Hits; HitN++){
    if (LcP){
      if (GetHitUrlStr(HitN).GetLc()==LcUrlStr){return HitN;}
    } else {
      if (GetHitUrlStr(HitN)==UrlStr){return HitN;}
    }
  }
  return -1;
}
Ejemplo n.º 9
0
void TGgSchRSet::SaveXml(const TStr& FNm){
  TFOut FOut(FNm); FILE* fOut=FOut.GetFileId();
  fprintf(fOut, "<RSet>\n");
  // fixed fields
  if (!GetUrlStr().Empty()){
    fprintf(fOut, "  <Url>%s</Url>\n", TXmlLx::GetXmlStrFromPlainStr(GetUrlStr()).CStr());}
  if (!GetNextUrlStr().Empty()){
    fprintf(fOut, "  <NextUrl>%s</NextUrl>\n", TXmlLx::GetXmlStrFromPlainStr(GetNextUrlStr()).CStr());}
  if (!GetQueryStr().Empty()){
    fprintf(fOut, "  <Query>%s</Query>\n", TXmlLx::GetXmlStrFromPlainStr(GetQueryStr()).CStr());}
  if (GetAllHits()!=-1){
    fprintf(fOut, "  <AllHits>%d</AllHits>\n", GetAllHits());}
  // hits
  fprintf(fOut, "  <Hits Size=\"%d\">\n", GetHits());
  for (int HitN=0; HitN<GetHits(); HitN++){
    PGgSchRef Ref=GetHit(HitN);
    Ref->SaveXml(fOut, 1+HitN);
  }
  fprintf(fOut, "  </Hits>\n");
  fprintf(fOut, "</RSet>");
}
Ejemplo n.º 10
0
  // for poisson test
// return number of hits above threshold scaled by m/z positions
int CMSHit::CountHits(double Threshold, int MaxI, int High)
{
    int i ;
    float retval(0);

    for(i = 0; i < GetHits(); i++)
      if(SetHitInfo(i).GetIntensity() > MaxI*Threshold) {
	if (SetHitInfo(i).GetMZ() > High/2)
	  retval += 0.5 + 2.0*(High - SetHitInfo(i).GetMZ())/(float)High;
	else
	  retval += 1.5 - 2.0*SetHitInfo(i).GetMZ()/(float)High;
      }
    return (int)(retval+0.5);
}
Ejemplo n.º 11
0
PBowDocBs TGgSchRSet::GetBowDocBs() const {
  // prepare stop-words, stemming
  PSwSet SwSet=TSwSet::GetSwSet(swstEn523);
  PStemmer Stemmer=TStemmer::New(stmtPorter, true);
  // prepare n-grams
  TStrV HtmlStrV(GetHits(), 0);
  for (int HitN=0; HitN<GetHits(); HitN++){
    TStr HtmlStr=GetHit(HitN)->TitleStr;
    HtmlStrV.Add(HtmlStr);
  }
  PNGramBs NGramBs=TNGramBs::GetNGramBsFromHtmlStrV(
   HtmlStrV, 3, 3, SwSet, Stemmer);
  // create document-base
  printf("Create Bag-Of-Words Base ... ");
  PBowDocBs BowDocBs=TBowDocBs::New();
  BowDocBs->PutNGramBs(NGramBs);
  for (int HitN=0; HitN<GetHits(); HitN++){
    BowDocBs->AddHtmlDoc(TInt::GetStr(HitN), TStrV(), HtmlStrV[HitN], true);
  }
  BowDocBs->AssertOk();
  printf("Done.\n");
  // return bag-of-words
  return BowDocBs;
}
Ejemplo n.º 12
0
int URLData::TopTen()
{
    //add to a list, all the website visited and their hits
	for( auto it = dataMap.begin(); it != dataMap.end(); ++it )
	{
        addTopTen(it->first,(it->second)->GetHits());
	}
	//sort this list according to their hits number (less first)
	listTopTen.sort();
	
    //displays the last 10 elements of this list
    auto itend = listTopTen.end();
	for (int i = 0; i < TOP_DISP; i++)
    {
        itend--;
        //displays each ten first website with this syntax : "Website : X" (X, number of hits)
        std::cout << itend->GetName() << " : " << itend->GetHits() << std::endl;
    }

   	return 0;
}