PBowDocBs TNmObjBs::GetBowDocBs(const int& MnNmObjFq) const {
  printf("Generating Bag-Of-Words...\n");
  // create bag-of-words
  PBowDocBs BowDocBs=TBowDocBs::New();
  // traverse documents
  for (int DocId=0; DocId<GetDocs(); DocId++){
    if (DocId%100==0){printf("%d\r", DocId);}
    TStr DocNm=GetDocNm(DocId);
    TStr DateStr=GetDocDateStr(DocId);
    TStrV WordStrV;
    int DocNmObjs=GetDocNmObjs(DocId);
    for (int DocNmObjN=0; DocNmObjN<DocNmObjs; DocNmObjN++){
      int NmObjId; int TermFq; GetDocNmObjId(DocId, DocNmObjN, NmObjId, TermFq);
      if ((MnNmObjFq==-1)||(GetNmObjDocs(NmObjId)>=MnNmObjFq)){
        TStr NmObjStr=GetNmObjStr(NmObjId);
        for (int TermOccN=0; TermOccN<TermFq; TermOccN++){
          WordStrV.Add(NmObjStr);
        }
      }
    }
    if (!WordStrV.Empty()){
      int DId=BowDocBs->AddDoc(DocNm, TStrV(), WordStrV);
      BowDocBs->PutDateStr(DId, DateStr);
   }
  }
  // return bag-of-words
  BowDocBs->AssertOk();
  printf("\nDone.\n");
  return BowDocBs;
}
Example #2
0
//////////////////////////////////////
// URL-Redirect-Function
TSASFunRedirect::TSASFunRedirect(const TStr& FunNm,
		const TStr& SettingFNm): TSAppSrvFun(FunNm, saotUndef) { 

	printf("Loading redirects %s\n", FunNm.CStr());
	TFIn FIn(SettingFNm); TStr LnStr, OrgFunNm;
	while (FIn.GetNextLn(LnStr)) {
		TStrV PartV;  LnStr.SplitOnAllCh('\t', PartV, false);
		if (PartV.Empty()) { continue; }
		if (PartV[0].Empty()) {
			// parameters
			EAssert(PartV.Len() >= 3);
			TStr FldNm = PartV[1];
			TStr FldVal = PartV[2];
			if (FldVal.StartsWith("$")) {
				MapH.GetDat(OrgFunNm).FldNmMapH.AddDat(FldVal.Right(1), FldNm);
			} else {
				MapH.GetDat(OrgFunNm).FldNmValPrV.Add(TStrKd(FldNm, FldVal));
			}
		} else {
			// new function
			EAssert(PartV.Len() >= 2);
			OrgFunNm = PartV[0];
			MapH.AddDat(OrgFunNm).FunNm = PartV[1];
			printf("  %s - %s\n", PartV[0].CStr(), PartV[1].CStr());
		}
	}
	printf("Done\n");
}
Example #3
0
void TGgSchRef::GetAuthNmVPubStr(
 const TStr& AuthNmVPubStr, TStrV& AuthNmV, TStr& PubNm, TStr& PubYearStr){
  // split input string into two parts
  TStr AuthNmVStr; TStr PubStr;
  AuthNmVPubStr.SplitOnStr(AuthNmVStr, " - ", PubStr);
  // author-names string
  AuthNmVStr.SplitOnAllCh(',', AuthNmV, true);
  for (int AuthN=0; AuthN<AuthNmV.Len(); AuthN++){
    AuthNmV[AuthN].ToTrunc();
  }
  if ((!AuthNmV.Empty())&&
   ((AuthNmV.Last().IsStrIn("..."))||(AuthNmV.Last().Len()<=2))){
    AuthNmV.DelLast();
  }
  // publication-name & publication-year string
  TStr OriginStr; TStr LinkStr;
  PubStr.SplitOnStr(OriginStr, " - ", LinkStr);
  OriginStr.SplitOnLastCh(PubNm, ',', PubYearStr);
  PubNm.ToTrunc(); PubYearStr.ToTrunc();
  if ((PubYearStr.Len()>=4)&&(PubYearStr.GetSubStr(0, 3).IsInt())){
    PubYearStr=PubYearStr.GetSubStr(0, 3);
  } else
  if ((PubNm.Len()>=4)&&(PubNm.GetSubStr(0, 3).IsInt())){
    PubYearStr=PubNm.GetSubStr(0, 3); PubNm="";
  } else {
    PubYearStr="";
  }
}
Example #4
0
TStrV TEnv::GetIfArgPrefixStrV(
    const TStr& PrefixStr, TStrV& DfValV, const TStr& DNm) const {
    TStrV ArgValV;
    if (Env.GetArgs()<=MnArgs) {
        // 'usage' argument message
        if (!SilentP) {
            printf("   %s%s (default:", PrefixStr.CStr(), DNm.CStr());
            for (int DfValN=0; DfValN<DfValV.Len(); DfValN++) {
                if (DfValN>0) {
                    printf(", ");
                }
                printf("'%s'", DfValV[DfValN].CStr());
            }
            printf(")\n");
        }
        return ArgValV;
    } else {
        // argument & value message
        TStr ArgValVChA;
        for (int ArgN=0; ArgN<GetArgs(); ArgN++) {
            // get argument string
            TStr ArgStr=GetArg(ArgN);
            if (ArgStr.StartsWith(PrefixStr)) {
                // extract & add argument value
                ArgStr.DelStr(PrefixStr);
                ArgValV.Add(ArgStr);
                // add to message string
                if (ArgValV.Len()>1) {
                    ArgValVChA+=", ";
                }
                ArgValVChA+=ArgValV.Last();
            }
        }
        if (ArgValV.Empty()) {
            ArgValV=DfValV;
        }
        // output argument values
        TChA MsgChA;
        MsgChA+="  "+DNm;
        MsgChA+=" (";
        MsgChA+=PrefixStr;
        MsgChA+=")=";
        for (int ArgValN=0; ArgValN<ArgValV.Len(); ArgValN++) {
            if (ArgValN>0) {
                MsgChA+=", ";
            }
            MsgChA+="'";
            MsgChA+=ArgValV[ArgValN];
            MsgChA+="'";
        }
        if (!SilentP) {
            TNotify::OnStatus(Notify, MsgChA);
        }
        return ArgValV;
    }
}
Example #5
0
File: util.cpp Project: pikma/Snap
// <last_name>_<first name innitial>
TStr TStrUtil::GetStdName(TStr AuthorName) {
    TStr StdName;
    AuthorName.ToLc();
    AuthorName.ChangeChAll('\n', ' ');
    AuthorName.ChangeChAll('.', ' ');
    // if there is a number in the name, remove it and everything after it
    int i, pos = 0;
    while (pos<AuthorName.Len() && (AuthorName[pos]!='#' && !TCh::IsNum(AuthorName[pos]))) {
        pos++;
    }
    if (pos < AuthorName.Len()) {
        AuthorName = AuthorName.GetSubStr(0, pos-1).ToTrunc();
    }
    if (AuthorName.Empty()) {
        return TStr::GetNullStr();
    }

    // replace everything after '('
    int b = AuthorName.SearchCh('(');
    if (b != -1) {
        AuthorName = AuthorName.GetSubStr(0, b-1).ToTrunc();
    }
    // skip if contains ')'
    if (AuthorName .SearchCh(')')!=-1) {
        return TStr::GetNullStr();
    }
    // skip if it is not a name
    if (AuthorName .SearchStr("figures")!=-1 || AuthorName .SearchStr("macros")!=-1
            || AuthorName .SearchStr("univ")!=-1 || AuthorName .SearchStr("institute")!=-1) {
        return TStr::GetNullStr();
    }
    // remove all non-letters (latex tags, ...)
    TChA NewName;
    for (i = 0; i < AuthorName.Len(); i++) {
        const char Ch = AuthorName[i];
        if (TCh::IsAlpha(Ch) || TCh::IsWs(Ch) || Ch=='-') {
            NewName += Ch;
        }
    }
    StdName = NewName;
    StdName.ToTrunc();
    TStrV AuthNmV;
    StdName.SplitOnWs(AuthNmV);
    // too short -- not a name
    if (! AuthNmV.Empty() && AuthNmV.Last() == "jr") AuthNmV.DelLast();
    if (AuthNmV.Len() < 2) return TStr::GetNullStr();

    const TStr LastNm = AuthNmV.Last();
    if (! TCh::IsAlpha(LastNm[0]) || LastNm.Len() == 1) return TStr::GetNullStr();

    IAssert(isalpha(AuthNmV[0][0]));
    return TStr::Fmt("%s_%c", LastNm.CStr(), AuthNmV[0][0]);
}
Example #6
0
// parse:
//   10:16, 16 Sep 2004
//   10:20, 2004 Sep 16
//   2005-07-07 20:30:35
//   23:24:07, 2005-07-10
//   9 July 2005 14:38
//   21:16, July 9, 2005
//   06:02, 10 July 2005
bool TStrUtil::GetTmFromStr(const char* TmStr, TSecTm& Tm) {
  static TStrV MonthV1, MonthV2;
  if (MonthV1.Empty()) {
    TStr("january|february|march|april|may|june|july|august|september|october|november|december").SplitOnAllCh('|', MonthV1);
    TStr("jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec").SplitOnAllCh('|', MonthV2);
  }
  TChA Tmp(TmStr);
  Tmp.ToLc();
  TVec<char *> WrdV;
  const char* End = Tmp.CStr()+Tmp.Len();
  int Col = -1, Cols=0;
  for (char *b = Tmp.CStr(); b <End; ) {
    WrdV.Add(b);
    while (*b && ! (*b==' ' || *b=='-' || *b==':' || *b==',')) { b++; }
    if (*b==':') { if(Col==-1) { Col=WrdV.Len(); } Cols++;  }
    *b=0; b++;
    while (*b && (*b==' ' || *b=='-' || *b==':' || *b==',')) { b++; }
  }
  if (Cols == 2) {
    if (Col+1 >= WrdV.Len()) { return false; }
    WrdV.Del(Col+1);
  }
  if (Col<1) { return false; }
  const int Hr = atoi(WrdV[Col-1]);
  const int Min = atoi(WrdV[Col]);
  WrdV.Del(Col);  WrdV.Del(Col-1);
  if (WrdV.Len() != 3) { return false; }
  int y=0,m=1,d=2, Mon=-1;
  if (TCh::IsAlpha(WrdV[0][0])) {
    y=2; m=0; d=1;
  } else if (TCh::IsAlpha(WrdV[1][0])) {
    y=2; m=1; d=0;
  } else if (TCh::IsAlpha(WrdV[2][0])) {
    y=0; m=2; d=1;
  } else {
    y=0; m=1; d=2;
    Mon = atoi(WrdV[m]);
  }
  int Day = atoi(WrdV[d]);
  if (Mon <= 0) { Mon = MonthV1.SearchForw(WrdV[m])+1; }
  if (Mon <= 0) { Mon = MonthV2.SearchForw(WrdV[m])+1; }
  if (Mon == 0) { return false; }
  int Year = atoi(WrdV[y]);
  if (Day > Year) { ::Swap(Day, Year); }
  //printf("%d-%02d-%02d  %02d:%02d\n", Year, Mon, Day, Hr, Min);
  Tm = TSecTm(Year, Mon, Day, Hr, Min, 0);
  return true;
}
Example #7
0
TStrV TEnv::GetIfArgPrefixStrV(const TStr& PrefixStr,
                               TStrV& DfValV,
                               const TStr& DNm) const {
    TStrV ArgValV;
    if (Env.GetArgs() <= MnArgs) {
        // 'usage' argument message
        if (!SilentP) {
            printf("   %s%s (default:", PrefixStr.CStr(),
                   DNm.CStr());
            for (int DfValN = 0; DfValN < DfValV.Len();
                 DfValN++) {
                if (DfValN > 0) printf(", ");
                printf("%s", DfValV[DfValN].CStr());
            }
            printf(")\n");
        }
        return ArgValV;
    } else {
        // argument & value message
        TStrV Items;
        for (int ArgN = 0; ArgN < GetArgs(); ArgN++) {
            // get argument string
            TStr ArgStr = GetArg(ArgN);
            if (ArgStr.GetSubStr(0, PrefixStr.Len() - 1) ==
                PrefixStr) {
                TStr ArgVals = ArgStr.GetSubStr(
                    PrefixStr.Len(), ArgStr.Len());
                ArgVals.SplitOnAllCh(',', Items);
                for (int i = 0; i < Items.Len(); i++)
                    ArgValV.Add(Items[i]);
            }
        }
        if (ArgValV.Empty()) ArgValV = DfValV;
        // output argument values
        TChA MsgChA;
        MsgChA += DNm;
        MsgChA += " (";
        MsgChA += PrefixStr;
        MsgChA += ")=";
        for (int ArgValN = 0; ArgValN < ArgValV.Len();
             ArgValN++) {
            if (ArgValN > 0) MsgChA += ", ";
            MsgChA += ArgValV[ArgValN];
        }
        if (!SilentP) TNotify::OnStatus(Notify, MsgChA);
        return ArgValV;
    }
}
Example #8
0
PJsonVal TGraphCascade::GetPosterior(const TStrV& NodeNmV, const TFltV& QuantileV) const {
    PJsonVal Result = TJsonVal::NewObj();
    TIntV NodeIdV;
    if (NodeNmV.Empty()) {
        // go over all zero timestamps for which samples exist
        TIntV FullNodeIdV; Graph.GetNIdV(FullNodeIdV);
        int Nodes = Graph.GetNodes();
        for (int NodeN = 0; NodeN < Nodes; NodeN++) {
            int NodeId = FullNodeIdV[NodeN];
            if (Timestamps.IsKey(NodeId) && Sample.IsKey(NodeId) && !Sample.GetDat(NodeId).Empty() && Timestamps.GetDat(NodeId) == 0) {
                NodeIdV.Add(NodeId);
            }
        }
    } else {
        int Nodes = NodeNmV.Len();
        for (int NodeN = 0; NodeN < Nodes; NodeN++) {
            if (!NodeNmIdH.IsKey(NodeNmV[NodeN])) { continue; }
            int NodeId = NodeNmIdH.GetDat(NodeNmV[NodeN]);
            if (Timestamps.IsKey(NodeId) && Sample.IsKey(NodeId) && !Sample.GetDat(NodeId).Empty() && Timestamps.GetDat(NodeId) == 0) {
                NodeIdV.Add(NodeId);
            }
        }
    }
    EAssertR(QuantileV.Len() > 0, "TGraphCascade::GetPosterior quantiles should not be empty!");
    for (int QuantileN = 0; QuantileN < QuantileV.Len(); QuantileN++) {
        EAssertR((QuantileV[QuantileN] >= 0.0) && (QuantileV[QuantileN] <= 1.0), "TGraphCascade::GetPosterior quantiles should be between 0.0 and 1.0");
    }

    int Nodes = NodeIdV.Len();
    for (int NodeN = 0; NodeN < Nodes; NodeN++) {
        int NodeId = NodeIdV[NodeN];
        TStr NodeNm = NodeIdNmH.GetDat(NodeId);
        int Quantiles = QuantileV.Len();
        TUInt64V SampleV = Sample.GetDat(NodeId);
        SampleV.Sort(true);
        int SampleSize = SampleV.Len();
        PJsonVal QuantilesArr = TJsonVal::NewArr();
        for (int QuantileN = 0; QuantileN < Quantiles; QuantileN++) {
            int Idx = (int)floor(QuantileV[QuantileN] * SampleSize);
            Idx = MIN(Idx, SampleSize - 1);
            uint64 UnixTimestamp = TTm::GetUnixMSecsFromWinMSecs(SampleV[Idx]);
            QuantilesArr->AddToArr((double)UnixTimestamp);
        }
        Result->AddToObj(NodeNm, QuantilesArr);
    }
    return Result;
}
void TNmObjBs::LoadCustSwSet(const PSIn& SIn){
  if (SIn.Empty()){return;}
  TILx Lx(SIn, TFSet(iloCmtAlw, iloRetEoln, iloExcept));
  // traverse lines
  Lx.GetSym(syLn, syEof);
  while (Lx.Sym!=syEof){
    // get stop-phrase string
    TStr WordStrVStr=Lx.Str;
    // split phrase to words
    TStrV WordStrV; WordStrVStr.SplitOnWs(WordStrV);
    if (!WordStrV.Empty()){
      // define phrase as stop-word
      WordStrVToNmObjAttrSetH.AddDat(WordStrV).Incl(noaIgnore);
    }
    // get next symbol
    Lx.GetSym(syLn, syEof);
  }
}
void TNmObjBs::ExtrCandWordStrV(
 const TStr& HtmlStr, TStrV& CandWordStrV, const bool& DumpP){
  // prepare named-object vector
  CandWordStrV.Clr();
  // prepare html parsing
  PSIn HtmlSIn=TStrIn::New(HtmlStr);
  PHtmlDoc HtmlDoc=THtmlDoc::New(HtmlSIn, hdtAll, false);
  PHtmlTok Tok;
  THtmlLxSym Sym; TStr Str; TStr NrStr;
  CandWordStrV.Add(PeriodTagStr);
  bool InTitle=false; bool InScript=false; int LastNmObjTokN=-1;
  // traverse html tokens
  if (DumpP){printf("Tokens: ");}
  for (int TokN=0; TokN<HtmlDoc->GetToks(); TokN++){
    PHtmlTok Tok=HtmlDoc->GetTok(TokN);
    HtmlDoc->GetTok(TokN, Sym, Str);
    switch (Sym){
      case hsyUndef: break;
      case hsyStr:
      case hsyNum:
        if (InTitle){break;}
        if (InScript){break;}
        NrStr=GetNrWordStr(Str);
        if (DumpP){
          if (Str==NrStr){printf("%s ", Str.CStr());}
          else {printf("%s(%s) ", Str.CStr(), NrStr.CStr());}
        }
        if (IsFirstCapWordStr(NrStr)||IsNmObjAttr(NrStr, noaAsCapitalized)){
          if ((LastNmObjTokN!=-1)&&(LastNmObjTokN<TokN-1)){
            if (CandWordStrV.Last()!=PeriodTagStr){
              CandWordStrV.Add(BreakTagStr);
            }
          }
          CandWordStrV.Add(NrStr); LastNmObjTokN=TokN;
        }
        break;
      case hsySSym:
        if (InTitle){break;}
        if (InScript){break;}
        if (DumpP){
          printf("%s ", Str.CStr());}
        if (
         (Str==".")||(Str=="!")||(Str=="?")||
         (Str=="\"")||(Str=="-")||(Str=="/")||
         (Str==":")||(Str==";")){
          if (CandWordStrV.Last()!=PeriodTagStr){
            CandWordStrV.Add(PeriodTagStr);
          }
        }
        break;
      case hsyBTag:
      case hsyETag:
        if (Str=="<TITLE>"){
          InTitle=(Sym==hsyBTag);
        } else
        if (Str=="<SCRIPT>"){
          InScript=(Sym==hsyBTag);
        } else
        if (Str=="<P>"){
          if ((!CandWordStrV.Empty())&&(CandWordStrV.Last()!=ParagraphTagStr)){
            CandWordStrV.Add(ParagraphTagStr);
            CandWordStrV.Add(PeriodTagStr);
          }
        } else
        if ((Str=="<TD>")||(Str=="<BR>")){
          CandWordStrV.Add(PeriodTagStr);
        }
        break;
      case hsyEof: break;
      default: break;
    }
  }
  CandWordStrV.Add(EofTagStr);
  if (DumpP){printf("\n");}
  if (DumpP){
    printf("Candidates: ");
    for (int CandWordStrN=0; CandWordStrN<CandWordStrV.Len(); CandWordStrN++){
      printf("%s ", CandWordStrV[CandWordStrN].CStr());}
    printf("\n");
  }
}
/////////////////////////////////////////////////
// NIST-score
double TEvalScoreNist::Eval(const PTransCorpus& TransCorpus, const TIntV& _SentIdV) {
    // check if the corpus has translations
    IAssert(TransCorpus->IsTrans());

    // ngram counts (cliped and full)
    TIntH ClipCountNGramH, CountNGramH;
    // ngram info score
    TIntFltH NGramInfoH;
    // candidate and effective reference length
    double FullTransLen = 0.0, FullRefLen = 0.0;

    // iterate over sentences
    TIntV SentIdV = _SentIdV;
    if (SentIdV.Empty()) { TransCorpus->GetSentIdV(SentIdV); }
    const int Sents = SentIdV.Len();
    for (int SentIdN = 0; SentIdN < Sents; SentIdN++) {
        const int SentId = SentIdV[SentIdN];
        // tokenize translation
        TIntV TransWIdV; Parse(TransCorpus->GetTransStr(SentId), TransWIdV);
        TIntH TransNGramH; GetNGramH(TransWIdV, MxNGramLen, TransNGramH);
        TIntH FreeTransNGramH = TransNGramH; // number of non-matched ngrams
        // counters for getting the closest length of reference sentences
        const int TransLen = TransWIdV.Len(); int RefLenSum = 0;
        // go over reference translations and count ngram matches
        TStrV RefTransStrV = TransCorpus->GetRefTransStrV(SentId);
        // we assume that there is at least one reference translation
        IAssert(!RefTransStrV.Empty());
        for (int RefN = 0; RefN < RefTransStrV.Len(); RefN++) {
            // parse reference translation sentence
            TIntV RefWIdV; Parse(RefTransStrV[RefN], RefWIdV);
            TIntH RefNGramH; GetNGramH(RefWIdV, MxNGramLen, RefNGramH);
            // check for matches
            int TransNGramKeyId = TransNGramH.FFirstKeyId();
            while(TransNGramH.FNextKeyId(TransNGramKeyId)) {
                const int NGramId = TransNGramH.GetKey(TransNGramKeyId);
                const int FreeTransNGrams = FreeTransNGramH(NGramId);
                if (RefNGramH.IsKey(NGramId) && (FreeTransNGrams>0)) {
                    // ngram match and still some free ngrams left to clip
                    const int RefNGrams = RefNGramH(NGramId);
                    FreeTransNGramH(NGramId) = TInt::GetMx(0, FreeTransNGrams - RefNGrams);
                }
            }
            // check the length difference
            const int RefLen = RefWIdV.Len();
            RefLenSum += RefLen;
        }
        // count ngrams
        int TransNGramKeyId = TransNGramH.FFirstKeyId();
        while(TransNGramH.FNextKeyId(TransNGramKeyId)) {
            // get ngram
            const int NGramId = TransNGramH.GetKey(TransNGramKeyId);
            IAssert(NGramId != -1);
            // check if two hash tables are aligned (should be...)
            const int FreeNGramId = FreeTransNGramH.GetKey(TransNGramKeyId);
            IAssert(NGramId == FreeNGramId);
            // get ngram count and clip-count
            const int Count = TransNGramH[TransNGramKeyId];
            const int ClipCount = Count - FreeTransNGramH[TransNGramKeyId];
            // add ngram to the coprus ngram counts
            CountNGramH.AddDat(NGramId) += Count;
            ClipCountNGramH.AddDat(NGramId) += ClipCount;
        }
        // count length
        FullTransLen += double(TransLen);
        FullRefLen += double(RefLenSum) / double(RefTransStrV.Len());
    }

    // calculate ngram info scores
    int CountKeyId = CountNGramH.FFirstKeyId();
    while (CountNGramH.FNextKeyId(CountKeyId)) {
        // get the n-gram
        const int NGramId = CountNGramH.GetKey(CountKeyId);
        TIntV NGram = GetNGram(NGramId);
        // prepare counts
        if (NGram.Len() == 1) {
            // n-gram is a word
            const int WordCount = CountNGramH[CountKeyId];
            const double NGramInfoScore = TMath::Log2(FullTransLen / double(WordCount));
            NGramInfoH.AddDat(NGramId, NGramInfoScore);
        } else {
            // more then one word in the n-gram
            // get a n-gram with removed last element
            TIntV N1Gram = NGram; N1Gram.DelLast();
            const int N1GramId = NGramH.GetKeyId(N1Gram);
            // get the counts
            const int NGramCount = CountNGramH(NGramId);
            const int N1GramCount = CountNGramH(N1GramId);
            // get the score
            const double NGramInfoScore = TMath::Log2(double(N1GramCount) / double(NGramCount));
            NGramInfoH.AddDat(NGramId, NGramInfoScore);
        }
    }

    // calcualte ngram precisions
    TFltV ClipCountV(MxNGramLen); ClipCountV.PutAll(0);
    int ClipCountKeyId = ClipCountNGramH.FFirstKeyId();
    while (ClipCountNGramH.FNextKeyId(ClipCountKeyId)) {
        const int NGramId = ClipCountNGramH.GetKey(ClipCountKeyId);
        const int NGramLen = GetNGramLen(NGramId);
        const double NGramInfo = NGramInfoH(NGramId);
        IAssert(0 < NGramLen && NGramLen <= MxNGramLen);
        const int ClipCountNGram = ClipCountNGramH[ClipCountKeyId];
        ClipCountV[NGramLen-1] += double(ClipCountNGram) * NGramInfo;
    }
    TIntV CountV(MxNGramLen); CountV.PutAll(0);
    CountKeyId = CountNGramH.FFirstKeyId();
    while (CountNGramH.FNextKeyId(CountKeyId)) {
        const int NGramId = CountNGramH.GetKey(CountKeyId);
        const int NGramLen = GetNGramLen(NGramId);
        IAssert(0 < NGramLen && NGramLen <= MxNGramLen);
        CountV[NGramLen-1] += CountNGramH[CountKeyId];
    }
    TFltV PrecV(MxNGramLen, 0);
    for (int NGramLen = 0; NGramLen < MxNGramLen; NGramLen++) {
        const double ClipCount = ClipCountV[NGramLen];
        const int Count = CountV[NGramLen];
        const double Prec = (Count > 0) ? ClipCount / double(Count) : 0.0;
        PrecV.Add(Prec);
    }

    // calcualte brevity penalty
    const double LenFrac = double(FullTransLen)/double(FullRefLen);
    double BP = 0.0;
    if (LenFrac >= 1.0) { BP = 1.0; }
    else if (LenFrac <= 0.0) { BP = 0.0; }
    else {
        // calculate beta
        const double LenFracX = 1.5, BPX = 0.5;
        const double Beta = log(BPX) / TMath::Sqr(log(LenFracX));
        // calculate BP score
        BP = exp(Beta * TMath::Sqr(log(LenFrac)));
    }

    // calculate full NIST score
    double NistScore = 0.0; 
    for (int NGramLen = 0; NGramLen < MxNGramLen; NGramLen++) {
        NistScore += PrecV[NGramLen];
    }    
    NistScore *= BP;
    printf("NIST Score: %.5f\n", NistScore);
    
    // done!
    return NistScore;
}