Esempio n. 1
0
void TStrFeatureSpace::ToStr(const TIntFltKdV& FeatureIds, TChA& ChA, int k, char Sep) const {
	TIntSet TakenIndexes(k);
	int Len = TMath::Mn(FeatureIds.Len(), k);
	for (int i = 0; i < Len; i++) {
		double MxVal = TFlt::Mn;
		int MxIndex = 0;
		for (int j = 0; j < FeatureIds.Len(); j++) {
			const TIntFltKd& Feature = FeatureIds[j];
			if (Feature.Dat > MxVal) {
				if (!TakenIndexes.IsKey(Feature.Key)) {
					MxVal = Feature.Dat;
					MxIndex = Feature.Key;
				}
			}
		}
		TakenIndexes.AddKey(MxIndex);

		ChA += ISpace.KeyFromOfs(Space[MxIndex]);
		ChA += ':';
		ChA += TFlt::GetStr(MxVal, "%2.6f");
		if (i < Len) {
			ChA += Sep;
		}
	}
}
Esempio n. 2
0
int TGnuPlot::AddPlot(const TIntFltKdV& XYValV, const TGpSeriesTy& SeriesTy, const TStr& Label, const TStr& Style) {
  TFltKdV XYFltValV(XYValV.Len(), 0);
  for (int i = 0; i < XYValV.Len(); i++) {
    XYFltValV.Add(TFltKd(TFlt(XYValV[i].Key), TFlt(XYValV[i].Dat)));
  }
  return AddPlot(XYFltValV, SeriesTy, Label, Style);
}
Esempio n. 3
0
void TGUtil::Normalize(TIntFltKdV& PdfV) {
  double Sum = 0.0;
  for (int i = 0; i < PdfV.Len(); i++) {
    Sum += PdfV[i].Dat; }
  if (Sum <= 0.0) { return; }
  for (int i = 0; i < PdfV.Len(); i++) {
    PdfV[i].Dat /= Sum; }
}
Esempio n. 4
0
PJsonVal TNearestNeighbor::Explain(const TIntFltKdV& Vec) const {
    // if not initialized, return null (JSON)
    if (!IsInit()) { return TJsonVal::NewNull(); }
    // find nearest neighbor
    double NearDist = TFlt::Mx; int NearColN = -1;
    for (int ColN = 0; ColN < Mat.Len(); ColN++) {
        const double Dist = TLinAlg::Norm2(Vec) - 2 * TLinAlg::DotProduct(Vec, Mat[ColN]) + TLinAlg::Norm2(Mat[ColN]);
        if (Dist < NearDist) { NearDist = Dist; NearColN = ColN; }
    }
    const TIntFltKdV& NearVec = Mat[NearColN];
    // generate JSon explanations
    PJsonVal ResVal = TJsonVal::NewObj();
    // id of the nearest element
    ResVal->AddToObj("nearestDat", DatV[NearColN]);
    ResVal->AddToObj("distance", NearDist);
    // element-wise difference
    PJsonVal DiffVal = TJsonVal::NewArr();
    int NearEltN = 0, EltN = 0;
    while (NearEltN < NearVec.Len() || EltN < Vec.Len()) {
        // get the feature ID
        const int VecFtrId = EltN < Vec.Len() ? Vec[EltN].Key.Val : TInt::Mx;
        const int NearFtrId = NearEltN < NearVec.Len() ? NearVec[NearEltN].Key.Val : TInt::Mx;
        const int FtrId = NearFtrId < VecFtrId ? NearFtrId : VecFtrId;
        // get values
        const double VecVal = FtrId < VecFtrId ? 0.0 : Vec[EltN].Dat.Val;
        const double NearVal = FtrId < NearFtrId ? 0.0 : NearVec[NearEltN].Dat.Val;
        // get diff
        const double Diff = TMath::Sqr(NearVal - VecVal) / NearDist;
        // add to json result
        PJsonVal FtrVal = TJsonVal::NewObj();
        //avoid unnecessary fields in the explanation
        if (Diff > 1e-8) {
            FtrVal->AddToObj("id", FtrId);
            FtrVal->AddToObj("val", VecVal);
            FtrVal->AddToObj("nearVal", NearVal);
            FtrVal->AddToObj("contribution", Diff);
            DiffVal->AddToArr(FtrVal);
        }
        // move to the next feature
        if (VecFtrId <= NearFtrId) {
            EltN++;
        }
        if (NearFtrId <= VecFtrId) {
            NearEltN++;
        }
    }
    ResVal->AddToObj("features", DiffVal);
    // first and last record in the buffer
    ResVal->AddToObj("oldestDat", DatV[NextCol]);
    int CurCol = NextCol > 0 ? NextCol - 1 : WindowSize - 1;
    ResVal->AddToObj("newestDat", DatV[CurCol]);
    return ResVal;
}
Esempio n. 5
0
// interpolate effective diameter
double CalcEffDiam(const TIntFltKdV& DistNbrsCdfV, const double& Percentile) {
  const double EffPairs = Percentile * DistNbrsCdfV.Last().Dat;
  int ValN;
  for (ValN = 0; ValN < DistNbrsCdfV.Len(); ValN++) {
    if (DistNbrsCdfV[ValN].Dat() > EffPairs) {  break; }
  }
  if (ValN >= DistNbrsCdfV.Len()) return DistNbrsCdfV.Last().Key;
  if (ValN == 0) return 1;
  // interpolate
  const double DeltaNbrs = DistNbrsCdfV[ValN].Dat - DistNbrsCdfV[ValN-1].Dat;
  if (DeltaNbrs == 0) return DistNbrsCdfV[ValN].Key;
  return DistNbrsCdfV[ValN-1].Key + (EffPairs - DistNbrsCdfV[ValN-1].Dat)/DeltaNbrs;
}
Esempio n. 6
0
void TMultinomial::AddFtr(const TStrV& StrV, const TFltV& FltV, TIntFltKdV& SpV) const {
    // make sure we either do not have explicit values, or their dimension matches with string keys
    EAssertR(FltV.Empty() || (StrV.Len() == FltV.Len()), "TMultinomial::AddFtr:: String and double values not aligned");
    // generate internal feature vector
    SpV.Gen(StrV.Len(), 0);
    for (int StrN = 0; StrN < StrV.Len(); StrN++) {
        const int FtrId = FtrGen.GetFtr(StrV[StrN]);
        // only use features we've seen during updates
        if (FtrId != -1) {
            const double Val = FltV.Empty() ? 1.0 : FltV[StrN].Val;
            if (Val > 1e-16) { SpV.Add(TIntFltKd(FtrId, Val)); }
        }
    }
    SpV.Sort();
    // merge elements with the same id
    int GoodSpN = 0;
    for (int SpN = 1; SpN < SpV.Len(); SpN++) {
        if (SpV[GoodSpN].Key == SpV[SpN].Key) {
            // repetition of previous id, sum counts
            SpV[GoodSpN].Dat += SpV[SpN].Dat;
        } else {
            // increase the pointer to the next good position
            GoodSpN++;
            // and move the new value down to the good position
            SpV[GoodSpN] = SpV[SpN];
        }
    }
    // truncate the vector
    SpV.Trunc(GoodSpN + 1);
    // replace values with 1 if needed
    if (IsBinary()) { for (TIntFltKd& Sp : SpV) { Sp.Dat = 1.0; } }
    // final normalization, if needed
    if (IsNormalize()) { TLinAlg::Normalize(SpV); }    
}
Esempio n. 7
0
void TSparseNumeric::AddFtr(const TIntFltKdV& InSpV, TIntFltKdV& SpV, int& Offset) const {
    for (int SpN = 0; SpN < InSpV.Len(); SpN++) {
        const int Id = InSpV[SpN].Key;
        double Val = FtrGen.GetFtr(InSpV[SpN].Dat);
        SpV.Add(TIntFltKd(Offset + Id, Val));
    }
    Offset += GetVals();
}
Esempio n. 8
0
double CalcAvgDiamPdf(const TIntFltKdV& DistNbrsPdfV) {
  double Paths=0, SumLen=0;
  for (int i = 0; i < DistNbrsPdfV.Len(); i++) {
    SumLen += DistNbrsPdfV[i].Key * DistNbrsPdfV[i].Dat;
    Paths += DistNbrsPdfV[i].Dat;
  }
  return SumLen/Paths;
}
Esempio n. 9
0
void TGnuPlot::SaveTs(const TIntFltKdV& KdV, const TStr& FNm, const TStr& HeadLn) {
  FILE *F = fopen(FNm.CStr(), "wt");
  EAssert(F);
  if (! HeadLn.Empty()) fprintf(F, "# %s\n", HeadLn.CStr());
  for (int i = 0; i < KdV.Len(); i++)
    fprintf(F, "%d\t%g\n", KdV[i].Key(), KdV[i].Dat());
  fclose(F);
}
Esempio n. 10
0
///////////////////////////////////////////////////////////////////////
// Logistic-Regression-Model
double TLogRegMd::GetCfy(const TIntFltKdV& AttrV) {
    int len = AttrV.Len();
    double res = bb.Last();
    for (int i = 0; i < len; i++) {
        if (AttrV[i].Key < bb.Len())
            res += AttrV[i].Dat * bb[AttrV[i].Key];
    }
    double mu = 1/(1 + exp(-res));
    return mu;
}
Esempio n. 11
0
void TBagOfWords::AddFtr(const TStrV& TokenStrV, TFltV& FullV, int& Offset) const {
    // create sparse vector
    TIntFltKdV ValSpV; AddFtr(TokenStrV, ValSpV);
    // add to the full feature vector and increase offset count
    for (int ValSpN = 0; ValSpN < ValSpV.Len(); ValSpN++) {
        const TIntFltKd& ValSp = ValSpV[ValSpN];
        FullV[Offset + ValSp.Key] = ValSp.Dat;
    }
    // increase the offset by the dimension
    Offset += GetDim();    
}
Esempio n. 12
0
void TMultinomial::AddFtr(const TStrV& StrV, const TFltV& FltV, TFltV& FullV, int& Offset) const {
    // generate feature 
    TIntFltKdV ValSpV; AddFtr(StrV, FltV, ValSpV);
    // add to the full feature vector and increase offset count
    for (int ValSpN = 0; ValSpN < ValSpV.Len(); ValSpN++) {
        const TIntFltKd& ValSp = ValSpV[ValSpN];
        FullV[Offset + ValSp.Key] = ValSp.Dat;
    }
    // increase the offset by the dimension
    Offset += GetDim();
}
Esempio n. 13
0
TStr TStrUtil::GetStr(const TIntFltKdV& IntFltKdV, const TStr& FieldDelimiterStr, 
 const TStr& DelimiterStr, const TStr& FmtStr) {
  TChA ResChA;
  for (int EltN = 0; EltN < IntFltKdV.Len(); EltN++) {
	if (!ResChA.Empty()) { ResChA+=DelimiterStr; }
    ResChA+=IntFltKdV[EltN].Key.GetStr();
	ResChA+=FieldDelimiterStr;
    ResChA+=TFlt::GetStr(IntFltKdV[EltN].Dat, FmtStr);
  }
  return ResChA;
}
Esempio n. 14
0
void TFtrGenBs::AddBowDoc(const PBowDocBs& BowDocBs,
		const TStr& DocNm, const TStrV& FtrValV) const {

    TIntFltKdV FtrSpV; GenFtrV(FtrValV, FtrSpV);
    // make KdV to PrV
    const int WIds = FtrSpV.Len(); TIntFltPrV WIdWgtPrV(WIds, 0);
    for (int WIdN = 0; WIdN < WIds; WIdN++) {
        WIdWgtPrV.Add(TIntFltPr(FtrSpV[WIdN].Key, FtrSpV[WIdN].Dat));
    }
    // add the feature vector to trainsets
    BowDocBs->AddDoc(DocNm, TStrV(), WIdWgtPrV);
}
Esempio n. 15
0
double TBowLinAlg::DotProduct(const TIntFltKdV& x, PBowSpV y) {
    TBowWIdWgtKd* vec2 = y->BegI();
    int len1 = x.Len(), len2 = y->Len();
    double res = 0.0; int j1 = 0, j2 = 0;

    while (j1 < len1 && j2 < len2) {
        if (x[j1].Key < vec2[j2].Key) { j1++; } 
        else if (x[j1].Key > vec2[j2].Key) { j2++; } 
        else { res += x[j1].Dat * vec2[j2].Dat; j1++; j2++; }
    }

    return res;
}
Esempio n. 16
0
void TBagOfWords::AddFtr(const TStr& Val, TIntFltKdV& SpV, int& Offset) const {
    // tokenize
    TStrV TokenStrV(Val.Len() / 5, 0); GetFtr(Val, TokenStrV);
    // create sparse vector
    TIntFltKdV ValSpV; AddFtr(TokenStrV, ValSpV);
    // add to the full feature vector and increase offset count
    for (int ValSpN = 0; ValSpN < ValSpV.Len(); ValSpN++) {
        const TIntFltKd& ValSp = ValSpV[ValSpN];
        SpV.Add(TIntFltKd(Offset + ValSp.Key, ValSp.Dat));
    }    
    // increase the offset by the dimension
    Offset += GetDim();
}
Esempio n. 17
0
PJsonVal TNearestNeighbor::Explain(const TIntFltKdV& Vec) const {
	// if not initialized, return null (JSON)
	if (!IsInit()) { return TJsonVal::NewNull(); }
	// find nearest neighbor
	double NearDist = TFlt::Mx;
	int NearColN = -1;
	TIntFltKdV DiffV;
	for (int ColN = 0; ColN < Mat.Len(); ColN++) {		
		const double Dist = TLinAlg::Norm2(Vec) - 2 * TLinAlg::DotProduct(Vec, Mat[ColN]) + TLinAlg::Norm2(Mat[ColN]);
		if (Dist < NearDist) { NearDist = Dist; NearColN = ColN; }
	}
    const TIntFltKdV& NearVec = Mat[NearColN];
	// generate JSon explanations
	PJsonVal ResVal = TJsonVal::NewObj();
    // id of the nearest element
	ResVal->AddToObj("nearestID", IDVec[NearColN]);
	ResVal->AddToObj("distance", NearDist);
    // element-wise difference
    PJsonVal DiffVal = TJsonVal::NewArr();
    int NearEltN = 0, EltN = 0;
    while (NearEltN < NearVec.Len() && EltN < Vec.Len()) {
        // get values
        const int FtrId =      (NearVec[NearEltN].Key < Vec[EltN].Key) ? NearVec[NearEltN].Key     : Vec[EltN].Key;
        const double Val =     (NearVec[NearEltN].Key >= Vec[EltN].Key) ? Vec[EltN].Dat.Val : 0.0;
        const double NearVal = (NearVec[NearEltN].Key <= Vec[EltN].Key) ? NearVec[NearEltN].Dat.Val : 0.0;
        const double Diff    = TMath::Sqr(NearVal - Val) / NearDist;
        // add to json result
        PJsonVal FtrVal = TJsonVal::NewObj();
        FtrVal->AddToObj("id", FtrId);
        FtrVal->AddToObj("val", Val);
        FtrVal->AddToObj("nearVal", NearVal);
        FtrVal->AddToObj("contribution", Diff);
        DiffVal->AddToArr(FtrVal);
        // move to the next feature
        if (NearVec[NearEltN].Key > Vec[EltN].Key) {
            EltN++;
        } else if (NearVec[NearEltN].Key < Vec[EltN].Key) {
            NearEltN++;
        } else {
            NearEltN++; EltN++;
        }
    }
    ResVal->AddToObj("features", DiffVal);
	return ResVal;
}
TStr TAlignPairBs::MapQuery(const TAlignPairMap& Map, const TStr& QueryStr, 
        const int& QueryLangId, const int& TargetLangId, const int& TransQueryMtpy,
        const double& MxWgtPrc) {

    // get alignment corpus
    PAlignPair AlignPair = GetAlignPair(QueryLangId, TargetLangId);
    AlignPair->Def();
    // get languages
    const TStr& QueryLang = LangH.GetKey(QueryLangId);
    const TStr& TargetLang = LangH.GetKey(TargetLangId);
    // get sparse vector from the query
    TIntFltKdV InSpV; AlignPair->GetSpV(QueryStr, QueryLang, InSpV);
    // get sparse matrices with aligned columns
    const TMatrix& QueryMatrix = AlignPair->GetMatrix(QueryLang);
    const TMatrix& TargetMatrix = AlignPair->GetMatrix(TargetLang);
    // map the query
    TIntFltKdV OutSpV; Map(InSpV, QueryMatrix, TargetMatrix, OutSpV);
    // make query back to string
    return AlignPair->GetSpVStr(OutSpV, TargetLang, 
        InSpV.Len() * TransQueryMtpy, MxWgtPrc);
}
Esempio n. 19
0
void TSparseNumeric::Update(const TIntFltKdV& SpV) {
    for (int SpN = 0; SpN < SpV.Len(); SpN++) {
        MxId = TInt::GetMx(SpV[SpN].Key, MxId);
        FtrGen.Update(SpV[SpN].Dat);
    }
}
Esempio n. 20
0
void TGUtil::GetPdf(const TIntFltKdV& CdfV, TIntFltKdV& PdfV) {
  PdfV = CdfV;
  for (int i = PdfV.Len()-1; i > 0; i--) {
    PdfV[i].Dat = PdfV[i].Dat - PdfV[i-1].Dat; }
}
Esempio n. 21
0
void TGUtil::GetCCdf(const TIntFltKdV& PdfV, TIntFltKdV& CCdfV) {
  CCdfV = PdfV;
  for (int i = CCdfV.Len()-2; i >= 0; i--) {
    CCdfV[i].Dat = CCdfV[i+1].Dat + CCdfV[i].Dat; }
}
Esempio n. 22
0
void TGUtil::GetCdf(const TIntFltKdV& PdfV, TIntFltKdV& CdfV) {
  CdfV = PdfV;
  for (int i = 1; i < CdfV.Len(); i++) {
    CdfV[i].Dat = CdfV[i-1].Dat + CdfV[i].Dat; }
}