// set Language and Country for movies that do not have the value set // for every movie find the mojority language/country in 1-hop neighborhood and set it void TImdbNet::SetLangCntryByMajority() { // set language while (true) { TIntPrV NIdToVal; for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { if (NI().GetLang() != 0) { continue; } int Nbhs=0; TIntH LangCntH; for (int e = 0; e < NI.GetOutDeg(); e++) { LangCntH.AddDat(NI.GetOutNDat(e).GetLang()) += 1; Nbhs++; } for (int e = 0; e < NI.GetInDeg(); e++) { LangCntH.AddDat(NI.GetInNDat(e).GetLang()) += 1; Nbhs++; } if (LangCntH.IsKey(0)) { Nbhs-=LangCntH.GetDat(0); LangCntH.GetDat(0)=0; } LangCntH.SortByDat(false); if (LangCntH.GetKey(0) == 0) { continue; } if (LangCntH[0]*2 >= Nbhs) { NIdToVal.Add(TIntPr(NI.GetId(), LangCntH.GetKey(0))); } } if (NIdToVal.Empty()) { break; } // done for (int i = 0; i < NIdToVal.Len(); i++) { GetNDat(NIdToVal[i].Val1).Lang = NIdToVal[i].Val2; } printf(" language set: %d\n", NIdToVal.Len()); } int cnt=0; for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { if (NI().GetLang()==0) cnt++; } printf(" NO language: %d\n\n", cnt); // set country while (true) { TIntPrV NIdToVal; for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { if (NI().GetCntry() != 0) { continue; } int Nbhs=0; TIntH CntryCntH; for (int e = 0; e < NI.GetOutDeg(); e++) { CntryCntH.AddDat(NI.GetOutNDat(e).GetCntry()) += 1; Nbhs++; } for (int e = 0; e < NI.GetInDeg(); e++) { CntryCntH.AddDat(NI.GetInNDat(e).GetCntry()) += 1; Nbhs++; } if (CntryCntH.IsKey(0)) { Nbhs-=CntryCntH.GetDat(0); CntryCntH.GetDat(0)=0; } CntryCntH.SortByDat(false); if (CntryCntH.GetKey(0) == 0) { continue; } if (CntryCntH[0]*2 >= Nbhs) { NIdToVal.Add(TIntPr(NI.GetId(), CntryCntH.GetKey(0))); } } if (NIdToVal.Empty()) { break; } // done for (int i = 0; i < NIdToVal.Len(); i++) { GetNDat(NIdToVal[i].Val1).Cntry = NIdToVal[i].Val2; } printf(" country set: %d\n", NIdToVal.Len()); } cnt=0; for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { if (NI().GetCntry()==0) cnt++; } printf(" NO country: %d\n\n", cnt); }
// actors collaboration graph PUNGraph TImdbNet::GetActorGraph() const { TIntPrSet EdgeSet; for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { if (NI().GetTy() == mtyActor) { const int NId1 = NI.GetId(); for (int e = 0; e < NI.GetOutDeg(); e++) { if (NI.GetOutNDat(e).GetTy() != mtyActor) { TNodeI NI2 = GetNI(NI.GetOutNId(e)); for (int e2 = 0; e2 < NI2.GetInDeg(); e2++) { if (NI2.GetInNDat(e2).GetTy() == mtyActor) { const int NId2 = NI2.GetInNId(e2); EdgeSet.AddKey(TIntPr(TMath::Mn(NId1, NId2), TMath::Mx(NId1, NId2))); } } } } } } PUNGraph G = TUNGraph::New(); for (int i = 0; i < EdgeSet.Len(); i++) { const int NId1 = EdgeSet[i].Val1; const int NId2 = EdgeSet[i].Val2; if (! G->IsNode(NId1)) { G->AddNode(NId1); } if (! G->IsNode(NId2)) { G->AddNode(NId2); } G->AddEdge(NId1, NId2); } return G; }
// set actor's language and country void TImdbNet::SetActorCntryLangByMajority() { // set language TIntPrV NIdToVal; for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { if (! NI().IsActor()) { continue; } IAssert(NI().GetLang() == 0); // no language set IAssert(NI.GetInDeg() == 0); // actors point to movies int Nbhs=0; TIntH LangCntH; for (int e = 0; e < NI.GetOutDeg(); e++) { LangCntH.AddDat(NI.GetOutNDat(e).GetLang()) += 1; Nbhs++; } if (LangCntH.IsKey(0)) { Nbhs-=LangCntH.GetDat(0); LangCntH.GetDat(0)=0; } LangCntH.SortByDat(false); if (LangCntH.GetKey(0) == 0) { continue; } if (LangCntH[0]*2 >= Nbhs) { NIdToVal.Add(TIntPr(NI.GetId(), LangCntH.GetKey(0))); } } for (int i = 0; i < NIdToVal.Len(); i++) { GetNDat(NIdToVal[i].Val1).Lang = NIdToVal[i].Val2; } printf(" language set: %d\n", NIdToVal.Len()); int cnt=0; for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { if (NI().IsActor() && NI().GetLang()==0) cnt++; } printf(" Actors NO language: %d\n\n", cnt); // set country NIdToVal.Clr(true); for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { if (! NI().IsActor()) { continue; } IAssert(NI().GetCntry() == 0); // no country set IAssert(NI.GetInDeg() == 0); // actors point to movies int Nbhs=0; TIntH CntryCntH; for (int e = 0; e < NI.GetOutDeg(); e++) { CntryCntH.AddDat(NI.GetOutNDat(e).GetCntry()) += 1; Nbhs++; } if (CntryCntH.IsKey(0)) { Nbhs-=CntryCntH.GetDat(0); CntryCntH.GetDat(0)=0; } CntryCntH.SortByDat(false); if (CntryCntH.GetKey(0) == 0) { continue; } if (CntryCntH[0]*2 >= Nbhs) { NIdToVal.Add(TIntPr(NI.GetId(), CntryCntH.GetKey(0))); } } for (int i = 0; i < NIdToVal.Len(); i++) { GetNDat(NIdToVal[i].Val1).Cntry = NIdToVal[i].Val2; } printf(" country set: %d\n", NIdToVal.Len()); cnt=0; for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { if (NI().IsActor() && NI().GetCntry()==0) cnt++; } printf(" Actors NO country: %d\n\n", cnt); }