TEST_F(BodyCentredNonRotatingDynamicFrameTest, GeometricAcceleration) {
  int const kSteps = 10;
  RelativeDegreesOfFreedom<ICRFJ2000Equator> const initial_big_to_small =
      small_initial_state_ - big_initial_state_;
  Length const big_to_small = initial_big_to_small.displacement().Norm();
  Acceleration const small_on_big =
      small_gravitational_parameter_ / (big_to_small * big_to_small);
  for (Length y = big_to_small / kSteps;
       y < big_to_small;
       y += big_to_small / kSteps) {
    Position<Big> const position(Big::origin +
                                     Displacement<Big>({0 * Kilo(Metre),
                                                        y,
                                                        0 * Kilo(Metre)}));
    Acceleration const big_on_position =
        -big_gravitational_parameter_ / (y * y);
    Acceleration const small_on_position =
        small_gravitational_parameter_ /
            ((big_to_small - y) * (big_to_small - y));
    Vector<Acceleration, Big> const expected_acceleration(
                  {0 * SIUnit<Acceleration>(),
                   small_on_position + big_on_position - small_on_big,
                   0 * SIUnit<Acceleration>()});
    EXPECT_THAT(AbsoluteError(
                    big_frame_->GeometricAcceleration(
                        t0_,
                        DegreesOfFreedom<Big>(position, Velocity<Big>())),
                    expected_acceleration),
                Lt(1E-10 * SIUnit<Acceleration>()));
  }
}
Exemplo n.º 2
0
TFfGGen::TStopReason TUndirFFire::AddNodes(const int& GraphNodes, const bool& FloodStop) {
	printf("\n***Undirected GEO ForestFire: graph(%d,%d) add %d nodes, burn prob %.3f\n",
		Graph->GetNodes(), Graph->GetEdges(), GraphNodes, BurnProb);
	TExeTm ExeTm;
	int Burned1 = 0, Burned2 = 0, Burned3 = 0; // last 3 fire sizes
	TIntPrV NodesEdgesV;
	// create initial set of nodes
	if (Graph.Empty()) { Graph = PUNGraph::New(); }
	if (Graph->GetNodes() == 0) { Graph->AddNode(); }
	int NEdges = Graph->GetEdges();
	// forest fire
	for (int NNodes = Graph->GetNodes() + 1; NNodes <= GraphNodes; NNodes++) {
		const int NewNId = Graph->AddNode(-1);
		IAssert(NewNId == Graph->GetNodes() - 1); // node ids have to be 0...N
		const int StartNId = Rnd.GetUniDevInt(NewNId);
		const int NBurned = BurnGeoFire(StartNId);
		// add edges to burned nodes
		for (int e = 0; e < NBurned; e++) {
			Graph->AddEdge(NewNId, GetBurnedNId(e));
		}
		NEdges += NBurned;
		Burned1 = Burned2;  Burned2 = Burned3;  Burned3 = NBurned;
		if (NNodes % Kilo(1) == 0) {
			printf("(%d, %d)    burned: [%d,%d,%d]  [%s]\n", NNodes, NEdges, Burned1, Burned2, Burned3, ExeTm.GetStr());
			NodesEdgesV.Add(TIntPr(NNodes, NEdges));
		}
		if (FloodStop && NEdges>1000 && NEdges / double(NNodes)>100.0) { // average node degree is more than 50
			printf("!!! FLOOD. G(%6d, %6d)\n", NNodes, NEdges);  return TFfGGen::srFlood;
		}
	}
	printf("\n");
	IAssert(Graph->GetEdges() == NEdges);
	return TFfGGen::srOk;
}
Exemplo n.º 3
0
TFfGGen::TStopReason TFfGGen::AddNodes(const int& GraphNodes, const bool& FloodStop) {
	printf("\n***ForestFire:  %s  Nodes:%d  StartNodes:%d  Take2AmbProb:%g\n", BurnExpFire ? "ExpFire" : "GeoFire", GraphNodes, StartNodes(), Take2AmbProb());
	printf("                FwdBurnP:%g  BckBurnP:%g  ProbDecay:%g  Orphan:%g\n", FwdBurnProb(), BckBurnProb(), ProbDecay(), OrphanProb());
	TExeTm ExeTm;
	int Burned1 = 0, Burned2 = 0, Burned3 = 0; // last 3 fire sizes
	// create initial set of nodes
	if (Graph.Empty()) { Graph = PNGraph::New(); }
	if (Graph->GetNodes() == 0) {
		for (int n = 0; n < StartNodes; n++) { Graph->AddNode(); }
	}
	int NEdges = Graph->GetEdges();
	// forest fire
	TRnd Rnd(0);
	TForestFire ForestFire(Graph, FwdBurnProb, BckBurnProb, ProbDecay, 0);
	// add nodes
	for (int NNodes = Graph->GetNodes() + 1; NNodes <= GraphNodes; NNodes++) {
		const int NewNId = Graph->AddNode(-1);
		IAssert(NewNId == Graph->GetNodes() - 1); // node ids have to be 0...N
		// not an Orphan (burn fire)
		if (OrphanProb == 0.0 || Rnd.GetUniDev() > OrphanProb) {
			// infect ambassadors
			if (Take2AmbProb == 0.0 || Rnd.GetUniDev() > Take2AmbProb || NewNId < 2) {
				ForestFire.Infect(Rnd.GetUniDevInt(NewNId)); // take 1 ambassador
			}
			else {
				const int AmbassadorNId1 = Rnd.GetUniDevInt(NewNId);
				int AmbassadorNId2 = Rnd.GetUniDevInt(NewNId);
				while (AmbassadorNId1 == AmbassadorNId2) {
					AmbassadorNId2 = Rnd.GetUniDevInt(NewNId);
				}
				ForestFire.Infect(TIntV::GetV(AmbassadorNId1, AmbassadorNId2)); // take 2 ambassadors
			}
			// burn fire
			if (BurnExpFire) { ForestFire.BurnExpFire(); }
			else { ForestFire.BurnGeoFire(); }
			// add edges to burned nodes
			for (int e = 0; e < ForestFire.GetBurned(); e++) {
				Graph->AddEdge(NewNId, ForestFire.GetBurnedNId(e));
				NEdges++;
			}
			Burned1 = Burned2;  Burned2 = Burned3;  Burned3 = ForestFire.GetBurned();
		}
		else {
			// Orphan (zero out-links)
			Burned1 = Burned2;  Burned2 = Burned3;  Burned3 = 0;
		}
		if (NNodes % Kilo(1) == 0) {
			printf("(%d, %d)  burned: [%d,%d,%d]  [%s]\n", NNodes, NEdges, Burned1, Burned2, Burned3, ExeTm.GetStr());
		}
		if (FloodStop && NEdges>GraphNodes && (NEdges / double(NNodes)>1000.0)) { // average node degree is more than 500
			printf(". FLOOD. G(%6d, %6d)\n", NNodes, NEdges);  return srFlood;
		}
		if (NNodes % 1000 == 0 && TimeLimitSec > 0 && ExeTm.GetSecs() > TimeLimitSec) {
			printf(". TIME LIMIT. G(%d, %d)\n", Graph->GetNodes(), Graph->GetEdges());
			return srTimeLimit;
		}
	}
	IAssert(Graph->GetEdges() == NEdges);
	return srOk;
}
Exemplo n.º 4
0
Arquivo: gstat.cpp Projeto: Accio/snap
void TGStat::TakeSpectral(const PNGraph& Graph, TFSet StatFSet, int _TakeSngVals) {
  if (_TakeSngVals == -1) { _TakeSngVals = TakeSngVals; }
  // singular values, vectors
  if (StatFSet.In(gsdSngVal)) {
    const int SngVals = TMath::Mn(_TakeSngVals, Graph->GetNodes()/2);
    TFltV SngValV1;
    TSnap::GetSngVals(Graph, SngVals, SngValV1);
    SngValV1.Sort(false);
    TFltPrV& SngValV = DistrStatH.AddDat(gsdSngVal);
    SngValV.Gen(SngValV1.Len(), 0);
    for (int i = 0; i < SngValV1.Len(); i++) {
      SngValV.Add(TFltPr(i+1, SngValV1[i]));
    }
  }
  if (StatFSet.In(gsdSngVec)) {
    TFltV LeftV, RightV;
    TSnap::GetSngVec(Graph, LeftV, RightV);
    LeftV.Sort(false);
    TFltPrV& SngVec = DistrStatH.AddDat(gsdSngVec);
    SngVec.Gen(LeftV.Len(), 0);
    for (int i = 0; i < TMath::Mn(Kilo(10), LeftV.Len()/2); i++) {
      if (LeftV[i] > 0) { SngVec.Add(TFltPr(i+1, LeftV[i])); }
    }
  }
}
Exemplo n.º 5
0
void BM_BarycentricRotatingDynamicFrame(
    benchmark::State& state) {  // NOLINT(runtime/references)
  Time const Δt = 5 * Minute;
  int const steps = state.range_x();

  SolarSystem<ICRFJ2000Equator> solar_system;
  solar_system.Initialize(
      SOLUTION_DIR / "astronomy" / "gravity_model.proto.txt",
      SOLUTION_DIR / "astronomy" /
          "initial_state_jd_2433282_500000000.proto.txt");
  auto const ephemeris = solar_system.MakeEphemeris(
      McLachlanAtela1992Order5Optimal<Position<ICRFJ2000Equator>>(),
      45 * Minute,
      5 * Milli(Metre));
  ephemeris->Prolong(solar_system.epoch() + steps * Δt);

  not_null<MassiveBody const*> const earth =
      solar_system.massive_body(*ephemeris, "Earth");
  not_null<MassiveBody const*> const venus =
      solar_system.massive_body(*ephemeris, "Venus");

  MasslessBody probe;
  Position<ICRFJ2000Equator> probe_initial_position =
      ICRFJ2000Equator::origin + Displacement<ICRFJ2000Equator>(
                                     {0.5 * AstronomicalUnit,
                                      -1 * AstronomicalUnit,
                                      0 * AstronomicalUnit});
  Velocity<ICRFJ2000Equator> probe_velocity =
      Velocity<ICRFJ2000Equator>({0 * SIUnit<Speed>(),
                                  100 * Kilo(Metre) / Second,
                                  0 * SIUnit<Speed>()});
  DiscreteTrajectory<ICRFJ2000Equator> probe_trajectory;
  FillLinearTrajectory<ICRFJ2000Equator, DiscreteTrajectory>(
      probe_initial_position,
      probe_velocity,
      solar_system.epoch(),
      Δt,
      steps,
      &probe_trajectory);

  BarycentricRotatingDynamicFrame<ICRFJ2000Equator, Rendering>
      dynamic_frame(ephemeris.get(), earth, venus);
  while (state.KeepRunning()) {
    auto v = ApplyDynamicFrame(&probe,
                               &dynamic_frame,
                               probe_trajectory.Begin(),
                               probe_trajectory.End());
  }
}
Exemplo n.º 6
0
/// load bipartite community affiliation graph from text file (each row contains the member node IDs for each community)
void TAGMUtil::LoadCmtyVV(const TStr& InFNm, TVec<TIntV>& CmtyVV) {
    CmtyVV.Gen(Kilo(100), 0);
    TSsParser Ss(InFNm, ssfWhiteSep);
    while (Ss.Next()) {
        if(Ss.GetFlds() > 0) {
            TIntV CmtyV;
            for (int i = 0; i < Ss.GetFlds(); i++) {
                if (Ss.IsInt(i)) {
                    CmtyV.Add(Ss.GetInt(i));
                }
            }
            CmtyVV.Add(CmtyV);
        }
    }
    CmtyVV.Pack();
    printf("community loading completed (%d communities)\n",CmtyVV.Len());

}
Exemplo n.º 7
0
// Eve communication network
PWgtNet TWgtNet::LoadEveCommNet(const TStr& FNm) {
  PWgtNet Net = TWgtNet::New();
  TStrSet AuthorSet;
  TChA Ln;
  TVec<char*> WrdV;
  TFIn FIn(FNm);
  for (int c=0; FIn.GetNextLn(Ln); c++) {
    TStrUtil::SplitOnCh(Ln, WrdV, ';');
    const int n1 = AuthorSet.AddKey(WrdV[0]);
    const int n2 = AuthorSet.AddKey(WrdV[1]);
    if (! Net->IsNode(n1)) { Net->AddNode(n1, WrdV[0]); }
    if (! Net->IsNode(n2)) { Net->AddNode(n2, WrdV[1]); }
    if (Net->IsEdge(n1, n2)) { Net->GetEDat(n1, n2) += 1; }
    else { Net->AddEdge(n1, n2, 1); }
    if (c % Kilo(10) == 0) { printf("\r%dk", c/1000); }
  }
  printf("\n");
  TGBase::PrintInfo(Net);
  printf("  Edge weight: %f\n", Net->GetEdgeWgt());
  return Net;
}
Exemplo n.º 8
0
// Gradient descent for p_c while fixing community affiliation graph (CAG).
int TAGMFit::MLEGradAscentGivenCAG(const double& Thres, const int& MaxIter, const TStr PlotNm) {
  int Edges = G->GetEdges();
  TExeTm ExeTm;
  TFltV GradV(LambdaV.Len());
  int iter = 0;
  TIntFltPrV IterLV, IterGradNormV;
  double GradCutOff = 1000;
  for (iter = 0; iter < MaxIter; iter++) {
    GradLogLForLambda(GradV);    //if gradient is going out of the boundary, cut off
    for (int i = 0; i < LambdaV.Len(); i++) {
      if (GradV[i] < -GradCutOff) { GradV[i] = -GradCutOff; }
      if (GradV[i] > GradCutOff) { GradV[i] = GradCutOff; }
      if (LambdaV[i] <= MinLambda && GradV[i] < 0) { GradV[i] = 0.0; }
      if (LambdaV[i] >= MaxLambda && GradV[i] > 0) { GradV[i] = 0.0; }
    }
    double Alpha = 0.15, Beta = 0.2;
    if (Edges > Kilo(100)) { Alpha = 0.00015; Beta = 0.3;}
    double LearnRate = GetStepSizeByLineSearchForLambda(GradV, GradV, Alpha, Beta);
    if (TLinAlg::Norm(GradV) < Thres) { break; }
    for (int i = 0; i < LambdaV.Len(); i++) {
      double Change = LearnRate * GradV[i];
      LambdaV[i] += Change;
      if(LambdaV[i] < MinLambda) { LambdaV[i] = MinLambda;}
      if(LambdaV[i] > MaxLambda) { LambdaV[i] = MaxLambda;}
    }
    if (! PlotNm.Empty()) {
      double L = Likelihood();
      IterLV.Add(TIntFltPr(iter, L));
      IterGradNormV.Add(TIntFltPr(iter, TLinAlg::Norm(GradV)));
    }
  }
  if (! PlotNm.Empty()) {
    TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
    TGnuPlot::PlotValV(IterGradNormV, PlotNm + ".gradnorm_Q");
    printf("MLE for Lambda completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr());
  }
  return iter;
}
Exemplo n.º 9
0
void TTrawling::CountSupport() {
  for (int c = 0; c < CandItemH.Len(); c++) {
    CandItemH[c] = GetSupport(CandItemH.GetKey(c));
    if (c % Kilo(100) == 0) { printf("."); }
  }
}
Exemplo n.º 10
0
TEST_F(ManœuvreTest, Apollo8SIVB) {
    // Data from NASA's Saturn V Launch Vehicle, Flight Evaluation Report AS-503,
    // Apollo 8 Mission (1969),
    // http://ntrs.nasa.gov/archive/nasa/casi.ntrs.nasa.gov/19690015314.pdf.
    // We use the reconstructed or actual values.

    // Table 2-2. Significant Event Times Summary.
    Instant const range_zero;
    Instant const s_ivb_1st_90_percent_thrust = range_zero +    530.53 * Second;
    Instant const s_ivb_1st_eco               = range_zero +    684.98 * Second;
    // Initiate S-IVB Restart Sequence and Start of Time Base 6 (T6).
    Instant const t6                          = range_zero +   9659.54 * Second;
    Instant const s_ivb_2nd_90_percent_thrust = range_zero + 10'240.02 * Second;
            Instant const s_ivb_2nd_eco               = range_zero + 10'555.51 * Second;

    // From Table 7-2. S-IVB Steady State Performance - First Burn.
    Force thrust_1st                  = 901'557 * Newton;
                                        Speed specific_impulse_1st        = 4'204.1 * Newton * Second / Kilogram;
    Variation<Mass> lox_flowrate_1st  = 178.16 * Kilogram / Second;
    Variation<Mass> fuel_flowrate_1st = 36.30 * Kilogram / Second;

    // From Table 7-7. S-IVB Steady State Performance - Second Burn.
    Force thrust_2nd                  = 897'548 * Newton;
                                        Speed specific_impulse_2nd        = 4199.2 * Newton * Second / Kilogram;
                                        Variation<Mass> lox_flowrate_2nd  = 177.70 * Kilogram / Second;
                                        Variation<Mass> fuel_flowrate_2nd = 36.01 * Kilogram / Second;

                                        // Table 21-5. Total Vehicle Mass, S-IVB First Burn Phase, Kilograms.
                                        Mass total_vehicle_at_s_ivb_1st_90_percent_thrust = 161143 * Kilogram;
                                        Mass total_vehicle_at_s_ivb_1st_eco               = 128095 * Kilogram;

                                        // Table 21-7. Total Vehicle Mass, S-IVB Second Burn Phase, Kilograms.
                                        Mass total_vehicle_at_s_ivb_2nd_90_percent_thrust = 126780 * Kilogram;
                                        Mass total_vehicle_at_s_ivb_2nd_eco               =  59285 * Kilogram;

                                        // An arbitrary direction, we're not testing this.
                                        Vector<double, World> e_y({0, 1, 0});

    Manœuvre<World> first_burn(thrust_1st,
                                total_vehicle_at_s_ivb_1st_90_percent_thrust,
                                specific_impulse_1st, e_y);
    EXPECT_THAT(RelativeError(lox_flowrate_1st + fuel_flowrate_1st,
                              first_burn.mass_flow()),
                Lt(1E-4));

    first_burn.set_duration(s_ivb_1st_eco - s_ivb_1st_90_percent_thrust);
    EXPECT_THAT(
        RelativeError(total_vehicle_at_s_ivb_1st_eco, first_burn.final_mass()),
        Lt(1E-3));

    first_burn.set_initial_time(s_ivb_1st_90_percent_thrust);
    EXPECT_EQ(s_ivb_1st_eco, first_burn.final_time());

    // Accelerations from Figure 4-4. Ascent Trajectory Acceleration Comparison.
    // Final acceleration from Table 4-2. Comparison of Significant Trajectory
    // Events.
    EXPECT_THAT(
        first_burn.acceleration()(first_burn.initial_time()).Norm(),
        AllOf(Gt(5 * Metre / Pow<2>(Second)), Lt(6.25 * Metre / Pow<2>(Second))));
    EXPECT_THAT(first_burn.acceleration()(range_zero + 600 * Second).Norm(),
                AllOf(Gt(6.15 * Metre / Pow<2>(Second)),
                      Lt(6.35 * Metre / Pow<2>(Second))));
    EXPECT_THAT(first_burn.acceleration()(first_burn.final_time()).Norm(),
                AllOf(Gt(7.03 * Metre / Pow<2>(Second)),
                      Lt(7.05 * Metre / Pow<2>(Second))));

    Manœuvre<World> second_burn(thrust_2nd,
                                 total_vehicle_at_s_ivb_2nd_90_percent_thrust,
                                 specific_impulse_2nd, e_y);
    EXPECT_THAT(RelativeError(lox_flowrate_2nd + fuel_flowrate_2nd,
                              second_burn.mass_flow()),
                Lt(2E-4));

    second_burn.set_duration(s_ivb_2nd_eco - s_ivb_2nd_90_percent_thrust);
    EXPECT_THAT(
        RelativeError(total_vehicle_at_s_ivb_2nd_eco, second_burn.final_mass()),
        Lt(2E-3));

    second_burn.set_initial_time(s_ivb_2nd_90_percent_thrust);
    EXPECT_EQ(s_ivb_2nd_eco, second_burn.final_time());

    // Accelerations from Figure 4-9. Injection Phase Acceleration Comparison.
    // Final acceleration from Table 4-2. Comparison of Significant Trajectory
    // Events.
    EXPECT_THAT(second_burn.acceleration()(second_burn.initial_time()).Norm(),
                AllOf(Gt(7 * Metre / Pow<2>(Second)),
                      Lt(7.5 * Metre / Pow<2>(Second))));
    EXPECT_THAT(second_burn.acceleration()(t6 + 650 * Second).Norm(),
                AllOf(Gt(8 * Metre / Pow<2>(Second)),
                      Lt(8.02 * Metre / Pow<2>(Second))));
    EXPECT_THAT(second_burn.acceleration()(t6 + 700 * Second).Norm(),
                AllOf(Gt(8.8 * Metre / Pow<2>(Second)),
                      Lt(9 * Metre / Pow<2>(Second))));
    EXPECT_THAT(second_burn.acceleration()(t6 + 750 * Second).Norm(),
                AllOf(Gt(9.9 * Metre / Pow<2>(Second)),
                      Lt(10 * Metre / Pow<2>(Second))));
    EXPECT_THAT(second_burn.acceleration()(t6 + 850 * Second).Norm(),
                AllOf(Gt(12.97 * Metre / Pow<2>(Second)),
                      Lt(13 * Metre / Pow<2>(Second))));
    EXPECT_THAT(second_burn.acceleration()(second_burn.final_time()).Norm(),
                AllOf(Gt(15.12 * Metre / Pow<2>(Second)),
                      Lt(15.17 * Metre / Pow<2>(Second))));

    EXPECT_THAT(second_burn.Δv(),
                AllOf(Gt(3 * Kilo(Metre) / Second),
                      Lt(3.25 * Kilo(Metre) / Second)));

    // From the Apollo 8 flight journal.
    EXPECT_THAT(AbsoluteError(10'519.6 * Foot / Second, second_burn.Δv()),
                              Lt(20 * Metre / Second));
                          }
Exemplo n.º 11
0
// Parse files downloaded from IMDB. Actors point to movies.
// Files: actors.list.gz, languages.list.gz, countries.list.gz
PImdbNet TImdbNet::LoadFromImdb(const TStr& DataDir) {
  PImdbNet Net = TImdbNet::New();
  Net->Reserve((int) Mega(2.5), -1);
  // ACTORS
  { TSsParser Ss(DataDir+"\\actors.list.gz", ssfTabSep);
  while (Ss.Next() && strcmp(Ss[0],"THE ACTORS LIST")!=0) { }
  Ss.Next();  Ss.Next();  Ss.Next();  
  int ActorId = -1, NAct=0;
  for (int i = 0; Ss.Next(); i++) {
    //printf("%s\n", Ss.DumpStr());
    int mPos = 0;
    if (Ss.Len() > 1) { // actor
      ActorId = Net->AddStr(Ss[0]);
      if (Net->IsNode(ActorId)) {
        printf("  actor '%s'(%d) is already a node %s\n", Ss[0], 
          ActorId, TImdbNet::GetMovieTyStr((TMovieTy) Net->GetNDat(ActorId).Type.Val).CStr());
        continue;
      } else { Net->AddNode(ActorId); }
      TImdbNode& Node = Net->GetNDat(ActorId);
      Node.Name = ActorId;  NAct++;
      Node.Type = mtyActor;
      mPos = 1;  while (strlen(Ss[mPos])==0) { mPos++; }
    } 
    // movie (delete everything last [)
    //  also parse the position <>, but is a property of an edge (actor, movie) pair
    char *DelFrom;
    char *C1 = strrchr(Ss[mPos], '<');
    char *C2 = strrchr(Ss[mPos], '[');
    if (C1==NULL) { DelFrom=C2; }
    else if (C2==NULL) { DelFrom=C1; }
    else { DelFrom = TMath::Mn(C1, C2); }
    if (DelFrom != NULL) {
      char *mov = DelFrom;  while (*mov) { *mov=0; mov++; }
      mov = (char *) DelFrom-1;  while (TCh::IsWs(*mov)) { *mov=0; mov--; }
    }
    const int MovNId = Net->AddStr(Ss[mPos]);
    if (! Net->IsNode(MovNId)) {
      Net->AddNode(MovNId);
      TImdbNode& Node = Net->GetNDat(MovNId);
      Node.Type = mtyMovie;  Node.Year = GetYearFromTitle(Ss[mPos]);
    }
    if (Net->IsEdge(ActorId, MovNId)) {
      printf("  already an edge %d %d\n", ActorId, MovNId); }
    else { Net->AddEdge(ActorId, MovNId); }
    if ((i+1) % Kilo(10) == 0) { 
      printf("\r   %d", (i+1)/1000); 
      if ((i+1) % Kilo(100) == 0) { 
        printf(" nodes: %d, edges: %d, actors: %d", 
          Net->GetNodes(), Net->GetEdges(), NAct); }
    }
  } 
  printf("\n=== nodes: %d, edges: %d, actors: %d", Net->GetNodes(), Net->GetEdges(), NAct); }
  // MOVIE LANGUAGE */
  { TSsParser Ss(DataDir+"\\language.list.gz", ssfTabSep);
  while (Ss.Next() && strcmp(Ss[0],"LANGUAGE LIST")!=0) { }
  Ss.Next();
  int LangCnt=0, i;
  for (i = 0; Ss.Next(); i++) {
    char *Mov = Ss[0];
    char *Lang = Ss[Ss.Len()-1];
    if (Net->IsStr(Mov)) {
      const int NId = Net->GetStrId(Mov);
      Net->GetNDat(NId).Lang = Net->AddStr(Lang);
      LangCnt++;
    } //else { printf("movie not found: '%s'\n", Mov); }
    if ((i+1) % Kilo(10) == 0) { 
      printf("\r   %d found %d ", (i+1), LangCnt); }
  } 
  printf("\n  LANG: total movies: %d,  found %d\n", (i+1), LangCnt); }
  // MOVIE COUNTRY
  { TSsParser Ss(DataDir+"\\countries.list.gz", ssfTabSep);
  while (Ss.Next() && strcmp(Ss[0],"COUNTRIES LIST")!=0) { }
  Ss.Next();
  int LangCnt=0, i;
  for (i = 0; Ss.Next(); i++) {
    char *Mov = Ss[0];
    char *Cntry = Ss[Ss.Len()-1];
    if (Net->IsStr(Mov)) {
      const int NId = Net->GetStrId(Mov);
      Net->GetNDat(NId).Cntry = Net->AddStr(Cntry);
      LangCnt++;
    } //else { printf("country not found: '%s'\n", Mov); }
    if ((i+1) % Kilo(10) == 0) { 
      printf("\n   %d found %d ", (i+1), LangCnt); }
  } 
  printf("\r  CNTRY: total movies: %d,  found %d\n", (i+1), LangCnt);  }
  return Net;
}
Exemplo n.º 12
0
void BigMain(int argc, char* argv[]) {
  TExeTm ExeTm;
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs("QuotesApp");
  const TStr ToDo = Env.GetIfArgPrefixStr("-do:", "", "To do").GetLc();
  if (Env.IsEndOfRun()) {
    printf("To do:\n");
    printf("    MkDataset         : Make memes dataset (extract quotes and save txt)\n");
    printf("    ExtractSubset     : Extract a subset of memes containing particular words\n");
    printf("    MemesToQtBs       : Load memes dataset and create quote base\n");
    printf("    MkClustNet        : Build cluster network from the quote base\n");
    return;
  }	
#pragma region mkdataset
  // extract quotes and links and make them into a single file
  if (ToDo == "mkdataset") {
    const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "files.txt", "Spinn3r input files (one file per line)");
    const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "Spinn3r-dataset.txt", "Output file");
    const int MinQtWrdLen = Env.GetIfArgPrefixInt("-w:", 3, "Minimum quote word length");
    const TStr UrlFNm = Env.GetIfArgPrefixStr("-u:", "", "Seen url set (THashSet<TMd5Sig>) file name");
    const bool UrlOnlyOnce = Env.GetIfArgPrefixBool("-q:", true, "Only keep unique Urls");
    //// parse directly from Spinn3r
    TStr Spinn3rFNm;
    THashSet<TMd5Sig> SeenUrlSet;
    if (UrlOnlyOnce && ! UrlFNm.Empty()) {  // keep track of already seen urls (so that there are no duplicate urls)
      TFIn FIn(UrlFNm);  SeenUrlSet.Load(FIn);
    }
    FILE *F = fopen(OutFNm.CStr(), "wt");
    TFIn FIn(InFNm);
    int Items=0;
    for (int f=0; FIn.GetNextLn(Spinn3rFNm); f++) {
      TQuoteExtractor QE(Spinn3rFNm.ToTrunc());
      printf("Processing %02d: %s [%s]\n", f+1, Spinn3rFNm.CStr(), TExeTm::GetCurTm());
      fflush(stdout);
      for (int item = 0; QE.Next(); item++) {
        const TMd5Sig PostMd5(QE.PostUrlStr);
        if (QE.QuoteV.Empty() && QE.LinkV.Empty()) { continue; } // no quotes, no links
        if (UrlOnlyOnce) {
          if (SeenUrlSet.IsKey(PostMd5)) { continue; }
          SeenUrlSet.AddKey(PostMd5);
        }
        fprintf(F, "P\t%s\n", QE.PostUrlStr.CStr());
        //if (QE.PubTm > TSecTm(2008,8,30) || QE.PubTm < TSecTm(2008,7,25)) { printf("%s\n", QE.PubTm.GetStr().CStr()); }
        fprintf(F, "T\t%s\n", QE.PubTm.GetYmdTmStr().CStr());
        for (int q = 0; q < QE.QuoteV.Len(); q++) {
          if (TStrUtil::CountWords(QE.QuoteV[q]) >= MinQtWrdLen) {
            fprintf(F, "Q\t%s\n", QE.QuoteV[q].CStr()); }
        }
        for (int l = 0; l < QE.LinkV.Len(); l++) {
          fprintf(F, "L\t%s\n", QE.LinkV[l].CStr()); }
        fprintf(F, "\n");
        if (item>0 && item % Kilo(100) == 0) {
          QE.DumpStat();  QE.ExeTm.Tick(); }
        Items++;
      }
      printf("file done. Total %d all posts, %d all items\n", SeenUrlSet.Len(), Items);
      fflush(stdout);
    }
    printf("all done. Saving %d post urls\n", SeenUrlSet.Len());  fflush(stdout);
    if (! SeenUrlSet.Empty()) {
      TFOut FOut(OutFNm.GetFMid()+".SeenUrlSet");
      SeenUrlSet.Save(FOut);
    }
    fclose(F);
  }
#pragma endregion mkdataset

#pragma region extractsubset
  // save posts with memes containing particular words
  else if (ToDo == "extractsubset") {
    const TStr InFNmWc = Env.GetIfArgPrefixStr("-i:", "memes_*.rar", "Input file prefix");
    const bool IsInFNmWc = Env.GetIfArgPrefixBool("-w:", true, "Input is wildcard (else a file with list of input files)");
    const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "memes-subset.txt", "Output memes file");
    const TStr WordsFNm = Env.GetIfArgPrefixStr("-p:", "phrases-in.txt", "Phrases that memes have to contain");

    TChAV CatchMemeV;// = TStr::GetV("great depression", "economic meltdown", "recession had bottomed out", "green shoots", "slow recovery", "gradual recovery");
    printf("Loading %s\n", WordsFNm.CStr());
    { TFIn FIn(WordsFNm);
    for (TStr Ln; FIn.GetNextLn(Ln); ) {
      printf("  %s\n", Ln.GetLc().CStr());
      CatchMemeV.Add(Ln.GetLc()); }
    }
    printf("%d strings loaded\n", CatchMemeV.Len());
    TFOut FOut(OutFNm);
    TMemesDataLoader Memes(InFNmWc, IsInFNmWc);
    for (int posts = 0, nsave=0; Memes.LoadNext(); posts++) {
      bool DoSave = false;
      for (int m = 0; m < Memes.MemeV.Len(); m++) {
        for (int i = 0; i < CatchMemeV.Len(); i++) {
          if (Memes.MemeV[m].SearchStr(CatchMemeV[i]) != -1) {
            DoSave=true; break; }
        }
        if (DoSave) { break; }
      }
      if (DoSave) { Memes.SaveTxt(FOut); nsave++; }
      if (posts % Mega(1) == 0) {
        printf("%dm posts, %d saved\n", posts/Mega(1), nsave);
        FOut.Flush();
      }
    }
  }
#pragma endregion extractsubset

#pragma region memestoqtbs
  // load memes dataset (MkDataset) and create quote base
  else if (ToDo == "memestoqtbs") {
    const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "201007_201107.txt", "Input Memes dataset files");
    const TStr MediaUrlFNm = Env.GetIfArgPrefixStr("-u:", "news_media.txt", "Fule with news media urls");
    const TStr Pref = Env.GetIfArgPrefixStr("-o:", "qt", "Output file name prefix");
    const int MinWrdLen = Env.GetIfArgPrefixInt("-l:", 4, "Min quote word length");
    const int MinMemeFq = Env.GetIfArgPrefixInt("-f:", 5, "Min meme frequency");
		const TStr MinTmStr = Env.GetIfArgPrefixStr("-mint:", "20100714", "Min time of quotes, format = YYYYMMDD");
		const TStr MaxTmStr = Env.GetIfArgPrefixStr("-maxt:", "20110728", "Max time of quotes, format = YYYYMMDD");
		TSecTm MinTm(atoi(MinTmStr.GetSubStr(0,3).CStr()),atoi(MinTmStr.GetSubStr(4,5).CStr()),atoi(MinTmStr.GetSubStr(6,7).CStr()));
		TSecTm MaxTm(atoi(MaxTmStr.GetSubStr(0,3).CStr()),atoi(MaxTmStr.GetSubStr(4,5).CStr()),atoi(MaxTmStr.GetSubStr(6,7).CStr()));

		PQuoteBs QtBs = TQuoteBs::New();
		int HashTableSize = 100; // 100 for each quarter, for one year data, use 400
		int UrlSetSize = 4 * HashTableSize;
		QtBs->ConstructQtBsZarya(InFNm, Pref, MediaUrlFNm, MinTm, MaxTm, MinWrdLen, MinMemeFq, HashTableSize, UrlSetSize);
		}
#pragma endregion memestoqtbs

#pragma region mkclustnet
  // make cluster network
  else if (ToDo == "mkclustnet") {
    TStr InQtBsNm = Env.GetIfArgPrefixStr("-i:", "", "Input quote base file name");
    TStr Pref = Env.GetIfArgPrefixStr("-o:", "qt", "Output network/updated QtBs filename");
		TStr BlackListFNm = Env.GetIfArgPrefixStr("-b:", "quote_blacklist.txt", "Blacklist file name");
		bool IsShglReady = Env.GetIfArgPrefixBool("-shglready:", false, "Indicate whether shingle hash table is ready");
		bool IsNetReady = Env.GetIfArgPrefixBool("-netready:", false, "Indicate whether cluster net is ready");
		double BktThresh = Env.GetIfArgPrefixFlt("-bktthresh:", 0.4, "Threshold for bad shingle bucket elimination");
		double MxTmDelay = Env.GetIfArgPrefixFlt("-delaythresh:", 5, "Max time delay between two quotes in the same cluster");
		double MxTmDev = Env.GetIfArgPrefixFlt("-devthresh:", 3, "Max time deviation for a quote to be specific rather than general");
		double RefineThresh = Env.GetIfArgPrefixFlt("-refinethresh:", 0.2, "Threshold for merging quote cluster in refining process");
    const int MinWrdLen = Env.GetIfArgPrefixInt("-minl:", 4, "Min quote word length");
		const int MaxWrdLen = Env.GetIfArgPrefixInt("-maxl:", 200, "Max quote word length");
    const int MinMemeFq = Env.GetIfArgPrefixInt("-mf:", 5, "Min meme frequency");
		const int MinClustFq = Env.GetIfArgPrefixInt("-cf:", 50, "Min quote cluster frequency");

		// Load quote base
    PQuoteBs QtBs;
    if (TZipIn::IsZipFNm(InQtBsNm)) { TZipIn ZipIn(InQtBsNm);  QtBs = TQuoteBs::Load(ZipIn); }
    else { TFIn FIn(InQtBsNm);  QtBs = TQuoteBs::Load(FIn); }

		// Cluster the quotes
    QtBs->ClusterQuotes(MinMemeFq, MinWrdLen, MaxWrdLen, BlackListFNm, Pref, IsShglReady, IsNetReady, BktThresh, MxTmDelay, MxTmDev, RefineThresh);

		// Dump the clusters
		bool SkipUrl = true, FlashDisp = true;
		QtBs->DumpQuoteClusters(MinWrdLen, MinClustFq, SkipUrl, FlashDisp, Pref);
  }
#pragma endregion mkclustnet

#pragma region memeclust
	else if (ToDo.SearchStr(TStr("memeclust")) >= 0) {
    const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "201101.txt", "Input Memes dataset files");
    const TStr MediaUrlFNm = Env.GetIfArgPrefixStr("-u:", "news_media.txt", "Fule with news media urls");
    TStr Pref = Env.GetIfArgPrefixStr("-o:", "qt", "Output file name prefix");
    const int MinWrdLen = Env.GetIfArgPrefixInt("-minl:", 4, "Min quote word length");
		const int MaxWrdLen = Env.GetIfArgPrefixInt("-maxl:", 200, "Max quote word length");
		const int MinMemeFq = Env.GetIfArgPrefixInt("-f:", 5, "Min meme frequency");

		const int MinClustFq = Env.GetIfArgPrefixInt("-cf:", 50, "Min quote cluster frequency");
		TStr BlackListFNm = Env.GetIfArgPrefixStr("-b:", "quote_blacklist.txt", "Blacklist file name");
		bool IsQtBsReady = Env.GetIfArgPrefixBool("-qtbsready:", false, "Indicate whether quote base is ready and can be loaded readily");
		bool IsShglReady = Env.GetIfArgPrefixBool("-shglready:", false, "Indicate whether shingle hash table is ready");
		bool IsNetReady = Env.GetIfArgPrefixBool("-netready:", false, "Indicate whether cluster net is ready");
		double BktThresh = Env.GetIfArgPrefixFlt("-bktthresh:", 0.4, "Threshold for bad shingle bucket elimination");
		double MxTmDelay = Env.GetIfArgPrefixFlt("-delaythresh:", 5, "Max time delay between two quotes in the same cluster");
		double MxTmDev = Env.GetIfArgPrefixFlt("-devthresh:", 3, "Max time deviation for a quote to be specific rather than general");
		double RefineThresh = Env.GetIfArgPrefixFlt("-refinethresh:", 0.2, "Threshold for merging quote cluster in refining process");

		const TStr MinTmStr = Env.GetIfArgPrefixStr("-mint:", "20010101", "Min time of quotes, format = YYYYMMDD");
		const TStr MaxTmStr = Env.GetIfArgPrefixStr("-maxt:", "20200101", "Max time of quotes, format = YYYYMMDD");
		TSecTm MinTm(atoi(MinTmStr.GetSubStr(0,3).CStr()),atoi(MinTmStr.GetSubStr(4,5).CStr()),atoi(MinTmStr.GetSubStr(6,7).CStr()));
		TSecTm MaxTm(atoi(MaxTmStr.GetSubStr(0,3).CStr()),atoi(MaxTmStr.GetSubStr(4,5).CStr()),atoi(MaxTmStr.GetSubStr(6,7).CStr()));

		// Construct the quote base from Zarya data
		PQuoteBs QtBs = TQuoteBs::New();

		if (!IsQtBsReady) {
			int HashTableSize = 100; // 100 for each quarter, for one year data, use 400
			if (ToDo == "memeclustzarya") {
				int UrlSetSize = 4 * HashTableSize;
				QtBs->ConstructQtBsZarya(InFNm, Pref, MediaUrlFNm, MinTm, MaxTm, MinWrdLen, MinMemeFq, HashTableSize, UrlSetSize);
			}	else if (ToDo == "memeclustqtonly") {
				QtBs->ConstructQtBsQtOnly(InFNm, Pref, MediaUrlFNm, MinWrdLen, MinMemeFq, HashTableSize);
			} else if (ToDo == "memeclustqttime") {
				QtBs->ConstructQtBsQtTime(InFNm, Pref, MediaUrlFNm, MinWrdLen, MinMemeFq, HashTableSize);
			} else {
				printf("Please specify one of the three options for -do : memeclustzarya, memeclustqtonly, memeclustqttime!\n");
				return;
			}
		} else {
			TStr InQtBsNm = TStr::Fmt("%s-w%dmfq%d.QtBs", Pref.CStr(), MinWrdLen, MinMemeFq);
			if (TZipIn::IsZipFNm(InQtBsNm)) { TZipIn ZipIn(InQtBsNm);  QtBs = TQuoteBs::Load(ZipIn); }
			else { TFIn FIn(InQtBsNm);  QtBs = TQuoteBs::Load(FIn); }
		}

		// Cluster the quotes
    QtBs->ClusterQuotes(MinMemeFq, MinWrdLen, MaxWrdLen, BlackListFNm, Pref, IsShglReady, IsNetReady, BktThresh, MxTmDelay, MxTmDev, RefineThresh);

		// Dump the clusters
		bool SkipUrl = true, FlashDisp = true;
		QtBs->DumpQuoteClusters(MinWrdLen, MinClustFq, SkipUrl, FlashDisp, Pref);
	}
#pragma endregion memeclust
}