void PoaGraphImpl::tracebackAndThread(std::string sequence, const AlignmentColumnMap& alignmentColumnForVertex, AlignMode alignMode, std::vector<Vertex>* outputPath) { const int I = sequence.length(); // perform traceback from (I,$), threading the new sequence into // the graph as we go. int i = I; const AlignmentColumn* curCol; VD v = null_vertex, forkVertex = null_vertex; VD u = exitVertex_; VD startSpanVertex; VD endSpanVertex = alignmentColumnForVertex.at(exitVertex_)->PreviousVertex[I]; if (outputPath) { outputPath->resize(I); std::fill(outputPath->begin(), outputPath->end(), (size_t)-1); } #define READPOS (i - 1) #define VERTEX_ON_PATH(readPos, v) \ if (outputPath) { \ (*outputPath)[(readPos)] = externalize(v); \ } while (!(u == enterVertex_ && i == 0)) { // u -> v // u: current vertex // v: vertex last visited in traceback (could be == u) // forkVertex: the vertex that will be the target of a new edge Vertex uExt = this->externalize(u); // DEBUGGING Vertex vExt = this->externalize(v); // DEBUGGING curCol = alignmentColumnForVertex.at(u); assert(curCol != NULL); PoaNode& curNodeInfo = vertexInfoMap_[u]; VD prevVertex = curCol->PreviousVertex[i]; MoveType reachingMove = curCol->ReachingMove[i]; if (reachingMove == StartMove) { assert(v != null_vertex); if (forkVertex == null_vertex) { forkVertex = v; } // In local model thread read bases, adjusting i (should stop at 0) while (i > 0) { assert(alignMode == AlignMode::LOCAL); VD newForkVertex = addVertex(sequence[READPOS]); add_edge(newForkVertex, forkVertex, g_); VERTEX_ON_PATH(READPOS, newForkVertex); forkVertex = newForkVertex; i--; } } else if (reachingMove == EndMove) { assert(forkVertex == null_vertex && u == exitVertex_ && v == null_vertex); forkVertex = exitVertex_; if (alignMode == AlignMode::LOCAL) { // Find the row # we are coming from, walk // back to there, threading read bases onto // graph via forkVertex, adjusting i. const AlignmentColumn* prevCol = alignmentColumnForVertex.at(prevVertex); int prevRow = ArgMax(prevCol->Score); while (i > static_cast<int>(prevRow)) { VD newForkVertex = addVertex(sequence[READPOS]); add_edge(newForkVertex, forkVertex, g_); VERTEX_ON_PATH(READPOS, newForkVertex); forkVertex = newForkVertex; i--; } } } else if (reachingMove == MatchMove) { VERTEX_ON_PATH(READPOS, u); // if there is an extant forkVertex, join it if (forkVertex != null_vertex) { add_edge(u, forkVertex, g_); forkVertex = null_vertex; } // add to existing node curNodeInfo.Reads++; i--; } else if (reachingMove == DeleteMove) { if (forkVertex == null_vertex) { forkVertex = v; } } else if (reachingMove == ExtraMove || reachingMove == MismatchMove) { // begin a new arc with this read base VD newForkVertex = addVertex(sequence[READPOS]); if (forkVertex == null_vertex) { forkVertex = v; } add_edge(newForkVertex, forkVertex, g_); VERTEX_ON_PATH(READPOS, newForkVertex); forkVertex = newForkVertex; i--; } else { throw std::runtime_error("unreachable"); } v = u; u = prevVertex; } startSpanVertex = v; // if there is an extant forkVertex, join it to enterVertex if (forkVertex != null_vertex) { add_edge(enterVertex_, forkVertex, g_); startSpanVertex = forkVertex; forkVertex = null_vertex; } if (startSpanVertex != exitVertex_) { tagSpan(startSpanVertex, endSpanVertex); } // all filled in? assert(outputPath == NULL || std::find(outputPath->begin(), outputPath->end(), ((size_t)-1)) == outputPath->end()); #undef READPOS #undef VERTEX_ON_PATH }
int main( int argc, char* argv[]) { if(argc != 2) { std::cerr << "Usage : ./main.out <edge_file>" << std::endl; exit(1); } Network network; std::ifstream fin(argv[1]); std::cerr << "Loading input file" << std::endl; network.LoadFile( fin ); bool is_weighted = network.IsWeighted(); if( ! is_weighted ) { std::cerr << "All the link weights are 1. Analyze the network as a non-weighted network." << std::endl; } std::pair<double,double> fc; if( is_weighted ) { std::cerr << "Conducting percolation analysis" << std::endl; std::ofstream lrp("link_removal_percolation.dat"); lrp << "#fraction weak_link_removal_lcc susceptibility strong_link_removal_lcc susceptibility" << std::endl; fc = network.AnalyzeLinkRemovalPercolationVariableAccuracy( 0.02, 0.005, lrp ); lrp.flush(); } std::cerr << "Calculating local clustering coefficients" << std::endl; network.CalculateLocalCCs(); if( is_weighted ) { std::cerr << "Calculating overlaps" << std::endl; network.CalculateOverlaps(); } std::cerr << "Calculating degree distribution" << std::endl; std::ofstream dd("degree_distribution.dat"); const auto degree_distribution = network.DegreeDistribution(); for(const auto& f : degree_distribution ) { dd << f.first << ' ' << f.second << std::endl; } dd.flush(); if( is_weighted ) { std::cerr << "Calculating link weight distribution" << std::endl; // double edge_weight_bin_size = 1.0; std::ofstream ewd("edge_weight_distribution.dat"); for(const auto& f : network.EdgeWeightDistributionLogBin() ) { ewd << f.first << ' ' << f.second << std::endl; } ewd.flush(); } std::map<double, size_t> strength_distribution; if( is_weighted ) { std::cerr << "Calculating node strength distribution" << std::endl; double avg_s = network.AverageEdgeWeight() * network.AverageDegree(); double strength_bin_size = avg_s * 0.01; std::ofstream sd("strength_distribution.dat"); strength_distribution = network.StrengthDistribution(strength_bin_size); for(const auto& f :strength_distribution) { sd << f.first << ' ' << f.second << std::endl; } sd.flush(); } std::cerr << "Calculating c(k)" << std::endl; std::ofstream cc_d("cc_degree_correlation.dat"); for(const auto& f : network.CC_DegreeCorrelation() ) { cc_d << f.first << ' ' << f.second << std::endl; } cc_d.flush(); if( is_weighted ) { std::cerr << "Calculating s(k)" << std::endl; std::ofstream sdc("strength_degree_correlation.dat"); for(const auto& f : network.StrengthDegreeCorrelation() ) { sdc << f.first << ' ' << f.second << std::endl; } sdc.flush(); } std::cerr << "Calculating k_nn(k)" << std::endl; std::ofstream ndc("neighbor_degree_correlation.dat"); for(const auto& f : network.NeighborDegreeCorrelation() ) { ndc << f.first << ' ' << f.second << std::endl; } ndc.flush(); if( is_weighted ) { std::cerr << "Calculating O(w)" << std::endl; std::ofstream owc("overlap_weight_correlation.dat"); for(const auto& f : network.OverlapWeightCorrelationLogBin() ) { owc << f.first << ' ' << f.second << std::endl; } owc.flush(); } std::cerr << "Calculating scalar values" << std::endl; std::ofstream fout("_output.json"); fout << "{" << std::endl; fout << " \"NumNodes\": " << network.NumNodes() << ',' << std::endl; fout << " \"NumEdges\": " << network.NumEdges() << ',' << std::endl; fout << " \"AverageDegree\": " << network.AverageDegree() << ',' << std::endl; fout << " \"Assortativity\": " << network.PCC_k_knn() << ',' << std::endl; fout << " \"ArgMax_Pk\": " << ArgMax( degree_distribution ) << ',' << std::endl; fout << " \"ClusteringCoefficient\": " << network.ClusteringCoefficient() << ',' << std::endl; fout << " \"PCC_C_k\": " << network.PCC_C_k(); if( is_weighted ) { fout << ',' << std::endl; double ave_w = network.AverageEdgeWeight(); fout << " \"AverageEdgeWeight\": " << ave_w << ',' << std::endl; double ave_k = network.AverageDegree(); fout << " \"AverageStrength\": " << ave_w * ave_k << ',' << std::endl; double argmax_ps = ArgMax( strength_distribution ); fout << " \"ArgMax_Ps\": " << argmax_ps << ',' << std::endl; fout << " \"Normalized_ArgMax_Ps\": " << argmax_ps / (ave_w*ave_k) << ',' << std::endl; fout << " \"PCC_s_k\": " << network.PCC_s_k() << ',' << std::endl; fout << " \"AverageOverlap\": " << network.AverageOverlap() << ',' << std::endl; fout << " \"PCC_O_w\": " << network.PCC_O_w() << ',' << std::endl; fout << " \"Fc_Ascending\": " << fc.first << ',' << std::endl; fout << " \"Fc_Descending\": " << fc.second << ',' << std::endl; fout << " \"Delta_Fc\": " << fc.second - fc.first << std::endl; } fout << "}" << std::endl; return 0; }
size_t ArgMax(const std::vector<TIn> &in, const std::function<TOut(TIn)> f) { return ArgMax(Map<TIn, TOut>(in, f)); }