int main() { // push waypoints to a routes' map waypoints.push_back(waypoint(-4.0, -3.0)); // 0 waypoints.push_back(waypoint(-3.0, -1.0)); // 1 waypoints.push_back(waypoint(1.0, -4.0)); // 2 waypoints.push_back(waypoint(3.0, -2.0)); // 3 waypoints.push_back(waypoint(-1.0, 1.0)); // 4 waypoints.push_back(waypoint(4.0, -3.0)); // 5 waypoints.push_back(waypoint(2.0, 1.0)); // 6 waypoints.push_back(waypoint(-2.0, 2.0)); // 7 waypoints.push_back(waypoint(5.0, -1.0)); // 8 waypoints.push_back(waypoint(6.0, 2.0)); // 9 waypoints.push_back(waypoint(-1.0, 4.0)); // 10 waypoints.push_back(waypoint(4.0, 4.0)); // 11 waypoints.push_back(waypoint(8.0, 3.0)); // 12 waypoints.push_back(waypoint(1.0, 6.0)); // 13 waypoints.push_back(waypoint(6.0, 5.0)); // 14 waypoints.push_back(waypoint(2.0, 7.0)); // 15 waypoints.push_back(waypoint(7.0, 7.0)); // 16 waypoints.push_back(waypoint(4.0, 7.0)); // 17 waypoints.push_back(waypoint(6.0, 9.0)); // 18 waypoints.push_back(waypoint(8.0, 9.0)); // 19 obstacles.push_back(obstacle(0.0,5.0,1.0)); // 0 obstacles.push_back(obstacle(3.0,3.0,2.0)); // 1 obstacles.push_back(obstacle(3.5,0.0,1.0)); // 2 obstacles.push_back(obstacle(8.0,7.0,2.5)); // 3 // define exponential traverser type and create an exemplar typedef ExpTr<node_value,cost_value,expand,less<cost_value> > ExpTr_t; ExpTr_t j; cout << "in exp traverser forward search ...\n"; j = ExpTr_t(0); // set a maximal lag length j.getexpfunc().setMaxLagLen(5.0); // or 6.0 // add two defined handlers to enable calculation of an expansions number j.set_handler_on_expand_root(OnExpandRoot); j.set_handler_on_select_cursor(OnSelectCursor); // launch A* algorithm (combination of a predefined heuristics with a best - first // search that is provided by exponential traverser is A*!) ForwardSearch(j, goal); ShowPath(j); // show founded path cout << "number of expansions = " << num_expansions << '\n'; cout << "...out \n\n"; cout << "Search is completed.\n"; return 0; }
void ForwardSearch( int u, int inDummy, int time, int *visitTime, int *counter, int *visitDummy, ContigGraph &contigGraph ) { if ( visitTime[u] == 2 * time ) return ; visitTime[u] = 2 * time ; counter[u] = 1 ; visitDummy[u] = inDummy ; struct _pair *buffer = new struct _pair[ MAX_NEIGHBOR ] ; int ncnt ; int i ; ncnt = contigGraph.GetNeighbors( u, 1 - inDummy, buffer, MAX_NEIGHBOR ) ; for ( i = 0 ; i < ncnt ; ++i ) { /*if ( time == 639 ) { printf( "forwardsearch: (%d %d)=>(%d %d)\n", u, 1 - inDummy, buffer[i].a, buffer[i].b ) ; }*/ ForwardSearch( buffer[i].a, buffer[i].b, time, visitTime, counter, visitDummy, contigGraph ) ; } delete[] buffer ; }
int main() { // grid map // vertical edges C[0][1] = 11; C[1][2] = 10; C[2][3] = 8; C[3][4] = 7; C[4][5] = 9; C[5][6] = 10; C[10][11] = 9; C[11][12] = 8; C[12][13] = 7; C[13][14] = 6; C[14][15] = 8; C[15][16] = 12; C[20][21] = 10; C[21][22] = 10; C[22][23] = 8; C[23][24] = 8; C[24][25] = 9; C[25][26] = 13; C[30][31] = 12; C[31][32] = 11; C[32][33] = 9; C[33][34] = 7; C[34][35] = 10; C[35][36] = 13; C[40][41] = 13; C[41][42] = 10; C[42][43] = 9; C[43][44] = 8; C[44][45] = 11; C[45][46] = 14; C[50][51] = 14; C[51][52] = 11; C[52][53] = 10; C[53][54] = 10; C[54][55] = 12; C[55][56] = 15; C[60][61] = 14; C[61][62] = 12; C[62][63] = 11; C[63][64] = 11; C[64][65] = 13; C[65][66] = 14; C[70][71] = 12; C[71][72] = 10; C[72][73] = 8; C[73][74] = 10; C[74][75] = 12; C[75][76] = 13; C[80][81] = 10; C[81][82] = 9; C[82][83] = 7; C[83][84] = 9; C[84][85] = 12; C[85][86] = 14; // horizontal edges C[0][10] = 12; C[10][20] = 11; C[20][30] = 10; C[30][40] = 9; C[40][50] = 13; C[50][60] = 14; C[60][70] = 17; C[70][80] = 20; C[1][11] = 11; C[11][21] = 9; C[21][31] = 8; C[31][41] = 7; C[41][51] = 9; C[51][61] = 13; C[61][71] = 14; C[71][81] = 19; C[2][12] = 10; C[12][22] = 8; C[22][32] = 8; C[32][42] = 8; C[42][52] = 10; C[52][62] = 12; C[62][72] = 13; C[72][82] = 15; C[3][13] = 12; C[13][23] = 13; C[23][33] = 12; C[33][43] = 10; C[43][53] = 11; C[53][63] = 13; C[63][73] = 15; C[73][83] = 20; C[4][14] = 14; C[14][24] = 13; C[24][34] = 12; C[34][44] = 10; C[44][54] = 9; C[54][64] = 8; C[64][74] = 11; C[74][84] = 15; C[5][15] = 17; C[15][25] = 14; C[25][35] = 13; C[35][45] = 10; C[45][55] = 11; C[55][65] = 13; C[65][75] = 14; C[75][85] = 17; C[6][16] = 20; C[16][26] = 18; C[26][36] = 16; C[36][46] = 15; C[46][56] = 14; C[56][66] = 12; C[66][76] = 15; C[76][86] = 17; // diagonal edges C[5][16] = 28; C[4][15] = 23; C[15][26] = 20; C[3][14] = 18; C[14][25] = 21; C[25][36] = 27; C[2][13] = 19; C[13][24] = 20; C[24][35] = 21; C[35][46] = 25; C[1][12] = 18; C[12][23] = 17; C[23][34] = 21; C[34][45] = 19; C[45][56] = 27; C[0][11] = 20; C[11][22] = 18; C[22][33] = 18; C[33][44] = 19; C[44][55] = 22; C[55][66] = 26; C[10][21] = 20; C[21][32] = 19; C[32][43] = 18; C[43][54] = 20; C[54][65] = 22; C[65][76] = 28; C[20][31] = 20; C[31][42] = 18; C[42][53] = 19; C[53][64] = 21; C[64][75] = 22; C[75][86] = 29; C[30][41] = 20; C[41][52] = 19; C[52][63] = 22; C[63][74] = 23; C[74][85] = 25; C[40][51] = 23; C[51][62] = 24; C[62][73] = 22; C[73][84] = 24; C[50][61] = 24; C[61][72] = 23; C[72][83] = 24; C[60][71] = 26; C[71][82] = 26; C[70][81] = 29; typedef LinTr<int,int,expand,greater<int> > LinTr_t; // linear ttraverser typedef ExpTr<int,int,expand,less<int> > ExpTr_t; // exponential traverser typedef ExpTr<int,int,bidir_expand,less<int> > RevExpTr_t; // second traverser for bi-directional search LinTr_t i; ExpTr_t j; RevExpTr_t k; // LINEAR TRAVERSER IN WORK /* cout << "in lin traverser forward search ...\n"; cout << "eps = " << eps << '\n'; i = LinTr_t(start); i.set_handler_on_expand_root(OnExpandRoot); i.set_handler_on_select_cursor(OnSelectCursor); ForwardSearch(i, goal); ShowPath(i); cout << "number of expansions = " << num_expansions << '\n'; cout << "...out \n\n"; /**/ /* cout << "in lin traverser search bounded by " << thresh << "...\n"; BoundedSearch(i = LinTr_t(start), goal, thresh); ShowPath(i); cout << "...out \n\n"; cout << "in lin traverser search bounded by " << next_thresh << "... continue previous one ... \n"; BoundedSearch(i, goal, next_thresh); ShowPath(i); cout << "...out \n\n"; cout << "in lin traverser search bounded by " << next_thresh2 << "... continue previous one ...\n"; BoundedSearch(i, goal, next_thresh2); ShowPath(i); cout << "...out \n\n"; */ /* cout << "in lin traverser search bounded by " << less_than_opt_value << "...\n"; BoundedSearch(i = LinTr_t(start), goal, less_than_opt_value); ShowPath(i); cout << "...out \n\n"; */ /* cout << "in lin traverser optimal search bounded by " << thresh << " from initial state ...\n"; i = LinTr_t(start); BoundedOptSearch(i, goal, thresh); ShowPath(i); cout << "...out\n\n"; */ /* cout << "in lin traverser optimal search bounded by " << opt_value << " from initial state ...\n"; BoundedOptSearch(i = LinTr_t(start), goal, opt_value); ShowPath(i); cout << "...out\n\n"; */ /**/ cout << "in lin traverser branch and bound ...\n"; i = LinTr_t(start); int eps = i.getexpfunc().GetEps(); cout << "eps = " << eps << '\n'; i.set_handler_on_expand_root(OnExpandRoot); i.set_handler_on_select_cursor(OnSelectCursor); BranchAndBound(i, goal); ShowPath(i); cout << "number of expansions = " << num_expansions << '\n'; cout << "...out \n\n"; cout << "in lin traverser branch and bound ...\n"; i = LinTr_t(start); i.getexpfunc().SetEps(10); expand &e = i.getexpfunc(); e.SetEps(12); eps = i.getexpfunc().GetEps(); cout << "eps = " << eps << "\n"; i.set_handler_on_expand_root(OnExpandRoot); i.set_handler_on_select_cursor(OnSelectCursor); BranchAndBound(i, goal); ShowPath(i); cout << "number of expansions = " << num_expansions << '\n'; cout << "...out\n\n"; cout << "in lin traverser search bounded by " << 140 << "...\n"; cout << "eps = " << eps << '\n'; i = LinTr_t(start); i.set_handler_on_expand_root(OnExpandRoot); i.set_handler_on_select_cursor(OnSelectCursor); BoundedSearch(i, goal, 140); ShowPath(i); cout << "number of expansions = " << num_expansions << '\n'; cout << "...out\n\n"; /**/ // EXPONENTIAL TRAVERSER IN WORK /**/ cout << "in exp traverser forward search ...\n"; j = ExpTr_t(start); j.getexpfunc().SetEps(0); eps = j.getexpfunc().GetEps(); cout << "eps = " << eps << '\n'; j.set_handler_on_expand_root(OnExpandRoot); j.set_handler_on_select_cursor(OnSelectCursor); ForwardSearch(j, goal); ShowPath(j); cout << "number of expansions = " << num_expansions << '\n'; cout << "...out \n\n"; /**/ cout << "in exp traverser search bounded by " << thresh << "...\n"; BoundedSearch(j = ExpTr_t(start), goal, thresh); ShowPath(j); cout << "...out \n\n"; /**/ /* cout << "in exp traverser search bounded by " << next_thresh << "... continue previous one...\n"; BoundedSearch(j, goal, next_thresh); ShowPath(j); cout << "...out \n\n"; /**/ /* cout << "in exp traverser optimal search from initial state bounded by " << thresh << "...\n"; BoundedOptSearch(j = ExpTr_t(start), goal, thresh); ShowPath(j); cout << "...out \n\n"; /**/ /* cout << "in exp traverser branch and bound ...\n"; BranchAndBound(j = ExpTr_t(start), goal); ShowPath(j); cout << "...out \n\n"; */ /**/ cout << "in exp traverser iterative deepening ...\n"; j = ExpTr_t(start); j.getexpfunc().SetEps(10); eps = j.getexpfunc().GetEps(); cout << "eps = " << eps << '\n'; j.set_handler_on_expand_root(OnExpandRoot); j.set_handler_on_select_cursor(OnSelectCursor); IterativeDeepening(j, goal); ShowPath(j); cout << "number of expansions = " << num_expansions << '\n'; cout << "...out \n\n"; /**/ /* cout << "in exp traverser bi-directional search ...\n"; BidirectionalSearch( j = ExpTr_t(start), k = RevExpTr_t(final) ); ShowStickedPaths(j,k); cout << "...out \n\n"; */ cout << "All searches completed.\n"; return 0; }
int main( int argc, char *argv[] ) { Alignments alignments ; Genome genome ; std::vector<int> rascafFileId ; char line[2048] ; char prefix[512] = "rascaf_scaffold" ; int rawAssemblyInd = 1 ; FILE *rascafFile ; bool contigLevel = false ; int i ; FILE *outputFile ; FILE *infoFile ; breakN = 1 ; if ( argc < 2 ) { fprintf( stderr, "%s", usage ) ; exit( 1 ) ; } for ( i = 1 ; i < argc ; ++i ) { if ( !strcmp( "-o", argv[i] ) ) { strcpy( prefix, argv[i + 1 ] ) ; ++i ; } else if ( !strcmp( "-ms", argv[i] ) ) { minSupport = atoi( argv[i + 1] ) ; ++i ; } else if ( !strcmp( "-ignoreGap", argv[i] ) ) { ignoreGap = true ; } else if ( !strcmp( "-r", argv[i] ) ) { rascafFileId.push_back( i + 1 ) ; ++i ; } else { fprintf( stderr, "Unknown option: %s\n", argv[i] ) ; exit( EXIT_FAILURE ) ; } } if ( rascafFileId.size() == 0 ) { fprintf( stderr, "Must use -r to specify rascaf output file.\n" ) ; exit( EXIT_FAILURE ) ; } MAX_NEIGHBOR = 1 + rascafFileId.size() ; // Get the bam file. rascafFile = fopen( argv[ rascafFileId[0] ], "r" ) ; while ( fgets( line, sizeof( line ), rascafFile ) != NULL ) { if ( strstr( line, "command line:" ) ) { char *p ; char buffer[512] ; p = strstr( line, "-breakN" ) ; if ( p != NULL ) { p += 7 ; while ( *p == ' ' ) ++p ; for ( i = 0 ; *p && *p != ' ' ; ++p, ++i ) buffer[i] = *p ; buffer[i] = '\0' ; breakN = atoi( buffer ) ; } p = strstr( line, "-b" ) ; if ( p == NULL ) { fprintf( stderr, "Could not find the bam file specified by -b in Rascaf.\n" ) ; exit( 1 ) ; } p += 2 ; while ( *p == ' ' ) ++p ; for ( i = 0 ; *p && *p != ' ' ; ++p, ++i ) buffer[i] = *p ; buffer[i] = '\0' ; alignments.Open( buffer ) ; p = strstr( line, "-f") ; if ( p == NULL ) { fprintf( stderr, "Could not find the raw assembly file specified by -f in Rascaf.\n" ) ; exit( 1 ) ; } p += 2 ; while ( *p == ' ' ) ++p ; for ( i = 0 ; *p && *p != ' ' && *p != '\n' ; ++p, ++i ) buffer[i] = *p ; buffer[i] = '\0' ; fprintf( stderr, "Found raw assembly file: %s\n", buffer ) ; genome.Open( alignments, buffer ) ; break ; } } fclose( rascafFile ) ; // Parse the input. for ( unsigned int fid = 0 ; fid < rascafFileId.size() ; ++fid ) { rascafFile = fopen( argv[ rascafFileId[fid] ], "r" ) ; bool start = false ; int tag ; while ( fgets( line, sizeof( line ), rascafFile ) != NULL ) { if ( strstr( line, "command line:" ) ) { start = true ; if ( strstr( line, "-f" ) ) { contigLevel = true ; } continue ; } if ( !start ) continue ; if ( !strcmp( line, "WARNINGS:\n" ) ) break ; std::vector<struct _part> nparts ; if ( line[0] >= '0' && line[0] <= '9' ) { AddConnection( line, alignments, nparts ) ; connects.push_back( nparts ) ; tag = 0 ; } else if ( line[0] == '\t' || line[0] == ' ' ) { // Break the nparts if the support is too low. int num = 0 ; for ( i = 0 ; line[i] < '0' || line[i] > '9' ; ++i ) ; for ( ; line[i] >= '0' && line[i] <= '9' ; ++i ) num = num * 10 + line[i] - '0' ; ++tag ; if ( num < minSupport ) { nparts = connects.back() ; connects.pop_back() ; int size = nparts.size() ; std::vector<struct _part> newNParts ; for ( i = 0 ; i < tag ; ++i ) newNParts.push_back( nparts[i] ) ; if ( newNParts.size() > 1 ) connects.push_back( newNParts ) ; newNParts.clear() ; for ( ; i < size ; ++i ) newNParts.push_back( nparts[i] ) ; if ( newNParts.size() > 1 ) connects.push_back( newNParts ) ; tag = 0 ; } } } fclose( rascafFile ) ; } if ( contigLevel == false ) { genome.SetIsOpen( contigLevel ) ; } // Build the graph int contigCnt = genome.GetContigCount() ; int edgeCnt = 0 ; int csize = connects.size() ; for ( i = 0 ; i < csize ; ++i ) edgeCnt += connects[i].size() ; ContigGraph contigGraph( contigCnt, contigCnt + edgeCnt ) ; for ( i = 0 ; i < contigCnt - 1 ; ++i ) { if ( genome.GetChrIdFromContigId( i ) == genome.GetChrIdFromContigId( i + 1 ) ) { contigGraph.AddEdge( i, 1, i + 1, 0 ) ; } } for ( i = 0 ; i < csize ; ++i ) { std::vector<struct _part> &parts = connects[i] ; int size = parts.size() ; for ( int j = 0 ; j < size - 1 ; ++j ) { struct _part &a = parts[j] ; struct _part &b = parts[j + 1] ; // Two dummy nodes for each contig. Left is 0, right is 1 int dummyU = 0 ; int dummyV = 0 ; if ( a.strand == '+' ) dummyU = 1 ; if ( b.strand == '-' ) dummyV = 1 ; contigGraph.AddEdge( a.contigId, dummyU, b.contigId, dummyV, true ) ; } } // Check the cycles in the contig graph. This may introduces when combining different rascaf outputs. int *visitTime = new int[contigCnt] ; struct _pair *neighbors = new struct _pair[ MAX_NEIGHBOR ] ; bool *isInCycle = new bool[contigCnt] ; std::vector<int> cycleNodes ; memset( visitTime, -1, sizeof( int ) * contigCnt ) ; memset( isInCycle, false, sizeof( bool ) * contigCnt ) ; for ( i = 0 ; i < contigCnt ; ++i ) { if ( isInCycle[i] ) continue ; if ( contigGraph.IsInCycle( i, cycleNodes, visitTime ) ) { int cnt = cycleNodes.size() ; //printf( "===\n") ; for ( int j = 0 ; j < cnt ; ++j ) { //printf( "In cycle %d\n", cycleNodes[j] ) ; isInCycle[ cycleNodes[j] ] = true ; } } } //exit( 1 ) ; // Remove the connected edges involving the nodes in the cycle for ( i = 0 ; i < contigCnt ; ++i ) { if ( isInCycle[i] ) { for ( int dummy = 0 ; dummy <= 1 ; ++dummy ) { int ncnt = contigGraph.GetNeighbors( i, dummy, neighbors, MAX_NEIGHBOR ) ; for ( int j = 0 ; j < ncnt ; ++j ) { if ( neighbors[j].a == i + 2 * dummy - 1 && neighbors[j].b != dummy && genome.GetChrIdFromContigId( i ) == genome.GetChrIdFromContigId( neighbors[j].a ) ) continue ; // the connection created by the raw assembly else contigGraph.RemoveEdge( i, dummy, neighbors[j].a, neighbors[j].b ) ; } } } } //delete[] isInCycle ; //printf( "hi: %d %d\n", __LINE__, contigCnt ) ; //printf( "%d %d\n", contigGraph.GetNeighbors( 0, 0, neighbors, MAX_NEIGHBOR ), contigGraph.GetNeighbors( 0, 1, neighbors, MAX_NEIGHBOR ) ) ; // Sort the scaffolds from fasta file, so that longer scaffold come first int scafCnt = genome.GetChrCount() ; struct _pair *scafInfo = new struct _pair[scafCnt] ; memset( scafInfo, -1, sizeof( struct _pair) * scafCnt ) ; for ( i = 0 ; i < contigCnt ; ++i ) { int chrId = genome.GetChrIdFromContigId( i ) ; if ( scafInfo[chrId].a == -1 ) { scafInfo[ chrId ].a = i ; scafInfo[ chrId ].b = genome.GetChrLength( chrId ) ; } } qsort( scafInfo, scafCnt, sizeof( struct _pair ), CompScaffold ) ; // Merge the branches and build the scaffold ContigGraph scaffold( contigCnt, 2 * contigCnt ) ; // Use a method similar to topological sort bool *used = new bool[contigCnt] ; int *degree = new int[2 *contigCnt] ; int *danglingVisitTime = new int[contigCnt] ; int *counter = new int[contigCnt] ; int *visitDummy = new int[ contigCnt ] ; int *buffer = new int[contigCnt] ; int *buffer2 = new int[contigCnt] ; bool *isInQueue = new bool[ contigCnt ] ; int *chosen = new int[contigCnt] ; int chosenCnt ; memset( isInCycle, false, sizeof( bool ) * contigCnt ) ; memset( visitTime, -1, sizeof( int ) * contigCnt ) ; memset( visitDummy, -1, sizeof( int ) * contigCnt ) ; memset( counter, -1, sizeof( int ) * contigCnt ) ; // Use those memory to remove triangular cycles for ( i = 0 ; i < scafCnt ; ++i ) { int from, to ; if ( scafInfo[i].a == -1 ) continue ; genome.GetChrContigRange( genome.GetChrIdFromContigId( scafInfo[i].a ), from, to ) ; ForwardSearch( from, 0, i, visitTime, counter, visitDummy, contigGraph ) ; chosenCnt = 0 ; BackwardSearchForTriangularCycle( to, 1, i, visitTime, counter, visitDummy, contigGraph, chosen, chosenCnt ) ; for ( int j = 0 ; j < chosenCnt ; ++j ) { //printf( "%d\n", chosen[j] ) ; isInCycle[ chosen[j] ] = true ; } } for ( i = 0 ; i < contigCnt ; ++i ) { if ( isInCycle[i] ) { for ( int dummy = 0 ; dummy <= 1 ; ++dummy ) { int ncnt = contigGraph.GetNeighbors( i, dummy, neighbors, MAX_NEIGHBOR ) ; for ( int j = 0 ; j < ncnt ; ++j ) { if ( neighbors[j].a == i + 2 * dummy - 1 && neighbors[j].b != dummy && genome.GetChrIdFromContigId( i ) == genome.GetChrIdFromContigId( neighbors[j].a ) ) continue ; // the connection created by the raw assembly else contigGraph.RemoveEdge( i, dummy, neighbors[j].a, neighbors[j].b ) ; } } } } memset( used, false, sizeof( bool ) * contigCnt ) ; memset( visitTime, -1, sizeof( int ) * contigCnt ) ; memset( visitDummy, -1, sizeof( int ) * contigCnt ) ; memset( danglingVisitTime, -1, sizeof( int ) * contigCnt ) ; memset( counter, -1, sizeof( int ) * contigCnt ) ; memset( isInQueue, false, sizeof( bool ) * contigCnt ) ; ContigGraph newGraph( contigCnt, edgeCnt ) ; // Compute the gap size int *gapSize = new int[contigCnt] ; for ( i = 0 ; i < contigCnt - 1 ; ++i ) { if ( genome.GetChrIdFromContigId( i ) == genome.GetChrIdFromContigId( i + 1 ) ) { struct _contig c1 = genome.GetContigInfo( i ) ; struct _contig c2 = genome.GetContigInfo( i + 1 ) ; gapSize[i] = c2.start - c1.end - 1 ; } else gapSize[i] = -1 ; } // Start search int ncnt ; struct _pair *queue = new struct _pair[ contigCnt ] ; int head = 0, tail ; int danglingTime = 0 ; // Pre-allocate the subgraph. ContigGraph subgraph( contigCnt, 3 * contigCnt ) ; for ( i = 0 ; i < scafCnt ; ++i ) { //if ( used[144281] == true ) // printf( "changed %d %d\n", i, scafInfo[i - 1].a ) ; if ( scafInfo[i].a == -1 ) continue ; int from, to ; genome.GetChrContigRange( genome.GetChrIdFromContigId( scafInfo[i].a ), from, to ) ; //printf( "%d: %d %d %d\n", i, scafInfo[i].b, from, to ) ; ForwardSearch( from, 0, i, visitTime, counter, visitDummy, contigGraph ) ; chosenCnt = 0 ; BackwardSearch( to, 1, i, visitTime, counter, contigGraph, chosen, chosenCnt ) ; /*printf( "%s %d (%d %d) %d\n", alignments.GetChromName( genome.GetChrIdFromContigId( scafInfo[i].a ) ), i, from, to, chosenCnt ) ; if ( chosenCnt > 1 ) { printf( "=== " ) ; for ( int j = 0 ; j < chosenCnt ; ++j ) printf( "%d ", chosen[j] ) ; printf( "\n" ) ; }*/ for ( int j = 0 ; j < chosenCnt ; ++j ) { ncnt = contigGraph.GetNeighbors( chosen[j], 0, neighbors, MAX_NEIGHBOR ) ; //printf( "%d %d %d: %d %d %d\n", j, chosen[j], ncnt, neighbors[0].a, visitTime[ neighbors[0].a ], // counter[neighbors[0].a ] ) ; for ( int k = 0 ; k < ncnt ; ++k ) { //if ( i == 639 ) // printf( "Neighbor from 0 %d: %d %d\n", k, neighbors[k].a, neighbors[k].b ) ; if ( visitTime[ neighbors[k].a ] == 2 * i + 1 && counter[neighbors[k].a ] == 2 ) { subgraph.AddEdge( chosen[j], 0, neighbors[k].a, neighbors[k].b, true ) ; //printf( "subgraph: (%d %d)=>(%d %d)\n", chosen[j], 0, neighbors[k].a, neighbors[k].b ) ; } } ncnt = contigGraph.GetNeighbors( chosen[j], 1, neighbors, MAX_NEIGHBOR ) ; for ( int k = 0 ; k < ncnt ; ++k ) { //if ( i == 639 ) // printf( "Neighbor from 1 %d: %d %d\n", k, neighbors[k].a, neighbors[k].b ) ; if ( visitTime[ neighbors[k].a ] == 2 * i + 1 && counter[neighbors[k].a ] == 2 ) { subgraph.AddEdge( chosen[j], 1, neighbors[k].a, neighbors[k].b, true ) ; //printf( "subgraph: (%d %d)=>(%d %d)\n", chosen[j], 1, neighbors[k].a, neighbors[k].b ) ; } } } // Initialize the degree counter for ( int j = 0 ; j < chosenCnt ; ++j ) { for ( int l = 0 ; l < 2 ; ++l ) { /*if ( i == 6145 ) { std::vector<struct _pair> neighbors ; ncnt = subgraph.GetNeighbors( chosen[j], l, neighbors ) ; printf( "%d ncnt=%d\n", l, ncnt ) ; }*/ ncnt = subgraph.GetNeighbors( chosen[j], l, neighbors, MAX_NEIGHBOR ) ; degree[ 2 * chosen[j] + l ] = ncnt ; } } // "topological" sort head = 0 ; isInQueue[from] = true ; queue[0].a = from ; queue[0].b = 0 ; tail = 1 ; int prevTag = -1 ; int *prevAdd = buffer ; // reuse counter to save some memory. int *nextAdd = buffer2 ; int firstAdd = -1 ; while ( head < tail ) { int tailTag = tail ; for ( int j = head ; j < tailTag ; ++j ) { nextAdd[j] = -1 ; if ( !used[ queue[j].a ] ) { used[ queue[j].a ] = true ; if ( prevTag != -1 ) { scaffold.AddEdge( queue[ prevTag].a, 1 - queue[prevTag].b, queue[j].a, queue[j].b ) ; nextAdd[ prevTag ] = j ; /*if ( i == 639 ) printf( "(%lld %lld)=>(%lld %lld)\n", queue[ prevTag].a, 1 - queue[prevTag].b, queue[j].a, queue[j].b ) ;*/ } else firstAdd = j ; prevTag = j ; } prevAdd[j] = prevTag ; // the most recent(<=) queue id when added to scaffold. ncnt = subgraph.GetNeighbors( queue[j].a, 1 - queue[j].b, neighbors, MAX_NEIGHBOR ) ; for ( int k = 0 ; k < ncnt ; ++k ) { --degree[ 2 * neighbors[k].a + neighbors[k].b ] ; if ( degree[ 2 * neighbors[k].a + neighbors[k].b ] == 0 && !isInQueue[neighbors[k].a] ) { isInQueue[ neighbors[k].a ] = true ; queue[ tail ] = neighbors[k] ; // Interesting assignment, I think. ++tail ; /*if ( i == 639 ) printf( "pushed in queue: %d\n", neighbors[k].a ) ;*/ // Put the consecutive contigs together. struct _pair testNeighbors[ MAX_NEIGHBOR ] ; struct _pair tag ; tag = neighbors[k] ; while ( 1 ) { if ( contigGraph.GetNeighbors( tag.a, 1 - tag.b, testNeighbors, MAX_NEIGHBOR ) != 1 ) break ; int n = subgraph.GetNeighbors( tag.a, 1 - tag.b, testNeighbors, MAX_NEIGHBOR ) ; if ( n != 1 ) break ; //printf( "%d %d\n", n, testNeighbors[0].a ) ; struct _pair backNeighbors[ MAX_NEIGHBOR ] ; if ( contigGraph.GetNeighbors( testNeighbors[0].a, testNeighbors[0].b, backNeighbors, MAX_NEIGHBOR ) != 1 ) break ; n = subgraph.GetNeighbors( testNeighbors[0].a, testNeighbors[0].b, backNeighbors, MAX_NEIGHBOR ) ; if ( n != 1 ) break ; isInQueue[ testNeighbors[0].a ] = true ; queue[tail] = testNeighbors[0] ; ++tail ; /*if ( i == 639 ) printf( "pushed in queue: %d\n", testNeighbors[0].a ) ;*/ tag = testNeighbors[0] ; } } } } head = tailTag ; } // Remove the effect on the subgraph. /*if ( tail != chosenCnt ) { printf( "WARNING: not matched\n" ) ; exit( 1 ) ; }*/ for ( int j = 0 ; j < tail ; ++j ) { visitDummy[ queue[j].a ] = -1 ; counter[ queue[j].a ] = -1 ; subgraph.RemoveAdjacentEdges( queue[j].a ) ; isInQueue[ queue[j].a ] = false ; } subgraph.ResetEdgeUsed() ; // no point is picked if ( prevTag == -1 ) { continue ; } // Update the gap size prevTag = -1 ; for ( int j = 0 ; j < tail - 1 ; ++j ) { if ( genome.GetChrIdFromContigId( queue[j].a ) == genome.GetChrIdFromContigId( from ) ) prevTag = queue[j].a ; else if ( prevTag != -1 ) { struct _contig c = genome.GetContigInfo( queue[j].a ) ; gapSize[prevTag] -= ( c.end - c.start + 1) ; } } // Add the dangling contigs. Use the fact that the queue holding the contigs in the same order as in the scaffold. // 5'->3' dangling int *chosenDummy = degree ; for ( int j = tail - 1 ; j >= 0 ; --j ) { //if ( j < tail - 1 ) // continue ; chosenCnt = 0 ; //if ( queue[j].a == 0 ) // printf( "Dummy: %d %d %d\n", j, queue[j].b, 1 - queue[j].b ) ; SearchDangling( queue[j].a, queue[j].b, used, danglingTime, danglingVisitTime, contigGraph, false, chosen, chosenDummy, chosenCnt, genome ) ; ++danglingTime ; int prevTag = prevAdd[j] ; /*if ( queue[j].a == 0 ) { struct _pair neighbors[5] ; int ncnt = contigGraph.GetNeighbors( queue[j].a, 1 - queue[j].b, neighbors, 5 ) ; printf( "%d %d %d %d: %d %d\n", queue[j].b, chosenCnt, prevTag, ncnt, neighbors[0].a, used[ neighbors[0].a ] ) ; }*/ if ( prevTag == -1 ) break ; // Trim the dangling list int k = chosenCnt - 1 ; if ( j > 0 && j < tail - 1 ) { for ( k = chosenCnt - 1 ; k >= 1 ; --k ) if ( genome.GetChrIdFromContigId( chosen[k] ) != genome.GetChrIdFromContigId( chosen[k - 1] ) ) break ; } // Test the gap size int len = 0 ; for ( int l = 0 ; l <= k ; ++l ) { struct _contig c = genome.GetContigInfo( chosen[k] ) ; len += c.end - c.start + 1 ; } if ( j < tail - 1 ) { int l ; for ( l = j ; l >= 0 ; --l ) if ( genome.GetChrIdFromContigId( queue[l].a ) == genome.GetChrIdFromContigId( from ) ) break ; if ( !ignoreGap && len >= gapSize[ queue[l].a ] + 100 ) continue ; else gapSize[ queue[l].a ] -= len ; } for ( ; k >= 0 ; --k ) { used[ chosen[k] ] = true ; //printf( "Dangling 1: %d=>%d\n", queue[prevTag].a, chosen[k] ) ; scaffold.InsertNode( queue[ prevTag ].a, 1 - queue[ prevTag ].b, chosen[k], chosenDummy[k] ) ; } } // 3'->5' dangling for ( int j = 0 ; j < tail ; ++j ) { //if ( j > 0 ) // continue ; chosenCnt = 0 ; SearchDangling( queue[j].a, 1 - queue[j].b, used, danglingTime, danglingVisitTime, contigGraph, false, chosen, chosenDummy, chosenCnt, genome ) ; ++danglingTime ; int prevTag = prevAdd[j] ; int nextTag ; if ( prevTag == -1 || j <= firstAdd ) nextTag = firstAdd ; else if ( j == prevTag ) nextTag = j ; else nextTag = nextAdd[ prevTag ] ; if ( nextTag == -1 ) break ; /*if ( queue[j].a == 37549 ) { struct _pair neighbors[5] ; int ncnt = contigGraph.GetNeighbors( queue[j].a, queue[j].b, neighbors, 5 ) ; fprintf( stderr, "%d %d %d: %d %d %d: %d %d %d\n", j, queue[j].a, queue[j].b, chosenCnt, nextTag, ncnt, chosen[0], chosenDummy[0], used[ chosen[0] ] ) ; }*/ // trim the danling list int k = chosenCnt - 1 ; if ( j < tail - 1 && j > 0 ) { for ( k = chosenCnt - 1 ; k >= 1 ; --k ) if ( genome.GetChrIdFromContigId( chosen[k] ) != genome.GetChrIdFromContigId( chosen[k - 1] ) ) break ; } // Test the gap size int len = 0 ; for ( int l = 0 ; l <= k ; ++l ) { struct _contig c = genome.GetContigInfo( chosen[k] ) ; len += c.end - c.start + 1 ; } if ( j > 0 ) { int l ; for ( l = j - 1 ; l >= 0 ; --l ) // Notice the j-1 here, because we want the gap strictly before current contig if ( genome.GetChrIdFromContigId( queue[l].a ) == genome.GetChrIdFromContigId( from ) ) break ; if ( !ignoreGap && len >= gapSize[ queue[l].a ] + 100 ) continue ; else gapSize[ queue[l].a ] -= len ; } for ( ; k >= 0 ; --k ) { used[ chosen[k] ] = true ; scaffold.InsertNode( queue[nextTag].a, queue[nextTag].b, chosen[k], chosenDummy[k] ) ; //printf( "Dangling 2: %d<=%d\n", queue[nextTag].a, chosen[k] ) ; //if ( chosen[k] == 10246 ) // printf( "hi %d %d %d %d\n", j, queue[j].a, k, chosen[k] ) ; } } } //return 0 ; // Output the scaffold int id = 0 ; char infoFileName[512] ; char outputFileName[512] ; sprintf( infoFileName, "%s.info", prefix ) ; sprintf( outputFileName, "%s.fa", prefix ) ; outputFile = fopen( outputFileName, "w" ) ; infoFile = fopen( infoFileName, "w") ; memset( used, false, sizeof( bool ) * contigCnt ) ; for ( i = 0 ; i < contigCnt ; ++i ) { //printf( "%d (%s)\n", i, alignments.GetChromName( genome.GetChrIdFromContigId( i ) ) ) ; fflush( stdout ) ; /*if ( i == 10246 ) { std::vector<struct _pair> neighbors ; scaffold.GetNeighbors( i, 0, neighbors ) ; printf( "%u\n", neighbors.size() ) ; }*/ if ( used[i] ) continue ; int ncnt1 = scaffold.GetNeighbors( i, 0, neighbors, MAX_NEIGHBOR ) ; int ncnt2 = scaffold.GetNeighbors( i, 1, neighbors, MAX_NEIGHBOR ) ; if ( ncnt1 == 0 || ncnt2 == 0 ) // The end of a scaffold { fprintf( outputFile, ">scaffold_%d\n", id) ; fprintf( infoFile, ">scaffold_%d", id ) ; ++id ; int p = i ; int dummyP = 1 ; if ( ncnt1 == 0 ) dummyP = 0 ; used[i] = true ; genome.PrintContig( outputFile, i, dummyP ) ; fprintf( infoFile, " (%s %d %c)", alignments.GetChromName( genome.GetChrIdFromContigId( p ) ), p, dummyP == 0 ? '+' : '-' ) ; while ( 1 ) { ncnt = scaffold.GetNeighbors( p, 1 - dummyP, neighbors, MAX_NEIGHBOR ) ; if ( ncnt == 0 ) break ; // ncnt must be 1 int insertN = 17 ; if ( genome.GetChrIdFromContigId( p ) == genome.GetChrIdFromContigId( neighbors[0].a ) ) { struct _contig cp, cna ; cp = genome.GetContigInfo( p ) ; cna = genome.GetContigInfo( neighbors[0].a ) ; if ( p < neighbors[0].a ) insertN = cna.start - cp.end - 1 ; else if ( p > neighbors[0].a ) insertN = cp.start - cna.end - 1 ; } p = neighbors[0].a ; dummyP = neighbors[0].b ; for ( int j = 0 ; j < insertN ; ++j ) fprintf( outputFile, "N" ) ; used[p] = true ; genome.PrintContig( outputFile, p, dummyP ) ; fprintf( infoFile, " (%s %d %c)", alignments.GetChromName( genome.GetChrIdFromContigId( p ) ), p, dummyP == 0 ? '+' : '-' ) ; } fprintf( outputFile, "\n" ) ; fprintf( infoFile, "\n" ) ; } } for ( i = 0 ; i < contigCnt ; ++i ) if ( !used[i] ) { fprintf( stderr, "Unreported contig %d.\n", i ) ; } fclose( outputFile ) ; fclose( infoFile ) ; delete[] buffer ; delete[] buffer2 ; delete[] chosen ; delete[] queue ; delete[] counter ; delete[] visitTime ; delete[] used ; delete[] scafInfo ; delete[] isInQueue ; delete[] gapSize ; //fclose( rascafFile ) ; return 0 ; }