Exemple #1
0
void ProRataGraphPane::buildMenu()
{

	qmContextMenu = new QMenu( this );

	qaDetach = new QAction( "Detach", this );
	connect( qaDetach, SIGNAL( triggered() ), this, SLOT( detach() ) );

	// the print function is commented out.
	// Removing the comment signs will add the print function back in.
	
	
	qaExport = new QAction( "Export...", this );
	connect( qaExport, SIGNAL( triggered() ), this,
		SLOT( exportGraph() ) );
	

	qmContextMenu->addAction( qaDetach );
//	qmContextMenu->addSeparator();
	qmContextMenu->addAction( qaExport );
	
}
/*
 * findAndLogAllPossibleLegs has the same search algorithm as findAllPossibleLegs. However, the requirement
 * that all the legs be logged requires a more sophisticated means of parallelization in order to do a
 * reduction of the solutions when all the threads are done.
 */
int findAndLogAllPossibleLegs(Graph *graph, SearchOptions *options)
{
    int i, j, k;
    int found = 0;
    int searches = 0;
    double tick, tock;
    int hours, min;
    double sec;
    int maxThreads = 1;
    NodeVecVec **lastingResults;
    Graph *optimizedGraph = NULL;
    char timeStr[50];

    /* A little bit of error checking */
    if ( !graph )
        return 0;

    tick = currentTime();
    fprintf(stderr, "Immediately before parallel\n" );

    #pragma omp parallel private(i,j,k) shared(graph,maxThreads,lastingResults) reduction(+:found) reduction(+:searches)
    {
        int myThread = omp_get_thread_num();
        NodeVecVec *myResults = NULL;

        #pragma omp single
        {
            maxThreads = omp_get_num_threads();
            options->multiThreaded = maxThreads > 1;
#ifdef DEBUG
            printf( "%d total threads, this one is %d\n", maxThreads, myThread );
#endif
            lastingResults = malloc((maxThreads+1) * sizeof(NodeVecVec*) );
            lastingResults[maxThreads] = 0; /* Null terminated to avoid having to keep track of bitsNeeded */
        }

        #pragma omp critical
        {
            lastingResults[myThread] = NodeVecVec_new(64); /* Arbitrary - multiple of 8, eventually cache line aligned... */
            myResults = lastingResults[myThread];
        }

        #pragma omp single
        {
            printf ("Immediately before nested for's\n");
        }

        #pragma omp for collapse(2)
        for ( i = 0; i < graph->systemCallMap->contentSize; ++i )
        {
            for ( j = 0; j < graph->systemCallMap->contentSize; ++j )
            {
                ++searches;
                for (k = 0; k < graph->systemCallMap->vector[i]->nodes->contentSize; ++k)
                {
                    char *fullSignature[3] = { NULL, NULL, NULL };
                    int fullIntSignature[3] = { 0, 0, -1 };
                    fullSignature[0] = graph->systemCallMap->vector[i]->label;
                    fullSignature[1] = graph->systemCallMap->vector[j]->label;
                    fullIntSignature[0] = i;
                    fullIntSignature[1] = j;
                    NodePtrVec *result = NodePtrVec_new(16);
                    Bitfield *visited = Bitfield_new(graph->totalNodes);
#ifdef DEBUG
                    printf( "Searching for %s(%d) ~~~> %s\n", fullSignature[0],
                            graph->systemCallMap->vector[i]->nodes->vector[k]->id,
                            fullSignature[1]);
#endif
                    findAndRecordAllPaths( graph->systemCallMap->vector[i]->nodes->vector[k], &fullSignature[1],
                            &fullIntSignature[1], result, visited, myResults, options );
                    Bitfield_delete(visited);
                    if ( result )
                        NodePtrVec_delete( result );
                } // end of for (k)... fork? heh.
            } // end of for (j)
        } // end of for (i)

        found = myResults->contentSize;
    }

    tock = currentTime();
    sec = tock-tick;
    hours = (int)sec/3600;
    sec = fmod( sec, 3600.0 );
    min = (int)sec/60;
    sec = fmod( sec, 60.0 );
    printf ( "\n\n%d found for %d searches. Overall Time: %d:%d:%2.3f\n",
            found, searches, hours, min, sec );
    timeStr[0] = '\0'; /* just in case sprintf doesn't do what we want. */
    sprintf ( timeStr, "%02d:%02d:%02.3f", hours, min, sec );
    YAMLWriteInt("Signatures Found", found);
    YAMLWriteString("Search Time", timeStr);

#ifdef DEBUG
    printf ("max threads still:%d\n", maxThreads);
    for ( i = 0; i < maxThreads; ++i )
    {
        printf ( "printing out thread %d result - %d long\n", i, lastingResults[i]->contentSize );
        for ( j = 0; j < lastingResults[i]->contentSize; ++j )
        {
            printf("\t");
            printStack( lastingResults[i]->vector[j] );
            printf("\n");
        }
    }
#endif

    /* At some point, we will want to use the FullPath argument and pass it along to
     * buildGraphFromPaths. Until then, however, we're only going to build the most
     * minimal graph possible.
     */
    if ( options->writeOutputFile && options->outputFile )
    {
        optimizedGraph = buildGraphFromPaths(lastingResults, options->buildType);
        exportGraph(optimizedGraph, options->outputFile);
    }

    if ( options->doStatistics && !options->multiThreaded )
        printStats();

    return( found );
}
Exemple #3
0
int main(int argc, char **argv)
{
	ReadSet *sequences = NULL;
	RoadMapArray *rdmaps;
	PreGraph *preGraph;
	Graph *graph;
	char *directory, *graphFilename, *preGraphFilename, *seqFilename,
	    *roadmapFilename;
	double coverageCutoff = -1;
	double maxCoverageCutoff = -1;
	double expectedCoverage = -1;
	int longMultCutoff = -1;
	Coordinate minContigLength = -1;
	Coordinate minContigKmerLength;
	boolean *dubious = NULL;
	Coordinate insertLength[CATEGORIES];
	Coordinate insertLengthLong = -1;
	Coordinate std_dev[CATEGORIES];
	Coordinate std_dev_long = -1;
	short int accelerationBits = 24;
	boolean readTracking = false;
	boolean exportAssembly = false;
	boolean unusedReads = false;
	boolean estimateCoverage = false;
	boolean estimateCutoff = false;
	FILE *file;
	int arg_index, arg_int;
	double arg_double;
	char *arg;
	Coordinate *sequenceLengths = NULL;
	Category cat;
	boolean scaffolding = true;
	int pebbleRounds = 1;
	long long longlong_var;
	short int short_var;

	setProgramName("velvetg");

	for (cat = 0; cat < CATEGORIES; cat++) {
		insertLength[cat] = -1;
		std_dev[cat] = -1;
	}

	// Error message
	if (argc == 1) {
		puts("velvetg - de Bruijn graph construction, error removal and repeat resolution");
		printf("Version %i.%i.%2.2i\n", VERSION_NUMBER,
		       RELEASE_NUMBER, UPDATE_NUMBER);
		puts("\nCopyright 2007, 2008 Daniel Zerbino ([email protected])");
		puts("This is free software; see the source for copying conditions.  There is NO");
		puts("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n");
		puts("Compilation settings:");
		printf("CATEGORIES = %i\n", CATEGORIES);
		printf("MAXKMERLENGTH = %i\n", MAXKMERLENGTH);
		puts("");
		printUsage();
		return 1;
	}

	if (strcmp(argv[1], "--help") == 0) {
		printUsage();
		return 0;
	}

	// Memory allocation 
	directory = argv[1];
	graphFilename = mallocOrExit(strlen(directory) + 100, char);
	preGraphFilename =
	    mallocOrExit(strlen(directory) + 100, char);
	roadmapFilename = mallocOrExit(strlen(directory) + 100, char);
	seqFilename = mallocOrExit(strlen(directory) + 100, char);
	// Argument parsing
	for (arg_index = 2; arg_index < argc; arg_index++) {
		arg = argv[arg_index++];
		if (arg_index >= argc) {
			puts("Unusual number of arguments!");
			printUsage();
			exit(1);
		}

		if (strcmp(arg, "-cov_cutoff") == 0) {
			if (strcmp(argv[arg_index], "auto") == 0) {
				estimateCutoff = true;
			} else {
				sscanf(argv[arg_index], "%lf", &coverageCutoff);
			}
		} else if (strcmp(arg, "-exp_cov") == 0) {
			if (strcmp(argv[arg_index], "auto") == 0) {
				estimateCoverage = true;
				readTracking = true;
			} else {
				sscanf(argv[arg_index], "%lf", &expectedCoverage);
				if (expectedCoverage > 0)
					readTracking = true;
			}
		} else if (strcmp(arg, "-ins_length") == 0) {
			sscanf(argv[arg_index], "%lli", &longlong_var);
			insertLength[0] = (Coordinate) longlong_var;
			if (insertLength[0] < 0) {
				printf("Invalid insert length: %lli\n",
				       (long long) insertLength[0]);
				exit(1);
			}
		} else if (strcmp(arg, "-ins_length_sd") == 0) {
			sscanf(argv[arg_index], "%lli", &longlong_var);
			std_dev[0] = (Coordinate) longlong_var;
			if (std_dev[0] < 0) {
				printf("Invalid std deviation: %lli\n",
				       (long long) std_dev[0]);
				exit(1);
			}
		} else if (strcmp(arg, "-ins_length_long") == 0) {
			sscanf(argv[arg_index], "%lli", &longlong_var);
			insertLengthLong = (Coordinate) longlong_var;
		} else if (strcmp(arg, "-ins_length_long_sd") == 0) {
			sscanf(argv[arg_index], "%lli", &longlong_var);
			std_dev_long = (Coordinate) longlong_var;
		} else if (strncmp(arg, "-ins_length", 11) == 0
			   && strchr(arg, 'd') == NULL) {
			sscanf(arg, "-ins_length%hi", &short_var);
			cat = (Category) short_var;
			if (cat < 1 || cat > CATEGORIES) {
				printf("Unknown option: %s\n", arg);
				exit(1);
			}
			sscanf(argv[arg_index], "%lli", &longlong_var);
			insertLength[cat - 1] = (Coordinate) longlong_var;
			if (insertLength[cat - 1] < 0) {
				printf("Invalid insert length: %lli\n",
				       (long long) insertLength[cat - 1]);
				exit(1);
			}
		} else if (strncmp(arg, "-ins_length", 11) == 0) {
			sscanf(arg, "-ins_length%hi_sd", &short_var);
			cat = (Category) short_var;
			if (cat < 1 || cat > CATEGORIES) {
				printf("Unknown option: %s\n", arg);
				exit(1);
			}
			sscanf(argv[arg_index], "%lli", &longlong_var);
			std_dev[cat - 1] = (Coordinate) longlong_var;
			if (std_dev[cat - 1] < 0) {
				printf("Invalid std deviation: %lli\n",
				       (long long) std_dev[cat - 1]);
				exit(1);
			}
		} else if (strcmp(arg, "-read_trkg") == 0) {
			readTracking =
			    (strcmp(argv[arg_index], "yes") == 0);
		} else if (strcmp(arg, "-scaffolding") == 0) {
			scaffolding =
			    (strcmp(argv[arg_index], "yes") == 0);
		} else if (strcmp(arg, "-amos_file") == 0) {
			exportAssembly =
			    (strcmp(argv[arg_index], "yes") == 0);
		} else if (strcmp(arg, "-min_contig_lgth") == 0) {
			sscanf(argv[arg_index], "%lli", &longlong_var);
			minContigLength = (Coordinate) longlong_var;
		} else if (strcmp(arg, "-accel_bits") == 0) {
			sscanf(argv[arg_index], "%hi", &accelerationBits);
			if (accelerationBits < 0) {
				printf
				    ("Illegal acceleration parameter: %s\n",
				     argv[arg_index]);
				printUsage();
				return -1;
			}
		} else if (strcmp(arg, "-max_branch_length") == 0) {
			sscanf(argv[arg_index], "%i", &arg_int);
			setMaxReadLength(arg_int);
			setLocalMaxReadLength(arg_int);
		} else if (strcmp(arg, "-max_divergence") == 0) {
			sscanf(argv[arg_index], "%lf", &arg_double);
			setMaxDivergence(arg_double);
			setLocalMaxDivergence(arg_double);
		} else if (strcmp(arg, "-max_gap_count") == 0) {
			sscanf(argv[arg_index], "%i", &arg_int);
			setMaxGaps(arg_int);
			setLocalMaxGaps(arg_int);
		} else if (strcmp(arg, "-min_pair_count") == 0) {
			sscanf(argv[arg_index], "%i", &arg_int);
			setUnreliableConnectionCutoff(arg_int);
		} else if (strcmp(arg, "-max_coverage") == 0) {
			sscanf(argv[arg_index], "%lf", &maxCoverageCutoff);
		} else if (strcmp(arg, "-long_mult_cutoff") == 0) {
			sscanf(argv[arg_index], "%i", &longMultCutoff);
			setMultiplicityCutoff(longMultCutoff);
		} else if (strcmp(arg, "-unused_reads") == 0) {
			unusedReads =
			    (strcmp(argv[arg_index], "yes") == 0);
			if (unusedReads)
				readTracking = true;
		} else if (strcmp(arg, "--help") == 0) {
			printUsage();
			return 0;	
		} else {
			printf("Unknown option: %s;\n", arg);
			printUsage();
			return 1;
		}
	}

	// Bookkeeping
	logInstructions(argc, argv, directory);

	strcpy(seqFilename, directory);
	strcat(seqFilename, "/Sequences");

	strcpy(roadmapFilename, directory);
	strcat(roadmapFilename, "/Roadmaps");

	strcpy(preGraphFilename, directory);
	strcat(preGraphFilename, "/PreGraph");

	if (!readTracking) {
		strcpy(graphFilename, directory);
		strcat(graphFilename, "/Graph");
	} else {
		strcpy(graphFilename, directory);
		strcat(graphFilename, "/Graph2");
	}

	// Graph uploading or creation
	if ((file = fopen(graphFilename, "r")) != NULL) {
		fclose(file);
		graph = importGraph(graphFilename);
	} else if ((file = fopen(preGraphFilename, "r")) != NULL) {
		fclose(file);
		sequences = importReadSet(seqFilename);
		convertSequences(sequences);
		graph =
		    importPreGraph(preGraphFilename, sequences,
				   readTracking, accelerationBits);
		sequenceLengths =
		    getSequenceLengths(sequences, getWordLength(graph));
		correctGraph(graph, sequenceLengths);
		exportGraph(graphFilename, graph, sequences->tSequences);
	} else if ((file = fopen(roadmapFilename, "r")) != NULL) {
		fclose(file);
		rdmaps = importRoadMapArray(roadmapFilename);
		preGraph = newPreGraph_pg(rdmaps, seqFilename);
		clipTips_pg(preGraph);
		exportPreGraph_pg(preGraphFilename, preGraph);
		destroyPreGraph_pg(preGraph);

		sequences = importReadSet(seqFilename);
		convertSequences(sequences);
		graph =
		    importPreGraph(preGraphFilename, sequences,
				   readTracking, accelerationBits);
		sequenceLengths =
		    getSequenceLengths(sequences, getWordLength(graph));
		correctGraph(graph, sequenceLengths);
		exportGraph(graphFilename, graph, sequences->tSequences);
	} else {
		puts("No Roadmap file to build upon! Please run velveth (see manual)");
		exit(1);
	}

	// Set insert lengths and their standard deviations
	for (cat = 0; cat < CATEGORIES; cat++) {
		if (insertLength[cat] > -1 && std_dev[cat] < 0)
			std_dev[cat] = insertLength[cat] / 10;
		setInsertLengths(graph, cat,
				 insertLength[cat], std_dev[cat]);
	}

	if (insertLengthLong > -1 && std_dev_long < 0)
		std_dev_long = insertLengthLong / 10;
	setInsertLengths(graph, CATEGORIES,
			 insertLengthLong, std_dev_long);

	// Coverage cutoff
	if (expectedCoverage < 0 && estimateCoverage == true) {
		expectedCoverage = estimated_cov(graph);
		if (coverageCutoff < 0) {
			coverageCutoff = expectedCoverage / 2;
			estimateCutoff = true;
		}
	} else { 
		estimateCoverage = false;
		if (coverageCutoff < 0 && estimateCutoff) 
			coverageCutoff = estimated_cov(graph) / 2;
		else 
			estimateCutoff = false;
	}

	if (coverageCutoff < 0) {
		puts("WARNING: NO COVERAGE CUTOFF PROVIDED");
		puts("Velvet will probably leave behind many detectable errors");
		puts("See manual for instructions on how to set the coverage cutoff parameter");
	}

	dubious =
	    removeLowCoverageNodesAndDenounceDubiousReads(graph,
							  coverageCutoff);
	removeHighCoverageNodes(graph, maxCoverageCutoff);
	clipTipsHard(graph);

	if (expectedCoverage > 0) {
		if (sequences == NULL) {
			sequences = importReadSet(seqFilename);
			convertSequences(sequences);
		}

		// Mixed length sequencing
		readCoherentGraph(graph, isUniqueSolexa, expectedCoverage,
				  sequences);

		// Paired ends module
		createReadPairingArray(sequences);
		for (cat = 0; cat < CATEGORIES; cat++) 
			if(pairUpReads(sequences, 2 * cat + 1))
				pebbleRounds++;

		if (pairUpReads(sequences, 2 * CATEGORIES + 1))
			pebbleRounds++;

		detachDubiousReads(sequences, dubious);
		activateGapMarkers(graph);
		for ( ;pebbleRounds > 0; pebbleRounds--)
			exploitShortReadPairs(graph, sequences, dubious, scaffolding);
	} else {
		puts("WARNING: NO EXPECTED COVERAGE PROVIDED");
		puts("Velvet will be unable to resolve any repeats");
		puts("See manual for instructions on how to set the expected coverage parameter");
	}

	free(dubious);

	concatenateGraph(graph);

	if (minContigLength < 2 * getWordLength(graph))
		minContigKmerLength = getWordLength(graph);
	else
		minContigKmerLength = minContigLength - getWordLength(graph) + 1;		

	strcpy(graphFilename, directory);
	strcat(graphFilename, "/contigs.fa");
	exportLongNodeSequences(graphFilename, graph, minContigKmerLength); 

	strcpy(graphFilename, directory);
	strcat(graphFilename, "/stats.txt");
	displayGeneralStatistics(graph, graphFilename);

	if (sequences == NULL) {
		sequences = importReadSet(seqFilename);
		convertSequences(sequences);
	}

	strcpy(graphFilename, directory);
	strcat(graphFilename, "/LastGraph");
	exportGraph(graphFilename, graph, sequences->tSequences);

	if (exportAssembly) {
		strcpy(graphFilename, directory);
		strcat(graphFilename, "/velvet_asm.afg");
		exportAMOSContigs(graphFilename, graph, minContigKmerLength, sequences);
	}

	if (unusedReads)
		exportUnusedReads(graph, sequences, minContigKmerLength, directory);

	if (estimateCoverage) 
		printf("Estimated Coverage = %f\n", expectedCoverage);
	if (estimateCutoff) 
		printf("Estimated Coverage cutoff = %f\n", coverageCutoff);

	logFinalStats(graph, minContigKmerLength, directory);

	destroyGraph(graph);
	free(graphFilename);
	free(preGraphFilename);
	free(seqFilename);
	free(roadmapFilename);
	destroyReadSet(sequences);
	return 0;
}