Exemple #1
0
/*********************************************************************
** METHOD  :
** PURPOSE :
** INPUT   :
** OUTPUT  :
** RETURN  :
** REMARKS : Warning! this method isn't exactly basename() as you'd expect in C++. It returns the base name but cuts everything after the last dot.
*********************************************************************/
IFileSystem::Path FileSystemCommon::getBaseName (const Path& path, bool cutToFirstDot)
{
    /** We duplicate the provided path. */
    char* reads_path = strdup (path.c_str());

    /** We build the basename; it still may have a suffix. */
    std::string reads_name (basename(reads_path)); // posix basename() may alter reads_path

    /** We release the duplicated path. */
    free (reads_path);

	//string prefix = System::file().getBaseName(_inputFilename);;
	while (reads_name.find('.') != string::npos){ // make sure there is a dot in the file, else the basename is the file itself

	    /** We look for the beginnin of the suffix. */
		int lastindex = reads_name.find_last_of(".");

	    /** We build the result. */
		reads_name = reads_name.substr(0, lastindex);

        if (cutToFirstDot == false)
            break;
	}

    //int lastindex = reads_name.find_last_of (".");
    //Path result = reads_name.substr(0, lastindex);

    /** We return the base name, without suffix. */
    return reads_name;
}
Exemple #2
0
int main(int argc, char *argv[])
{
    if(argc <  3)
    {
        fprintf (stderr,"%s: [d]isk [s]treaming of [k]-mers (constant-memory k-mer counting)\n",argv[0]);
        fprintf (stderr,"usage:\n");
        fprintf (stderr," %s input_file kmer_size [-t min_abundance] [-m max_memory] [-d max_disk_space] [-o out_prefix] [-histo]\n",argv[0]);
        fprintf (stderr,"details:\n [-t min_abundance] filters out k-mers seen ( < min_abundance ) times, default: 1 (all kmers are returned)\n [-m max_memory] is in MB, default: min(total system memory / 2, 5 GB) \n [-d max_disk_space] is in MB, default: min(available disk space / 2, reads file size)\n [-o out_prefix] saves results in [out_prefix].solid_kmers. default out_prefix = basename(input_file)\n [-histo] outputs histogram of kmers abundance\n [-rev] outputs only one of forward or reverse complement k-mers\n Input file can be fasta, fastq, gzipped or not, or a file containing a list of file names.\n");
#ifdef SVN_REV
fprintf(stderr,"Running dsk version %s\n",STR(SVN_REV));
#endif
        return 0;
    }

    // reads file
    Bank *Reads = new Bank(argv[1]);

    if (argv[2][0] == '-')
    {
        printf("please specify a k value\n");
        exit(1);
    }
	/* Changes by Raunaq
	 * Code addition for taking in multiple values of k. The file containing should have values of k sorted in decreasing order  
	 *
	*/
	int *Kmerlist =  loadKmers(argv[2]);
	
    // kmer size
	//fprintf(stderr,"Smallest kmer is %d \n",smallestKmer);
    sizeKmer = Kmerlist[0];
    if (sizeKmer>(int)(sizeof(kmer_type)*4))
    {
        printf("Max kmer size on this compiled version is %lu\n",sizeof(kmer_type)*4);
        exit(1);
    }
    kmerMask=(((kmer_type)1)<<(sizeKmer*2))-1;

    // default solidity 
    nks = 1;

    // default max memory
    max_memory = 5*1024;
    #ifndef OSX
    struct sysinfo info;
    sysinfo(&info);
    int total_ram = (int)(((double)info.totalram*(double)info.mem_unit)/1024/1024);
    printf("Total RAM: %d MB\n",total_ram);
#else
    int total_ram = 128*1024;
#endif


    // default prefix is the reads file basename
    char *reads_path=strdup(argv[1]);
    string reads_name(basename(reads_path)); // posix basename() may alter reads_path
    free(reads_path);
    int lastindex = reads_name.find_last_of("."); 
    strcpy(prefix,reads_name.substr(0, lastindex).c_str()); 

    for (int n_a = 3; n_a < argc ; n_a++)
    {
        if (strcmp(argv[n_a],"-t")==0)
            nks = atoi(argv[n_a+1]);

        if (strcmp(argv[n_a],"-o")==0)
            strcpy(prefix,argv[n_a+1]);
    }

    int verbose = 0;
    bool reverse = false;
    max_disk_space = 0;

    output_histo =false;
    // parse the remaining arguments: these will override the default max memory / max disk
    for (int n_a = 3; n_a < argc ; n_a++)
    {
        if (strcmp(argv[n_a],"-m")==0)
            max_memory = atoi(argv[n_a+1]);

        if (strcmp(argv[n_a],"-d")==0)
            max_disk_space = atoi(argv[n_a+1]);

        if (strcmp(argv[n_a],"-v")==0)
            verbose = 1;

        if (strcmp(argv[n_a],"-vv")==0)
            verbose = 2;
        
        if (strcmp(argv[n_a],"-histo")==0)
            output_histo =true;
	
	if (strcmp(argv[n_a],"-rev")==0)
	    reverse = true;
    }

    if (max_memory > total_ram)
    {
        printf("Maximum memory (%d MB), exceeds total RAM (%d MB). Setting maximum memory to %d MB.\n",max_memory,total_ram,total_ram/2);
        max_memory = total_ram/2;
    }

    STARTWALL(0);

    sorting_count(Reads,prefix,max_memory,max_disk_space,true,verbose,reverse);

    STOPWALL(0,"Total");

    delete Reads;

    return 0;
}