/********************************************************************* ** METHOD : ** PURPOSE : ** INPUT : ** OUTPUT : ** RETURN : ** REMARKS : Warning! this method isn't exactly basename() as you'd expect in C++. It returns the base name but cuts everything after the last dot. *********************************************************************/ IFileSystem::Path FileSystemCommon::getBaseName (const Path& path, bool cutToFirstDot) { /** We duplicate the provided path. */ char* reads_path = strdup (path.c_str()); /** We build the basename; it still may have a suffix. */ std::string reads_name (basename(reads_path)); // posix basename() may alter reads_path /** We release the duplicated path. */ free (reads_path); //string prefix = System::file().getBaseName(_inputFilename);; while (reads_name.find('.') != string::npos){ // make sure there is a dot in the file, else the basename is the file itself /** We look for the beginnin of the suffix. */ int lastindex = reads_name.find_last_of("."); /** We build the result. */ reads_name = reads_name.substr(0, lastindex); if (cutToFirstDot == false) break; } //int lastindex = reads_name.find_last_of ("."); //Path result = reads_name.substr(0, lastindex); /** We return the base name, without suffix. */ return reads_name; }
int main(int argc, char *argv[]) { if(argc < 3) { fprintf (stderr,"%s: [d]isk [s]treaming of [k]-mers (constant-memory k-mer counting)\n",argv[0]); fprintf (stderr,"usage:\n"); fprintf (stderr," %s input_file kmer_size [-t min_abundance] [-m max_memory] [-d max_disk_space] [-o out_prefix] [-histo]\n",argv[0]); fprintf (stderr,"details:\n [-t min_abundance] filters out k-mers seen ( < min_abundance ) times, default: 1 (all kmers are returned)\n [-m max_memory] is in MB, default: min(total system memory / 2, 5 GB) \n [-d max_disk_space] is in MB, default: min(available disk space / 2, reads file size)\n [-o out_prefix] saves results in [out_prefix].solid_kmers. default out_prefix = basename(input_file)\n [-histo] outputs histogram of kmers abundance\n [-rev] outputs only one of forward or reverse complement k-mers\n Input file can be fasta, fastq, gzipped or not, or a file containing a list of file names.\n"); #ifdef SVN_REV fprintf(stderr,"Running dsk version %s\n",STR(SVN_REV)); #endif return 0; } // reads file Bank *Reads = new Bank(argv[1]); if (argv[2][0] == '-') { printf("please specify a k value\n"); exit(1); } /* Changes by Raunaq * Code addition for taking in multiple values of k. The file containing should have values of k sorted in decreasing order * */ int *Kmerlist = loadKmers(argv[2]); // kmer size //fprintf(stderr,"Smallest kmer is %d \n",smallestKmer); sizeKmer = Kmerlist[0]; if (sizeKmer>(int)(sizeof(kmer_type)*4)) { printf("Max kmer size on this compiled version is %lu\n",sizeof(kmer_type)*4); exit(1); } kmerMask=(((kmer_type)1)<<(sizeKmer*2))-1; // default solidity nks = 1; // default max memory max_memory = 5*1024; #ifndef OSX struct sysinfo info; sysinfo(&info); int total_ram = (int)(((double)info.totalram*(double)info.mem_unit)/1024/1024); printf("Total RAM: %d MB\n",total_ram); #else int total_ram = 128*1024; #endif // default prefix is the reads file basename char *reads_path=strdup(argv[1]); string reads_name(basename(reads_path)); // posix basename() may alter reads_path free(reads_path); int lastindex = reads_name.find_last_of("."); strcpy(prefix,reads_name.substr(0, lastindex).c_str()); for (int n_a = 3; n_a < argc ; n_a++) { if (strcmp(argv[n_a],"-t")==0) nks = atoi(argv[n_a+1]); if (strcmp(argv[n_a],"-o")==0) strcpy(prefix,argv[n_a+1]); } int verbose = 0; bool reverse = false; max_disk_space = 0; output_histo =false; // parse the remaining arguments: these will override the default max memory / max disk for (int n_a = 3; n_a < argc ; n_a++) { if (strcmp(argv[n_a],"-m")==0) max_memory = atoi(argv[n_a+1]); if (strcmp(argv[n_a],"-d")==0) max_disk_space = atoi(argv[n_a+1]); if (strcmp(argv[n_a],"-v")==0) verbose = 1; if (strcmp(argv[n_a],"-vv")==0) verbose = 2; if (strcmp(argv[n_a],"-histo")==0) output_histo =true; if (strcmp(argv[n_a],"-rev")==0) reverse = true; } if (max_memory > total_ram) { printf("Maximum memory (%d MB), exceeds total RAM (%d MB). Setting maximum memory to %d MB.\n",max_memory,total_ram,total_ram/2); max_memory = total_ram/2; } STARTWALL(0); sorting_count(Reads,prefix,max_memory,max_disk_space,true,verbose,reverse); STOPWALL(0,"Total"); delete Reads; return 0; }