bool Genome::getSizeFromFile(const char *fileName, unsigned *nBases, unsigned *nContigs) { GenericFile *file; unsigned localNBases, localnContigs; if (!openFileAndGetSizes(fileName,&file,nBases ? nBases : &localNBases, nContigs ? nContigs : &localnContigs)) { return false; } file->close(); delete file; return true; }
const Genome * Genome::loadFromFile(const char *fileName, unsigned chromosomePadding, unsigned i_minOffset, unsigned length) { GenericFile *loadFile; unsigned nBases,nContigs; if (!openFileAndGetSizes(fileName,&loadFile,&nBases,&nContigs)) { // // It already printed an error. Just fail. // return NULL; } if (0 == length) { length = nBases - i_minOffset; } else { // // Don't let length go beyond nBases. // length = __min(length,nBases - i_minOffset); } Genome *genome = new Genome(nBases,length, chromosomePadding); genome->nBases = nBases; genome->nContigs = genome->maxContigs = nContigs; genome->contigs = new Contig[nContigs]; genome->minOffset = i_minOffset; if (i_minOffset >= nBases) { WriteErrorMessage("Genome::loadFromFile: specified minOffset %u >= nBases %u\n",i_minOffset,nBases); } genome->maxOffset = i_minOffset + length; static const unsigned contigNameBufferSize = 512; char contigNameBuffer[contigNameBufferSize]; unsigned n; size_t contigSize; char *curName; for (unsigned i = 0; i < nContigs; i++) { if (NULL == loadFile->gets(contigNameBuffer, contigNameBufferSize)){ WriteErrorMessage("Unable to read contig description\n"); delete genome; return NULL; } for (n = 0; n < contigNameBufferSize; n++){ if (contigNameBuffer[n] == ' ') { contigNameBuffer[n] = '\0'; break; } } genome->contigs[i].beginningOffset = atoi(contigNameBuffer); contigNameBuffer[n] = ' '; n++; // increment n so we start copying at the position after the space contigSize = strlen(contigNameBuffer + n) - 1; //don't include the final \n genome->contigs[i].name = new char[contigSize + 1]; genome->contigs[i].nameLength = (unsigned)contigSize; curName = genome->contigs[i].name; for (unsigned pos = 0; pos < contigSize; pos++) { curName[pos] = contigNameBuffer[pos + n]; } curName[contigSize] = '\0'; } // // Skip over the miserable \n that gets left in the file. // /* char newline; if (1 != fread(&newline,1,1,loadFile)) { WriteErrorMessage("Genome::loadFromFile: Unable to read expected newline\n"); delete genome; return NULL; } if (newline != 10) { WriteErrorMessage("Genome::loadFromFile: Expected newline to be 0x0a, got 0x%02x\n",newline); delete genome; return NULL; } */ if (0 != loadFile->advance(i_minOffset)) { WriteErrorMessage("Genome::loadFromFile: _fseek64bit failed\n"); soft_exit(1); } size_t retval; if (length != (retval = loadFile->read(genome->bases,length))) { WriteErrorMessage("Genome::loadFromFile: fread of bases failed; wanted %u, got %d\n", length, retval); loadFile->close(); delete loadFile; delete genome; return NULL; } loadFile->close(); delete loadFile; genome->fillInContigLengths(); genome->sortContigsByName(); return genome; }
const Genome * Genome::loadFromFile(const char *fileName, unsigned chromosomePadding, GenomeLocation minLocation, GenomeDistance length, bool map) { GenericFile *loadFile; GenomeDistance nBases; unsigned nContigs; if (!openFileAndGetSizes(fileName, &loadFile, &nBases, &nContigs, map)) { // // It already printed an error. Just fail. // return NULL; } GenomeLocation maxLocation(nBases); if (0 == length) { length = maxLocation - minLocation; } else { // // Don't let length go beyond nBases. // length = __min(length, maxLocation - minLocation); maxLocation = minLocation + length; } Genome *genome = new Genome(nBases, length, chromosomePadding); genome->nBases = nBases; genome->nContigs = genome->maxContigs = nContigs; genome->contigs = new Contig[nContigs]; genome->minLocation = minLocation; if (GenomeLocationAsInt64(minLocation) >= nBases) { WriteErrorMessage("Genome::loadFromFile: specified minOffset %u >= nBases %u\n", GenomeLocationAsInt64(minLocation), nBases); soft_exit(-1); } genome->maxLocation = maxLocation; static const unsigned contigNameBufferSize = 512; char contigNameBuffer[contigNameBufferSize]; unsigned n; size_t contigSize; char *curName; for (unsigned i = 0; i < nContigs; i++) { if (NULL == loadFile->gets(contigNameBuffer, contigNameBufferSize)){ WriteErrorMessage("Unable to read contig description\n"); delete genome; return NULL; } for (n = 0; n < contigNameBufferSize; n++){ if (contigNameBuffer[n] == ' ') { contigNameBuffer[n] = '\0'; break; } } _int64 contigStart; if (1 != sscanf(contigNameBuffer, "%lld", &contigStart)) { WriteErrorMessage("Unable to parse contig start in genome file '%s', '%s%'\n", fileName, contigNameBuffer); soft_exit(1); } genome->contigs[i].beginningLocation = GenomeLocation(contigStart); contigNameBuffer[n] = ' '; n++; // increment n so we start copying at the position after the space contigSize = strlen(contigNameBuffer + n) - 1; //don't include the final \n genome->contigs[i].name = new char[contigSize + 1]; genome->contigs[i].nameLength = (unsigned)contigSize; curName = genome->contigs[i].name; for (unsigned pos = 0; pos < contigSize; pos++) { curName[pos] = contigNameBuffer[pos + n]; } curName[contigSize] = '\0'; } if (0 != loadFile->advance(GenomeLocationAsInt64(minLocation))) { WriteErrorMessage("Genome::loadFromFile: _fseek64bit failed\n"); soft_exit(1); } size_t readSize; if (map) { GenericFile_map *mappedFile = (GenericFile_map *)loadFile; genome->bases = (char *)mappedFile->mapAndAdvance(length, &readSize); genome->mappedFile = mappedFile; mappedFile->prefetch(); } else { readSize = loadFile->read(genome->bases, length); loadFile->close(); delete loadFile; loadFile = NULL; } if (length != readSize) { WriteErrorMessage("Genome::loadFromFile: fread of bases failed; wanted %u, got %d\n", length, readSize); delete loadFile; delete genome; return NULL; } genome->fillInContigLengths(); genome->sortContigsByName(); return genome; }