String File::read(long unsigned bytes) const { if(file < 0) openForRead(); CArray<char> buffer(bytes + 1); errno = 0; while(buffer.len < bytes){ ssize_t readIn = ::read(file, buffer.ptr + buffer.len, bytes - buffer.len); buffer.len += readIn; if(errno) throw FileError(errno, "Error after reading % bytes from file %.", buffer.len, fileName); if(!readIn){ eofbit = true; break; } } if(buffer.len != bytes) throw FileError("Found end of file after reading %/% bytes from file %.", buffer.len, bytes, fileName); String str; str.adopt(buffer.ptr, buffer.len, buffer.size); return str; }
void BedFile::openForRead(const char* bfile, const char* reffile, int nbuf) { String s = String(bfile); String bedFile = s + ".bed"; String bimFile = s + ".bim"; String famFile = s + ".fam"; openForRead(bedFile.c_str(), bimFile.c_str(), famFile.c_str(), reffile, nbuf); }
String File::readAll() const { if(file < 0) openForRead(); CArray<char> buffer(4096); long unsigned capacity = buffer.size - 1; errno = 0; while(buffer.len < capacity){ ssize_t readIn = ::read(file, buffer.ptr + buffer.len, capacity - buffer.len); buffer.len += readIn; buffer.ptr[buffer.len] = 0; if(errno) throw FileError(errno, "Error after reading % bytes from file %.", buffer.len, fileName); if(!readIn){ eofbit = true; break; } if(buffer.len == capacity){ capacity += 4096; buffer.resize(capacity + 1); } } String str; str.adopt(buffer.ptr, buffer.len, buffer.size); return str; }
// Constructor, init variables and open the specified file based on the // specified mode (READ/WRITE). Default is READ.. GlfFile::GlfFile(const char* filename, OpenType mode) : myFilePtr(NULL), myEndMarker() { resetFile(); bool openStatus = true; if(mode == READ) { // open the file for read. openStatus = openForRead(filename); } else { // open the file for write. openStatus = openForWrite(filename); } if(!openStatus) { // Failed to open the file - print error and abort. fprintf(stderr, "%s\n", getStatusMessage()); std::cerr << "FAILURE - EXITING!!!" << std::endl; exit(-1); } }
String File::readFromPos(long unsigned pos, long unsigned bytes) const { if(file < 0) openForRead(); if(lseek(file, pos, SEEK_SET) < 0) throw FileError(errno, "Could not seek in file %.", fileName); return read(bytes); }
// Constructor that opens the specified file for read. GlfFileReader::GlfFileReader(const char* filename) { if(!openForRead(filename)) { // Failed to open for reading - print error and abort. fprintf(stderr, "%s\n", getStatusMessage()); std::cerr << "FAILURE - EXITING!!!" << std::endl; exit(-1); } }
// Open a glf file for reading with the specified filename and read the // header into the specified header. bool GlfFile::openForRead(const char * filename, GlfHeader& header) { if(!openForRead(filename)) { return(false); } // Read the header if(!readHeader(header)) { return(false); } return(true); }
bool CsvFile :: readRow() { if (!file.isOpen()) openForRead(); while (1) { if (file.atEnd()) { currentRow.clear(); return false; } QString str = file.readLine().trimmed(); currentRow = str.split(';'); if (!str.isEmpty()) return true; } }
uint64_t* hashKeyFromFile(const uint8_t* fname, const SkeinSize_t state_size) { int64_t fd = openForRead(fname); if(fd < 0) { return NULL; } uint64_t bytes_to_hash = getFileSize(fname); if(bytes_to_hash == 0) { return NULL; } struct SkeinCtx skein_state; uint64_t* hash_chunk = NULL; uint64_t* key = calloc(state_size/64 , sizeof(uint64_t)); skeinCtxPrepare(&skein_state, state_size); //Set up the context //Init Skein and tell it how big the digest will be skeinInit(&skein_state, state_size); //Iterate through the file and run its contents through Skein while (bytes_to_hash > 0) { uint64_t chunk_size = 0; if (bytes_to_hash < HASH_BUFFER_SIZE) { chunk_size = bytes_to_hash; hash_chunk = (uint64_t*)readBytes(bytes_to_hash, fd); } else if (bytes_to_hash >= HASH_BUFFER_SIZE) { chunk_size = HASH_BUFFER_SIZE; hash_chunk = (uint64_t*)readBytes(HASH_BUFFER_SIZE, fd); } if (hash_chunk == NULL) { free(key); return NULL; } skeinUpdate(&skein_state, (uint8_t*)hash_chunk, chunk_size); free(hash_chunk); bytes_to_hash -= chunk_size; //decriment the counter } skeinFinal(&skein_state, (uint8_t*)key); //get the digest and return it return key; }
long unsigned File::size() const { if(file < 0) openForRead(); long lastPos = lseek(file, 0, SEEK_CUR); if(lastPos < 0) throw FileError(errno, "Could not use lseek to get position in file %.", fileName); off_t size = lseek(file, 0, SEEK_END); if(size < 0) throw FileError(errno, "Could not seek to end of file %.", fileName); if(-1 == lseek(file, lastPos, SEEK_SET)) throw FileError(errno, "Could not seek in file %.", fileName); return size; }
void processQueryFile(AlignmentArgs_t * AAs) { ///// // Open files, and initialize structures // The compressed genome and index structures are read only and shared amonst threads via AAs. ///// // Open the query file. openQueryFile(AAs); // Open the genome file. char *gfilePtr; FSIZE gfileSize; FDES gfp = openForRead(AAs->gfileName, &gfileSize, &gfilePtr, TRUE); // Read in the .nib2 file header. BaseSequences_t * BSs = loadBaseSequences(gfilePtr); normalizeBaseSequences(BSs); if (AAs->verbose) fprintf(stderr, "Read in %d reference sequences from %s.\n", BSs->curCount, AAs->gfileName); AAs->BSs = BSs; AAs->basePtr = BSs->basePtr; AAs->maxROff = baseSequencesMaxROff(BSs); // Open the output alignment file. // We have no idea how much output we will generate. // It will be all written sequentially. // So, no fancy memory mapped IO for this output. // Just do formatted ouput. AAs->outFile = openForPrint(AAs->ofileName); #ifdef QUERYSTATS if (AAs->queryStats) { AAs->qsFile = openForPrint(AAs->qsfileName); } #endif // Some of the file formats have headers. outputFileHeader(AAs); // Open the index file. // This is harder as it requires putting all pointers to various pieces back together. char *xfilePtr; FSIZE xfileSize; FDES xfp = openForRead(AAs->xfileName, &xfileSize, &xfilePtr, TRUE); // Start by getting the wordLen and totalCount from the file Header. UINT *xuintPtr = (UINT *)xfilePtr; int version = xuintPtr[0]; if (version != CURRENT_INDEX_FILE_VERSION) fatalError("Index file version is out of date.\nPlease remake index file and try again."); AAs->wordLen = xuintPtr[1]; int fileMaxHits = xuintPtr[2]; if (fileMaxHits < AAs->maxHits) { fprintf(stderr, "WARNING: Index file made with maxHits of %d, while %d specified for this query run.\n" "Mimimum of two (%d) will be used.\n", fileMaxHits, AAs->maxHits, fileMaxHits); AAs->maxHits = fileMaxHits; } // Now we can recalculate the hash table size. UINT HTsize = iexp(4, AAs->wordLen); // Now we will walk through the various structures, assigning pointers of right type to right spots. FSIZE sizeofUINT = sizeof(UINT); FSIZE inputSize; // Space for four numbers in header. inputSize = 4*sizeofUINT; FSIZE startingOffBase = inputSize; // Space for reference starting Offsets array. inputSize += sizeofUINT * (HTsize + 1); FSIZE referenceBase = inputSize; // We've calculated all the offsets, now get the pointers. AAs->startingOffs = (ROFF *)(xfilePtr + startingOffBase); AAs->ROAPtr = (ROFF *)(xfilePtr + referenceBase); #ifdef DEBUG fprintf(stderr, "Number of Hash Tables entries is %u, and ROA has %u entries.\n", HTsize, (UINT)((xfileSize-inputSize)/sizeof(ROFF))); fprintf(stderr, "Hash Table sizes are as follows: offs=%zd, refs=%zd.\n", sizeof(UINT)*(HTsize + 1), xfileSize-inputSize); fprintf(stderr, "The index pointers are: xfilePtr=%p, sOffsPtr=%p, and refPtr=%p\n", xfilePtr, startingOffs, refs); #endif // Read in first query to get an estimate of the size of the queries in the file. QueryState_t * QS = makeQueryState(AAs); readNextQuery(AAs, QS); calcNewMaxQueryLength(QS); QOFF maxQueryLength = QS->maxQueryLength; initializeQueries(QS); // Create extra threads. // We will use the main thread as well, so only need to spawn one less than requested. int numthreads = AAs->numThreads - 1; pthread_t * threads = NULL; if (numthreads > 0) { // Keep the thread count to something the machine can handle. int nprocs = get_nprocs(); if (numthreads + 1 > nprocs) { fprintf(stderr, "Warning. Requested number of threads (%d) is greater than number of processors." " %d threads will be used.\n", AAs->numThreads, nprocs); numthreads = nprocs - 1; } // Do the storage allocation for threads. // We allocate the thread local storage here as well to avoid malloc contention while spawning threads. threads = (pthread_t *)malloc(numthreads * sizeof(pthread_t)); for (int i=0; i<numthreads; i++) { QueryState_t * QS = makeQueryState(AAs); // Set in an initial value for maxQueryLength based on first query read. QS->maxQueryLength = maxQueryLength; initializeQueries(QS); pthread_t * thread = threads+i; int err = pthread_create(thread, NULL, &processQueries, QS); // If we fail to create the thread for any reason, bail from trying to make more. if (err != 0) { fprintf(stderr, "Warning. Thread creation resulted in error %d. %d threads will be used.\n", err, i+1); numthreads = i; // Deallocate the unused thread local structures. finalizeQueries(QS); disposeQueryState(QS); break; } } } // Now also do work in this thread. // This won't return until the query file is exhausted. processQueries(QS); // Wait for all the rest of the threads (if any) to finish. for (int i=0; i<numthreads; i++) { pthread_join(threads[i], NULL); } if (threads != NULL) free(threads); // Free resources. // First close files. closeQueryFile(AAs); closeForRead(gfp, gfileSize, gfilePtr); closeForRead(xfp, xfileSize, xfilePtr); closeForPrint(AAs->outFile); #ifdef QUERYSTATS if (AAs->queryStats) { closeForPrint(AAs->qsFile); } #endif // Free allocated structures. disposeBaseSequences(BSs); }