Пример #1
0
String File::read(long unsigned bytes) const
{
	if(file < 0)
		openForRead();

	CArray<char> buffer(bytes + 1);

	errno = 0;
	while(buffer.len < bytes){
		ssize_t readIn = ::read(file, buffer.ptr + buffer.len, bytes - buffer.len);
		buffer.len += readIn;
		if(errno)
			throw FileError(errno, "Error after reading % bytes from file %.",
					buffer.len, fileName);
		if(!readIn){
			eofbit = true;
			break;
		}
	}

	if(buffer.len != bytes)
		throw FileError("Found end of file after reading %/% bytes from file %.",
				buffer.len, bytes, fileName);

	String str;
	str.adopt(buffer.ptr, buffer.len, buffer.size);
	return str;
}
Пример #2
0
void BedFile::openForRead(const char* bfile, const char* reffile, int nbuf) {
  String s = String(bfile);
  String bedFile = s + ".bed";
  String bimFile = s + ".bim";
  String famFile = s + ".fam";
  openForRead(bedFile.c_str(), bimFile.c_str(), famFile.c_str(), reffile, nbuf);
}
Пример #3
0
String File::readAll() const
{
	if(file < 0)
		openForRead();

	CArray<char> buffer(4096);
	long unsigned capacity = buffer.size - 1;

	errno = 0;
	while(buffer.len < capacity){
		ssize_t readIn = ::read(file, buffer.ptr + buffer.len, capacity - buffer.len);
		buffer.len += readIn;
		buffer.ptr[buffer.len] = 0;
		if(errno)
			throw FileError(errno, "Error after reading % bytes from file %.",
					buffer.len, fileName);
		if(!readIn){
			eofbit = true;
			break;
		}
		if(buffer.len == capacity){
			capacity += 4096;
			buffer.resize(capacity + 1);
		}
	}

	String str;
	str.adopt(buffer.ptr, buffer.len, buffer.size);
	return str;
}
Пример #4
0
// Constructor, init variables and open the specified file based on the
// specified mode (READ/WRITE).  Default is READ..
GlfFile::GlfFile(const char* filename, OpenType mode)
    : myFilePtr(NULL),
      myEndMarker()
{
    resetFile();

    bool openStatus = true;
    if(mode == READ)
    {
        // open the file for read.
        openStatus = openForRead(filename);
    }
    else
    {
        // open the file for write.
        openStatus = openForWrite(filename);
    }
    if(!openStatus)
    {
        // Failed to open the file - print error and abort.
        fprintf(stderr, "%s\n", getStatusMessage());
        std::cerr << "FAILURE - EXITING!!!" << std::endl;
        exit(-1);
    }
}
Пример #5
0
String File::readFromPos(long unsigned pos, long unsigned bytes) const
{
	if(file < 0)
		openForRead();

	if(lseek(file, pos, SEEK_SET) < 0)
		throw FileError(errno, "Could not seek in file %.", fileName);

	return read(bytes);
}
Пример #6
0
// Constructor that opens the specified file for read.
GlfFileReader::GlfFileReader(const char* filename)
{
    if(!openForRead(filename))
    {
        // Failed to open for reading - print error and abort.
        fprintf(stderr, "%s\n", getStatusMessage());
        std::cerr << "FAILURE - EXITING!!!" << std::endl;
        exit(-1);
    }
}
Пример #7
0
// Open a glf file for reading with the specified filename and read the
// header into the specified header.
bool GlfFile::openForRead(const char * filename, GlfHeader& header)
{
    if(!openForRead(filename))
    {
        return(false);
    }

    // Read the header
    if(!readHeader(header))
    {
        return(false);
    }
    return(true);
}
Пример #8
0
bool CsvFile :: readRow()
 {
  if (!file.isOpen()) openForRead();
  while (1)
   {
    if (file.atEnd())
     {
      currentRow.clear();
      return false;
     }
    QString str = file.readLine().trimmed();
    currentRow = str.split(';');
    if (!str.isEmpty()) return true;
   }
 }
Пример #9
0
uint64_t* hashKeyFromFile(const uint8_t* fname, const SkeinSize_t state_size)
{
   int64_t fd = openForRead(fname);
   if(fd < 0) { return NULL; }
   
   uint64_t bytes_to_hash = getFileSize(fname);
   if(bytes_to_hash == 0) { return NULL; }

   struct SkeinCtx skein_state;
   uint64_t* hash_chunk = NULL;

   uint64_t* key = calloc(state_size/64 , sizeof(uint64_t));
   skeinCtxPrepare(&skein_state, state_size); //Set up the context
   //Init Skein and tell it how big the digest will be
   skeinInit(&skein_state, state_size);

   //Iterate through the file and run its contents through Skein
   while (bytes_to_hash > 0)
   {
       uint64_t chunk_size = 0;
       if (bytes_to_hash < HASH_BUFFER_SIZE)
       {
           chunk_size = bytes_to_hash; 
           hash_chunk = (uint64_t*)readBytes(bytes_to_hash, fd);
       }
       else if (bytes_to_hash >= HASH_BUFFER_SIZE)
       {
           chunk_size = HASH_BUFFER_SIZE; 
           hash_chunk = (uint64_t*)readBytes(HASH_BUFFER_SIZE, fd); 
       }

       if (hash_chunk == NULL)
       {
           free(key); 
           return NULL;
       }

       skeinUpdate(&skein_state, (uint8_t*)hash_chunk, chunk_size);
       free(hash_chunk); 
       bytes_to_hash -= chunk_size; //decriment the counter
   }
   
   skeinFinal(&skein_state, (uint8_t*)key); //get the digest and return it

   return key;
}
Пример #10
0
long unsigned File::size() const
{
	if(file < 0)
		openForRead();

	long lastPos = lseek(file, 0, SEEK_CUR);
	if(lastPos < 0)
		throw FileError(errno, "Could not use lseek to get position in file %.", fileName);

	off_t size = lseek(file, 0, SEEK_END);
	if(size < 0)
		throw FileError(errno, "Could not seek to end of file %.", fileName);

	if(-1 == lseek(file, lastPos, SEEK_SET))
		throw FileError(errno, "Could not seek in file %.", fileName);

	return size;
}
Пример #11
0
void processQueryFile(AlignmentArgs_t * AAs)
{

/////
//     Open files, and initialize structures
//     The compressed genome and index structures are read only and shared amonst threads via AAs.
/////

    // Open the query file.
    openQueryFile(AAs);

    // Open the genome file.
    char *gfilePtr;
    FSIZE gfileSize;
    FDES gfp = openForRead(AAs->gfileName, &gfileSize, &gfilePtr, TRUE);

    // Read in the .nib2 file header.
    BaseSequences_t * BSs = loadBaseSequences(gfilePtr);
    normalizeBaseSequences(BSs);
    if (AAs->verbose) fprintf(stderr, "Read in %d reference sequences from %s.\n", BSs->curCount, AAs->gfileName);
    AAs->BSs = BSs;
    AAs->basePtr = BSs->basePtr;
    AAs->maxROff = baseSequencesMaxROff(BSs);

    // Open the output alignment file.
    // We have no idea how much output we will generate.
    // It will be all written sequentially.
    // So, no fancy memory mapped IO for this output.
    // Just do formatted ouput.
    AAs->outFile = openForPrint(AAs->ofileName);

#ifdef QUERYSTATS
    if (AAs->queryStats)
    {
        AAs->qsFile = openForPrint(AAs->qsfileName);
    }
#endif

    // Some of the file formats have headers.
    outputFileHeader(AAs);

    // Open the index file.
    // This is harder as it requires putting all pointers to various pieces back together.
    char *xfilePtr;
    FSIZE xfileSize;
    FDES xfp = openForRead(AAs->xfileName, &xfileSize, &xfilePtr, TRUE);

    // Start by getting the wordLen and totalCount from the file Header.
    UINT *xuintPtr = (UINT *)xfilePtr;
    int version = xuintPtr[0];
    if (version != CURRENT_INDEX_FILE_VERSION)
        fatalError("Index file version is out of date.\nPlease remake index file and try again.");
    AAs->wordLen = xuintPtr[1];
    int fileMaxHits = xuintPtr[2];
    if (fileMaxHits < AAs->maxHits)
    {
        fprintf(stderr, "WARNING: Index file made with maxHits of %d, while %d specified for this query run.\n"
                "Mimimum of two (%d) will be used.\n", fileMaxHits, AAs->maxHits, fileMaxHits);
        AAs->maxHits = fileMaxHits;
    }
    // Now we can recalculate the hash table size.
    UINT HTsize = iexp(4, AAs->wordLen);

    // Now we will walk through the various structures, assigning pointers of right type to right spots.
    FSIZE sizeofUINT = sizeof(UINT);
    FSIZE inputSize;
    // Space for four numbers in header.
    inputSize = 4*sizeofUINT;
    FSIZE startingOffBase = inputSize;
    // Space for reference starting Offsets array.
    inputSize += sizeofUINT * (HTsize + 1);
    FSIZE referenceBase = inputSize;

    // We've calculated all the offsets, now get the pointers.
    AAs->startingOffs = (ROFF *)(xfilePtr + startingOffBase);
    AAs->ROAPtr = (ROFF *)(xfilePtr + referenceBase);

#ifdef DEBUG
    fprintf(stderr, "Number of Hash Tables entries is %u, and ROA has %u entries.\n",
            HTsize, (UINT)((xfileSize-inputSize)/sizeof(ROFF)));
    fprintf(stderr, "Hash Table sizes are as follows: offs=%zd, refs=%zd.\n", sizeof(UINT)*(HTsize + 1), xfileSize-inputSize);
    fprintf(stderr, "The index pointers are: xfilePtr=%p, sOffsPtr=%p,  and refPtr=%p\n", xfilePtr, startingOffs, refs);
#endif

    // Read in first query to get an estimate of the size of the queries in the file.
    QueryState_t * QS = makeQueryState(AAs);
    readNextQuery(AAs, QS);
    calcNewMaxQueryLength(QS);
    QOFF maxQueryLength = QS->maxQueryLength;
    initializeQueries(QS);

    // Create extra threads.
    // We will use the main thread as well, so only need to spawn one less than requested.
    int numthreads = AAs->numThreads - 1;
    pthread_t * threads = NULL;
    if (numthreads > 0)
    {
        // Keep the thread count to something the machine can handle.
        int nprocs = get_nprocs();
        if (numthreads + 1 > nprocs)
        {
            fprintf(stderr, "Warning.  Requested number of threads (%d) is greater than number of processors."
                    "  %d threads will be used.\n", AAs->numThreads, nprocs);
            numthreads = nprocs - 1;
        }

        // Do the storage allocation for threads.
        // We allocate the thread local storage here as well to avoid malloc contention while spawning threads.
        threads = (pthread_t *)malloc(numthreads * sizeof(pthread_t));
        for (int i=0; i<numthreads; i++)
        {
            QueryState_t * QS = makeQueryState(AAs);
            // Set in an initial value for maxQueryLength based on first query read.
            QS->maxQueryLength = maxQueryLength;
            initializeQueries(QS);
            pthread_t * thread = threads+i;
            int err = pthread_create(thread, NULL, &processQueries, QS);

            // If we fail to create the thread for any reason, bail from trying to make more.
            if (err != 0)
            {
                fprintf(stderr, "Warning.  Thread creation resulted in error %d.  %d threads will be used.\n", err, i+1);
                numthreads = i;
                // Deallocate the unused thread local structures.
                finalizeQueries(QS);
                disposeQueryState(QS);
                break;
            }
        }
    }

    // Now also do work in this thread.
    // This won't return until the query file is exhausted.
    processQueries(QS);

    // Wait for all the rest of the threads (if any) to finish.
    for (int i=0; i<numthreads; i++)
    {
        pthread_join(threads[i], NULL);
    }
    if (threads != NULL) free(threads);

    // Free resources.
    // First close files.
    closeQueryFile(AAs);
    closeForRead(gfp, gfileSize, gfilePtr);
    closeForRead(xfp, xfileSize, xfilePtr);
    closeForPrint(AAs->outFile);

#ifdef QUERYSTATS
    if (AAs->queryStats)
    {
        closeForPrint(AAs->qsFile);
    }
#endif

    // Free allocated structures.
    disposeBaseSequences(BSs);
}