uint64_t FileData::Size() const { #ifdef OMIM_OS_TIZEN Tizen::Io::FileAttributes attr; result const error = Tizen::Io::File::GetAttributes(m_FileName.c_str(), attr); if (IsFailed(error)) MYTHROW(Reader::SizeException, (m_FileName, m_Op, error)); return attr.GetFileSize(); #else int64_t const pos = ftell64(m_File); if (pos == INVALID_POS) MYTHROW(Reader::SizeException, (GetErrorProlog(), pos)); if (fseek64(m_File, 0, SEEK_END)) MYTHROW(Reader::SizeException, (GetErrorProlog())); int64_t const size = ftell64(m_File); if (size == INVALID_POS) MYTHROW(Reader::SizeException, (GetErrorProlog(), size)); if (fseek64(m_File, pos, SEEK_SET)) MYTHROW(Reader::SizeException, (GetErrorProlog(), pos)); ASSERT_GREATER_OR_EQUAL(size, 0, ()); return static_cast<uint64_t>(size); #endif }
/* A portable fseek() function return 0 on success, non-zero on failure (with errno set) */ int my_fseek (FILE *fp, my_off_t offset, int whence) { #if defined (HAVE_FSEEKO) && SIZEOF_OFF_T >= 8 return fseeko(fp, offset, whence); #elif defined (HAVE_FSEEK64) return fseek64(fp, offset, whence); #elif defined (__BEOS__) return _fseek(fp, offset, whence); #elif SIZEOF_FPOS_T >= 8 /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos() and fgetpos() to implement fseek()*/ fpos_t pos; switch (whence) { case SEEK_END: #ifdef MS_WINDOWS fflush (fp); if (_lseeki64 (fileno(fp), 0, 2) == -1) return -1; #else if (fseek (fp, 0, SEEK_END) != 0) return -1; #endif /* fall through */ case SEEK_CUR: if (fgetpos (fp, &pos) != 0) return -1; offset += pos; break; /* case SEEK_SET: break; */ } return fsetpos(fp, &offset); #else #error "Large file support, but no way to fseek." #endif }
// TODO: Remove this ASAP. Rewrite the decompression routines to allow easy decompression of multiple patch file versions. To future me: Consider using a C++ class for decompression. bool ZPatcher::FileDecompress_Version_1(CLzma2Dec* decoder, FILE* sourceFile, FILE* destFile) { ELzmaStatus status; const SizeT buffer_size = 1 << 16; Byte sourceBuffer[buffer_size]; Byte destBuffer[buffer_size]; SizeT sourceLen = 0; SizeT destLen = buffer_size; int64_t sourceFilePos = ftell64(sourceFile); // We must reinitialize every time we want a decode a new file. Lzma2Dec_Init(decoder); while (true) { sourceLen = fread(sourceBuffer, 1, buffer_size, sourceFile); SRes res = Lzma2Dec_DecodeToBuf(decoder, destBuffer, &destLen, sourceBuffer, &sourceLen, LZMA_FINISH_ANY, &status); assert(res == SZ_OK); fwrite(destBuffer, 1, destLen, destFile); sourceFilePos += sourceLen; res = fseek64(sourceFile, sourceFilePos, SEEK_SET); assert(res == 0); if (res == SZ_OK && status == LZMA_STATUS_FINISHED_WITH_MARK) break; } return true; }
string FastaReference::getSubSequence(string seqname, int start, int length) { //cout << "HELLO " << start << " " << length << " " << seqname << endl; FastaIndexEntry entry = index->entry(seqname); //cout << entry << " " << entry.line_blen << endl; if (start < 0 || length < 1) { cerr << "Error: cannot construct subsequence with negative offset or length < 1" << endl; exit(1); } // we have to handle newlines // approach: count newlines before start // count newlines by end of read // subtracting newlines before start find count of embedded newlines int newlines_before = start > 0 ? (start - 1) / entry.line_blen : 0; int newlines_by_end = (start + length - 1) / entry.line_blen; int newlines_inside = newlines_by_end - newlines_before; int seqlen = length + newlines_inside; char* seq = (char*) calloc (seqlen + 1, sizeof(char)); fseek64(file, (off_t) (entry.offset + newlines_before + start), SEEK_SET); fread(seq, sizeof(char), (off_t) seqlen, file); seq[seqlen] = '\0'; char* pbegin = seq; char* pend = seq + (seqlen/sizeof(char)); pend = remove(pbegin, pend, '\n'); pend = remove(pbegin, pend, '\0'); string s = seq; free(seq); s.resize((pend - pbegin)/sizeof(char)); return s; }
string FastaReference::getSubSequence(string seqname, int start, int length) { FastaIndexEntry entry = index->entry(seqname); length = min(length, entry.length - start); if (start < 0 || length < 1) { //cerr << "Empty sequence" << endl; return ""; } // we have to handle newlines // approach: count newlines before start // count newlines by end of read // subtracting newlines before start find count of embedded newlines int newlines_before = start > 0 ? (start - 1) / entry.line_blen : 0; int newlines_by_end = (start + length - 1) / entry.line_blen; int newlines_inside = newlines_by_end - newlines_before; int seqlen = length + newlines_inside; char* seq = (char*) calloc (seqlen + 1, sizeof(char)); fseek64(file, (off_t) (entry.offset + newlines_before + start), SEEK_SET); string s; if (fread(seq, sizeof(char), (off_t) seqlen, file)) { seq[seqlen] = '\0'; char* pbegin = seq; char* pend = seq + (seqlen/sizeof(char)); pend = remove(pbegin, pend, '\n'); pend = remove(pbegin, pend, '\0'); s = seq; free(seq); s.resize((pend - pbegin)/sizeof(char)); } return s; }
static VC_CONTAINER_STATUS_T io_file_seek(VC_CONTAINER_IO_T *p_ctx, int64_t offset) { VC_CONTAINER_STATUS_T status = VC_CONTAINER_SUCCESS; int ret; //FIXME: large file support #ifdef _VIDEOCORE extern int fseek64(FILE *fp, int64_t offset, int whence); ret = fseek64(p_ctx->module->stream, offset, SEEK_SET); #else if (offset > (int64_t)UINT_MAX) { p_ctx->status = VC_CONTAINER_ERROR_EOS; return VC_CONTAINER_ERROR_EOS; } ret = fseek(p_ctx->module->stream, (long)offset, SEEK_SET); #endif if(ret) { if( feof(p_ctx->module->stream) ) status = VC_CONTAINER_ERROR_EOS; else status = VC_CONTAINER_ERROR_FAILED; } p_ctx->status = status; return status; }
int imgwriter::write(int lba, int scnt, int ssz, void* buff) { int res=0; #if defined(HAVE_FSEEKO) && defined(OFFT_64BIT) off_t offs = ssz*(off_t)lba; #else int64_t offs = ssz*(int64_t)lba; #endif mutex->lock(); iso = fopen64(fname, "r+"); if (iso) { #if defined(HAVE_FSEEKO) && defined(OFFT_64BIT) if (fseeko(iso, offs, SEEK_SET)) #elif defined(HAVE_FSEEK64) if (fseek64(iso, offs, SEEK_SET)) #else if (fseek(iso, offs, SEEK_SET)) #endif { printf("\nseek() failed! Offs: %lld (%08LX)\n", offs, offs); mutex->unlock(); return 0; } res = fwrite(buff, ssz, scnt, iso); // printf("\nwrote: %ld of %ld\n", res, scnt); fclose(iso); } mutex->unlock(); return res; }
int FSEEK64(FILE *stream,long long offset,int ptrname) { int ret=-1; if (stream==NULL) programmer_error("NULL file pointer passed to FSEEK64.\n"); #if defined(irix) ret=fseek64(stream,offset,ptrname); #elif defined(cygwin) || defined(darwin) // On cygwin and darwin, the fseeko function is 64-bit ready ret=fseeko(stream,offset,ptrname); #elif defined(mingw) // On MinGW, use fseeko64 (Windows native) ret=fseeko64(stream,offset,ptrname); #else ret=fseeko64(stream,offset,ptrname); #endif if (ret==-1) { fprintf(stderr, "The file offset passed is: %lld\n", offset); if (fLog!=NULL) fprintf(fLog, "The file offset passed is: %lld\n", offset); if (caplib_behavior_on_error == BEHAVIOR_ON_ERROR_ABORT) programmer_error("Stream passed to FSEEK64 is not seekable.\n"); else { fprintf(stderr, "Stream passed to FSEEK64 is not seekable.\n"); if (fLog!=NULL) fprintf(fLog, "Stream passed to FSEEK64 is not seekable.\n"); } } return ret; }
// returns the file size for the specified filename off_type GetFileSize(const char* filename) { FILE* FILEHANDLE = NULL; off_type fileSize = 0; if(fopen_s(&FILEHANDLE, filename, "rb") != 0) { cout << "ERROR: Unable to open file (" << filename << ") when getting file size." << endl; exit(1); } if(FILEHANDLE) { if(fseek64(FILEHANDLE, 0, SEEK_END) != 0) { cout << "ERROR: Unable to go to the end of the file (" << filename << ")" << endl; exit(1); } fileSize = ftell64(FILEHANDLE); fclose(FILEHANDLE); } if ( FILEHANDLE != NULL ) fclose( FILEHANDLE ); return fileSize; }
int large_file_seek(FILE* aStream, long long aOffset, int aOrigin) { #ifdef _WIN32 return _fseeki64(aStream, aOffset, aOrigin); #else return fseek64(aString, aOffset, aOrigin); #endif }
// Public library function. Returns NULL if successful, a string starting with "I/O error: " // if an I/O error occurred (please see perror()), or a string if some other error occurred. const char *modify_file_crc32(const char *path, uint64_t offset, uint32_t newcrc, bool printstatus) { FILE *f = fopen(path, "r+b"); if (f == NULL) return "I/O error: fopen"; // Read entire file and calculate original CRC-32 value. // Note: We can't use fseek(f, 0, SEEK_END) + ftell(f) to determine the length of the file, due to undefined behavior. // To be portable, we also avoid using POSIX fseeko()+ftello() or Windows GetFileSizeEx()/_filelength(). uint64_t length; uint32_t crc = get_crc32_and_length(f, &length); if (offset > UINT64_MAX - 4 || offset + 4 > length) { fclose(f); return "Error: Byte offset plus 4 exceeds file length"; } if (printstatus) fprintf(stdout, "Original CRC-32: %08" PRIX32 "\n", reverse_bits(crc)); // Compute the change to make uint32_t delta = crc ^ newcrc; delta = (uint32_t)multiply_mod(reciprocal_mod(pow_mod(2, (length - offset) * 8)), delta); // Patch 4 bytes in the file fseek64(f, offset); for (int i = 0; i < 4; i++) { int b = fgetc(f); if (b == EOF) { fclose(f); return "I/O error: fgetc"; } b ^= (int)((reverse_bits(delta) >> (i * 8)) & 0xFF); if (fseek(f, -1, SEEK_CUR) != 0) { fclose(f); return "I/O error: fseek"; } if (fputc(b, f) == EOF) { fclose(f); return "I/O error: fputc"; } if (fflush(f) == EOF) { fclose(f); return "I/O error: fflush"; } } if (printstatus) fprintf(stdout, "Computed and wrote patch\n"); // Recheck entire file bool match = get_crc32_and_length(f, &length) == newcrc; fclose(f); if (match) { if (printstatus) fprintf(stdout, "New CRC-32 successfully verified\n"); return NULL; // Success } else return "Assertion error: Failed to update CRC-32 to desired value"; }
uint64_t FileHandle::Size() const { int64_t const pos = ftell64(m_file); if (pos == INVALID_POS) THROWEX(FileException, (E2S(), pos)); if (fseek64(m_file, 0, SEEK_END)) THROWEX(FileException, (E2S())); int64_t const size = ftell64(m_file); if (size == INVALID_POS) THROWEX(FileException, (E2S(), size)); if (fseek64(m_file, pos, SEEK_SET)) THROWEX(FileException, (E2S(), pos)); ASSERT(size >= 0, ()); return static_cast<uint64_t>(size); }
int sys_fseek(FILE *fp, SMB_OFF_T offset, int whence) { #if defined(HAVE_EXPLICIT_LARGEFILE_SUPPORT) && defined(LARGE_SMB_OFF_T) && defined(HAVE_FSEEK64) return fseek64(fp, offset, whence); #elif defined(HAVE_EXPLICIT_LARGEFILE_SUPPORT) && defined(LARGE_SMB_OFF_T) && defined(HAVE_FSEEKO64) return fseeko64(fp, offset, whence); #else return fseek(fp, offset, whence); #endif }
bool File::SeekEnd() { if (_pFile == NULL) { FATAL("File not opened"); return false; } if (fseek64(_pFile, 0, SEEK_END) != 0) { FATAL("Unable to seek to the end of file"); return false; } return true; }
bool File::SeekBegin() { if (_pFile == NULL) { FATAL("File not opened"); return false; } if (fseek64(_pFile, 0, SEEK_SET) != 0) { FATAL("Unable to seek to the beginning of file"); return false; } return true; }
void FileData::Seek(uint64_t pos) { ASSERT_NOT_EQUAL(m_Op, OP_APPEND, (m_FileName, m_Op, pos)); #ifdef OMIM_OS_TIZEN result const error = m_File->Seek(Tizen::Io::FILESEEKPOSITION_BEGIN, pos); if (IsFailed(error)) MYTHROW(Writer::SeekException, (m_FileName, m_Op, error, pos)); #else if (fseek64(m_File, pos, SEEK_SET)) MYTHROW(Writer::SeekException, (GetErrorProlog(), pos)); #endif }
/* Function: GSI64GetRecord() * * Purpose: Each non-header record of a GSI64 index file consists * of 42 bytes: 32 bytes of character string, a 2 byte * short, and an 8 byte long long. This function returns the * three values. * * Args: gsi - open GSI64 index file, correctly positioned at a record * f1 - char[32], allocated by caller (or NULL if unwanted) * f2 - pointer to short (or NULL if unwanted) * f3 - pointer to long long (or NULL if unwanted) * * Return: 0 on failure and sets squid_errno. */ int GSI64GetRecord(GSI64FILE *gsi, char *f1, sqd_uint16 *f2, sqd_uint64 *f3) { if (f1 == NULL) fseek64(gsi->gsifp, GSI64_KEYSIZE, SEEK_CUR); else if (! fread(f1, GSI64_KEYSIZE, 1, gsi->gsifp)) { squid_errno = SQERR_NODATA; return 0; } if (f2 == NULL) fseek64(gsi->gsifp, sizeof(sqd_uint16), SEEK_CUR); else if (! fread(f2, sizeof(sqd_uint16), 1, gsi->gsifp)) { squid_errno = SQERR_NODATA; return 0; } if (f3 == NULL) fseek64(gsi->gsifp, sizeof(sqd_uint64), SEEK_CUR); else if (! fread(f3, sizeof(sqd_uint64), 1, gsi->gsifp)) { squid_errno = SQERR_NODATA; return 0; } #if 0 /* no byteswap yet! HACK! */ if (f2 != NULL) *f2 = sre_ntohs(*f2); if (f3 != NULL) *f3 = sre_ntohl(*f3); #endif return 1; }
/* Function: GSI64GetOffset() * * Purpose: From a key (sequence name), find a disk offset * in an open general sequence index file by binary * search. Presumably GSI64 indexing could be even faster * if we used hashing. * * Args: gsi - GSI64 index file, opened by GSI64Open() * key - name of key to retrieve indices for * ret_seqfile - pre-alloced char[32] array for seqfile name * ret_fmt - format of seqfile * ret_offset - return: disk offset in seqfile. */ int GSI64GetOffset(GSI64FILE *gsi, char *key, char *ret_seqfile, int *ret_format, long long *ret_offset) { sqd_uint64 left, right, mid; int cmp; char name[GSI64_KEYSIZE + 1]; sqd_uint64 offset; sqd_uint16 filenum; sqd_uint64 fmt; name[GSI64_KEYSIZE] = '\0'; left = gsi->nfiles + 1; right = gsi->nfiles + gsi->recnum; mid = (left + right) / 2; fseek64(gsi->gsifp, mid * GSI64_RECSIZE, SEEK_SET); while (GSI64GetRecord(gsi, name, &filenum, &offset)) { cmp = strcmp(name, key); if (cmp == 0) break; /* found it! */ else if (left >= right) return 0; /* oops, missed it; fail. */ else if (cmp < 0) left = mid + 1; /* it's right of mid */ else if (cmp > 0) right = mid - 1; /* it's left of mid */ mid = (left + right) / 2; fseek64(gsi->gsifp, mid * GSI64_RECSIZE, SEEK_SET); } /* Using file number, look up the sequence file and format. */ fseek64(gsi->gsifp, filenum * GSI64_RECSIZE, SEEK_SET); GSI64GetRecord(gsi, ret_seqfile, NULL, &fmt); *ret_format = (int) fmt; *ret_offset = (long long) offset; return 1; }
string FastaReference::getSequence(string seqname) { FastaIndexEntry entry = index->entry(seqname); int newlines_in_sequence = entry.length / entry.line_blen; int seqlen = newlines_in_sequence + entry.length; char* seq = (char*) calloc (seqlen + 1, sizeof(char)); fseek64(file, entry.offset, SEEK_SET); fread(seq, sizeof(char), seqlen, file); seq[seqlen] = '\0'; char* pbegin = seq; char* pend = seq + (seqlen/sizeof(char)); pend = remove(pbegin, pend, '\n'); pend = remove(pbegin, pend, '\0'); string s = seq; free(seq); s.resize((pend - pbegin)/sizeof(char)); return s; }
static u32 rawromfs_Read(file_type* self, u32 ptr, u32 sz, u64 off, u32* read_out) { *read_out = 0; if((off >> 32) || (off >= romfs_sz) || ((off+sz) >= romfs_sz)) { ERROR("Invalid read params.\n"); return -1; } if(fseek64(in_fd, romfs_off + off, SEEK_SET) == -1) { ERROR("fseek failed.\n"); return -1; } u8* b = malloc(sz); if(b == NULL) { ERROR("Not enough mem.\n"); return -1; } u32 read = fread(b, 1, sz, in_fd); ctr_aes_context ctx; if (loader_encrypted) { u8* temp = calloc(sz + (off & 0xF) + (sz & 0xF), sizeof(u8)); memcpy(temp + (off & 0xF), b, sz); ncch_extract_prepare(&ctx, &loader_h, NCCHTYPE_ROMFS, loader_key); ctr_add_counter(&ctx, (u32)((0x1000 + off) / 0x10)); //this is from loader ctr_crypt_counter(&ctx, temp, temp, sz + (off & 0xF)); memcpy(b, temp + (off & 0xF), sz); free(temp); } if(mem_Write(b, ptr, read) != 0) { ERROR("mem_Write failed.\n"); free(b); return -1; } *read_out = read; free(b); return 0; // Result }
bool File::SeekTo(uint64_t position) { if (_pFile == NULL) { FATAL("File not opened"); return false; } if (_size < position) { FATAL("End of file will be reached"); return false; } if (fseek64(_pFile, (PIOFFT) position, SEEK_SET) != 0) { FATAL("Unable to seek to position %"PRIu64, position); return false; } return true; }
void FileData::Read(uint64_t pos, void * p, size_t size) { #ifdef OMIM_OS_TIZEN result error = m_File->Seek(Tizen::Io::FILESEEKPOSITION_BEGIN, pos); if (IsFailed(error)) MYTHROW(Reader::ReadException, (error, pos)); int const bytesRead = m_File->Read(p, size); error = GetLastResult(); if (static_cast<size_t>(bytesRead) != size || IsFailed(error)) MYTHROW(Reader::ReadException, (m_FileName, m_Op, error, bytesRead, pos, size)); #else if (fseek64(m_File, pos, SEEK_SET)) MYTHROW(Reader::ReadException, (GetErrorProlog(), pos)); size_t const bytesRead = fread(p, 1, size, m_File); if (bytesRead != size || ferror(m_File)) MYTHROW(Reader::ReadException, (GetErrorProlog(), bytesRead, pos, size)); #endif }
// closes the read archive void CReadWriter::Close(void) { // prevent the archive from being updated elsewhere mIsOpen = false; // flush the buffer if(mPartitionMembers > 0) WritePartition(); // ================= // update the header // ================= // update the number of reads in the archive fseek64(mOutStream, UPDATE_HEADER_OFFSET, SEEK_SET); fwrite((char*)&mNumReads, SIZEOF_UINT64, 1, mOutStream); // update the number of bases in the archive fwrite((char*)&mNumBases, SIZEOF_UINT64, 1, mOutStream); // close the file stream fclose(mOutStream); }
bool File::SeekAhead(int64_t count) { if (_pFile == NULL) { FATAL("File not opened"); return false; } if (count < 0) { FATAL("Invalid count"); return false; } if (count + Cursor() > _size) { FATAL("End of file will be reached"); return false; } if (fseek64(_pFile, (PIOFFT) count, SEEK_CUR) != 0) { FATAL("Unable to seek ahead %"PRId64" bytes", count); return false; } return true; }
bool File::SeekBehind(int64_t count) { if (_pFile == NULL) { FATAL("File not opened"); return false; } if (count < 0) { FATAL("Invalid count"); return false; } if (Cursor() < (uint64_t) count) { FATAL("End of file will be reached"); return false; } if (fseek64(_pFile, (PIOFFT) (-1 * count), SEEK_CUR) != 0) { FATAL("Unable to seek behind %"PRId64" bytes", count); return false; } return true; }
main(int argc, char **argv) { FILE *infp=stdin, *outfp=stdout; usghed usgh; int ierr; float *grid, *g, *g1; int i1, i2, i3; int k1, k2, k3; int h1, h2, h3; int n1, n2, n3; int j1, j2, j3; int m1, m2, m3; int i; float gmin, gmax; float tmp; int op, w1, w2, w3; int nz, iz; int si1, si2, si3, ni1, ni2, ni3; /* initialization */ initargs(argc,argv); askdoc(1); /* large than 2 GB files */ file2g(infp); file2g(infp); /* read in the grid header */ ierr = fgetusghdr(infp, &usgh); if(ierr!=0) err("non standard grid header input "); /* get the dimensions of input grid */ n1 = usgh.n1; n2 = usgh.n2; n3 = usgh.n3; /* get input parameters */ if (!getparint("op",&op)) op = 0; if (!getparint("w1",&w1)) w1 = 5; if (!getparint("w2",&w2)) w2 = 5; if (!getparint("w3",&w3)) w3 = 5; if(w1<1) w1 = 1; if(w1>n1) w1 = n1; if(w2<1) w2 = 1; if(w2>n2) w2 = n2; if(w3<1) w3 = 1; if(w3>n3) w3 = n3; if (!getparint("si1",&si1)) si1 = 1; if(si1<1)si1=1; if(si1>n1)si1=n1; if (!getparint("si2",&si2)) si2 = 1; if(si2<1)si2=1; if(si2>n2)si2=n2; if (!getparint("si3",&si3)) si3 = 1; if(si3<1)si3=1; if(si3>n3)si3=n3; if (!getparint("ni1",&ni1)) ni1 = n1; if(ni1<1)ni1=1;if(ni1>n1)ni1=n1; if (!getparint("ni2",&ni2)) ni2 = n2; if(ni2<1)ni2=1;if(ni2>n2)ni2=n2; if (!getparint("ni3",&ni3)) ni3 = n3; if(ni3<1)ni3=1;if(ni3>n3)ni3=n3; /* memory allocations */ if(n1*n2*n3>1024*1024*1024/4) err(" input grid too big; subsample recommended \n"); grid = (float*)emalloc(n1*n2*n3*sizeof(float)); g = (float*)emalloc(w1*w2*w3*sizeof(float)); g1 = (float*) emalloc(n1*sizeof(float)); fseek64(infp,0,0); efread(grid,sizeof(float),n1*n2*n3,infp); h1 = w1/2; h2 = w2/2; h3 = w3/2; nz = w1*w2*w3; iz = (50*nz/100.); si1 = si1 - 1; si2 = si2 - 1; si3 = si3 - 1; for (i3=0;i3<n3;i3++) { for (i2=0;i2<n2;i2++) { k3 = i3 - h3; k2 = i2 - h2; for(i1=0;i1<n1;i1++) g1[i1] = grid[i1+i2*n1+i3*n1*n2]; if( i2>=si2 && i2<ni2 && i3>=si3 && i3<ni3 ) { for (i1=si1-1;i1<ni1;i1++) { i = 0; k1 = i1 - h1; for (j3=k3;j3<k3+w3;j3++) { m3=j3; if(m3<0)m3=0; if(m3>n3-1)m3=n3-1; for (j2=k2;j2<k2+w2;j2++) { m2=j2; if(m2<0)m2=0; if(m2>n2-1)m2=n2-1; for (j1=k1;j1<k1+w1;j1++) { m1=j1; if(m1<0)m1=0; if(m1>n1-1)m1=n1-1; g[i] = grid[m1+m2*n1+m3*n1*n2]; i = i + 1; } } } if(op==0) { qkfind(iz,nz,g); g1[i1] = g[iz]; } else if(op==1) { tmp = 0.; for(i=0;i<nz;i++) { tmp = tmp + g[i]; } g1[i1] = tmp/nz; } } } if(i2==0 && i3==0) { gmin = g1[0]; gmax = g1[0]; } for(i1=0;i1<n1;i1++) { if(gmin>g1[i1]) gmin = g1[i1]; if(gmax<g1[i1]) gmax = g1[i1]; } fwrite(g1,sizeof(float),n1,outfp); } } /* update the output gridheader header */ usgh.gmin = gmin; usgh.gmax = gmax; /* output the grid header */ ierr = fputusghdr(outfp, &usgh); if(ierr!=0) err("output grid header error "); free(grid); free(g); free(g1); exit(0); }
// opens the alignment archive void CAlignmentReader::Open(const string& filename) { if(mIsOpen) { cout << "ERROR: An attempt was made to open an already open alignment archive." << endl; exit(1); } mInputFilename = filename; mInStream = NULL; if(fopen_s(&mInStream, filename.c_str(), "rb") != 0) { cout << "ERROR: Could not open the compressed alignment archive (" << mInputFilename << ") for reading." << endl; exit(1); } mIsOpen = true; // =============== // read the header // =============== // MOSAIK_SIGNATURE[6] 0 - 5 // STATUS[1] 6 - 6 // SEQUENCE_TECHNOLOGY[2] 7 - 8 // ARCHIVE_DATE[8] 9 - 16 // NUM_REFERENCE_SEQS[4] 17 - 20 // NUM_READ_GROUPS[4] 21 - 24 // NUM_READS[8] 25 - 32 // NUM_BASES[8] 33 - 40 // REFERENCES_OFFSET[8] 41 - 48 // REFERENCE_GAP_OFFSET[8] 49 - 57 // INDEX_OFFSET[8] 58 - 63 // NUM_READ_GROUP_TAGS[1] 64 - 64 // READ_GROUPS[*] // check the MOSAIK signature char signature[SIGNATURE_LENGTH + 1]; signature[SIGNATURE_LENGTH] = 0; fread( signature, SIGNATURE_LENGTH, 1, mInStream ); // check if the read signatures match //if(strncmp(signature, MOSAIK_SIGNATURE, 5) != 0) { if ( ( strncmp( signature, ALIGNER_SIGNATURE, SIGNATURE_LENGTH - 1 ) != 0 ) && ( strncmp( signature, SORT_SIGNATURE, SIGNATURE_LENGTH - 1 ) != 0 ) ) { //if(strncmp(signature, ALIGNER_SIGNATURE, 5) != 0) { printf("ERROR: It seems that the input file (%s) is not in the MOSAIK alignment format.\n", filename.c_str()); exit(1); } //if(MOSAIK_SIGNATURE[5] != signature[5]) { if ( ( signature[5] != ALIGNER_SIGNATURE[5] ) && ( signature[5] != ALIGNER_SIGNATURE5[5] ) && ( signature[5] != SORT_SIGNATURE[5] ) ) { //if ( ( signature[5] != ALIGNER_SIGNATURE[5] ) && ( signature[5] != ALIGNER_SIGNATURE5[5] ) ) { //char version = ( strncmp( signature, ALIGNER_SIGNATURE, SIGNATURE_LENGTH - 1 ) == 0 ) ? ALIGNER_SIGNATURE[5] : SORT_SIGNATURE[5]; //printf("ERROR: It seems that the input file (%s) was created in another version of MosaikAligner. " // "This version of MOSAIK expected to find an alignment archive using version: %hu, but the " // "alignment archive uses version: %hu. A new alignment archive is required.\n", // filename.c_str(), version, signature[5]); printf("ERROR: It seems that the input file (%s) was created in another version of MosaikAligner. " "This version of MOSAIK expected to find an alignment archive using version: 4 or 5, but the " "alignment archive uses version: %hu. A new alignment archive is required.\n", filename.c_str(), signature[5]); exit(1); } MosaikSignature = new char [ SIGNATURE_LENGTH + 1 ]; memcpy( MosaikSignature, signature, SIGNATURE_LENGTH ); MosaikSignature[ SIGNATURE_LENGTH ] = 0; // retrieve the alignment file status mStatus = (AlignmentStatus)fgetc(mInStream); // retrieve the sequencing technology fread((char*)&mSeqTech, SIZEOF_SHORT, 1, mInStream); // skip the archive date fseek64(mInStream, SIZEOF_UINT64, SEEK_CUR); // retrieve the number of reference sequences fread((char*)&mNumRefSeqs, SIZEOF_INT, 1, mInStream); // retrieve the number of read groups unsigned int numReadGroups; fread((char*)&numReadGroups, SIZEOF_INT, 1, mInStream); // retrieve the number of reads fread((char*)&mNumReads, SIZEOF_UINT64, 1, mInStream); if(mNumReads == 0) { printf("ERROR: The alignment archive header indicates that no reads are contained in\n"); printf(" this file. This might happen when the file was not closed properly -\n"); printf(" usually from a killed process or a crash. Your only recourse is to\n"); printf(" realign this data set.\n"); printf(" filename: [%s]\n", filename.c_str()); exit(1); } // retrieve the number of bases fread((char*)&mNumBases, SIZEOF_UINT64, 1, mInStream); // retrieve the references offset off_type referencesOffset = 0; fread((char*)&referencesOffset, SIZEOF_OFF_TYPE, 1, mInStream); // retrieve the reference gaps offset fread((char*)&mReferenceGapOffset, SIZEOF_OFF_TYPE, 1, mInStream); // retrieve the index offset fread((char*)&mIndexOffset, SIZEOF_OFF_TYPE, 1, mInStream); // retrieve the number of header tags const unsigned char numHeaderTags = (unsigned char)fgetc(mInStream); if(numHeaderTags != 0) { for(unsigned char j = 0; j < numHeaderTags; j++) { Tag tag; ReadTag(tag); mHeaderTags[tag.ID] = tag; } } // DEBUG //cout << "mStatus: " << (short)mStatus << endl; //cout << "mSeqTech: " << mSeqTech << endl; //cout << "mNumRefSeqs: " << mNumRefSeqs << endl; //cout << "numReadGroups: " << numReadGroups << endl; //cout << "mNumReads: " << mNumReads << endl; //cout << "mNumBases: " << mNumBases << endl; //cout << "referencesOffset: " << referencesOffset << endl; //cout << "mReferenceGapOffset: " << mReferenceGapOffset << endl; //cout << "mIndexOffset: " << mIndexOffset << endl; //cout << "numHeaderTags: " << (unsigned short)numHeaderTags << endl << endl; // retrieve the read groups mReadGroups.resize(numReadGroups); vector<ReadGroup>::iterator rgIter; for(rgIter = mReadGroups.begin(); rgIter != mReadGroups.end(); ++rgIter) { // read the metadata string lengths const unsigned char centerNameLen = (unsigned char)fgetc(mInStream); const unsigned char libraryNameLen = (unsigned char)fgetc(mInStream); const unsigned char platformUnitLen = (unsigned char)fgetc(mInStream); const unsigned char readGroupIDLen = (unsigned char)fgetc(mInStream); const unsigned char sampleNameLen = (unsigned char)fgetc(mInStream); unsigned short descriptionLen = 0; fread((char*)&descriptionLen, SIZEOF_SHORT, 1, mInStream); fread((char*)&rgIter->SequencingTechnology, SIZEOF_SHORT, 1, mInStream); fread((char*)&rgIter->MedianFragmentLength, SIZEOF_INT, 1, mInStream); rgIter->CenterName.resize(centerNameLen); rgIter->LibraryName.resize(libraryNameLen); rgIter->PlatformUnit.resize(platformUnitLen); rgIter->ReadGroupID.resize(readGroupIDLen); rgIter->SampleName.resize(sampleNameLen); rgIter->Description.resize(descriptionLen); // read the metadata strings fread((void*)rgIter->CenterName.data(), centerNameLen, 1, mInStream); fread((void*)rgIter->Description.data(), descriptionLen, 1, mInStream); fread((void*)rgIter->LibraryName.data(), libraryNameLen, 1, mInStream); fread((void*)rgIter->PlatformUnit.data(), platformUnitLen, 1, mInStream); fread((void*)rgIter->ReadGroupID.data(), readGroupIDLen, 1, mInStream); fread((void*)rgIter->SampleName.data(), sampleNameLen, 1, mInStream); // set the read group code rgIter->ReadGroupCode = ReadGroup::GetCode(*rgIter); // add the read group to our LUT mReadGroupLUT[rgIter->ReadGroupCode] = *rgIter; // retrieve the number of read group tags const unsigned char numReadGroupTags = (unsigned char)fgetc(mInStream); if(numReadGroupTags != 0) { printf("ERROR: Found %u read group tags, but support for read group tags has not been implemented yet.\n", numReadGroupTags); exit(1); } //// DEBUG //cout << "center name: " << rgIter->CenterName << endl; //cout << "description: " << rgIter->Description << endl; //cout << "library name: " << rgIter->LibraryName << endl; //cout << "platform unit: " << rgIter->PlatformUnit << endl; //cout << "read group ID: " << rgIter->ReadGroupID << endl; //cout << "sample name: " << rgIter->SampleName << endl; //cout << "sequencing technology: " << rgIter->SequencingTechnology << endl; //cout << "median fragment length: " << rgIter->MedianFragmentLength << endl << endl; } // store the reads offset mReadsOffset = ftell64(mInStream); // ============================ // read the reference sequences // ============================ // jump to the reference sequence section fseek64(mInStream, referencesOffset, SEEK_SET); mReferenceSequences.resize(mNumRefSeqs); //mRefSeqLUT = new char*[mNumRefSeqs]; mRefSeqLUT.resize( mNumRefSeqs ); unsigned int currentRefSeq = 0; vector<ReferenceSequence>::iterator rsIter; for(rsIter = mReferenceSequences.begin(); rsIter != mReferenceSequences.end(); ++rsIter, ++currentRefSeq) { // REFERENCE_SEQ_NAME_LEN[1] 0 - 0 // REFERENCE_SEQ_SPECIES_LEN[1] 1 - 1 // REFERENCE_SEQ_GENOME_ASSEMBLY_ID_LEN[1] 2 - 2 // REFERENCE_SEQ_URI_LEN[1] 3 - 3 // REFERENCE_SEQ_NUM_BASES[4] 4 - 7 // REFERENCE_SEQ_SEQ_OFFSET[8] 8 - 15 // REFERENCE_SEQ_MD5[16] 16 - 31 // REFERENCE_SEQ_NAME[X] 32 - XX // REFERENCE_SEQ_SPECIES[X] // REFERENCE_SEQ_GENOME_ASSEMBLY_ID[X] // REFERENCE_SEQ_URI[X] // read the name length const unsigned char nameLen = fgetc(mInStream); // read the species length const unsigned char speciesLen = fgetc(mInStream); // read the genome assembly id length const unsigned char genomeAssemblyIDLen = fgetc(mInStream); // read the uri length const unsigned char uriLen = fgetc(mInStream); // read the number of bases fread((char*)&rsIter->NumBases, SIZEOF_INT, 1, mInStream); // write the number of aligned reads fread((char*)&rsIter->NumAligned, SIZEOF_UINT64, 1, mInStream); // read the MD5 checksum rsIter->MD5.resize(32); char* pBuffer = (char*)rsIter->MD5.data(); fread(pBuffer, 32, 1, mInStream); // read the reference name rsIter->Name.resize(nameLen); pBuffer = (char*)rsIter->Name.data(); fread(pBuffer, nameLen, 1, mInStream); //mRefSeqLUT[currentRefSeq] = new char[nameLen + 1]; //mRefSeqLUT[currentRefSeq].resize( nameLen + 1 ); //memcpy(mRefSeqLUT[currentRefSeq], pBuffer, nameLen); mRefSeqLUT[currentRefSeq].insert( 0, pBuffer, nameLen ); //mRefSeqLUT[currentRefSeq][nameLen] = 0; mRefSeqLUT[currentRefSeq].push_back(0); // read the species name if(speciesLen > 0) { rsIter->Species.resize(speciesLen); pBuffer = (char*)rsIter->Species.data(); fread(pBuffer, speciesLen, 1, mInStream); } // read the genome assembly ID if(genomeAssemblyIDLen > 0) { rsIter->GenomeAssemblyID.resize(genomeAssemblyIDLen); pBuffer = (char*)rsIter->GenomeAssemblyID.data(); fread(pBuffer, genomeAssemblyIDLen, 1, mInStream); } // read the URI if(uriLen > 0) { rsIter->URI.resize(uriLen); pBuffer = (char*)rsIter->URI.data(); fread(pBuffer, uriLen, 1, mInStream); } // retrieve the number of reference sequence tags const unsigned char numReferenceSequenceTags = (unsigned char)fgetc(mInStream); if(numReferenceSequenceTags != 0) { printf("ERROR: Found reference sequence tags, but support for reference sequence tags has not been implemented yet.\n"); exit(1); } //// DEBUG //cout << "# bases: " << rsIter->NumBases << endl; //cout << "md5: " << rsIter->MD5 << endl; //cout << "name: " << rsIter->Name << endl; //cout << "species: " << rsIter->Species << endl; //cout << "genome assembly ID: " << rsIter->GenomeAssemblyID << endl; //cout << "URI: " << rsIter->URI << endl; } // ================================ // read the reference sequence gaps // ================================ CFastLZIO fio; if(mReferenceGapOffset != 0) { // jump to the reference gap location fseek64(mInStream, mReferenceGapOffset, SEEK_SET); // read the reference gaps vector fio.Read(mBuffer, mBufferLen, mInStream); unsigned int bufferOffset = 0; vector<GapInfo>::iterator gvIter; vector<vector<GapInfo> >::iterator rsgIter; mRefSeqGaps.resize(mNumRefSeqs); for(rsgIter = mRefSeqGaps.begin(); rsgIter != mRefSeqGaps.end(); ++rsgIter) { // retrieve the number of gaps for this reference sequence unsigned int numGaps = 0; memcpy((char*)&numGaps, mBuffer + bufferOffset, SIZEOF_INT); bufferOffset += SIZEOF_INT; // pre-allocate the reference gap vector rsgIter->resize(numGaps); for(gvIter = rsgIter->begin(); gvIter != rsgIter->end(); ++gvIter) { // retrieve the reference gap position memcpy((char*)&gvIter->Position, mBuffer + bufferOffset, SIZEOF_INT); bufferOffset += SIZEOF_INT; // retrieve the reference gap length memcpy((char*)&gvIter->Length, mBuffer + bufferOffset, SIZEOF_SHORT); bufferOffset += SIZEOF_SHORT; } } } // restore our file position Rewind(); }
// jumps to the block containing the specified reference index and position void CAlignmentReader::Jump(const unsigned int referenceIndex, const unsigned int referencePosition) { // =============== // parse the index // =============== if(mIndexOffset == 0) { cout << "ERROR: Cannot jump to the desired compressed block because the index offset was not set." << endl; exit(1); } // jump to the index offset and read the number of entries fseek64(mInStream, mIndexOffset, SEEK_SET); unsigned int numIndexEntries = 0; fread((char*)&numIndexEntries, SIZEOF_INT, 1, mInStream); // load the index CFastLZIO fio; char* pBuffer = mBuffer; fio.Read(pBuffer, mBufferLen, mInStream); mBuffer = pBuffer; // find the block containing the specified reference index and position unsigned int bufferOffset = 0; unsigned int index = 0; unsigned int position = 0; off_type offset = 0; bool foundBlock = false; for(unsigned int i = 0; i < numIndexEntries; ++i) { // retrieve the reference index memcpy((char*)&index, mBuffer + bufferOffset, SIZEOF_INT); bufferOffset += SIZEOF_INT; // store the reference position memcpy((char*)&position, mBuffer + bufferOffset, SIZEOF_INT); bufferOffset += SIZEOF_INT; // store the file offset memcpy((char*)&offset, mBuffer + bufferOffset, SIZEOF_UINT64); bufferOffset += SIZEOF_UINT64; // keep going until we find a compression block that is past our desired index and position if(index > referenceIndex) foundBlock = true; if((index == referenceIndex) && (position >= referencePosition)) foundBlock = true; if(foundBlock) break; } if(!foundBlock) { cout << "ERROR: A suitable compression block was not found in the index." << endl; exit(1); } fseek64(mInStream, offset, SEEK_SET); mCurrentRead = 0; mPartitionMembers = 0; mPartitionSize = 0; }
// sets the file pointer to the beginning of the read data void CAlignmentReader::Rewind(void) { fseek64(mInStream, mReadsOffset, SEEK_SET); mCurrentRead = 0; mPartitionMembers = 0; mPartitionSize = 0; }
// opens the read archive void CReadWriter::Open(const string& filename, const ReadStatus rs, const ReadGroup& readGroup) { if(mIsOpen) { cout << "ERROR: An attempt was made to open an already open read archive." << endl; exit(1); } mOutputFilename = filename; if(fopen_s(&mOutStream, filename.c_str(), "wb") != 0) { cout << "ERROR: Could not open the compressed read archive (" << mOutputFilename << ") for writing." << endl; exit(1); } mIsOpen = true; // initialization mBufferPosition = 0; mPartitionMembers = 0; // ================ // write the header // ================ // MOSAIK_SIGNATURE[6] 0 - 5 // STATUS[1] 6 - 6 // SEQUENCING_TECHNOLOGY[1] 7 - 7 // ARCHIVE_DATE[8] 8 - 15 // NUM_READS[8] 16 - 23 // NUM_BASES[8] 24 - 31 // MEDIAN_FRAGMENT_LENGTH[4] 32 - 35 // CENTER_NAME_LEN[1] 36 - 36 // LIBRARY_NAME_LEN[1] 37 - 37 // PLATFORM_UNIT_LEN[1] 38 - 38 // READ_GROUP_ID_LEN[1] 39 - 39 // SAMPLE_NAME_LEN[1] 40 - 40 // DESCRIPTION_LEN[2] 41 - 42 // RESERVED[8] 43 - 50 // CENTER_NAME[*] 51 // DESCRIPTION[*] // LIBRARY_NAME[*] // PLATFORM_UNIT[*] // READ_GROUP_ID[*] // SAMPLE_NAME[*] // write the MOSAIK signature const unsigned char SIGNATURE_LENGTH = 6; const char* MOSAIK_SIGNATURE = "MSKRA\1"; fwrite(MOSAIK_SIGNATURE, SIGNATURE_LENGTH, 1, mOutStream); // write the read status (currently single end or paired end) fputc((unsigned char)rs, mOutStream); // write the sequencing technology fputc((unsigned char)readGroup.SequencingTechnology, mOutStream); if(readGroup.SequencingTechnology == ST_SOLID) mIsSOLiD = true; // write the archive date uint64_t currentTime = CTimeSupport::GetSystemTime(); fwrite((char*)¤tTime, SIZEOF_UINT64, 1, mOutStream); // skip the number of reads and bases fseek64(mOutStream, 2 * SIZEOF_UINT64, SEEK_CUR); // write the median fragment length fwrite((char*)&readGroup.MedianFragmentLength, SIZEOF_INT, 1, mOutStream); // write the metadata string lengths: the lengths are checked in BuildMain.cpp const unsigned char centerNameLen = (unsigned char)readGroup.CenterName.size(); const unsigned char libraryNameLen = (unsigned char)readGroup.LibraryName.size(); const unsigned char platformUnitLen = (unsigned char)readGroup.PlatformUnit.size(); const unsigned char readGroupIDLen = (unsigned char)readGroup.ReadGroupID.size(); const unsigned char sampleNameLen = (unsigned char)readGroup.SampleName.size(); const unsigned short descriptionLen = (unsigned short)readGroup.Description.size(); fputc(centerNameLen, mOutStream); fputc(libraryNameLen, mOutStream); fputc(platformUnitLen, mOutStream); fputc(readGroupIDLen, mOutStream); fputc(sampleNameLen, mOutStream); fwrite((char*)&descriptionLen, SIZEOF_SHORT, 1, mOutStream); // write the reserved bytes const uint64_t reserved = 0; fwrite((char*)&reserved, SIZEOF_UINT64, 1, mOutStream); // convert the center name to lowercase string centerName = readGroup.CenterName; CSequenceUtilities::LowercaseSequence(centerName); // write the metadata strings fwrite(centerName.c_str(), centerNameLen, 1, mOutStream); fwrite(readGroup.Description.c_str(), descriptionLen, 1, mOutStream); fwrite(readGroup.LibraryName.c_str(), libraryNameLen, 1, mOutStream); fwrite(readGroup.PlatformUnit.c_str(), platformUnitLen, 1, mOutStream); fwrite(readGroup.ReadGroupID.c_str(), readGroupIDLen, 1, mOutStream); fwrite(readGroup.SampleName.c_str(), sampleNameLen, 1, mOutStream); }