static void SR_AnchorInfoRead(SR_AnchorInfo* pInfo, FILE* libFile) { unsigned int readSize = 0; readSize = fread(pInfo->pLength, sizeof(int32_t), pInfo->size, libFile); if (readSize != pInfo->size) SR_ErrQuit("ERROR: Cannot read the length of anchors from the library file.\n"); readSize = fread(pInfo->pMd5s, sizeof(char), pInfo->size * MD5_STR_LEN, libFile); if (readSize != pInfo->size * MD5_STR_LEN) SR_ErrQuit("ERROR: Cannot read the md5 strings from the library file.\n"); uint32_t anchorNameLen = 0; for (unsigned int i = 0; i != pInfo->size; ++i) { readSize = fread(&anchorNameLen, sizeof(uint32_t), 1, libFile); if (readSize != 1) SR_ErrQuit("ERROR: Cannot read the length of anchor name from the library file.\n"); pInfo->pAnchors[i] = (char*) malloc((anchorNameLen + 1) * sizeof(char)); if (pInfo->pAnchors[i] == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of the anchor name.\n"); pInfo->pAnchors[i][anchorNameLen] = '\0'; readSize = fread(pInfo->pAnchors[i], sizeof(char), anchorNameLen, libFile); if (readSize != anchorNameLen) SR_ErrQuit("ERROR: Cannot read the anchor name from the library file.\n"); int ret = 0; khiter_t khIter = kh_put(name, pInfo->pAnchorHash, pInfo->pAnchors[i], &ret); kh_value((khash_t(name)*) pInfo->pAnchorHash, khIter) = i; } }
static void SR_SampleInfoRead(SR_SampleInfo* pInfo, FILE* libFile) { unsigned int readSize = 0; uint32_t sampleNameLen = 0; for (unsigned int i = 0; i != pInfo->size; ++i) { readSize = fread(&sampleNameLen, sizeof(uint32_t), 1, libFile); if (readSize != 1) SR_ErrQuit("ERROR: Cannot read the length of sample name from the library file.\n"); pInfo->pSamples[i] = (char*) malloc((sampleNameLen + 1) * sizeof(char)); if (pInfo->pSamples[i] == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of the sample name.\n"); pInfo->pSamples[i][sampleNameLen] = '\0'; readSize = fread(pInfo->pSamples[i], sizeof(char), sampleNameLen, libFile); if (readSize != sampleNameLen) SR_ErrQuit("ERROR: Cannot read the sample name from the library file.\n"); int ret = 0; khiter_t khIter = kh_put(name, pInfo->pSampleHash, pInfo->pSamples[i], &ret); kh_value((khash_t(name)*) pInfo->pSampleHash, khIter) = i; } }
SR_AnchorInfo* SR_AnchorInfoAlloc(uint32_t capacity) { SR_AnchorInfo* pNewInfo = (SR_AnchorInfo*) malloc(sizeof(SR_AnchorInfo)); if (pNewInfo == NULL) SR_ErrQuit("ERROR: Not enough memory for an anchor information object.\n"); pNewInfo->pAnchors = (char**) malloc(capacity * sizeof(char*)); if (pNewInfo->pAnchors == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of anchor names in the anchor information object.\n"); pNewInfo->pLength = (int32_t*) malloc(capacity * sizeof(int32_t)); if (pNewInfo->pLength == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of anchor length in the anchor information object.\n"); pNewInfo->pMd5s = (char*) malloc(capacity * MD5_STR_LEN * sizeof(char)); if (pNewInfo->pMd5s == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of md5 string in the anchor information object.\n"); pNewInfo->size = 0; pNewInfo->capacity = capacity; pNewInfo->pAnchorHash = kh_init(name); kh_resize(name, pNewInfo->pAnchorHash, 2 * capacity); return pNewInfo; }
void SR_LibInfoTableWrite(const SR_LibInfoTable* pTable, FILE* libFile) { unsigned int writeSize = 0; writeSize = fwrite(&(pTable->pAnchorInfo->size), sizeof(uint32_t), 1, libFile); if (writeSize != 1) SR_ErrQuit("ERROR: Cannot write the number of anchors into the information file.\n"); writeSize = fwrite(&(pTable->pSampleInfo->size), sizeof(uint32_t), 1, libFile); if (writeSize != 1) SR_ErrQuit("ERROR: Cannot write the number of samples into the information file.\n"); writeSize = fwrite(&(pTable->size), sizeof(uint32_t), 1, libFile); if (writeSize != 1) SR_ErrQuit("ERROR: Cannot write the number of read groups into the information file.\n"); SR_AnchorInfoWrite(pTable->pAnchorInfo, libFile); SR_SampleInfoWrite(pTable->pSampleInfo, libFile); writeSize = fwrite(pTable->pSampleMap, sizeof(int32_t), pTable->size, libFile); if (writeSize != pTable->size) SR_ErrQuit("ERROR: Cannot write the read-group-to-sample map into the information file.\n"); for (unsigned int i = 0; i != pTable->size; ++i) { uint32_t readGrpNameLen = strlen(pTable->pReadGrps[i]); writeSize = fwrite(&(readGrpNameLen), sizeof(uint32_t), 1, libFile); if (writeSize != 1) SR_ErrQuit("ERROR: Cannot write the readGrp name length into the information file.\n"); writeSize = fwrite(pTable->pReadGrps[i], sizeof(char), readGrpNameLen, libFile); if (writeSize != readGrpNameLen) SR_ErrQuit("ERROR: Cannot write the readGrp name into the information file.\n"); } writeSize = fwrite(&(pTable->fragLenMax), sizeof(uint32_t), 1, libFile); if (writeSize != 1) SR_ErrQuit("ERROR: Cannot write the maximum fragment length into the library file.\n"); writeSize = fwrite(&(pTable->cutoff), sizeof(double), 1, libFile); if (writeSize != 1) SR_ErrQuit("ERROR: Cannot write the cutoff into the library information file.\n"); writeSize = fwrite(&(pTable->trimRate), sizeof(double), 1, libFile); if (writeSize != 1) SR_ErrQuit("ERROR: Cannot write the trim rate into the library information file.\n"); writeSize = fwrite(pTable->pLibInfo, sizeof(SR_LibInfo), pTable->size, libFile); if (writeSize != pTable->size) SR_ErrQuit("ERROR: Cannot write the library information into the output file.\n"); fflush(libFile); }
static SR_Status SR_LibInfoTableAddSample(int* pSampleID, SR_LibInfoTable* pTable, const char* tagPos, const char* lineEnd) { const char* sampleNamePos = strstr(tagPos, "SM:"); if (sampleNamePos != NULL && sampleNamePos < lineEnd) sampleNamePos += 3; const char* sampleNameEnd = strpbrk(sampleNamePos, " \t\n"); int sampleNameLen = sampleNameEnd - sampleNamePos; SR_SampleInfo* pSampleInfo = pTable->pSampleInfo; if (sampleNameLen > 0) { char* buff = (char*) malloc((sampleNameLen + 1) * sizeof(char)); if (buff == NULL) SR_ErrQuit("ERROR: Not enought memory for the read group ID in the fragment length distribution object.\n"); buff[sampleNameLen] = '\0'; memcpy(buff, sampleNamePos, sampleNameLen); int ret = 0; khash_t(name)* pSampleHash = pSampleInfo->pSampleHash; khiter_t khIter = kh_put(name, pSampleHash, buff, &ret); if (ret == 0) { free(buff); *pSampleID = kh_value(pSampleHash, khIter); } else { if (pSampleInfo->size == pSampleInfo->capacity) { pSampleInfo->capacity *= 2; pSampleInfo->pSamples = (char**) realloc(pSampleInfo->pSamples, pSampleInfo->capacity * sizeof(char*)); if (pSampleInfo->pSamples == NULL) SR_ErrQuit("ERROR: Not enought memory for the sample names in the fragment length distribution object.\n"); } *pSampleID = pSampleInfo->size; kh_value(pSampleHash, khIter) = pSampleInfo->size; pSampleInfo->pSamples[pSampleInfo->size] = buff; ++(pSampleInfo->size); } return SR_OK; } return SR_ERR; }
static void SR_SampleInfoWrite(const SR_SampleInfo* pInfo, FILE* libFile) { unsigned int writeSize = 0; for (unsigned int i = 0; i != pInfo->size; ++i) { uint32_t sampleNameLen = strlen(pInfo->pSamples[i]); writeSize = fwrite(&(sampleNameLen), sizeof(uint32_t), 1, libFile); if (writeSize != 1) SR_ErrQuit("ERROR: Cannot write the sample name length into the information file.\n"); writeSize = fwrite(pInfo->pSamples[i], sizeof(char), sampleNameLen, libFile); if (writeSize != sampleNameLen) SR_ErrQuit("ERROR: Cannot write the sample name into the information file.\n"); } }
// read the header of a bam file SR_BamHeader* SR_BamInStreamLoadHeader(SR_BamInStream* pBamInStream) { bam_header_t* pOrigHeader = bam_header_read(pBamInStream->fpBamInput); if (pOrigHeader == NULL) return NULL; SR_BamHeader* pBamHeader = SR_BamHeaderAlloc(); pBamHeader->pOrigHeader = pOrigHeader; pBamHeader->pMD5s = (const char**) calloc(pOrigHeader->n_targets, sizeof(char*)); if (pBamHeader->pMD5s == NULL) SR_ErrQuit("ERROR: Not enough memory for md5 string"); unsigned int numMD5 = 0; for (const char* md5Pos = pOrigHeader->text; numMD5 <= pOrigHeader->n_targets && (md5Pos = strstr(md5Pos, "M5:")) != NULL; ++numMD5, ++md5Pos) { pBamHeader->pMD5s[numMD5] = md5Pos + 3; } if (numMD5 != pOrigHeader->n_targets) { free(pBamHeader->pMD5s); pBamHeader->pMD5s = NULL; if (numMD5 != 0) SR_ErrMsg("WARNING: Number of MD5 string is not consistent with number of chromosomes."); } return pBamHeader; }
SR_BamHeader* SR_BamHeaderAlloc(void) { SR_BamHeader* pNewHeader = (SR_BamHeader*) calloc(1, sizeof(SR_BamHeader)); if (pNewHeader == NULL) SR_ErrQuit("ERROR: Not enough memory for a bam header object"); return pNewHeader; }
SR_ReadPairAttrbtArray* SR_ReadPairAttrbtArrayAlloc(uint32_t numReadGrp) { SR_ReadPairAttrbtArray* pAttrbtArray = (SR_ReadPairAttrbtArray*) malloc(sizeof(SR_ReadPairAttrbtArray)); if (pAttrbtArray == NULL) SR_ErrQuit("ERROR: Not enough memory for a read pair attribute array object.\n"); pAttrbtArray->data = (SR_ReadPairAttrbt*) malloc(DEFAULT_RP_ATTRB_CAPACITY * sizeof(SR_ReadPairAttrbt)); if (pAttrbtArray->data == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of the read pair attributes in the read pair attribute array object.\n"); pAttrbtArray->pBoundaries = (double (*)[2]) malloc(sizeof(double) * 2 * numReadGrp); if (pAttrbtArray->pBoundaries == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of the boundaries in the read pair attribute array object.\n"); pAttrbtArray->numReadGrp = numReadGrp; pAttrbtArray->size = 0; pAttrbtArray->capacity = DEFAULT_RP_ATTRB_CAPACITY; return pAttrbtArray; }
SR_SampleInfo* SR_SampleInfoAlloc(uint32_t capacity) { SR_SampleInfo* pNewInfo = (SR_SampleInfo*) malloc(sizeof(SR_SampleInfo)); if (pNewInfo == NULL) SR_ErrQuit("ERROR: Not enough memory for a sample information object.\n"); pNewInfo->pSamples = (char**) malloc(capacity * sizeof(char*)); if (pNewInfo->pSamples == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of sample names in the sample information object.\n"); pNewInfo->pReadFraction = (double*) malloc(capacity * sizeof(double)); if (pNewInfo->pReadFraction == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of read fraction in the sample information object.\n"); pNewInfo->pSampleHash = kh_init(name); kh_resize(name, pNewInfo->pSampleHash, 2 * capacity); pNewInfo->size = 0; pNewInfo->capacity = capacity; return pNewInfo; }
void SR_ReadPairAttrbtArrayReInit(SR_ReadPairAttrbtArray* pAttrbtArray, uint64_t newCapacity) { pAttrbtArray->size = 0; if (newCapacity > pAttrbtArray->capacity) { free(pAttrbtArray->data); pAttrbtArray->data = (SR_ReadPairAttrbt*) malloc(newCapacity * sizeof(SR_ReadPairAttrbt)); if (pAttrbtArray->data == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of the read pair attributes in the read pair attribute array object.\n"); pAttrbtArray->capacity = newCapacity; } }
SR_LibInfoTable* SR_LibInfoTableAlloc(uint32_t capAnchor, uint32_t capSample, uint32_t capReadGrp) { SR_LibInfoTable* pNewTable = (SR_LibInfoTable*) malloc(sizeof(SR_LibInfoTable)); if (pNewTable == NULL) SR_ErrQuit("ERROR: Not enough memory for a library information table object.\n"); pNewTable->pSampleInfo = SR_SampleInfoAlloc(capSample); pNewTable->pLibInfo = (SR_LibInfo*) malloc(capReadGrp * sizeof(SR_LibInfo)); if (pNewTable->pLibInfo == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of library information in an library table object.\n"); pNewTable->pReadGrps = (char**) malloc(capReadGrp * sizeof(char*)); if (pNewTable->pLibInfo == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of read group names in an library table object.\n"); pNewTable->pSampleMap = (int32_t*) malloc(capReadGrp * sizeof(int32_t)); if (pNewTable->pSampleMap == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of read-group-to-sample map in an library table object.\n"); pNewTable->pSeqTech = (int8_t*) malloc(capReadGrp * sizeof(int8_t)); if (pNewTable->pSeqTech == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of sequencing technologies in an library table object.\n"); pNewTable->pAnchorInfo = SR_AnchorInfoAlloc(capAnchor); pNewTable->pReadGrpHash = kh_init(name); kh_resize(name, pNewTable->pReadGrpHash, 2 * capReadGrp); pNewTable->size = 0; pNewTable->capacity = capReadGrp; pNewTable->fragLenMax = 0; pNewTable->cutoff = 0.0; pNewTable->trimRate = 0.0; return pNewTable; }
static void SR_AnchorInfoWrite(const SR_AnchorInfo* pInfo, FILE* libFile) { unsigned int writeSize = 0; writeSize = fwrite(pInfo->pLength, sizeof(int32_t), pInfo->size, libFile); if (writeSize != pInfo->size) SR_ErrQuit("ERROR: Cannot write the length of anchors into the information file.\n"); writeSize = fwrite(pInfo->pMd5s, sizeof(char), pInfo->size * MD5_STR_LEN, libFile); if (writeSize != pInfo->size * MD5_STR_LEN) SR_ErrQuit("ERROR: Cannot write the md5 string into the information file.\n"); for (unsigned int i = 0; i != pInfo->size; ++i) { uint32_t anchorNameLen = strlen(pInfo->pAnchors[i]); writeSize = fwrite(&(anchorNameLen), sizeof(uint32_t), 1, libFile); if (writeSize != 1) SR_ErrQuit("ERROR: Cannot write the anchor name length into the information file.\n"); writeSize = fwrite(pInfo->pAnchors[i], sizeof(char), anchorNameLen, libFile); if (writeSize != anchorNameLen) SR_ErrQuit("ERROR: Cannot write the sample name into the information file.\n"); } }
SR_Status SR_LibInfoTableSetRG(SR_LibInfoTable* pTable, unsigned int* oldSize, const SR_BamHeader* pBamHeader) { SR_Status status = SR_OK; *oldSize = pTable->size; unsigned int oldCapacity = pTable->capacity; const char* tagPos = pBamHeader->pOrigHeader->text; status = SR_LibInfoTableAddAnchor(pTable, pBamHeader); if (status != SR_OK) return SR_ERR; while ((tagPos = strstr(tagPos, "@RG")) != NULL) { const char* lineEnd = strpbrk(tagPos, "\n"); int32_t sampleID = 0; status = SR_LibInfoTableAddSample(&sampleID, pTable, tagPos, lineEnd); if (status != SR_OK) { SR_ErrMsg("ERROR: the \"SM\" field is not found under the read group tag in the bam header.\n"); ++tagPos; continue; } status = SR_LibInfoTableAddReadGrp(pTable, tagPos, lineEnd, sampleID); if (status != SR_OK) SR_ErrMsg("ERROR: the \"ID\" field is required under the read group tag.\n"); ++tagPos; } if (pTable->capacity > oldCapacity) { pTable->pLibInfo = (SR_LibInfo*) realloc(pTable->pLibInfo, sizeof(SR_LibInfo) * pTable->capacity); if (pTable->pLibInfo == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of library summary in the library information object.\n"); } return SR_OK; }
// Read the next bam record from the bam file and store it in pBamInStream->pNewNode static inline int SR_BamInStreamLoadNext(SR_BamInStream* pBamInStream) { if (pBamInStream->bam_cur_status < 0) return -1; // for the bam alignment array, if we need to expand its space // we have to initialize those newly created bam alignment // and update the query name hash since the address of those // bam alignments are changed after expanding pBamInStream->pNewNode = SR_BamNodeAlloc(pBamInStream->pMemPool); if (pBamInStream->pNewNode == NULL) SR_ErrQuit("ERROR: Too many unpaired reads are stored in the memory. Please use smaller bin size or disable searching pair genomically.\n"); int ret; if (pBamInStream->pBamIterator != NULL) ret = bam_iter_read(pBamInStream->fpBamInput, *(pBamInStream->pBamIterator), &(pBamInStream->pNewNode->alignment)); else ret = bam_read1(pBamInStream->fpBamInput, &(pBamInStream->pNewNode->alignment)); pBamInStream->bam_cur_status = ret; return ret; }
SR_BamInStream* SR_BamInStreamAlloc(const char* bamFilename, uint32_t binLen, unsigned int numThreads, unsigned int buffCapacity, unsigned int reportSize, const SR_StreamMode* pStreamMode) { SR_BamInStream* pBamInStream = (SR_BamInStream*) calloc(1, sizeof(SR_BamInStream)); if (pBamInStream == NULL) SR_ErrQuit("ERROR: Not enough memory for a bam input stream object."); pBamInStream->bam_cur_status = -1; pBamInStream->fpBamInput = bam_open(bamFilename, "r"); if (pBamInStream->fpBamInput == NULL) SR_ErrQuit("ERROR: Cannot open bam file %s for reading.\n", bamFilename); if ((pStreamMode->controlFlag & SR_USE_BAM_INDEX) != 0) { pBamInStream->pBamIndex = bam_index_load(bamFilename); if (pBamInStream->pBamIndex == NULL) { SR_ErrMsg("WARNING: Cannot open bam index file for reading. Creating it......"); bam_index_build(bamFilename); SR_ErrMsg(" The bam index is created."); pBamInStream->pBamIndex = bam_index_load(bamFilename); } } pBamInStream->filterFunc = pStreamMode->filterFunc; pBamInStream->filterData = pStreamMode->filterData; pBamInStream->numThreads = numThreads; pBamInStream->reportSize = reportSize; pBamInStream->currRefID = NO_QUERY_YET; pBamInStream->currBinPos = NO_QUERY_YET; pBamInStream->binLen = binLen; pBamInStream->pNewNode = NULL; pBamInStream->pBamIterator = NULL; if (numThreads > 0) { pBamInStream->pRetLists = (SR_BamList*) calloc(numThreads, sizeof(SR_BamList)); if (pBamInStream->pRetLists == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of retrun alignment lists in the bam input stream object.\n"); pBamInStream->pAlgnTypes = (SR_AlgnType*) malloc(numThreads * reportSize * sizeof(SR_AlgnType)); if (pBamInStream->pAlgnTypes == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of pair alignment type in the bam input stream object.\n"); } else { pBamInStream->pRetLists = NULL; pBamInStream->pAlgnTypes = NULL; pBamInStream->reportSize = 0; } if ((pStreamMode->controlFlag & SR_PAIR_GENOMICALLY) == 0) { pBamInStream->pNameHashes[PREV_BIN] = kh_init(queryName); kh_resize(queryName, pBamInStream->pNameHashes[PREV_BIN], reportSize); } else { pBamInStream->pNameHashes[PREV_BIN] = NULL; pBamInStream->binLen = SR_MAX_BIN_LEN; } pBamInStream->pNameHashes[CURR_BIN] = kh_init(queryName); kh_resize(queryName, pBamInStream->pNameHashes[CURR_BIN], reportSize); pBamInStream->pMemPool = SR_BamMemPoolAlloc(buffCapacity); pBamInStream->bam_cur_status = 1; return pBamInStream; }
SR_LibInfoTable* SR_LibInfoTableRead(FILE* libFile) { unsigned int readSize = 0; uint32_t sizeAC = 0; uint32_t sizeSM = 0; uint32_t sizeRG = 0; readSize = fread(&sizeAC, sizeof(uint32_t), 1, libFile); if (readSize != 1) SR_ErrQuit("ERROR: Cannot read the number of anchors from the library file.\n"); readSize = fread(&sizeSM, sizeof(uint32_t), 1, libFile); if (readSize != 1) SR_ErrQuit("ERROR: Cannot read the number of sample from the library file.\n"); readSize = fread(&sizeRG, sizeof(uint32_t), 1, libFile); if (readSize != 1) SR_ErrQuit("ERROR: Cannot read the number of read group from the library file.\n"); SR_LibInfoTable* pTable = SR_LibInfoTableAlloc(sizeAC, sizeSM, sizeRG); pTable->pAnchorInfo->size = sizeAC; pTable->pSampleInfo->size = sizeSM; pTable->size = sizeRG; pTable->pAnchorInfo->capacity = sizeAC; pTable->pSampleInfo->capacity = sizeSM; pTable->capacity = sizeRG; SR_AnchorInfoRead(pTable->pAnchorInfo, libFile); SR_SampleInfoRead(pTable->pSampleInfo, libFile); readSize = fread(pTable->pSampleMap, sizeof(int32_t), sizeRG, libFile); if (readSize != sizeRG) SR_ErrQuit("ERROR: Cannot read the read-group-to-sample map from the library file.\n"); uint32_t readGrpNameLen = 0; for (unsigned int i = 0; i != sizeRG; ++i) { readSize = fread(&readGrpNameLen, sizeof(uint32_t), 1, libFile); if (readSize != 1) SR_ErrQuit("ERROR: Cannot read the length of read group name from the library file.\n"); pTable->pReadGrps[i] = (char*) malloc((readGrpNameLen + 1) * sizeof(char)); if (pTable->pReadGrps[i] == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of the sample name.\n"); pTable->pReadGrps[i][readGrpNameLen] = '\0'; readSize = fread(pTable->pReadGrps[i], sizeof(char), readGrpNameLen, libFile); if (readSize != readGrpNameLen) SR_ErrQuit("ERROR: Cannot read the read group name from the library file.\n"); int ret = 0; khiter_t khIter = kh_put(name, pTable->pReadGrpHash, pTable->pReadGrps[i], &ret); kh_value((khash_t(name)*) pTable->pReadGrpHash, khIter) = i; } readSize = fread(&(pTable->fragLenMax), sizeof(uint32_t), 1, libFile); if (readSize != 1) SR_ErrQuit("ERROR: Cannot read the maximum fragment length from the library file.\n"); readSize = fread(&(pTable->cutoff), sizeof(double), 1, libFile); if (readSize != 1) SR_ErrQuit("ERROR: Cannot read the cutoff from the library file.\n"); readSize = fread(&(pTable->trimRate), sizeof(double), 1, libFile); if (readSize != 1) SR_ErrQuit("ERROR: Cannot read the trim rate from the library file.\n"); readSize = fread(pTable->pLibInfo, sizeof(SR_LibInfo), pTable->size, libFile); if (readSize != pTable->size) SR_ErrQuit("ERROR: Cannot read the library information from the library file.\n"); return pTable; }
static SR_Status SR_LibInfoTableAddAnchor(SR_LibInfoTable* pTable, const SR_BamHeader* pBamHeader) { const SR_Bool isLoaded = pTable->pAnchorInfo->size == 0 ? FALSE : TRUE; int ret = 0; khiter_t khIter = 0; if (isLoaded) { if (pBamHeader->pOrigHeader->n_targets != pTable->pAnchorInfo->size) { SR_ErrMsg("ERROR: The number of reference sequences in this bam file is inconsistent with that in previous bam files.\n"); return SR_ERR; } unsigned int refIndex = 0; for (unsigned int i = 0; i != pBamHeader->pOrigHeader->n_targets; ++i) { khIter = kh_put(name, pTable->pAnchorInfo->pAnchorHash, pBamHeader->pOrigHeader->target_name[i], &ret); if (ret == 0) { khash_t(name)* pAnchorHash = pTable->pAnchorInfo->pAnchorHash; refIndex = kh_value(pAnchorHash, khIter); if (refIndex != i) { SR_ErrMsg("ERROR: Reference ID in this bam file is inconsistent with that in the previous bam files.\n"); return SR_ERR; } if (pTable->pAnchorInfo->pLength[i] > 0 && pTable->pAnchorInfo->pLength[i] != pBamHeader->pOrigHeader->target_len[i]) { SR_ErrMsg("ERROR: The length of the reference sequence in this bam file is inconsistent with that in previous bam files.\n"); return SR_ERR; } } else { SR_ErrMsg("ERROR: Found a reference sequence that is not in the previous bam headers.\n"); return SR_ERR; } if (strncmp(pTable->pAnchorInfo->pMd5s + MD5_STR_LEN * i, pBamHeader->pMD5s[i], MD5_STR_LEN) != 0) { SR_ErrMsg("ERROR: MD5 string in this bam file is inconsistent with that in previous bam files.\n"); return SR_ERR; } } } else { if (pBamHeader->pOrigHeader->n_targets > pTable->pAnchorInfo->capacity) { SR_AnchorInfoFree(pTable->pAnchorInfo); pTable->pAnchorInfo = SR_AnchorInfoAlloc(pBamHeader->pOrigHeader->n_targets); } pTable->pAnchorInfo->size = pBamHeader->pOrigHeader->n_targets; for (unsigned int i = 0; i != pBamHeader->pOrigHeader->n_targets; ++i) { unsigned int nameLen = strlen(pBamHeader->pOrigHeader->target_name[i]); pTable->pAnchorInfo->pAnchors[i] = (char*) calloc(nameLen + 1, sizeof(char)); if (pTable->pAnchorInfo->pAnchors[i] == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of reference name.\n"); strncpy(pTable->pAnchorInfo->pAnchors[i], pBamHeader->pOrigHeader->target_name[i], nameLen); khIter = kh_put(name, pTable->pAnchorInfo->pAnchorHash, pTable->pAnchorInfo->pAnchors[i], &ret); khash_t(name)* pAnchorHash = pTable->pAnchorInfo->pAnchorHash; kh_value(pAnchorHash, khIter) = i; // set the length of those unused references to -1 so that // we will ignore any reads aligned to them pTable->pAnchorInfo->pLength[i] = pBamHeader->pOrigHeader->target_len[i]; if (strncmp("GL0", pTable->pAnchorInfo->pAnchors[i], 3) == 0 || strncmp("NC_", pTable->pAnchorInfo->pAnchors[i], 3) == 0 || strncmp("NT_", pTable->pAnchorInfo->pAnchors[i], 3) == 0 || strncmp("hs", pTable->pAnchorInfo->pAnchors[i], 2) == 0) { pTable->pAnchorInfo->pLength[i] = -1; } strncpy(pTable->pAnchorInfo->pMd5s + MD5_STR_LEN * i, pBamHeader->pMD5s[i], MD5_STR_LEN); } } return SR_OK; }
static SR_Status SR_LibInfoTableAddReadGrp(SR_LibInfoTable* pTable, const char* tagPos, const char* lineEnd, int sampleID) { // get the name of the current read group const char* readGrpNamePos = strstr(tagPos, "ID:"); if (readGrpNamePos != NULL && readGrpNamePos < lineEnd) readGrpNamePos += 3; else return SR_ERR; const char* platformPos = strstr(tagPos, "PL:"); if (platformPos != NULL && platformPos < lineEnd) platformPos += 3; else return SR_ERR; // expand the array if necessary if (pTable->size == pTable->capacity) { pTable->capacity *= 2; pTable->pReadGrps = (char**) realloc(pTable->pReadGrps, pTable->capacity * sizeof(char*)); if (pTable->pReadGrps == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of read group names in the library information object.\n"); pTable->pSampleMap = (int32_t*) realloc(pTable->pSampleMap, pTable->capacity * sizeof(int32_t)); if (pTable->pSampleMap == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of the sample ID map in the library information object.\n"); pTable->pSeqTech = (int8_t*) realloc(pTable->pSeqTech, pTable->capacity * sizeof(int8_t)); if (pTable->pSeqTech == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of the sequencing technology in the library information object.\n"); } SR_Status status = SR_LibInfoTableAddSeqTech(pTable, platformPos); if (status != SR_OK) return SR_ERR; const char* readGrpNameEnd = strpbrk(readGrpNamePos, " \t\n\0"); size_t readGrpNameLen = readGrpNameEnd - readGrpNamePos; if (readGrpNameLen > 0) { pTable->pReadGrps[pTable->size] = (char*) calloc(readGrpNameLen + 1, sizeof(char)); if (pTable->pReadGrps[pTable->size] == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of read group name in the fragment length distribution object.\n"); memcpy(pTable->pReadGrps[pTable->size], readGrpNamePos, readGrpNameLen); int ret = 0; khash_t(name)* pReadGrpHash = pTable->pReadGrpHash; khiter_t khIter = kh_put(name, pReadGrpHash, pTable->pReadGrps[pTable->size], &ret); if (ret != 0) { pTable->pSampleMap[pTable->size] = sampleID; kh_value(pReadGrpHash, khIter) = pTable->size; ++(pTable->size); return SR_OK; } else { free(pTable->pReadGrps[pTable->size]); pTable->pReadGrps[pTable->size] = NULL; SR_ErrMsg("ERROR: Found a duplicated read group ID.\n"); } } return SR_ERR; }