SR_AnchorInfo* SR_AnchorInfoAlloc(uint32_t capacity) { SR_AnchorInfo* pNewInfo = (SR_AnchorInfo*) malloc(sizeof(SR_AnchorInfo)); if (pNewInfo == NULL) SR_ErrQuit("ERROR: Not enough memory for an anchor information object.\n"); pNewInfo->pAnchors = (char**) malloc(capacity * sizeof(char*)); if (pNewInfo->pAnchors == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of anchor names in the anchor information object.\n"); pNewInfo->pLength = (int32_t*) malloc(capacity * sizeof(int32_t)); if (pNewInfo->pLength == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of anchor length in the anchor information object.\n"); pNewInfo->pMd5s = (char*) malloc(capacity * MD5_STR_LEN * sizeof(char)); if (pNewInfo->pMd5s == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of md5 string in the anchor information object.\n"); pNewInfo->size = 0; pNewInfo->capacity = capacity; pNewInfo->pAnchorHash = kh_init(name); kh_resize(name, pNewInfo->pAnchorHash, 2 * capacity); return pNewInfo; }
mrb_value mrb_hash_new_capa(mrb_state *mrb, size_t capa) { struct RHash *h; h = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); h->ht = kh_init(ht, mrb); kh_resize(ht, h->ht, capa); h->iv = 0; return mrb_obj_value(h); }
static void worker_post(void *g, long i, int tid) { int j, start_a, start_p, n, n_keys; idxhash_t *h; mm_idx_t *mi = (mm_idx_t*)g; mm_idx_bucket_t *b = &mi->B[i]; if (b->a.n == 0) return; // sort by minimizer radix_sort_128x(b->a.a, b->a.a + b->a.n); // count and preallocate for (j = 1, n = 1, n_keys = 0, b->n = 0; j <= b->a.n; ++j) { if (j == b->a.n || b->a.a[j].x != b->a.a[j-1].x) { ++n_keys; if (n > 1) b->n += n; n = 1; } else ++n; } h = kh_init(idx); kh_resize(idx, h, n_keys); b->p = (uint64_t*)calloc(b->n, 8); // create the hash table for (j = 1, n = 1, start_a = start_p = 0; j <= b->a.n; ++j) { if (j == b->a.n || b->a.a[j].x != b->a.a[j-1].x) { khint_t itr; int absent; mm128_t *p = &b->a.a[j-1]; itr = kh_put(idx, h, p->x>>mi->b<<1, &absent); assert(absent && j - start_a == n); if (n == 1) { kh_key(h, itr) |= 1; kh_val(h, itr) = p->y; } else { int k; for (k = 0; k < n; ++k) b->p[start_p + k] = b->a.a[start_a + k].y; kh_val(h, itr) = (uint64_t)start_p<<32 | n; start_p += n; } start_a = j, n = 1; } else ++n;
void TGM_FragLenHistArrayInit(TGM_FragLenHistArray* pHistArray, unsigned int newSize) { TGM_FragLenHistArrayClear(pHistArray); if (newSize > pHistArray->capacity) { TGM_ARRAY_RESIZE(pHistArray, newSize * 2, TGM_FragLenHist); memset(pHistArray->data + pHistArray->size, 0, (newSize * 2 - pHistArray->size) * sizeof(TGM_FragLenHist)); } for (unsigned int i = 0; i != newSize; ++i) { if (pHistArray->data[i].rawHist == NULL) { pHistArray->data[i].rawHist = kh_init(fragLen); kh_resize(fragLen, pHistArray->data[i].rawHist, DEFAULT_NUM_HIST_ELMNT); } } pHistArray->size = newSize; }
SR_SampleInfo* SR_SampleInfoAlloc(uint32_t capacity) { SR_SampleInfo* pNewInfo = (SR_SampleInfo*) malloc(sizeof(SR_SampleInfo)); if (pNewInfo == NULL) SR_ErrQuit("ERROR: Not enough memory for a sample information object.\n"); pNewInfo->pSamples = (char**) malloc(capacity * sizeof(char*)); if (pNewInfo->pSamples == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of sample names in the sample information object.\n"); pNewInfo->pReadFraction = (double*) malloc(capacity * sizeof(double)); if (pNewInfo->pReadFraction == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of read fraction in the sample information object.\n"); pNewInfo->pSampleHash = kh_init(name); kh_resize(name, pNewInfo->pSampleHash, 2 * capacity); pNewInfo->size = 0; pNewInfo->capacity = capacity; return pNewInfo; }
TGM_Status TGM_FragLenHistArrayUpdate(TGM_FragLenHistArray* pHistArray, unsigned int backHistIndex, uint32_t fragLen) { if (backHistIndex > pHistArray->size) return TGM_ERR; TGM_FragLenHist* pCurrHist = pHistArray->data + (pHistArray->size - backHistIndex); // if the pair mode is not valid // we only updated the count of the invalid pair and return if (fragLen == 0) { ++(pCurrHist->modeCount[INVALID_PAIR_MODE_SET_INDEX]); return TGM_OK; } // because we can have up to 2 different pair mode sets (4 differen pair modes) // we should choose which histogram we should update // the first one (with index 0 or 1) or the second one(2, 3) khash_t(fragLen)* pCurrHash = pCurrHist->rawHist; if (pCurrHash == NULL) { pCurrHash = kh_init(fragLen); kh_resize(fragLen, pCurrHash, 20); } int ret = 0; khiter_t khIter = kh_put(fragLen, pCurrHash, fragLen, &ret); if (ret == 0) kh_value(pCurrHash, khIter) += 1; else kh_value(pCurrHash, khIter) = 1; ++(pCurrHist->modeCount[0]); pCurrHist->rawHist = pCurrHash; return TGM_OK; }
SR_LibInfoTable* SR_LibInfoTableAlloc(uint32_t capAnchor, uint32_t capSample, uint32_t capReadGrp) { SR_LibInfoTable* pNewTable = (SR_LibInfoTable*) malloc(sizeof(SR_LibInfoTable)); if (pNewTable == NULL) SR_ErrQuit("ERROR: Not enough memory for a library information table object.\n"); pNewTable->pSampleInfo = SR_SampleInfoAlloc(capSample); pNewTable->pLibInfo = (SR_LibInfo*) malloc(capReadGrp * sizeof(SR_LibInfo)); if (pNewTable->pLibInfo == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of library information in an library table object.\n"); pNewTable->pReadGrps = (char**) malloc(capReadGrp * sizeof(char*)); if (pNewTable->pLibInfo == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of read group names in an library table object.\n"); pNewTable->pSampleMap = (int32_t*) malloc(capReadGrp * sizeof(int32_t)); if (pNewTable->pSampleMap == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of read-group-to-sample map in an library table object.\n"); pNewTable->pSeqTech = (int8_t*) malloc(capReadGrp * sizeof(int8_t)); if (pNewTable->pSeqTech == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of sequencing technologies in an library table object.\n"); pNewTable->pAnchorInfo = SR_AnchorInfoAlloc(capAnchor); pNewTable->pReadGrpHash = kh_init(name); kh_resize(name, pNewTable->pReadGrpHash, 2 * capReadGrp); pNewTable->size = 0; pNewTable->capacity = capReadGrp; pNewTable->fragLenMax = 0; pNewTable->cutoff = 0.0; pNewTable->trimRate = 0.0; return pNewTable; }
SR_BamInStream* SR_BamInStreamAlloc(const char* bamFilename, uint32_t binLen, unsigned int numThreads, unsigned int buffCapacity, unsigned int reportSize, const SR_StreamMode* pStreamMode) { SR_BamInStream* pBamInStream = (SR_BamInStream*) calloc(1, sizeof(SR_BamInStream)); if (pBamInStream == NULL) SR_ErrQuit("ERROR: Not enough memory for a bam input stream object."); pBamInStream->bam_cur_status = -1; pBamInStream->fpBamInput = bam_open(bamFilename, "r"); if (pBamInStream->fpBamInput == NULL) SR_ErrQuit("ERROR: Cannot open bam file %s for reading.\n", bamFilename); if ((pStreamMode->controlFlag & SR_USE_BAM_INDEX) != 0) { pBamInStream->pBamIndex = bam_index_load(bamFilename); if (pBamInStream->pBamIndex == NULL) { SR_ErrMsg("WARNING: Cannot open bam index file for reading. Creating it......"); bam_index_build(bamFilename); SR_ErrMsg(" The bam index is created."); pBamInStream->pBamIndex = bam_index_load(bamFilename); } } pBamInStream->filterFunc = pStreamMode->filterFunc; pBamInStream->filterData = pStreamMode->filterData; pBamInStream->numThreads = numThreads; pBamInStream->reportSize = reportSize; pBamInStream->currRefID = NO_QUERY_YET; pBamInStream->currBinPos = NO_QUERY_YET; pBamInStream->binLen = binLen; pBamInStream->pNewNode = NULL; pBamInStream->pBamIterator = NULL; if (numThreads > 0) { pBamInStream->pRetLists = (SR_BamList*) calloc(numThreads, sizeof(SR_BamList)); if (pBamInStream->pRetLists == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of retrun alignment lists in the bam input stream object.\n"); pBamInStream->pAlgnTypes = (SR_AlgnType*) malloc(numThreads * reportSize * sizeof(SR_AlgnType)); if (pBamInStream->pAlgnTypes == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of pair alignment type in the bam input stream object.\n"); } else { pBamInStream->pRetLists = NULL; pBamInStream->pAlgnTypes = NULL; pBamInStream->reportSize = 0; } if ((pStreamMode->controlFlag & SR_PAIR_GENOMICALLY) == 0) { pBamInStream->pNameHashes[PREV_BIN] = kh_init(queryName); kh_resize(queryName, pBamInStream->pNameHashes[PREV_BIN], reportSize); } else { pBamInStream->pNameHashes[PREV_BIN] = NULL; pBamInStream->binLen = SR_MAX_BIN_LEN; } pBamInStream->pNameHashes[CURR_BIN] = kh_init(queryName); kh_resize(queryName, pBamInStream->pNameHashes[CURR_BIN], reportSize); pBamInStream->pMemPool = SR_BamMemPoolAlloc(buffCapacity); pBamInStream->bam_cur_status = 1; return pBamInStream; }