Exemplo n.º 1
0
SR_AnchorInfo* SR_AnchorInfoAlloc(uint32_t capacity)
{
    SR_AnchorInfo* pNewInfo = (SR_AnchorInfo*) malloc(sizeof(SR_AnchorInfo));
    if (pNewInfo == NULL)
        SR_ErrQuit("ERROR: Not enough memory for an anchor information object.\n");

    pNewInfo->pAnchors = (char**) malloc(capacity * sizeof(char*));
    if (pNewInfo->pAnchors == NULL)
        SR_ErrQuit("ERROR: Not enough memory for the storage of anchor names in the anchor information object.\n");

    pNewInfo->pLength = (int32_t*) malloc(capacity * sizeof(int32_t));
    if (pNewInfo->pLength == NULL)
        SR_ErrQuit("ERROR: Not enough memory for the storage of anchor length in the anchor information object.\n");

    pNewInfo->pMd5s = (char*) malloc(capacity * MD5_STR_LEN * sizeof(char));
    if (pNewInfo->pMd5s == NULL)
        SR_ErrQuit("ERROR: Not enough memory for the storage of md5 string in the anchor information object.\n");

    pNewInfo->size = 0;
    pNewInfo->capacity = capacity;

    pNewInfo->pAnchorHash = kh_init(name);
    kh_resize(name, pNewInfo->pAnchorHash, 2 * capacity);

    return pNewInfo;
}
Exemplo n.º 2
0
Arquivo: hash.c Projeto: Zyxwvu/mruby
mrb_value
mrb_hash_new_capa(mrb_state *mrb, size_t capa)
{
  struct RHash *h;

  h = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class);
  h->ht = kh_init(ht, mrb);
  kh_resize(ht, h->ht, capa);
  h->iv = 0;
  return mrb_obj_value(h);
}
Exemplo n.º 3
0
static void worker_post(void *g, long i, int tid)
{
	int j, start_a, start_p, n, n_keys;
	idxhash_t *h;
	mm_idx_t *mi = (mm_idx_t*)g;
	mm_idx_bucket_t *b = &mi->B[i];
	if (b->a.n == 0) return;

	// sort by minimizer
	radix_sort_128x(b->a.a, b->a.a + b->a.n);

	// count and preallocate
	for (j = 1, n = 1, n_keys = 0, b->n = 0; j <= b->a.n; ++j) {
		if (j == b->a.n || b->a.a[j].x != b->a.a[j-1].x) {
			++n_keys;
			if (n > 1) b->n += n;
			n = 1;
		} else ++n;
	}
	h = kh_init(idx);
	kh_resize(idx, h, n_keys);
	b->p = (uint64_t*)calloc(b->n, 8);

	// create the hash table
	for (j = 1, n = 1, start_a = start_p = 0; j <= b->a.n; ++j) {
		if (j == b->a.n || b->a.a[j].x != b->a.a[j-1].x) {
			khint_t itr;
			int absent;
			mm128_t *p = &b->a.a[j-1];
			itr = kh_put(idx, h, p->x>>mi->b<<1, &absent);
			assert(absent && j - start_a == n);
			if (n == 1) {
				kh_key(h, itr) |= 1;
				kh_val(h, itr) = p->y;
			} else {
				int k;
				for (k = 0; k < n; ++k)
					b->p[start_p + k] = b->a.a[start_a + k].y;
				kh_val(h, itr) = (uint64_t)start_p<<32 | n;
				start_p += n;
			}
			start_a = j, n = 1;
		} else ++n;
Exemplo n.º 4
0
void TGM_FragLenHistArrayInit(TGM_FragLenHistArray* pHistArray, unsigned int newSize)
{
    TGM_FragLenHistArrayClear(pHistArray);

    if (newSize > pHistArray->capacity)
    {
        TGM_ARRAY_RESIZE(pHistArray, newSize * 2, TGM_FragLenHist);
        memset(pHistArray->data + pHistArray->size, 0, (newSize * 2 - pHistArray->size) * sizeof(TGM_FragLenHist));
    }

    for (unsigned int i = 0; i != newSize; ++i)
    {
        if (pHistArray->data[i].rawHist == NULL)
        {
            pHistArray->data[i].rawHist = kh_init(fragLen);
            kh_resize(fragLen, pHistArray->data[i].rawHist, DEFAULT_NUM_HIST_ELMNT);
        }
    }

    pHistArray->size = newSize;
}
Exemplo n.º 5
0
SR_SampleInfo* SR_SampleInfoAlloc(uint32_t capacity)
{
    SR_SampleInfo* pNewInfo = (SR_SampleInfo*) malloc(sizeof(SR_SampleInfo));
    if (pNewInfo == NULL)
        SR_ErrQuit("ERROR: Not enough memory for a sample information object.\n");

    pNewInfo->pSamples = (char**) malloc(capacity * sizeof(char*));
    if (pNewInfo->pSamples == NULL)
        SR_ErrQuit("ERROR: Not enough memory for the storage of sample names in the sample information object.\n");

    pNewInfo->pReadFraction = (double*) malloc(capacity * sizeof(double));
    if (pNewInfo->pReadFraction == NULL)
        SR_ErrQuit("ERROR: Not enough memory for the storage of read fraction in the sample information object.\n");

    pNewInfo->pSampleHash = kh_init(name);
    kh_resize(name, pNewInfo->pSampleHash, 2 * capacity);

    pNewInfo->size = 0;
    pNewInfo->capacity = capacity;

    return pNewInfo;
}
Exemplo n.º 6
0
TGM_Status TGM_FragLenHistArrayUpdate(TGM_FragLenHistArray* pHistArray, unsigned int backHistIndex, uint32_t fragLen)
{
    if (backHistIndex > pHistArray->size)
        return TGM_ERR;

    TGM_FragLenHist* pCurrHist = pHistArray->data + (pHistArray->size - backHistIndex);

    // if the pair mode is not valid
    // we only updated the count of the invalid pair and return
    if (fragLen == 0)
    {
        ++(pCurrHist->modeCount[INVALID_PAIR_MODE_SET_INDEX]);
        return TGM_OK;
    }

    // because we can have up to 2 different pair mode sets (4 differen pair modes)
    // we should choose which histogram we should update
    // the first one (with index 0 or 1) or the second one(2, 3)
    khash_t(fragLen)* pCurrHash = pCurrHist->rawHist;

    if (pCurrHash == NULL)
    {
        pCurrHash = kh_init(fragLen);
        kh_resize(fragLen, pCurrHash, 20);
    }

    int ret = 0;
    khiter_t khIter = kh_put(fragLen, pCurrHash, fragLen, &ret);

    if (ret == 0)
        kh_value(pCurrHash, khIter) += 1;
    else
        kh_value(pCurrHash, khIter) = 1;

    ++(pCurrHist->modeCount[0]);
    pCurrHist->rawHist = pCurrHash;

    return TGM_OK;
}
Exemplo n.º 7
0
SR_LibInfoTable* SR_LibInfoTableAlloc(uint32_t capAnchor, uint32_t capSample, uint32_t capReadGrp)
{
    SR_LibInfoTable* pNewTable = (SR_LibInfoTable*) malloc(sizeof(SR_LibInfoTable));
    if (pNewTable == NULL)
        SR_ErrQuit("ERROR: Not enough memory for a library information table object.\n");

    pNewTable->pSampleInfo = SR_SampleInfoAlloc(capSample);

    pNewTable->pLibInfo = (SR_LibInfo*) malloc(capReadGrp * sizeof(SR_LibInfo));
    if (pNewTable->pLibInfo == NULL)
        SR_ErrQuit("ERROR: Not enough memory for the storage of library information in an library table object.\n");

    pNewTable->pReadGrps = (char**) malloc(capReadGrp * sizeof(char*));
    if (pNewTable->pLibInfo == NULL)
        SR_ErrQuit("ERROR: Not enough memory for the storage of read group names in an library table object.\n");

    pNewTable->pSampleMap = (int32_t*) malloc(capReadGrp * sizeof(int32_t));
    if (pNewTable->pSampleMap == NULL)
        SR_ErrQuit("ERROR: Not enough memory for the storage of read-group-to-sample map in an library table object.\n");

    pNewTable->pSeqTech = (int8_t*) malloc(capReadGrp * sizeof(int8_t));
    if (pNewTable->pSeqTech == NULL)
        SR_ErrQuit("ERROR: Not enough memory for the storage of sequencing technologies in an library table object.\n");

    pNewTable->pAnchorInfo = SR_AnchorInfoAlloc(capAnchor);

    pNewTable->pReadGrpHash = kh_init(name);
    kh_resize(name, pNewTable->pReadGrpHash, 2 * capReadGrp);

    pNewTable->size = 0;
    pNewTable->capacity = capReadGrp;
    pNewTable->fragLenMax = 0;
    pNewTable->cutoff = 0.0;
    pNewTable->trimRate = 0.0;

    return pNewTable;
}
Exemplo n.º 8
0
SR_BamInStream* SR_BamInStreamAlloc(const char* bamFilename, uint32_t binLen, unsigned int numThreads, unsigned int buffCapacity, 
                                    unsigned int reportSize, const SR_StreamMode* pStreamMode)
{
    SR_BamInStream* pBamInStream = (SR_BamInStream*) calloc(1, sizeof(SR_BamInStream));
    if (pBamInStream == NULL)
        SR_ErrQuit("ERROR: Not enough memory for a bam input stream object.");

    pBamInStream->bam_cur_status = -1;

    pBamInStream->fpBamInput = bam_open(bamFilename, "r");
    if (pBamInStream->fpBamInput == NULL)
        SR_ErrQuit("ERROR: Cannot open bam file %s for reading.\n", bamFilename);

    if ((pStreamMode->controlFlag & SR_USE_BAM_INDEX) != 0)
    {
        pBamInStream->pBamIndex = bam_index_load(bamFilename);
	if (pBamInStream->pBamIndex == NULL) {
            SR_ErrMsg("WARNING: Cannot open bam index file for reading. Creating it......");
	    bam_index_build(bamFilename);
	    SR_ErrMsg("         The bam index is created.");
	    pBamInStream->pBamIndex = bam_index_load(bamFilename);
	}
    }

    pBamInStream->filterFunc = pStreamMode->filterFunc;
    pBamInStream->filterData = pStreamMode->filterData;
    pBamInStream->numThreads = numThreads;
    pBamInStream->reportSize = reportSize;
    pBamInStream->currRefID = NO_QUERY_YET;
    pBamInStream->currBinPos = NO_QUERY_YET;
    pBamInStream->binLen = binLen;
    pBamInStream->pNewNode = NULL;
    pBamInStream->pBamIterator = NULL;

    if (numThreads > 0)
    {
        pBamInStream->pRetLists = (SR_BamList*) calloc(numThreads, sizeof(SR_BamList));
        if (pBamInStream->pRetLists == NULL)
            SR_ErrQuit("ERROR: Not enough memory for the storage of retrun alignment lists in the bam input stream object.\n");

        pBamInStream->pAlgnTypes = (SR_AlgnType*) malloc(numThreads * reportSize * sizeof(SR_AlgnType));
        if (pBamInStream->pAlgnTypes == NULL)
            SR_ErrQuit("ERROR: Not enough memory for the storage of pair alignment type in the bam input stream object.\n");
    }
    else
    {
        pBamInStream->pRetLists = NULL;
        pBamInStream->pAlgnTypes = NULL;
        pBamInStream->reportSize = 0;
    }

    if ((pStreamMode->controlFlag & SR_PAIR_GENOMICALLY) == 0)
    {
        pBamInStream->pNameHashes[PREV_BIN] = kh_init(queryName);
        kh_resize(queryName, pBamInStream->pNameHashes[PREV_BIN], reportSize);
    }
    else
    {
        pBamInStream->pNameHashes[PREV_BIN] = NULL;
        pBamInStream->binLen = SR_MAX_BIN_LEN;
    }

    pBamInStream->pNameHashes[CURR_BIN] = kh_init(queryName);
    kh_resize(queryName, pBamInStream->pNameHashes[CURR_BIN], reportSize);

    pBamInStream->pMemPool = SR_BamMemPoolAlloc(buffCapacity);

    pBamInStream->bam_cur_status = 1;

    return pBamInStream;
}