Beispiel #1
0
    void set(const KeyType &key, const ValueType &value) {
        auto index = get_hash_index(key);

        if (hashTable_[index] == nullptr) {
            auto record = new HashTableRecord{nullptr, nullptr, key, value};

            hashTable_[index] = record;

            size_++;
        }
        else {
            HashTableRecord *record = hashTable_[index];
            while (record->tail != nullptr && record->key != key) {
                record = record->tail;
            }

            if (record->key == key) {
                record->value = value;
            }
            else {
                auto new_record = new HashTableRecord{record, nullptr, key, value};
                record->tail = new_record;
                size_++;
            }
        }
    }
Beispiel #2
0
    const ValueType &remove(const KeyType &key) {
        auto record = get_record(key);

        if (record == nullptr) {
            throw HashTableException();
        }

        if (record->head == nullptr) {
            hashTable_[get_hash_index(key)] = record->tail;
        }
        else if (record->tail == nullptr) {
            record->head->tail = nullptr;
        }
        else {
            record->head->tail = record->tail;
            record->tail->head = record->head;
        }

        size_--;

        const ValueType &value = record->value;
        delete record;

        return value;
    }
Beispiel #3
0
    HashTableRecord *get_record(const KeyType &key) const {
        auto index = get_hash_index(key);

        auto record = hashTable_[index];
        while (record != nullptr && record->key != key) {
            record = record->tail;
        }

        return record;
    }
Beispiel #4
0
Record_List * _hashlist_find_elem(void * hashlist,void * elem)
{
	UUID_LIST * uuid_list= (UUID_LIST *)hashlist;
	int hindex;

	if(uuid_list->hash_num==1<<subdb_order)
		hindex=get_hash_subindex(elem);
	else if(uuid_list->hash_num==1<<db_order)
		hindex=get_hash_index(elem);
	else
		return NULL;
	return list_find_uuidelem(&uuid_list->hash_table[hindex],elem);
}
Beispiel #5
0
//read kmer file, the first line of the kmer file contains the number of (distinct) kmers as kmer_index (Note the total number of kmers would be (2*kmer_index) including reverse complementary pairs)
//for a pair of reverse complementary kmers kmer1 and kmer2, if kmer1 < kmer2 (dictionary order) then (the index of kmer2) = (the index of kmer1) + kmer_index
int read_kmer_file(char * kmer_file, struct kmer_state ** kmer, int* hashtable, int kmerlength, int * kmer_index1){
	FILE * fr_kmer;
	int file_end_tag;
        int temp_kmer_index, kmer_index, temp_hash_index, i;
	char temp_char[1000];
        char line[Length];
	fr_kmer = fopen(kmer_file, "r");	
	if (fr_kmer != NULL){			
		file_end_tag = 1;
		init_hashtable(hashtable);
		fgets(line, Length, fr_kmer);
		sscanf(line, "%s %d", temp_char, &kmer_index);
	        (*kmer) = (struct kmer_state *) malloc((kmer_index*2+2)*sizeof(struct kmer_state));
                init_kmer((*kmer), kmer_index);
		temp_kmer_index = 1;
		while((file_end_tag!=0)){ // read the reads file
			line[0] = 'X';
			if (fgets(line, Length, fr_kmer) == NULL){
				file_end_tag = 0;
			};
			if (line[0] == 'X')
				file_end_tag = 0;
			if (file_end_tag != 0){
				sscanf(line, "%s %d", (*kmer)[temp_kmer_index].seq, &(*kmer)[temp_kmer_index].cov);
				(*kmer)[temp_kmer_index].seq[kmerlength] = '\0';
				temp_hash_index = get_hash_index((*kmer)[temp_kmer_index].seq);
				if (hashtable[temp_hash_index] == -1){
					hashtable[temp_hash_index] = temp_kmer_index;
				}
                                for (i=0;i<kmerlength;i++){
					if ((*kmer)[temp_kmer_index].seq[kmerlength-1-i] == 'A') (*kmer)[temp_kmer_index+kmer_index].seq[i] = 'T';
                                        if ((*kmer)[temp_kmer_index].seq[kmerlength-1-i] == 'T') (*kmer)[temp_kmer_index+kmer_index].seq[i] = 'A';
                                        if ((*kmer)[temp_kmer_index].seq[kmerlength-1-i] == 'C') (*kmer)[temp_kmer_index+kmer_index].seq[i] = 'G';
                                        if ((*kmer)[temp_kmer_index].seq[kmerlength-1-i] == 'G') (*kmer)[temp_kmer_index+kmer_index].seq[i] = 'C';
				}
                                (*kmer)[temp_kmer_index+kmer_index].seq[kmerlength] = '\0';
                                (*kmer)[temp_kmer_index+kmer_index].cov = (*kmer)[temp_kmer_index].cov;
				temp_kmer_index++;
				if (temp_kmer_index % 10000 == -1)
					printf("Kmer %d %s count %d\n", temp_kmer_index-1, (*kmer)[temp_kmer_index-1].seq, (*kmer)[temp_kmer_index-1].cov);
			}			
		}
		hashtable[1048576] = kmer_index-1;
		fclose(fr_kmer);
	}
	(*kmer_index1) = kmer_index;
}
Beispiel #6
0
int hashlist_add_elem(void * hashlist,void * elem)
{
	Record_List * new_record;
	UUID_LIST * uuid_list= (UUID_LIST *)hashlist;
	int hindex;
	new_record=Calloc(sizeof(Record_List));
	if(new_record==NULL)
		return -ENOMEM;
	INIT_LIST_HEAD(&(new_record->list));
	new_record->record=elem;

	if(uuid_list->hash_num==256)
		hindex=get_hash_subindex(elem);
	else if(uuid_list->hash_num==1024)
		hindex=get_hash_index(elem);
	else
		return -EINVAL;
	
	List_add_tail(&new_record->list,&uuid_list->hash_table[hindex].list);
	return 0;
}
Beispiel #7
0
//Index reads from the kmer table
int indexing_Reads_kmer(struct read_state * Reads, struct kmer_state * kmer, int* hashtable, int read_index, int kmer_index, int kmerlength){
	int i, j, j1;
	int temp_hash_index, temp_table_index1, temp_table_index2;
        char temp_kmer1[KmerSize];
        char temp_kmer2[KmerSize];	
	int temp_gap, max_gap;
	int tag_overflow;
	int read_kmer_count;
	int temp_kmer_pos, temp_kmer_index;
	for (i = 0; i < read_index; i=i+2){
		//printf(" Reads[%d] len %d\n", i, Reads[i].len); getchar();
		read_kmer_count = 0;
		max_gap = 0; //for test
		temp_gap = 0; //for test
		for (j = 0; j < Reads[i].len-kmerlength-1 ; j++){
			for (j1 = j; j1 < j+kmerlength ; j1++){
				temp_kmer1[j1-j] = Reads[i].nc[j1];
				temp_kmer2[j+kmerlength-1-j1] = Reads[i+1].nc[Reads[i+1].len-1-j1];
			}
			temp_kmer1[kmerlength] = '\0';
                        temp_kmer2[kmerlength] = '\0'; 
			//printf(" Reads[%d] pos %d temp1 %s temp2 %s\n", i, j, temp_kmer1, temp_kmer2); //getchar();
			if (strcmp(temp_kmer1, temp_kmer2) < 0)
				temp_hash_index = get_hash_index(temp_kmer1);
			else
				temp_hash_index = get_hash_index(temp_kmer2);
			if (hashtable[temp_hash_index] == -1) 
			// from temp_hash_index get the range [temp_table_index1, temp_table_index2] in the kmer table
			// get temp_kmer_index as a reult
				temp_kmer_index = -1;
			else{
				temp_table_index1 = hashtable[temp_hash_index];
				temp_hash_index++;
				while(hashtable[temp_hash_index] == -1)
					temp_hash_index++;
				temp_table_index2 = hashtable[temp_hash_index];
				//printf("bounded by %s(%d) %s(%d)\n", kmer[temp_table_index1].seq,temp_table_index1, kmer[temp_table_index2].seq,temp_table_index2 ); getchar();
	                        if (strcmp(temp_kmer1, temp_kmer2) < 0)
					temp_kmer_index = get_index(temp_kmer1, kmer, temp_table_index1, temp_table_index2);
				else
					temp_kmer_index = get_index(temp_kmer2, kmer, temp_table_index1, temp_table_index2);
			}

			if (temp_kmer_index != -1){
				if (strcmp(temp_kmer1, temp_kmer2) > 0){
					temp_kmer_index = temp_kmer_index + kmer_index;
				}
				// downsampling to 24
				if (kmer[temp_kmer_index].cov < 24){
					tag_overflow = 0;
				}else if (kmer[temp_kmer_index].true_cov > 23){
					tag_overflow = 1;
				}else{
					if (rand() % kmer[temp_kmer_index].cov < 24){
						tag_overflow = 0;
					}else{
						tag_overflow = 1;
					}
				}
						
				if(tag_overflow == 0){
					if (kmer[temp_kmer_index].true_cov < 24){
						//kmer[temp_kmer_index].true_cov = 0;
						Reads[i].kmer_index[read_kmer_count] = temp_kmer_index;
						Reads[i].kmer_pos[read_kmer_count] = j;
						read_kmer_count++;
						kmer[temp_kmer_index].reads[kmer[temp_kmer_index].true_cov] = i;
						kmer[temp_kmer_index].reads_pos[kmer[temp_kmer_index].true_cov] = j;
						kmer[temp_kmer_index].true_cov++;
						temp_gap = 0;
					}
				}
			}else{
				temp_gap++;
				if (temp_gap > max_gap)
					max_gap = temp_gap;
			}
		}
		if (i %1000 == 0)
			printf("Reads[%d], kmer %d, gap %d\n", i, read_kmer_count, max_gap);
		Reads[i].kmer_count = read_kmer_count;
		Reads[i+1].kmer_count = read_kmer_count;
        	for (j = Reads[i].kmer_count -1; j>=0 ; j--){
                	temp_kmer_index = Reads[i].kmer_index[j];
			if (temp_kmer_index < kmer_index)
				temp_kmer_index = temp_kmer_index + kmer_index;
			else
				temp_kmer_index = temp_kmer_index - kmer_index;
			read_kmer_count = Reads[i].kmer_count -1 - j;
			temp_kmer_pos = Reads[i].len - Reads[i].kmer_pos[j] - kmerlength;
                        if (kmer[temp_kmer_index].true_cov < 24){
	                        Reads[i+1].kmer_index[read_kmer_count] = temp_kmer_index;
        	                Reads[i+1].kmer_pos[read_kmer_count] = temp_kmer_pos;
                                kmer[temp_kmer_index].reads[kmer[temp_kmer_index].true_cov] = i+1;
                                kmer[temp_kmer_index].reads_pos[kmer[temp_kmer_index].true_cov] = temp_kmer_pos;
                                kmer[temp_kmer_index].true_cov++;
                        } 

       		} 
	}
}
Beispiel #8
0
void
output_all_gnu_mo_files(void)
{
	struct catalog	*p, *op;
	struct messages	*m;
	size_t	id_len, str_len, id_off, str_off, ids_top, strs_top;
	unsigned int	*hash_tbl;
	unsigned int	hash_size;
	unsigned int	num = 0, fnum = 0, unum = 0;
	unsigned int	i, idx;
	char	*ids, *strs;
	struct msgtbl	*id_tbl, *str_tbl;
	struct gnu_msg_info	header;
	FILE	*out;

	p = catalog_head;

	while (p) {
		num += p->nmsg;
		fnum += p->fnum;
		unum += p->unum;


		free(p->thash);
		if (p->nmsg == 0) {
			/*
			 * no message in this file
			 * skip generating a mo
			 */
			goto skip;
		}

		if (p->header)
			num--;

		p->msg = (struct messages *)Xrealloc(p->msg,
			sizeof (struct messages) * p->nmsg);

		/*
		 * Sort the message array
		 */
		qsort(p->msg, p->nmsg, sizeof (struct messages),
			(int (*)(const void *, const void *))msg_cmp);


		hash_size = find_prime(p->nmsg);
		hash_tbl = (unsigned int *)Xcalloc(hash_size,
			sizeof (unsigned int));


		/* Setting Header info */
		header.magic = GNU_MAGIC;
		header.revision = GNU_REVISION;
		header.num_of_str = p->nmsg;
		header.off_msgid_tbl = sizeof (struct gnu_msg_info);
		header.off_msgstr_tbl = sizeof (struct gnu_msg_info) +
			p->nmsg * sizeof (struct msgtbl);
		header.sz_hashtbl = hash_size;
		header.off_hashtbl = header.off_msgstr_tbl +
			p->nmsg * sizeof (struct msgtbl);

		m = p->msg;

		id_len = 0;
		str_len = 0;
		for (i = 0; i < p->nmsg; i++) {
			id_len += m[i].id_len;
			str_len += m[i].str_len;
		}
		ids = (char *)Xmalloc(id_len);
		strs = (char *)Xmalloc(str_len);
		id_tbl = (struct msgtbl *)Xmalloc(sizeof (struct msgtbl) *
			p->nmsg);
		str_tbl = (struct msgtbl *)Xmalloc(sizeof (struct msgtbl) *
			p->nmsg);
		id_off = 0;
		str_off = 0;
		ids_top = header.off_hashtbl +
			sizeof (unsigned int) * hash_size;
		strs_top = ids_top + id_len;
		for (i = 0; i < p->nmsg; i++) {
			/*
			 * Set the hash table
			 */
			idx = get_hash_index(hash_tbl, m[i].hash, hash_size);
			hash_tbl[idx] = i + 1;

			/*
			 * rearrange msgid and msgstr
			 */
			id_tbl[i].len = m[i].id_len - 1;
			str_tbl[i].len = m[i].str_len - 1;
			id_tbl[i].offset = id_off + ids_top;
			str_tbl[i].offset = str_off + strs_top;
			(void) memcpy(ids + id_off, m[i].id, m[i].id_len);
			(void) memcpy(strs + str_off, m[i].str, m[i].str_len);
			id_off += m[i].id_len;
			str_off += m[i].str_len;
			free(m[i].id);
			free(m[i].str);
		}

		if ((out = fopen(p->fname, "w")) == NULL) {
			error(gettext(ERR_OPEN_FAILED), p->fname);
			/* NOTREACHED */
		}

		/* writing header */
		(void) fwrite(&header, sizeof (struct gnu_msg_info),
			1, out);

		/* writing msgid offset table */
		(void) fwrite(id_tbl, sizeof (struct msgtbl),
			p->nmsg, out);
		/* writing msgstr offset table */
		(void) fwrite(str_tbl, sizeof (struct msgtbl),
			p->nmsg, out);
		/* writing hash table */
		(void) fwrite(hash_tbl, sizeof (unsigned int),
			hash_size, out);
		/* writing msgid table */
		(void) fwrite(ids, id_len, 1, out);
		/* writing msgstr table */
		(void) fwrite(strs, str_len, 1, out);

		(void) fclose(out);
		free(id_tbl);
		free(str_tbl);
		free(hash_tbl);
		free(ids);
		free(strs);
skip:
		free(p->fname);
		free(p->msg);
		op = p->next;
		free(p);
		p = op;
	}
	if (verbose_flag) {
		diag(gettext(DIAG_RESULTS), num, fnum, unum);
	}
}