static hashtable* ReadReference(const char* const refname) { hashtable* reference = new_hashtable(12); sequence* sp = read_fasta_sequence(refname); while(sp != NULL){ // allocate a coverage array for the sequence chrcoverage* cov = ckallocz(sizeof(chrcoverage)); cov->length = strlen((char*)sp->sequence); cov->map = ckallocz(strlen((char*)sp->sequence)); cov->cov = ckallocz(strlen((char*)sp->sequence)); cov->seq = ckallocz(strlen((char*)sp->sequence)+1); memcpy(cov->seq, sp->sequence, cov->length); // if the name of the sequence has more than one tokens, just use the // first token in the name int i = 0; while((sp->header[i] != '\n') && (sp->header[i] != 0) && (sp->header[i] != '\t') && (sp->header[i] != 32)) i++; sp->header[i] = 0; add_hashtable(reference,(char*)sp->header,strlen((char*)sp->header),cov); sp = get_next_sequence(sp); } close_fasta_sequence(sp); return reference; }
static void PrintBinInfo(const char* const refName, hashtable* const reference, const int binSize) { int gc; sequence* sp = read_fasta_sequence(refName); while(sp != NULL){ int i = 0; while((sp->header[i] != '\n') && (sp->header[i] != 0) && (sp->header[i] != '\t') && (sp->header[i] != 32)) i++; sp->header[i] = 0; int binSum = 0; int binIndex = 0; u64 start = 0; chrcoverage* chrcov = (chrcoverage*)must_find_hashtable(reference, sp->header, strlen(sp->header)); for (u64 j = 0; j < chrcov->length; j++) { if (chrcov->map[j] == '1') { binSum += chrcov->cov[j]; binIndex += 1; if (binIndex == binSize) { gc = CalculateGC(chrcov, start, j+1); printf("%s\t%"PRIu64"\t%"PRIu64"\t%d\t%d\t%d\n", sp->header, start, j+1, binSum, binSize, gc); binIndex = 0; binSum = 0; start = j + 1; } } } if (binIndex > 0) { gc = CalculateGC(chrcov, start, chrcov->length); printf("%s\t%"PRIu64"\t%"PRIu64"\t%d\t%d\t%d\n", sp->header, start, chrcov->length, binSum, binIndex, gc); } sp = get_next_sequence(sp); } close_fasta_sequence(sp); }
bool reads_parsing::get_next_sequence(std::string &seq_read, int &len) { bool success = get_next_seq_from_file(seq_read,len,current_file_indx); if (success) return true; if ( current_file_indx < nb_files-1 ) { close_file(current_file_indx); current_file_indx++; open_file(current_file_indx); return get_next_sequence(seq_read,len); } return false; }