LIB_EXPORT rc_t CC TableWriterSeq_TmpKeyStart(const TableWriterSeq* cself) { rc_t rc = 0; if( cself == NULL ) { rc = RC( rcAlign, rcType, rcWriting, rcParam, rcNull); ALIGN_DBGERR(rc); } else if( cself->options & ewseq_co_AlignData ) { rc = RC( rcAlign, rcType, rcWriting, rcParam, rcViolated); ALIGN_DBGERR(rc); } else if( (rc = TableWriter_CloseCursor(cself->base, 0, NULL)) == 0 ) { TableWriterSeq* self = (TableWriterSeq*)cself; VTable* vtbl = NULL; self->tmpKeyIdFirst = INT64_MAX; self->tmpKeyIdLast = INT64_MIN; memcpy(&self->cols_read_tmpkey, &TableSeqReadTmpKey_cols, sizeof(TableSeqReadTmpKey_cols)); if( (rc = TableWriter_GetVTable(cself->base, &vtbl)) == 0 && (rc = TableReader_Make(&self->tmpkey_reader, vtbl, self->cols_read_tmpkey, 50 * 1024 * 1024)) == 0 ) { memcpy(self->cols_alignd, &TableWriterSeq_cols[ewseq_cn_PRIMARY_ALIGNMENT_ID], sizeof(self->cols_alignd)); rc = TableWriter_AddCursor(self->base, self->cols_alignd, sizeof(self->cols_alignd) / sizeof(self->cols_alignd[0]), &self->alignd_cursor_id); } } return rc; }
LIB_EXPORT rc_t CC TableWriterAlgn_Write_SpotInfo(const TableWriterAlgn* cself, int64_t rowid, int64_t spot_id, int64_t mate_id, ReferenceStart const *ref_start) { rc_t rc = 0; if( cself == NULL || rowid == 0 ) { rc = RC( rcAlign, rcType, rcWriting, rcParam, rcNull); ALIGN_DBGERR(rc); } else if( cself->options & ewalgn_co_SEQ_SPOT_ID ) { rc = RC( rcAlign, rcType, rcWriting, rcParam, rcViolated); ALIGN_DBGERR(rc); } else if( (rc = TableWriter_OpenRowId(cself->base, rowid, cself->spotid_cursor_id)) == 0 ) { TW_COL_WRITE_VAR(cself->base, cself->cols_spotid[0], spot_id); TW_COL_WRITE_VAR(cself->base, cself->cols_spotid[1], ref_start->global_ref_start); TW_COL_WRITE_VAR(cself->base, cself->cols_spotid[2], ref_start->local.ref_id); TW_COL_WRITE_VAR(cself->base, cself->cols_spotid[3], ref_start->local.ref_start); TW_COL_WRITE_VAR(cself->base, cself->cols_spotid[4], mate_id); if( rc == 0 ) { rc = TableWriter_CloseRow(cself->base); } } return rc; }
LIB_EXPORT rc_t CC TableWriterAlgn_TmpKey(const TableWriterAlgn* cself, int64_t rowid, uint64_t* key_id) { rc_t rc = 0; if( cself == NULL || rowid == 0 || key_id == NULL ) { rc = RC( rcAlign, rcType, rcReading, rcParam, rcNull); ALIGN_DBGERR(rc); } else if( cself->tmpkey_reader == NULL ) { rc = RC( rcAlign, rcType, rcReading, rcMode, rcNotOpen); ALIGN_DBGERR(rc); } else if( (rc = TableReader_ReadRow(cself->tmpkey_reader, rowid)) == 0 ) { memcpy(key_id, cself->cols_read_tmpkey[0].base.var, sizeof(*key_id)); } return rc; }
static rc_t GetSeq(RefSeqMgr *const self, RefSeq **result, unsigned const N, char const accession[]) { rc_t rc = 0; bool matched = false; unsigned const at = FindAccession(self->nRefSeqs, (RefSeq const **)self->refSeq, N, accession, &matched); if (!matched) { int const type = AccessionType(self->vmgr, N, accession, &rc); if (type) rc = 0; else if (rc == 0) rc = RC(rcAlign, rcTable, rcAccessing, rcType, rcUnexpected); if (rc == 0) rc = NewRefSeq(self, type, at, N, accession); else { ALIGN_CF_DBG("failed to open %.*s", N, accession); ALIGN_DBGERR(rc); } } if (rc) return rc; { RefSeq *const fnd = self->refSeq[at]; *result = fnd; } return 0; }
LIB_EXPORT rc_t CC TableWriterSeq_WriteAlignmentData(const TableWriterSeq* cself, int64_t rowid, const TableWriterData* primary_alignment_id, const TableWriterData* alignment_count) { rc_t rc = 0; if( cself == NULL ) { rc = RC( rcAlign, rcType, rcWriting, rcParam, rcNull); ALIGN_DBGERR(rc); } else if( cself->options & ewseq_co_AlignData ) { rc = RC( rcAlign, rcType, rcWriting, rcParam, rcViolated); ALIGN_DBGERR(rc); } else if( rowid == 0 ) { rc = TableWriter_Flush(cself->base, cself->alignd_cursor_id); } else if( primary_alignment_id == NULL || alignment_count == NULL ) { rc = RC( rcAlign, rcType, rcWriting, rcParam, rcNull); ALIGN_DBGERR(rc); } else if( (rc = TableReader_ReadRow(cself->tmpkey_reader, rowid)) != 0 || cself->cols_read_tmpkey[1].len != primary_alignment_id->elements ) { rc = rc ? rc : RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent); ALIGN_DBGERRP("nreads and primary_alignment_id length %u <> %lu", rc, cself->cols_read_tmpkey[1].len, primary_alignment_id->elements); } else if( primary_alignment_id->elements != alignment_count->elements ) { rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent); ALIGN_DBGERRP("primary_alignment_id and alignment_count length %u <> %lu", rc, primary_alignment_id->elements, alignment_count->elements); } else { if (cself->flush) { rc = TableWriter_Flush(cself->base, cself->alignd_cursor_id); ((TableWriterSeq*)cself)->flush = false; } if( rc == 0 && (rc = TableWriter_OpenRowId(cself->base, rowid, cself->alignd_cursor_id)) == 0 ) { TW_COL_WRITE(cself->base, cself->cols_alignd[0], *primary_alignment_id); TW_COL_WRITE(cself->base, cself->cols_alignd[1], *alignment_count); if( rc == 0 ) { rc = TableWriter_CloseRow(cself->base); } if (rc == 0/* && (cself->options & ewseq_co_WantMateStats)*/) { rc = TableWriterSeq_CollectStatistics((TableWriterSeq *)cself, primary_alignment_id); } } } return rc; }
LIB_EXPORT rc_t CC TableWriterAlgn_Write_SpotId(const TableWriterAlgn* cself, int64_t rowid, int64_t spot_id) { rc_t rc = 0; if( cself == NULL || rowid == 0 ) { rc = RC( rcAlign, rcType, rcWriting, rcParam, rcNull); ALIGN_DBGERR(rc); } else if( cself->options & ewalgn_co_SEQ_SPOT_ID ) { rc = RC( rcAlign, rcType, rcWriting, rcParam, rcViolated); ALIGN_DBGERR(rc); } else if( (rc = TableWriter_OpenRowId(cself->base, rowid, cself->spotid_cursor_id)) == 0 ) { TW_COL_WRITE_VAR(cself->base, cself->cols_spotid[0], spot_id); if( rc == 0 ) { rc = TableWriter_CloseRow(cself->base); } } return rc; }
LIB_EXPORT rc_t CC TableReaderRefSeq_Circular(const TableReaderRefSeq* cself, bool* circular) { rc_t rc = 0; if( cself == NULL || circular == NULL ) { rc = RC(rcAlign, rcType, rcReading, rcParam, rcNull); } else { *circular = cself->circular; } ALIGN_DBGERR(rc); return rc; }
LIB_EXPORT rc_t CC TableReaderRefSeq_MD5(const TableReaderRefSeq* cself, const uint8_t** md5) { rc_t rc = 0; if( cself == NULL || md5 == NULL ) { rc = RC(rcAlign, rcType, rcReading, rcParam, rcNull); } else { *md5 = cself->has_md5 ? cself->md5 : NULL; } ALIGN_DBGERR(rc); return rc; }
LIB_EXPORT rc_t CC TableReaderRefSeq_SeqLength(const TableReaderRefSeq* cself, INSDC_coord_len* len) { rc_t rc = 0; if( cself == NULL || len == NULL ) { rc = RC(rcAlign, rcType, rcReading, rcParam, rcNull); } else { *len = cself->total_seq_len; } ALIGN_DBGERR(rc); return rc; }
LIB_EXPORT rc_t CC TableWriterAlgn_RefStart(const TableWriterAlgn* cself, int64_t rowid, ReferenceStart *const rslt) { rc_t rc = 0; if( cself == NULL || rowid == 0 || rslt == NULL ) { rc = RC( rcAlign, rcType, rcReading, rcParam, rcNull); ALIGN_DBGERR(rc); } else if( cself->tmpkey_reader == NULL ) { rc = RC( rcAlign, rcType, rcReading, rcMode, rcNotOpen); ALIGN_DBGERR(rc); } else if( (rc = TableReader_ReadRow(cself->tmpkey_reader, rowid)) == 0 ) { if (cself->cols_read_tmpkey[1].flags & ewcol_Ignore) { memcpy(&rslt->local.ref_id, cself->cols_read_tmpkey[2].base.var, sizeof(rslt->local.ref_id)); memcpy(&rslt->local.ref_start, cself->cols_read_tmpkey[3].base.var, sizeof(rslt->local.ref_start)); } else memcpy(&rslt->global_ref_start, cself->cols_read_tmpkey[1].base.var, sizeof(rslt->global_ref_start)); } return rc; }
LIB_EXPORT rc_t CC TableReaderRefSeq_SeqId(const TableReaderRefSeq* cself, const char** id, uint32_t* id_sz) { rc_t rc = 0; if( cself == NULL || id == NULL || id_sz == NULL ) { rc = RC(rcAlign, rcType, rcReading, rcParam, rcNull); } else { *id = cself->seq_id; *id_sz = string_size(cself->seq_id); } ALIGN_DBGERR(rc); return rc; }
LIB_EXPORT rc_t CC TableWriterSeq_TmpKey(const TableWriterSeq* cself, int64_t rowid, uint64_t* key_id) { rc_t rc = 0; if( cself == NULL || rowid == 0 || key_id == NULL ) { rc = RC( rcAlign, rcType, rcReading, rcParam, rcNull); ALIGN_DBGERR(rc); } else if( cself->tmpkey_reader == NULL ) { rc = RC( rcAlign, rcType, rcReading, rcMode, rcNotOpen); ALIGN_DBGERR(rc); } else if( (rc = TableReader_ReadRow(cself->tmpkey_reader, rowid)) == 0 ) { memcpy(key_id, cself->cols_read_tmpkey[0].base.var, sizeof(*key_id)); if (cself->tmpKeyIdLast < rowid || rowid < cself->tmpKeyIdFirst) { rc = TableReader_PageIdRange(cself->tmpkey_reader, rowid, &((TableWriterSeq*)cself)->tmpKeyIdFirst, &((TableWriterSeq*)cself)->tmpKeyIdLast); ((TableWriterSeq*)cself)->flush = rc == 0; } } return rc; }
LIB_EXPORT rc_t CC RefSeq_SeqLength(const RefSeq* cself, INSDC_coord_len* len) { rc_t rc = 0; if (cself == NULL) rc = RC(rcAlign, rcFile, rcReading, rcParam, rcNull); else { RefSeq const *const self = (RefSeq *)cself; rc = self->vt->length(self, len); } ALIGN_DBGERR(rc); return rc; }
LIB_EXPORT rc_t CC RefSeq_Circular(const RefSeq* cself, bool* circular) { rc_t rc = 0; if (cself == NULL) rc = RC(rcAlign, rcFile, rcReading, rcParam, rcNull); else { RefSeq const *const self = (RefSeq *)cself; rc = self->vt->circular(self, circular); } ALIGN_DBGERR(rc); return rc; }
LIB_EXPORT rc_t CC RefSeq_Name(const RefSeq* cself, const char** name) { rc_t rc = 0; if (cself == NULL) rc = RC(rcAlign, rcFile, rcReading, rcParam, rcNull); else { RefSeq const *const self = (RefSeq *)cself; *name = self->vt->name(self); } ALIGN_DBGERR(rc); return rc; }
LIB_EXPORT rc_t CC RefSeq_MD5(const RefSeq* cself, const uint8_t** md5) { rc_t rc = 0; if (cself == NULL) rc = RC(rcAlign, rcFile, rcReading, rcParam, rcNull); else { RefSeq const *const self = (RefSeq *)cself; rc = self->vt->checksum(self, md5); } ALIGN_DBGERR(rc); return rc; }
LIB_EXPORT rc_t CC TableWriterAlgn_Write(const TableWriterAlgn* cself, const TableWriterAlgnData* data, int64_t* rowid) { rc_t rc = 0; if( cself == NULL || data == NULL ) { rc = RC( rcAlign, rcType, rcWriting, rcParam, rcNull); ALIGN_DBGERR(rc); } else if( (rc = TableWriter_OpenRow(cself->base, rowid, cself->cursor_id)) == 0 ) { TW_COL_WRITE_VAR(cself->base, cself->cols[ewalgn_cn_PLOIDY], data->ploidy); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_SEQ_SPOT_ID], data->seq_spot_id); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_SEQ_READ_ID], data->seq_read_id); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_READ_START], data->read_start); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_READ_LEN], data->read_len); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_TMP_KEY_ID], data->tmp_key_id); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_HAS_REF_OFFSET], data->has_ref_offset); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_REF_OFFSET], data->ref_offset); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_REF_OFFSET_TYPE], data->ref_offset_type); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_REF_ID], data->ref_id); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_REF_START], data->ref_start); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_GLOBAL_REF_START], data->global_ref_start); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_HAS_MISMATCH], data->has_mismatch); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_MISMATCH], data->mismatch); TW_COL_WRITE_VAR(cself->base, cself->cols[ewalgn_cn_REF_LEN], data->ref_len); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_REF_ORIENTATION], data->ref_orientation); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_REF_PLOIDY], data->ref_ploidy); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_MAPQ], data->mapq); if( data->mate_align_id.elements != 1 ) { TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_MATE_ALIGN_ID], data->mate_align_id); } else { TW_COL_WRITE_BUF(cself->base, cself->cols[ewalgn_cn_MATE_ALIGN_ID], data->mate_align_id.buffer, ((const int64_t*)(data->mate_align_id.buffer))[0] ? 1 : 0); } TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_MATE_REF_ORIENTATION], data->mate_ref_orientation); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_MATE_REF_ID], data->mate_ref_id); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_MATE_REF_POS], data->mate_ref_pos); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_TEMPLATE_LEN], data->template_len); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_EVIDENCE_ALIGNMENT_IDS], data->alingment_ids); TW_COL_WRITE(cself->base, cself->cols[ewalgn_cn_ALIGN_GROUP], data->align_group); if( rc == 0 ) { rc = TableWriter_CloseRow(cself->base); } } return rc; }
LIB_EXPORT rc_t CC RefSeqMgr_Make( const RefSeqMgr** cself, const VDBManager* vmgr, uint32_t reader_options, size_t cache, uint32_t keep_open_num ) { rc_t rc = 0; RefSeqMgr* obj = NULL; if ( cself == NULL || vmgr == NULL ) { rc = RC( rcAlign, rcIndex, rcConstructing, rcParam, rcNull ); } else { obj = calloc( 1, sizeof( *obj ) ); if ( obj == NULL ) { rc = RC( rcAlign, rcIndex, rcConstructing, rcMemory, rcExhausted ); } else { rc = KConfigMake( &obj->kfg, NULL ); if ( rc == 0 ) { rc = VDBManagerAddRef( vmgr ); if ( rc == 0 ) { obj->vmgr = vmgr; obj->cache = cache; obj->num_open_max = keep_open_num; obj->reader_options = reader_options; } } } } if ( rc == 0 ) { *cself = obj; /* ALIGN_DBG( "max_open: %u", obj->num_open_max ); */ } else { RefSeqMgr_Release( obj ); ALIGN_DBGERR( rc ); } return rc; }
LIB_EXPORT rc_t CC DNAReverseCompliment(const INSDC_dna_text* seq, INSDC_dna_text* cmpl, uint32_t len) { rc_t rc = 0; uint32_t i; static INSDC_dna_text compl[256] = "~"; if( seq == NULL || compl == NULL ) { rc = RC( rcAlign, rcType, rcConverting, rcParam, rcNull); } else { if( compl[0] == '~' ) { INSDC_dna_text* x = (INSDC_dna_text*)&compl[0]; memset(x, 0, sizeof(compl)); x['A'] = x['a'] = 'T'; x['T'] = x['t'] = 'A'; x['U'] = x['u'] = 'A'; x['G'] = x['g'] = 'C'; x['C'] = x['c'] = 'G'; x['Y'] = x['y'] = 'R'; x['R'] = x['r'] = 'Y'; x['S'] = x['s'] = 'S'; x['W'] = x['w'] = 'W'; x['K'] = x['k'] = 'M'; x['M'] = x['m'] = 'K'; x['B'] = x['b'] = 'V'; x['D'] = x['d'] = 'H'; x['H'] = x['h'] = 'D'; x['V'] = x['v'] = 'B'; x['N'] = x['n'] = 'N'; x['0'] = '0'; x['1'] = '1'; x['2'] = '2'; x['3'] = '3'; } cmpl += len; for(i = 0; i < len; i++) { *--cmpl= compl[(int)seq[i]]; if( *cmpl == '\0' ) { rc = RC(rcAlign, rcFormatter, rcWriting, rcData, rcInvalid); break; } } } ALIGN_DBGERR(rc); return rc; }
LIB_EXPORT rc_t CC RefSeq_Read(const RefSeq* cself, INSDC_coord_zero offset, INSDC_coord_len len, uint8_t* buffer, INSDC_coord_len* written) { rc_t rc = 0; if (cself == NULL || buffer == NULL || written == NULL) rc = RC(rcAlign, rcFile, rcReading, rcParam, rcNull); else { RefSeq *const self = (RefSeq *)cself; RefSeqMgr *const mgr = (RefSeqMgr *)self->mgr; rc = GetReader(mgr, self); if (rc == 0) rc = self->vt->read(self, offset, len, buffer, written); } ALIGN_DBGERR(rc); return rc; }
LIB_EXPORT rc_t CC TableWriterSeq_Write(const TableWriterSeq* cself, const TableWriterSeqData* data, int64_t* rowid) { rc_t rc = 0; int lbl; if( cself == NULL || data == NULL ) { rc = RC( rcAlign, rcType, rcWriting, rcParam, rcNull); ALIGN_DBGERR(rc); } else if( !cself->init && (rc = TableWriteSeq_WriteDefaults(cself)) != 0 ) { ALIGN_DBGERR(rc); } else if( data->quality.buffer == NULL || data->sequence.elements != data->quality.elements ) { rc = RC(rcAlign, rcType, rcWriting, rcData, data->quality.buffer ? rcInconsistent : rcEmpty); ALIGN_DBGERRP("sequence and quality length %lu <> %lu", rc, data->sequence.elements, data->quality.elements); } else if( data->read_start.elements != data->nreads ) { rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent); ALIGN_DBGERRP("nreads and read_start length %u <> %lu", rc, data->nreads, data->read_start.elements); } else if( data->read_len.elements != data->nreads ) { rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent ); ALIGN_DBGERRP("nreads and read_len length %u <> %lu", rc, data->nreads, data->read_len.elements); } else if( (cself->options & ewseq_co_AlignData) && data->primary_alignment_id.elements != data->nreads ) { rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent); ALIGN_DBGERRP("nreads and primary_alignment_id length %u <> %lu", rc, data->nreads, data->primary_alignment_id.elements); } else if( (cself->options & ewseq_co_AlignData) && data->alignment_count.elements != data->nreads ) { rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent); ALIGN_DBGERRP("nreads and alignment_count length %u <> %lu", rc, data->nreads, data->alignment_count.elements); } else if( data->no_quantize_mask.buffer && data->no_quantize_mask.elements != data->quality.elements ) { rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent); ALIGN_DBGERRP("quality and no_quantize_mask length %u <> %lu", rc, data->quality.elements, data->no_quantize_mask.elements); } else if( !(cself->options & ewseq_co_NoLabelData) && (lbl = ((data->label.buffer ? 1 : 0) + (data->label_start.buffer ? 1 : 0) + (data->label_len.buffer ? 1 : 0))) != 0 && lbl != 3 ) { rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent); ALIGN_DBGERRP("LABEL %s", rc, "incomplete"); } else if( (rc = TableWriter_OpenRow(cself->base, rowid, cself->cursor_id)) == 0 ) { if( cself->options & ewseq_co_AlignData ) { TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_PRIMARY_ALIGNMENT_ID], data->primary_alignment_id); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_ALIGNMENT_COUNT], data->alignment_count); if (rc == 0) { rc = TableWriterSeq_CollectStatistics((TableWriterSeq *)cself, &data->primary_alignment_id); } } { unsigned naligned = 0; unsigned i; const uint8_t* ac = data->alignment_count.buffer; for (i = 0; i < (unsigned)data->nreads; ++i) { if (ac[i] != 0) { ++naligned; } } if (naligned == 0) { if (!cself->haveFirstUnaligned) { ((TableWriterSeq *)cself)->firstUnaligned = *rowid; ((TableWriterSeq *)cself)->haveFirstUnaligned = true; } } else if (naligned < (unsigned)data->nreads) { if (!cself->haveFirstHalfAligned) { ((TableWriterSeq *)cself)->firstHalfAligned = *rowid; ((TableWriterSeq *)cself)->haveFirstHalfAligned = true; } } } if( cself->options & ewseq_co_SaveRead ) { TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_READ], data->sequence); } else { uint32_t i; const char* seq = data->sequence.buffer; const uint8_t* ac = data->alignment_count.buffer; const INSDC_coord_zero* rs = data->read_start.buffer; const INSDC_coord_len* rl = data->read_len.buffer; TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_READ], NULL, 0); TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_CSREAD], NULL, 0); for(i = 0; i < data->nreads; i++ ) { if( ac[i] == 0 ) { TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_READ], &seq[rs[i]], rl[i]); TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_CSREAD], &seq[rs[i]], rl[i]); } } } if( cself->options & ewseq_co_FullQuality ) { TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_QUALITY], data->quality); } else { uint64_t i; const uint8_t* b = data->quality.buffer; if( data->quality.elements > cself->qual_buf_sz ) { uint8_t* p = realloc(cself->qual_buf, data->quality.elements * cself->cols[ewseq_cn_QUALITY].element_bits); if( p == NULL ) { rc = RC(rcAlign, rcType, rcWriting, rcMemory, rcExhausted); ALIGN_DBGERRP("quality %s", rc, "discretization"); } else { ((TableWriterSeq*)cself)->qual_buf_sz = data->quality.elements; ((TableWriterSeq*)cself)->qual_buf = p; } } if (data->no_quantize_mask.buffer) { bool const *mask = data->no_quantize_mask.buffer; for(i = 0; i < data->quality.elements; i++ ) { uint8_t const q = b[i] & 0x3F; cself->qual_buf[i] = mask[i] ? q : cself->discrete_qual[q]; } } else { for(i = 0; i < data->quality.elements; i++ ) { cself->qual_buf[i] = cself->discrete_qual[b[i]]; } } if (cself->options & ewseq_co_SaveQual) { TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_QUALITY], cself->qual_buf, data->quality.elements); } else { uint32_t i; uint8_t const *const qual = data->sequence.buffer; uint8_t const *const aligned = data->alignment_count.buffer; INSDC_coord_zero const *const rs = data->read_start.buffer; INSDC_coord_len const *const rl = data->read_len.buffer; TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_QUALITY], NULL, 0); for (i = 0; i < data->nreads; ++i) { if (!aligned[i]) { INSDC_coord_zero const readStart = rs[i]; INSDC_coord_len const readLen = rl[i]; TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_QUALITY], &qual[readStart], readLen); } } } } if( !(cself->options & ewseq_co_NoLabelData) ) { TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_LABEL], data->label); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_LABEL_START], data->label_start); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_LABEL_LEN], data->label_len); } TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_READ_TYPE], data->read_type); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_READ_START], data->read_start); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_READ_LEN], data->read_len); TW_COL_WRITE_VAR(cself->base, cself->cols[ewseq_cn_TMP_KEY_ID], data->tmp_key_id); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_SPOT_GROUP], data->spot_group); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_CSKEY], data->cskey); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_READ_FILTER], data->read_filter); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_PLATFORM], data->platform); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_TI], data->ti); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_NAME], data->spot_name); if( rc == 0 ) { rc = TableWriter_CloseRow(cself->base); } } return rc; }
LIB_EXPORT rc_t CC TableWriterSeq_Make(const TableWriterSeq** cself, VDatabase* db, uint32_t options, char const qual_quantization[]) { rc_t rc = 0; TableWriterSeq* self = NULL; char const *tblName = (options & ewseq_co_ColorSpace) ? "CS_SEQUENCE" : "SEQUENCE"; options |= ewseq_co_SaveQual; /* TODO: remove when ready */ if( cself == NULL || db == NULL ) { rc = RC(rcAlign, rcFormatter, rcConstructing, rcParam, rcNull); } else { self = calloc(1, sizeof(*self)); if( self == NULL ) { rc = RC(rcAlign, rcFormatter, rcConstructing, rcMemory, rcExhausted); } else { memcpy(self->cols, TableWriterSeq_cols, sizeof(TableWriterSeq_cols)); if( options & ewseq_co_AlignData ) { self->cols[ewseq_cn_TMP_KEY_ID].flags |= ewcol_Ignore; } else { self->cols[ewseq_cn_PRIMARY_ALIGNMENT_ID].flags |= ewcol_Ignore; self->cols[ewseq_cn_ALIGNMENT_COUNT].flags |= ewcol_Ignore; } if(options & ewseq_co_NoLabelData) { self->cols[ewseq_cn_LABEL].flags |= ewcol_Ignore; self->cols[ewseq_cn_LABEL_LEN].flags |= ewcol_Ignore; self->cols[ewseq_cn_LABEL_START].flags |= ewcol_Ignore; } if(options & ewseq_co_ColorSpace) { self->cols[ewseq_cn_READ].flags |= ewcol_Ignore; self->cols[ewseq_cn_CSREAD].flags &= ~ewcol_Ignore; self->cols[ewseq_cn_CSKEY].flags &= ~ewcol_Ignore; } if( options & ewseq_co_SpotGroup) { self->cols[ewseq_cn_SPOT_GROUP].flags &= ~ewcol_Ignore; } if( options & ewseq_co_TI) { self->cols[ewseq_cn_TI].flags &= ~ewcol_Ignore; } if( options & ewseq_co_SpotName) { self->cols[ewseq_cn_NAME].flags &= ~ewcol_Ignore; } if( (rc = TableWriter_Make(&self->base, db, tblName, "SEQUENCE")) == 0 ) { rc = TableWriter_AddCursor(self->base, self->cols, sizeof(self->cols)/sizeof(self->cols[0]), &self->cursor_id); } } if( rc == 0 ) { self->options = options; if (qual_quantization && strcmp(qual_quantization, "0") == 0) { self->options |= ewseq_co_FullQuality; } if( !(self->options & ewseq_co_FullQuality) ) { char const *quant_string = qual_quantization; if (quant_string == NULL || strcmp(quant_string, "1") == 0) { quant_string = "1:10,10:20,20:30,30:-"; } else if (strcmp(quant_string, "2") == 0) { quant_string = "1:30,30:-"; } if (!TableWriterSeq_InitQuantMatrix(self->discrete_qual, quant_string)) { rc = RC(rcAlign, rcFormatter, rcConstructing, rcParam, rcInvalid); } } } } if( rc == 0 ) { *cself = self; ALIGN_DBG("table %s", "created"); } else { TableWriterSeq_Whack(self, false, NULL); ALIGN_DBGERR(rc); } return rc; }
LIB_EXPORT rc_t CC TableWriterAlgn_TmpKeyStart(const TableWriterAlgn* cself) { rc_t rc = 0; if( cself == NULL ) { rc = RC(rcAlign, rcType, rcWriting, rcParam, rcNull); ALIGN_DBGERR(rc); } else if( (cself->options & ewalgn_co_SEQ_SPOT_ID) || !(cself->options & ewalgn_co_TMP_KEY_ID) ) { rc = RC(rcAlign, rcType, rcWriting, rcParam, rcViolated); ALIGN_DBGERR(rc); } else if( (rc = TableWriter_CloseCursor(cself->base, cself->cursor_id, NULL)) == 0 ) { TableWriterAlgn* self = (TableWriterAlgn*)cself; VTable* vtbl = NULL; memcpy(&self->cols_read_tmpkey, &TableAlgnReadTmpKey_cols, sizeof(TableAlgnReadTmpKey_cols)); if (self->cols[ewalgn_cn_GLOBAL_REF_START].flags & ewcol_Ignore) { self->cols_read_tmpkey[2].flags = 0; self->cols_read_tmpkey[3].flags = 0; } else self->cols_read_tmpkey[1].flags = 0; if( (rc = TableWriter_GetVTable(self->base, &vtbl)) == 0 && (rc = TableReader_Make(&self->tmpkey_reader, vtbl, self->cols_read_tmpkey, 50 * 1024 * 1024)) == 0 ) { int64_t v = 0; memcpy(self->cols_spotid + 0, &TableWriterAlgn_cols[ewalgn_cn_SEQ_SPOT_ID], sizeof(self->cols_spotid[0])); memcpy(self->cols_spotid + 1, &TableWriterAlgn_cols[ewalgn_cn_MATE_GLOBAL_REF_START], sizeof(self->cols_spotid[0])); memcpy(self->cols_spotid + 2, &TableWriterAlgn_cols[ewalgn_cn_MATE_REF_ID], sizeof(self->cols_spotid[0])); memcpy(self->cols_spotid + 3, &TableWriterAlgn_cols[ewalgn_cn_MATE_REF_START], sizeof(self->cols_spotid[0])); memcpy(self->cols_spotid + 4, &TableWriterAlgn_cols[ewalgn_cn_MATE_ALIGN_ID], sizeof(self->cols_spotid[0])); self->cols_spotid[0].flags &= ~ewcol_Ignore; if (self->options & ewalgn_co_MATE_POSITION) { if (self->cols[ewalgn_cn_GLOBAL_REF_START].flags & ewcol_Ignore) { self->cols_spotid[2].flags &= ~ewcol_Ignore; self->cols_spotid[3].flags &= ~ewcol_Ignore; } else self->cols_spotid[1].flags &= ~ewcol_Ignore; self->cols_spotid[4].flags &= ~ewcol_Ignore; } else { self->cols_spotid[1].flags |= ewcol_Ignore; self->cols_spotid[2].flags |= ewcol_Ignore; self->cols_spotid[3].flags |= ewcol_Ignore; self->cols_spotid[4].flags |= ewcol_Ignore; } rc = TableWriter_AddCursor(self->base, self->cols_spotid, sizeof(self->cols_spotid) / sizeof(self->cols_spotid[0]), &self->spotid_cursor_id); TW_COL_WRITE_DEF_VAR(self->base, self->spotid_cursor_id, self->cols_spotid[0], v); TW_COL_WRITE_DEF_VAR(self->base, self->spotid_cursor_id, self->cols_spotid[1], v); TW_COL_WRITE_DEF_VAR(self->base, self->spotid_cursor_id, self->cols_spotid[2], v); TW_COL_WRITE_DEF_VAR(self->base, self->spotid_cursor_id, self->cols_spotid[3], v); TW_COL_WRITE_DEF_VAR(self->base, self->spotid_cursor_id, self->cols_spotid[4], v); } } return rc; }
LIB_EXPORT rc_t CC TableReaderRefSeq_Read(const TableReaderRefSeq* cself, INSDC_coord_zero offset, INSDC_coord_len len, uint8_t* buffer, INSDC_coord_len* written) { rc_t rc = 0; if ( cself == NULL || buffer == NULL || written == NULL ) { rc = RC( rcAlign, rcType, rcReading, rcParam, rcNull ); } else if( len == 0 ) { *written = 0; } else { rc = ReferenceSeq_ReOffset( cself->circular, cself->total_seq_len, &offset ); if ( rc == 0 ) { INSDC_coord_len q = 0; *written = 0; do { int64_t rowid = offset / cself->max_seq_len + 1; INSDC_coord_zero s = offset % cself->max_seq_len; rc = TableReader_ReadRow( cself->base, rowid ); if ( rc == 0 && ( cself->read->len == 0 || cself->read->base.str == NULL ) ) { /* TableReader_ReadRow() can return rc == 0 for an optional column! in these cases len/base.str are zero/NULL */ rc = RC( rcAlign, rcType, rcReading, rcItem, rcNull ); } if ( rc == 0 ) { q = cself->seq_len->base.coord_len[0] - s; if ( q > len ) { q = len; } memcpy( &buffer[*written], cself->read->base.str + s, q ); *written += q; offset += q; len -= q; } /* SEQ_LEN < MAX_SEQ_LEN is last row unless it is CIRCULAR */ if ( rc == 0 && ( cself->seq_len->base.coord_len[ 0 ] < cself->max_seq_len ) ) { if ( !cself->circular ) { break; } offset = 0; } } while( rc == 0 && q > 0 && len > 0 ); } } ALIGN_DBGERR( rc ); return rc; }
LIB_EXPORT rc_t CC TableWriterAlgn_Make(const TableWriterAlgn** cself, VDatabase* db, ETableWriterAlgn_TableType type, uint32_t options) { rc_t rc = 0; TableWriterAlgn* self = NULL; const char* tbl_nm = __func__; if( cself == NULL || db == NULL ) { rc = RC(rcAlign, rcFormatter, rcConstructing, rcParam, rcNull); } else { self = calloc(1, sizeof(*self)); if( self == NULL ) { rc = RC(rcAlign, rcFormatter, rcConstructing, rcMemory, rcExhausted); } else { self->ref_table_name = "REFERENCE"; memcpy(self->cols, TableWriterAlgn_cols, sizeof(TableWriterAlgn_cols)); switch(type) { case ewalgn_tabletype_PrimaryAlignment: tbl_nm = "PRIMARY_ALIGNMENT"; self->cols[ewalgn_cn_ALIGN_GROUP].flags &= ~ewcol_Ignore; if (options & ewalgn_co_MISMATCH_QUALITY) self->cols[ewalgn_cn_MISMATCH_QUALITY].flags &= ~ewcol_Ignore; break; case ewalgn_tabletype_SecondaryAlignment: tbl_nm = "SECONDARY_ALIGNMENT"; #if 0 self->cols[ewalgn_cn_HAS_MISMATCH].flags |= ewcol_Ignore; self->cols[ewalgn_cn_MISMATCH].flags |= ewcol_Ignore; #else self->cols[ewalgn_cn_MISMATCH].name = "TMP_MISMATCH"; self->cols[ewalgn_cn_HAS_MISMATCH].name = "TMP_HAS_MISMATCH"; #endif self->cols[ewalgn_cn_MATE_REF_ORIENTATION].flags &= ~ewcol_Ignore; self->cols[ewalgn_cn_MATE_REF_ID].flags &= ~ewcol_Ignore; self->cols[ewalgn_cn_MATE_REF_POS].flags &= ~ewcol_Ignore; self->cols[ewalgn_cn_MATE_ALIGN_ID].flags &= ~ewcol_Ignore; self->cols[ewalgn_cn_TEMPLATE_LEN].flags &= ~ewcol_Ignore; break; case ewalgn_tabletype_EvidenceInterval: tbl_nm = "EVIDENCE_INTERVAL"; options |= ewalgn_co_unsorted; options |= ewalgn_co_PLOIDY; self->cols[ewalgn_cn_SEQ_SPOT_ID].flags |= ewcol_Ignore; self->cols[ewalgn_cn_SEQ_READ_ID].flags |= ewcol_Ignore; self->cols[ewalgn_cn_EVIDENCE_ALIGNMENT_IDS].flags &= ~ewcol_Ignore; break; case ewalgn_tabletype_EvidenceAlignment: tbl_nm = "EVIDENCE_ALIGNMENT"; self->ref_table_name = "EVIDENCE_INTERVAL"; self->cols[ewalgn_cn_REF_PLOIDY].flags &= ~ewcol_Ignore; #if 0 self->cols[ewalgn_cn_HAS_MISMATCH].flags |= ewcol_Ignore; self->cols[ewalgn_cn_MISMATCH].flags |= ewcol_Ignore; #else self->cols[ewalgn_cn_MISMATCH].name = "TMP_MISMATCH"; self->cols[ewalgn_cn_HAS_MISMATCH].name = "TMP_HAS_MISMATCH"; #endif options |= ewalgn_co_unsorted; break; default: rc = RC(rcAlign, rcFormatter, rcConstructing, rcType, rcUnrecognized); } if( options & ewalgn_co_SEQ_SPOT_ID ) { self->cols[ewalgn_cn_SEQ_SPOT_ID].flags &= ~ewcol_Ignore; } if( options & ewalgn_co_TMP_KEY_ID ) { self->cols[ewalgn_cn_TMP_KEY_ID].flags &= ~ewcol_Ignore; } if( options & ewalgn_co_PLOIDY ) { self->cols[ewalgn_cn_PLOIDY].flags &= ~ewcol_Ignore; self->cols[ewalgn_cn_READ_START].flags &= ~ewcol_Ignore; self->cols[ewalgn_cn_READ_LEN].flags &= ~ewcol_Ignore; } if( options & ewalgn_co_unsorted ) { self->cols[ewalgn_cn_REF_ID].flags &= ~ewcol_Ignore; self->cols[ewalgn_cn_REF_START].flags &= ~ewcol_Ignore; self->cols[ewalgn_cn_GLOBAL_REF_START].flags |= ewcol_Ignore; if( type == ewalgn_tabletype_SecondaryAlignment ) { self->cols[ewalgn_cn_MISMATCH].name = "TMP_MISMATCH"; self->cols[ewalgn_cn_MISMATCH].flags &= ~ewcol_Ignore; self->cols[ewalgn_cn_HAS_MISMATCH].name = "TMP_HAS_MISMATCH"; self->cols[ewalgn_cn_HAS_MISMATCH].flags &= ~ewcol_Ignore; } } if( options & ewalgn_co_MATE_ALIGN_ID_only ) { self->cols[ewalgn_cn_MATE_REF_ORIENTATION].flags |= ewcol_Ignore; self->cols[ewalgn_cn_MATE_REF_ID].flags |= ewcol_Ignore; self->cols[ewalgn_cn_MATE_REF_POS].flags |= ewcol_Ignore; self->cols[ewalgn_cn_TEMPLATE_LEN].flags |= ewcol_Ignore; } if( (rc = TableWriter_Make(&self->base, db, tbl_nm, NULL)) == 0 ) { rc = TableWriter_AddCursor(self->base, self->cols, sizeof(self->cols)/sizeof(self->cols[0]), &self->cursor_id); self->options = options; } } } if( rc == 0 ) { *cself = self; ALIGN_DBG("table %s created", tbl_nm); } else { TableWriterAlgn_Whack(self, false, NULL); ALIGN_DBGERR(rc); } return rc; }