LIB_EXPORT rc_t CC TableWriterSeq_WriteAlignmentData(const TableWriterSeq* cself, int64_t rowid,
                                                     const TableWriterData* primary_alignment_id,
                                                     const TableWriterData* alignment_count)
{
    rc_t rc = 0;

    if( cself == NULL ) {
        rc = RC( rcAlign, rcType, rcWriting, rcParam, rcNull);
        ALIGN_DBGERR(rc);
    } else if( cself->options & ewseq_co_AlignData ) {
        rc = RC( rcAlign, rcType, rcWriting, rcParam, rcViolated);
        ALIGN_DBGERR(rc);
    } else if( rowid == 0 ) {
        rc = TableWriter_Flush(cself->base, cself->alignd_cursor_id);
    }
    else if( primary_alignment_id == NULL || alignment_count == NULL ) {
        rc = RC( rcAlign, rcType, rcWriting, rcParam, rcNull);
        ALIGN_DBGERR(rc);
    } else if( (rc = TableReader_ReadRow(cself->tmpkey_reader, rowid)) != 0 ||
        cself->cols_read_tmpkey[1].len != primary_alignment_id->elements ) {
        rc = rc ? rc : RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent);
        ALIGN_DBGERRP("nreads and primary_alignment_id length %u <> %lu",
            rc, cself->cols_read_tmpkey[1].len, primary_alignment_id->elements);
    } else if( primary_alignment_id->elements != alignment_count->elements ) {
        rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent);
        ALIGN_DBGERRP("primary_alignment_id and alignment_count length %u <> %lu",
            rc, primary_alignment_id->elements, alignment_count->elements);
    }
    else {
        if (cself->flush) {
            rc = TableWriter_Flush(cself->base, cself->alignd_cursor_id);
            ((TableWriterSeq*)cself)->flush = false;
        }
        if( rc == 0 &&
           (rc = TableWriter_OpenRowId(cself->base, rowid, cself->alignd_cursor_id)) == 0 ) {
            TW_COL_WRITE(cself->base, cself->cols_alignd[0], *primary_alignment_id);
            TW_COL_WRITE(cself->base, cself->cols_alignd[1], *alignment_count);
            if( rc == 0 ) {
                rc = TableWriter_CloseRow(cself->base);
            }
            if (rc == 0/* && (cself->options & ewseq_co_WantMateStats)*/) {
                rc = TableWriterSeq_CollectStatistics((TableWriterSeq *)cself, primary_alignment_id);
            }
        }
    }
    return rc;
}
Beispiel #2
0
LIB_EXPORT rc_t CC RefSeqMgr_GetSeq(RefSeqMgr const *const cmgr,
                                    RefSeq const **result,
                                    char const *seq_id,
                                    uint32_t seq_id_sz)
{
    rc_t rc;

    if (cmgr == NULL || result == NULL || seq_id == NULL)
        rc = RC(rcAlign, rcIndex, rcConstructing, rcParam, rcNull);
    else
        rc = GetSeqInternal((RefSeqMgr *)cmgr, result, seq_id_sz, seq_id);

    if (rc)
        ALIGN_DBGERRP("SEQ_ID: '%.*s'", rc, seq_id_sz, seq_id);

    return rc;
}
Beispiel #3
0
LIB_EXPORT rc_t CC TableReaderRefSeq_MakePath(const TableReaderRefSeq** cself, const VDBManager* vmgr,
                                              const char* path, uint32_t options, size_t cache)
{
    rc_t rc = 0;
    const VTable* tbl = NULL;

    if( vmgr == NULL || path == NULL ) {
        rc = RC(rcAlign, rcType, rcConstructing, rcParam, rcNull);
    } else if( (rc = VDBManagerOpenTableRead(vmgr, &tbl, NULL, "%s", path)) == 0 ) {
        rc = TableReaderRefSeq_MakeTable(cself, vmgr, tbl, options, cache);
        VTableRelease(tbl);
    }
    if( rc == 0 ) {
        /* ALIGN_DBG("table %s opened 0x%p", path, *cself); */
    } else {
        ALIGN_DBGERRP("table for %s", rc, path);
    }
    return rc;
}
Beispiel #4
0
LIB_EXPORT rc_t CC TableReaderRefSeq_MakeTable(const TableReaderRefSeq** cself, const VDBManager* vmgr,
                                               const VTable* table, uint32_t options, size_t cache)
{
    rc_t rc = 0;
    TableReaderRefSeq* self = NULL;

    if( cself == NULL || table == NULL ) {
        rc = RC(rcAlign, rcType, rcConstructing, rcParam, rcNull);
    } else if( (self = calloc(1, sizeof(*self))) == NULL ) {
        rc = RC(rcAlign, rcType, rcConstructing, rcMemory, rcExhausted);
    } else {
        const TableReader* tmp;
        TableReaderColumn static_cols[] = {
            /* order important, see code below! */
            {0, "MAX_SEQ_LEN", {NULL}, 0, 0},
            {0, "SEQ_ID", {NULL}, 0, 0},
            {0, "TOTAL_SEQ_LEN", {NULL}, 0, 0},
            {0, "CIRCULAR", {NULL}, 0, 0},
            {0, "MD5", {NULL}, 0, ercol_Optional},
            {0, NULL, {NULL}, 0, 0}
        };
        if( (rc = TableReader_Make(&tmp, table, static_cols, 0)) == 0 ) {
            if( (rc = TableReader_ReadRow(tmp, 1)) == 0 ) {
                self->max_seq_len = static_cols[0].base.u32[0];
                self->total_seq_len = static_cols[2].base.u64[0];
                if( self->total_seq_len != static_cols[2].base.u64[0] ) {
                    rc = RC(rcAlign, rcType, rcConstructing, rcData, rcOutofrange);
                }
                if( static_cols[1].base.var != NULL ) {
                    if ( static_cols[ 1 ].len > sizeof( self->seq_id ) - 1 )
                    {
                        rc = RC( rcAlign, rcType, rcConstructing, rcBuffer, rcInsufficient );
                    }
                    else
                    {
                        string_copy( self->seq_id, ( sizeof self->seq_id ) -  1, static_cols[1].base.str, static_cols[1].len );
                        self->seq_id[ static_cols[ 1 ].len ] = '\0';
                    }
                }
                self->circular = static_cols[3].base.buul[0];
                if( static_cols[4].base.var != NULL ) {
                    memcpy(self->md5, static_cols[4].base.var, sizeof(self->md5));
                    self->has_md5 = true;
                }
            }
            TableReader_Whack(tmp);
        }

        memcpy(self->cols, TableReaderRefSeq_cols, sizeof(TableReaderRefSeq_cols));
        if( options & errefseq_4NA) {
            self->cols[0].flags |= ercol_Skip;
            self->cols[1].flags &= ~ercol_Skip;
            self->read = &self->cols[1];
        } else {
            self->read = &self->cols[0];
        }
        self->seq_len = &self->cols[3];
        rc = TableReader_Make(&self->base, table, self->cols, cache);
    }
    if( rc == 0 ) {
        *cself = self;
        /* ALIGN_DBG("table 0x%p opened 0x%p", table, self); */
    } else {
        TableReaderRefSeq_Whack(self);
        ALIGN_DBGERRP("table for 0x%p", rc, table);
    }
    return rc;
}
Beispiel #5
0
rc_t RefSeqMgr_ForEachVolume(const RefSeqMgr* cself, RefSeqMgr_ForEachVolume_callback cb, void *data)
{
    rc_t rc = 0;
    char servers[4096];
    char volumes[4096];
    
    if( cself == NULL || cb == NULL ) {
        rc = RC(rcAlign, rcType, rcConstructing, rcParam, rcNull);
    } else if( cb(".", NULL, data) ) {
        /* found in local dir */
    } else if( (rc = RefSeqMgr_KfgReadStr(cself->kfg, "refseq/paths", servers, sizeof(servers))) != 0 ) {
        ALIGN_DBGERRP("%s", rc, "RefSeqMgr_KfgReadStr(paths)");
    } else {
        bool found = false;
        if( servers[0] != '\0' ) {
            char *srv_sep;
            char *srv_rem = servers;
            do {
                char const* server = srv_rem;
                
                srv_sep = strchr(server, ':');
                if(srv_sep) {
                    srv_rem = srv_sep + 1;
                    *srv_sep = 0;
                }
                if( cb(server, NULL, data) ) {
                    found = true;
                    break;
                }
            } while(srv_sep);
        }
        if( !found ) {
            /* locate refseq servers/volumes in possibly multiple repositories */
            if( (rc = RefSeqMgr_KfgReadRepositories(cself->kfg, servers, sizeof(servers))) != 0 ) {
                ALIGN_DBGERRP("%s", rc, "RefSeqMgr_KfgReadStr(refseq/repository/*)");
            };
            if( servers[0] != '\0' ) {
                char *srv_sep;
                char *srv_rem = servers;
                do {
                    char const* server = srv_rem;
                    
                    srv_sep = strchr(server, ':');
                    if(srv_sep) {
                        srv_rem = srv_sep + 1;
                        *srv_sep = 0;
                    }
                    if( cb(server, NULL, data) ) {
                        found = true;
                        break;
                    }
                } while(srv_sep);
            }
        }
        if( !found ) {
            if ( (rc = RefSeqMgr_KfgReadStr(cself->kfg, "refseq/servers", servers, sizeof(servers))) != 0 ||
                (rc = RefSeqMgr_KfgReadStr(cself->kfg, "refseq/volumes", volumes, sizeof(volumes))) != 0 ) {
                ALIGN_DBGERRP("%s", rc, "RefSeqMgr_KfgReadStr(servers/volumes)");
            }
            /* servers and volumes are deprecated and optional */
            if( rc == 0 && (servers[0] != '\0' || volumes[0] != '\0') ) {
                char *srv_sep;
                char *srv_rem = servers;
                do {
                    char vol[ 4096 ];
                    char const *server = srv_rem;
                    char *vol_rem = vol;
                    char *vol_sep;
                    
                    string_copy ( vol, sizeof vol, volumes, string_size( volumes ) );
                    srv_sep = strchr(server, ':');
                    if(srv_sep) {
                        srv_rem = srv_sep + 1;
                        *srv_sep = 0;
                    }
                    do {
                        char const *volume = vol_rem;
                        
                        vol_sep = strchr(volume, ':');
                        if(vol_sep) {
                            vol_rem = vol_sep + 1;
                            *vol_sep = 0;
                        }
                        found = cb(server, volume, data);
                    } while(!found && vol_sep);
                } while(!found && srv_sep);
            }
        }
    }
    return rc;
}
LIB_EXPORT rc_t CC TableWriterSeq_Write(const TableWriterSeq* cself, const TableWriterSeqData* data, int64_t* rowid)
{
    rc_t rc = 0;
    int lbl;
    
    if( cself == NULL || data == NULL ) {
        rc = RC( rcAlign, rcType, rcWriting, rcParam, rcNull);
        ALIGN_DBGERR(rc);
    }
    else if( !cself->init && (rc = TableWriteSeq_WriteDefaults(cself)) != 0 ) {
        ALIGN_DBGERR(rc);
    }
    else if( data->quality.buffer == NULL || data->sequence.elements != data->quality.elements ) {
        rc = RC(rcAlign, rcType, rcWriting, rcData, data->quality.buffer ? rcInconsistent : rcEmpty);
        ALIGN_DBGERRP("sequence and quality length %lu <> %lu", rc, data->sequence.elements, data->quality.elements);
    }
    else if( data->read_start.elements != data->nreads ) {
        rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent);
        ALIGN_DBGERRP("nreads and read_start length %u <> %lu", rc, data->nreads, data->read_start.elements);
    }
    else if( data->read_len.elements != data->nreads ) {
        rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent );
        ALIGN_DBGERRP("nreads and read_len length %u <> %lu", rc, data->nreads, data->read_len.elements);
    }
    else if( (cself->options & ewseq_co_AlignData) && data->primary_alignment_id.elements != data->nreads ) {
        rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent);
        ALIGN_DBGERRP("nreads and primary_alignment_id length %u <> %lu", rc, data->nreads, data->primary_alignment_id.elements);
    }
    else if( (cself->options & ewseq_co_AlignData) && data->alignment_count.elements != data->nreads ) {
        rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent);
        ALIGN_DBGERRP("nreads and alignment_count length %u <> %lu", rc, data->nreads, data->alignment_count.elements);
    }
    else if( data->no_quantize_mask.buffer && data->no_quantize_mask.elements != data->quality.elements ) {
        rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent);
        ALIGN_DBGERRP("quality and no_quantize_mask length %u <> %lu", rc, data->quality.elements, data->no_quantize_mask.elements);
    }
    else if( !(cself->options & ewseq_co_NoLabelData) &&
               (lbl = ((data->label.buffer ? 1 : 0) + (data->label_start.buffer ? 1 : 0) +
                                                  (data->label_len.buffer ? 1 : 0))) != 0 && lbl != 3 )
    {
        rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent);
        ALIGN_DBGERRP("LABEL %s", rc, "incomplete");
    }
    else if( (rc = TableWriter_OpenRow(cself->base, rowid, cself->cursor_id)) == 0 ) {
        if( cself->options & ewseq_co_AlignData ) {
            TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_PRIMARY_ALIGNMENT_ID], data->primary_alignment_id);
            TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_ALIGNMENT_COUNT], data->alignment_count);
            if (rc == 0) {
                rc = TableWriterSeq_CollectStatistics((TableWriterSeq *)cself, &data->primary_alignment_id);
            }
        }
        {
            unsigned naligned = 0;
            unsigned i;
            const uint8_t* ac = data->alignment_count.buffer;

            for (i = 0; i < (unsigned)data->nreads; ++i) {
                if (ac[i] != 0) {
                    ++naligned;
                }
            }
            if (naligned == 0) {
                if (!cself->haveFirstUnaligned) {
                    ((TableWriterSeq *)cself)->firstUnaligned = *rowid;
                    ((TableWriterSeq *)cself)->haveFirstUnaligned = true;
                }
            }
            else if (naligned < (unsigned)data->nreads) {
                if (!cself->haveFirstHalfAligned) {
                    ((TableWriterSeq *)cself)->firstHalfAligned = *rowid;
                    ((TableWriterSeq *)cself)->haveFirstHalfAligned = true;
                }
            }
        }
        if( cself->options & ewseq_co_SaveRead ) {
            TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_READ], data->sequence);
        }
        else {
            uint32_t i;
            const char* seq = data->sequence.buffer;
            const uint8_t* ac = data->alignment_count.buffer;
            const INSDC_coord_zero* rs = data->read_start.buffer;
            const INSDC_coord_len* rl = data->read_len.buffer;

            TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_READ], NULL, 0);
            TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_CSREAD], NULL, 0);
            for(i = 0; i < data->nreads; i++ ) {
                if( ac[i] == 0 ) {
                    TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_READ], &seq[rs[i]], rl[i]);
                    TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_CSREAD], &seq[rs[i]], rl[i]);
                }
            }
        }
        if( cself->options & ewseq_co_FullQuality ) {
            TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_QUALITY], data->quality);
        } else {
            uint64_t i;
            const uint8_t* b = data->quality.buffer;
            if( data->quality.elements > cself->qual_buf_sz ) {
                uint8_t* p = realloc(cself->qual_buf, data->quality.elements * cself->cols[ewseq_cn_QUALITY].element_bits);
                if( p == NULL ) {
                    rc = RC(rcAlign, rcType, rcWriting, rcMemory, rcExhausted);
                    ALIGN_DBGERRP("quality %s", rc, "discretization");
                } else {
                    ((TableWriterSeq*)cself)->qual_buf_sz = data->quality.elements;
                    ((TableWriterSeq*)cself)->qual_buf = p;
                }
            }
            if (data->no_quantize_mask.buffer) {
                bool const *mask = data->no_quantize_mask.buffer;
                
                for(i = 0; i < data->quality.elements; i++ ) {
                    uint8_t const q = b[i] & 0x3F;
                    cself->qual_buf[i] = mask[i] ? q : cself->discrete_qual[q];
                }
            }
            else {
                for(i = 0; i < data->quality.elements; i++ ) {
                    cself->qual_buf[i] = cself->discrete_qual[b[i]];
                }
            }
            if (cself->options & ewseq_co_SaveQual) {
                TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_QUALITY], cself->qual_buf, data->quality.elements);
            }
            else {
                uint32_t i;
                uint8_t const *const qual = data->sequence.buffer;
                uint8_t const *const aligned = data->alignment_count.buffer;
                INSDC_coord_zero const *const rs = data->read_start.buffer;
                INSDC_coord_len const *const rl = data->read_len.buffer;
                
                TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_QUALITY], NULL, 0);
                for (i = 0; i < data->nreads; ++i) {
                    if (!aligned[i]) {
                        INSDC_coord_zero const readStart = rs[i];
                        INSDC_coord_len const readLen = rl[i];
                        
                        TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_QUALITY], &qual[readStart], readLen);
                    }
                }
            }
        }
        if( !(cself->options & ewseq_co_NoLabelData) ) {
            TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_LABEL], data->label);
            TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_LABEL_START], data->label_start);
            TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_LABEL_LEN], data->label_len);
        }
        TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_READ_TYPE], data->read_type);
        TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_READ_START], data->read_start);
        TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_READ_LEN], data->read_len);
        TW_COL_WRITE_VAR(cself->base, cself->cols[ewseq_cn_TMP_KEY_ID], data->tmp_key_id);
        TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_SPOT_GROUP], data->spot_group);
        TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_CSKEY], data->cskey);
        TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_READ_FILTER], data->read_filter);
        TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_PLATFORM], data->platform);
        TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_TI], data->ti);
        TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_NAME], data->spot_name);
        if( rc == 0 ) {
            rc = TableWriter_CloseRow(cself->base);
        }
    }
    return rc;
}