Exemple #1
0
static
rc_t copy_file (const KFile * fin, KFile *fout)
{
    rc_t rc;
    uint8_t	buff	[64 * 1024];
    size_t	num_read;
    uint64_t	inpos;
    uint64_t	outpos;

    assert (fin != NULL);
    assert (fout != NULL);

    inpos = 0;
    outpos = 0;

    do
    {
        rc = KFileRead (fin, inpos, buff, sizeof (buff), &num_read);
        if (rc != 0)
        {
            PLOGERR (klogErr, (klogErr, rc,
                     "Failed to read from directory structure in creating archive at $(P)",
                               PLOG_U64(P), inpos));
            break;
        }
        else if (num_read > 0)
        {
            size_t to_write;

            inpos += (uint64_t)num_read;

            STSMSG (2, ("Read %zu bytes to %lu", num_read, inpos));

/*             PLOGMSG (klogDebug10, "Read $(B) Bytes for $(T)", PLOG_2(PLOG_U64(B),PLOG_U64(T)), num_read, inpos); */
            to_write = num_read;
            while (to_write > 0)
            {
                size_t num_writ;
                rc = KFileWrite (fout, outpos, buff, num_read, &num_writ);
                if (rc != 0)
                {
                    PLOGERR (klogErr, (klogErr, rc,
                             "Failed to write to archive in creating archive at $(P)",
                                       PLOG_U64(P), outpos));
                    break;
                }
                outpos += num_writ;
/*                 PLOGMSG (klogDebug10, "Wrote $(B) Bytes for $(T)", PLOG_2(PLOG_U64(B),PLOG_U64(T)), num_writ, outpos); */
                to_write -= num_writ;
            }
        }
/*         else */
/*             PLOGMSG (klogDebug10, "Read $(B) Bytes for $(T)", PLOG_2(PLOG_U64(B),PLOG_U64(T)), num_read, inpos); */
        if (rc != 0)
            break;
    } while (num_read != 0);
    return rc;
}
Exemple #2
0
static
rc_t copy_file (const KFile * fin, KFile *fout)
{
    rc_t rc;
    uint8_t	buff	[64 * 1024];
    size_t	num_read;
    uint64_t	inpos;
    uint64_t	outpos;
    uint64_t    fsize;

    assert (fin != NULL);
    assert (fout != NULL);

    inpos = 0;
    outpos = 0;

    rc = KFileSize (fin, &fsize);
    if (rc != 0)
        return rc;

    do
    {
        rc = KFileRead (fin, inpos, buff, sizeof (buff), &num_read);
        if (rc != 0)
        {
            PLOGERR (klogErr, (klogErr, rc,
                               "Failed to read from directory structure in creating archive at $(P)",
                               PLOG_U64(P), inpos));
            break;
        }
        else if (num_read > 0)
        {
            size_t to_write;

            inpos += (uint64_t)num_read;

            to_write = num_read;
            while (to_write > 0)
            {
                size_t num_writ;
                rc = KFileWrite (fout, outpos, buff, num_read, &num_writ);
                if (rc != 0)
                {
                    PLOGERR (klogErr, (klogErr, rc,
                                       "Failed to write to archive in creating archive at $(P)",
                                       PLOG_U64(P), outpos));
                    break;
                }
                outpos += num_writ;
                to_write -= num_writ;
            }
        }
        if (rc != 0)
            break;
    } while (num_read != 0);
    return rc;
}
Exemple #3
0
static rc_t MaxNReadsValidator_GetKey( const SRASplitter* cself,
                                       const char** key, spotid_t spot, readmask_t* readmask )
{
    rc_t rc = 0;
    MaxNReadsValidator* self = ( MaxNReadsValidator* )cself;

    if ( self == NULL || key == NULL )
    {
        rc = RC( rcSRA, rcNode, rcExecuting, rcParam, rcNull );
    }
    else
    {
        const void* nreads = NULL;
        bitsz_t o = 0, sz = 0;
        uint64_t nn = 0;

        *key = "";
        if ( self->col != NULL )
        {
            rc = SRAColumnRead( self->col, spot, &nreads, &o, &sz );
            if ( rc == 0 )
            {
                switch( sz )
                {
                case 8:
                    nn = *((const uint8_t*)nreads);
                    break;
                case 16:
                    nn = *((const uint16_t*)nreads);
                    break;
                case 32:
                    nn = *((const uint32_t*)nreads);
                    break;
                case 64:
                    nn = *((const uint64_t*)nreads);
                    break;
                default:
                    rc = RC( rcSRA, rcNode, rcExecuting, rcData, rcUnexpected );
                    break;
                }
                if ( nn > nreads_max )
                {
                    clear_readmask( readmask );
                    PLOGMSG(klogWarn, (klogWarn, "too many reads $(nreads) at spot id $(row), maximum $(max) supported, skipped",
                                       PLOG_3(PLOG_U64(nreads),PLOG_I64(row),PLOG_U32(max)), nn, spot, nreads_max));
                }
                else if ( nn == nreads_max - 1 )
                {
                    PLOGMSG(klogWarn, (klogWarn, "too many reads $(nreads) at spot id $(row), truncated to $(max)",
                                       PLOG_3(PLOG_U64(nreads),PLOG_I64(row),PLOG_U32(max)), nn + 1, spot, nreads_max));
                }
            }
        }
    }
    return rc;
}
Exemple #4
0
static
rc_t copy_file_skey_md5_kludge (const KFile * fin, KFile *fout)
{
/* size of HEX digest plus spzce plus * */
#define READ_SIZE 34
    static const uint8_t skey[] = "skey\n";
    uint8_t buff [256];
    uint64_t tot_read, tot_writ;
    size_t num_read, num_writ;
    rc_t rc;

    assert (fin);
    assert (fout);

    for (tot_read = 0 ; tot_read < READ_SIZE; tot_read += num_read)
    {
        rc = KFileRead (fin, tot_read, buff, READ_SIZE - tot_read, &num_read);
        if (rc != 0)
        {
            PLOGERR (klogErr, (klogErr, rc,
                               "Failed to read from directory structure in creating archive at $(P)",
                               PLOG_U64(P), tot_read));
            break;
        }
        if (num_read == 0)
            break;
    }
    if (rc == 0)
    {
        if (tot_read == READ_SIZE)
        {
            memcpy (buff + READ_SIZE, skey, sizeof (skey));
            tot_read += sizeof (skey) - 1;
        }

        for (tot_writ = 0; tot_writ < tot_read; tot_writ += num_writ)
        {
            rc = KFileWrite (fout, tot_writ, buff + tot_writ, 
                             (uint32_t)(tot_read - tot_writ), &num_writ);
            if (rc != 0)
            {
                PLOGERR (klogErr, (klogErr, rc,
                                   "Failed to write to archive in creating archive at $(P)",
                                   PLOG_U64(P), num_writ));
                break;
            }
        }
    }
    return rc;
}
/** Read a character from input file */
static rc_t SpotIteratorFileReadCharWithEof(SpotIterator* self,
    char* buffer)
{
    rc_t rc = 0;
    size_t num_read = 0;

    assert(self);

    /* get back the saved character */
    if (self->hasCh) {
        buffer[0] = self->ch;
        self->hasCh = false;
    }
    else {
        rc = KFileRead(self->file, self->filePos, buffer, 1, &num_read);
        if (rc == 0) {
            if (num_read == 0) {
                self->eof = true;
            }
            else { self->filePos += num_read; }
        }
        else {
            PLOGERR(klogErr, (klogErr, rc,
                "on line $(lineno) while reading file '$(path)'",
                PLOG_U64(lineno) ",path=%s", self->line, self->filename));
        }
    }

    return rc;
}
Exemple #6
0
/** Read a character from the input file */
static rc_t SpotIteratorFileReadWithEof(struct SpotIterator* self,
    void* buffer, size_t bsize)
{
    rc_t rc = 0;
    size_t num_read = 0;

    assert(self);

    rc = KFileRead(self->m_file, self->m_filePos, buffer, bsize, &num_read);
    if (rc == 0)
    {
        if (num_read == 0)
        {
            self->m_eof = true;
        }
        else
        {
            self->m_filePos += num_read;
        }
    }
    else
    {
        plogerr(klogErr, rc, "on line $(lineno) while reading file '$(path)'",
                PLOG_U64(lineno) ",path=%s", self->m_line, self->m_filename);
    }

    return rc;
}
Exemple #7
0
rc_t WriteFileMeta(SIndexObj* obj)
{
    rc_t rc = 0;
    KMDataNode* nd = NULL;

    PLOGMSG(klogInfo, (klogInfo, "Meta $(f) on index $(i): file size $(s), buffer $(b)",
        PLOG_4(PLOG_S(f),PLOG_S(i),PLOG_U64(s),PLOG_U32(b)), obj->file, obj->index, obj->file_size, obj->buffer_sz));

    if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Format")) == 0 ) {
        KMDataNode* opt = NULL;
        rc = KMDataNodeWriteCString(nd, obj->format);
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(nd, &opt, "Options")) == 0 ) {
            KMDataNode* ond = NULL;
            if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &ond, "accession")) == 0 ) {
                rc = KMDataNodeWriteCString(ond, g_accession);
                KMDataNodeRelease(ond);
            }
            if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &ond, "minSpotId")) == 0 ) {
                rc = KMDataNodeWriteB64(ond, &obj->minSpotId);
                KMDataNodeRelease(ond);
            }
            if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &ond, "maxSpotId")) == 0 ) {
                rc = KMDataNodeWriteB64(ond, &obj->maxSpotId);
                KMDataNodeRelease(ond);
            }
            KMDataNodeRelease(opt);
        }
        KMDataNodeRelease(nd);
    }

    if( rc == 0 && obj->file_size > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Size")) == 0 ) {
        rc = KMDataNodeWriteB64(nd, &obj->file_size);
        KMDataNodeRelease(nd);
    }

    if( rc == 0 && obj->buffer_sz > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Buffer")) == 0 ) {
        rc = KMDataNodeWriteB32(nd, &obj->buffer_sz);
        KMDataNodeRelease(nd);
    }

    if( rc == 0 && strlen(obj->index) > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Index")) == 0 ) {
        rc = KMDataNodeWriteCString(nd, obj->index);
        KMDataNodeRelease(nd);
    }

    if( rc == 0 && obj->file_size > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "md5")) == 0 ) {
        char x[5];
        int i;
        for( i = 0; rc == 0 && i < sizeof(obj->md5_digest); i++ ) {
            int l = snprintf(x, 4, "%02x", obj->md5_digest[i]);
            rc = KMDataNodeAppend(nd, x, l);
        }
        KMDataNodeRelease(nd);
    }
    return rc;
}
/** Get next spot from input file */
static rc_t SpotIteratorReadSpotToRedact(SpotIterator* self)
{
    rc_t rc = 0;

    assert(self);

    while (rc == 0 && ! self->eof) {
        rc = SpotIteratorReadLine(self);

        /* skip empty lines */ 
        if ((rc == 0) && (self->inBuffer > 0)) {
            spotid_t spot = 0;

            /* make sure the line contains digits only */
            int i = 0;
            for (i = 0; i < self->inBuffer; ++i) {
                if (!isdigit(self->buffer[i])) {
                    rc = RC(rcExe, rcFile, rcReading, rcChar, rcUnexpected);
                    PLOGERR(klogErr, (klogErr, rc,
                            "character '$(char)' on line $(lineno)"
                            " while reading file '$(path)': '$(line)'",
                        "char=%c," PLOG_U64(lineno) ",path=%s,line=%s",
                        self->buffer[i], self->line,
                        self->filename, self->buffer));
                    return rc;
                }
            }

            sscanf(self->buffer, "%ld", &spot);

            if (spot == 0) {
                rc = RC(rcExe, rcFile, rcReading, rcString, rcInvalid);
                PLOGERR(klogErr, (klogErr, rc,
                    "bad spot id '0' on line $(lineno) "
                    "while reading file '$(path)': '$(line)'",
                    PLOG_U64(lineno) ",path=%s,line=%s",
                    self->line, self->filename, self->buffer));
            }
            else if (spot == self->spotToReduct) {
                rc = RC(rcExe, rcFile, rcReading, rcString, rcInvalid);
                PLOGERR(klogErr, (klogErr, rc, "duplicated spot id '$(spot)' "
                    "on line $(lineno) while reading file '$(path)': '$(line)'",
                    PLOG_U32(spot) "," PLOG_U64(lineno) ",path=%s,line=%s",
                    spot, self->line, self->filename, self->buffer));
            }
            else if (spot < self->spotToReduct) {
                rc = RC(rcExe, rcFile, rcReading, rcString, rcInvalid);
                PLOGERR(klogErr, (klogErr, rc, "File '$(path)' is unsorted. "
                    "$(id) < $(last). See line $(lineno): '$(line)'",
                    "path=%s," PLOG_U32(id) "," PLOG_U32(last) ","
                        PLOG_U64(lineno) ",line=%s",
                    self->filename, spot, self->spotToReduct,
                    self->line, self->buffer));
            }
            else if (spot > self->maxSpotId) {
                rc = RC(rcExe, rcFile, rcReading, rcString, rcInvalid);
                PLOGERR(klogErr, (klogErr, rc,
                    "spotId $(spot) on line $(lineno) "
                    "of file '$(path)' is bigger that the max spotId $(max): "
                        "'$(line)'",
                    PLOG_U32(spot) "," PLOG_U64(lineno) ",path=%s,"
                    PLOG_U32(max) ",line=%s",
                    spot, self->line, self->filename, self->maxSpotId,
                    self->buffer));
            }
            else {
                self->spotToReduct = spot;
                self->inBuffer = 0;
            }
            break;
        }
    }

    return rc;
}
/** Read a line from input file */
static rc_t SpotIteratorReadLine(SpotIterator* self)
{
    rc_t rc = 0;
    bool overflow = false; /* input buffer overflow */

    /* to skip leading/traling spaces */
    enum ELane {
        eBefore,
        eIn,
        eAfter
    } state = eBefore;

    if (self->eof) {
        return rc;
    }

    assert(self);

    ++self->line;
    self->inBuffer = 0;

    while (!overflow && !self->eof) /* do until in-buffer overflow or EOF*/{
        char ch = 0;
        /* get next characted */
        if ((rc = SpotIteratorFileReadCharWithEof(self, &ch)) != 0) {
            return rc;
        }

        if (!self->eof) {
            /* treat leading/trailing spaces */
            switch (state) {
                /* skip leading spaces */
                case eBefore:
                    if (isblank(ch)) {
                        continue;
                    }
                    else {
                        state = eIn;
                    }
                    break;
                case eIn:
                    if (isblank(ch)) {
                        state = eAfter;
                        continue;
                    }
                    break;
                /* skip trailing spaces */
                case eAfter:
                    if (isblank(ch)) {
                        continue;
                    }
                    else if (ch != '\n' && ch != '\r') {
                        rc = RC(rcExe, rcFile, rcReading, rcChar, rcUnexpected);
                        PLOGERR(klogErr, (klogErr, rc, "bad symbol '$(char)' "
                            "on line $(lineno) in file '$(path)': '$(line)...'",
                            "char=%c," PLOG_U64(lineno) ",path=%s,line=%s",
                            ch, self->line, self->filename, self->buffer))
                        ;
                        return rc;
                    }
                    break;
            }

            /* add next non-blank characted */
            overflow = !SpotIteratorBufferAdd(self, ch);
            if (!overflow) {
      /* all combinations as "\r", "\n", "\r\n", "\n\r" are considered as EOL */
                if (ch == '\n' || ch == '\r') {
                    char c1 = 0;
                    if ((rc = SpotIteratorFileReadCharWithEof(self, &c1)) != 0)
                    {
                        return rc;
                    }
                    if (self->eof) {
                        break;
                    }
                    else if ((c1 != '\n' && c1 != '\r') || (ch == c1)) {
       /* save the character when EOL is a single character (WINDOWS):
          will be get back in SpotIteratorFileReadCharWithEof */
                        self->ch = c1;
                        self->hasCh = true;
                        break;
                    }
                    else {
                        overflow = !SpotIteratorBufferAdd(self, c1);
                        break;
                    }
                }
            }
        }
    }

    /* remove EOL */
    if (!overflow) {
        bool done = false;

        while (self->inBuffer > 0 && !done) {
            switch (self->buffer[self->inBuffer - 1]) {
                case '\n': case '\r':
                    self->buffer[--self->inBuffer] = '\0';
                    break;
                default:
                    done = true;
                    break;
            }
        }
    }
    else {
        rc = RC(rcExe, rcFile, rcReading, rcString, rcTooLong);
        PLOGERR(klogErr, (klogErr, rc,
            "on line $(lineno) while reading file '$(path)': '$(line)...'",
            PLOG_U64(lineno) ",path=%s,line=%s",
            self->line, self->filename, self->buffer));
    }

    return rc;
}
Exemple #10
0
/*******************************************************************************
 * KMain - defined for use with kapp library
 *******************************************************************************/
rc_t CC KMain ( int argc, char* argv[] )
{
    rc_t rc = 0;
    int i;
    const char* arg;
    uint64_t total_spots = 0;

    const SRAMgr* sraMGR = NULL;
    SRADumperFmt fmt;

    bool to_stdout = false, do_gzip = false, do_bzip2 = false;
    char const* outdir = NULL;
    spotid_t minSpotId = 1;
    spotid_t maxSpotId = ~0;
    bool sub_dir = false;
    bool keep_empty = false;
    const char* table_path[10240];
    int table_path_qty = 0;

    char const* D_option = NULL;
    char const* P_option = NULL;
    char P_option_buffer[4096];
    const char* accession = NULL;
    const char* table_name = NULL;

    bool spot_group_on = false;
    int spot_groups = 0;
    char* spot_group[128] = {NULL};
    bool read_filter_on = false;
    SRAReadFilter read_filter = 0xFF;

    bool failed_to_open = false;

    /* for the fasta-ouput of fastq-dump: branch out completely of 'common' code */
    if ( fasta_dump_requested( argc, argv ) )
    {
        return fasta_dump( argc, argv );
    }

    /* Prepare for the worst: report this information after disaster */
    ReportBuildDate ( __DATE__ );

    memset( &fmt, 0, sizeof( fmt ) );
    rc = SRADumper_Init( &fmt );
    if ( rc != 0 )
    {
        LOGERR(klogErr, rc, "formatter initialization");
        return 100;
    }
    else if ( fmt.get_factory == NULL )
    {
        rc = RC( rcExe, rcFormatter, rcValidating, rcInterface, rcNull );
        LOGERR( klogErr, rc, "formatter factory" );
        return 101;
    }
    else
    {
        rc = SRADumper_ArgsValidate( argv[0], &fmt );
        if ( rc != 0 )
        {
            LOGERR( klogErr, rc, "formatter args list" );
            return 102;
        }
    }

    if ( argc < 2 )
    {
        CoreUsage( argv[0], &fmt, true, EXIT_FAILURE );
        return 0;
    }

    for ( i = 1; i < argc; i++ )
    {
        arg = argv[ i ];
        if ( arg[ 0 ] != '-' )
        {
            uint32_t k;
            for ( k = 0; k < table_path_qty; k++ )
            {
                if ( strcmp( arg, table_path[ k ] ) == 0 )
                {
                    break;
                }
            }
            if ( k >= table_path_qty )
            {
                if ( ( table_path_qty + 1 ) >= ( sizeof( table_path ) / sizeof( table_path[ 0 ] ) ) )
                {
                    rc = RC( rcExe, rcArgv, rcReading, rcBuffer, rcInsufficient );
                    goto Catch;
                }
                table_path[ table_path_qty++ ] = arg;
            }
            continue;
        }
        arg = NULL;
        if ( SRADumper_GetArg( &fmt, "L", "log-level", &i, argc, argv, &arg ) )
        {
            rc = LogLevelSet( arg );
            if ( rc != 0 )
            {
                PLOGERR( klogErr, ( klogErr, rc, "log level $(lvl)", PLOG_S( lvl ), arg ) );
                goto Catch;
            }
        }
        else if ( SRADumper_GetArg( &fmt, NULL, OPTION_REPORT, &i, argc, argv, &arg ) )
        {
        }
        else if ( SRADumper_GetArg( &fmt, "+", "debug", &i, argc, argv, &arg ) )
        {
#if _DEBUGGING
            rc = KDbgSetString( arg );
            if ( rc != 0 )
            {
                PLOGERR( klogErr, ( klogErr, rc, "debug level $(lvl)", PLOG_S( lvl ), arg ) );
                goto Catch;
            }
#endif
        }
        else if ( SRADumper_GetArg( &fmt, "H", "help", &i, argc, argv, NULL ) ||
                  SRADumper_GetArg( &fmt, "?", "h", &i, argc, argv, NULL ) )
        {
            CoreUsage( argv[ 0 ], &fmt, false, EXIT_SUCCESS );

        }
        else if ( SRADumper_GetArg( &fmt, "V", "version", &i, argc, argv, NULL ) )
        {
            HelpVersion ( argv[ 0 ], KAppVersion() );
            return 0;
        }
        else if ( SRADumper_GetArg( &fmt, "v", NULL, &i, argc, argv, NULL ) )
        {
            KStsLevelAdjust( 1 );

        }
        else if ( SRADumper_GetArg( &fmt, "D", "table-path", &i, argc, argv, &D_option ) )
        {
            LOGMSG( klogErr, "option -D is deprecated, see --help" );
        }
        else if ( SRADumper_GetArg( &fmt, "P", "path", &i, argc, argv, &P_option ) )
        {
            LOGMSG( klogErr, "option -P is deprecated, see --help" );

        }
        else if ( SRADumper_GetArg( &fmt, "A", "accession", &i, argc, argv, &accession ) )
        {
        }
        else if ( SRADumper_GetArg( &fmt, "O", "outdir", &i, argc, argv, &outdir ) )
        {
        }
        else if ( SRADumper_GetArg( &fmt, "Z", "stdout", &i, argc, argv, NULL ) )
        {
            to_stdout = true;
        }
        else if ( fmt.gzip && SRADumper_GetArg( &fmt, NULL, "gzip", &i, argc, argv, NULL ) )
        {
            do_gzip = true;
        }
        else if ( fmt.bzip2 && SRADumper_GetArg( &fmt, NULL, "bzip2", &i, argc, argv, NULL ) )
        {
            do_bzip2 = true;
        }
        else if ( SRADumper_GetArg( &fmt, NULL, "table", &i, argc, argv, &table_name ) )
        {
        }
        else if ( SRADumper_GetArg( &fmt, "N", "minSpotId", &i, argc, argv, &arg ) )
        {
            minSpotId = AsciiToU32( arg, NULL, NULL );
        }
        else if ( SRADumper_GetArg( &fmt, "X", "maxSpotId", &i, argc, argv, &arg ) )
        {
            maxSpotId = AsciiToU32( arg, NULL, NULL );
        }
        else if ( SRADumper_GetArg( &fmt, "G", "spot-group", &i, argc, argv, NULL ) )
        {
            spot_group_on = true;
        }
        else if ( SRADumper_GetArg( &fmt, NULL, "spot-groups", &i, argc, argv, NULL ) )
        {
            if ( i + 1 < argc && argv[ i + 1 ][ 0 ] != '-' )
            {
                int f = 0, t = 0;
                i++;
                while ( argv[ i ][ t ] != '\0' )
                {
                    if ( argv[ i ][ t ] == ',' )
                    {
                        if ( t - f > 0 )
                        {
                            spot_group[ spot_groups++ ] = strndup( &argv[ i ][ f ], t - f );
                        }
                        f = t + 1;
                    }
                    t++;
                }
                if ( t - f > 0 )
                {
                    spot_group[ spot_groups++ ] = strndup( &argv[ i ][ f ], t - f );
                }
                if ( spot_groups < 1 )
                {
                    rc = RC( rcApp, rcArgv, rcReading, rcParam, rcEmpty );
                    PLOGERR( klogErr, ( klogErr, rc, "$(p)", PLOG_S( p ), argv[ i - 1 ] ) );
                    CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE );
                }
                spot_group[ spot_groups ] = NULL;
            }
        }
        else if ( SRADumper_GetArg( &fmt, "R", "read-filter", &i, argc, argv, NULL ) )
        {
            read_filter_on = true;
            if ( i + 1 < argc && argv[ i + 1 ][ 0 ] != '-' )
            {
                i++;
                if ( read_filter != 0xFF )
                {
                    rc = RC( rcApp, rcArgv, rcReading, rcParam, rcDuplicate );
                    PLOGERR( klogErr, ( klogErr, rc, "$(p): $(o)",
                                        PLOG_2( PLOG_S( p ),PLOG_S( o ) ), argv[ i - 1 ], argv[ i ] ) );
                    CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE );
                }
                if ( strcasecmp( argv[ i ], "pass" ) == 0 )
                {
                    read_filter = SRA_READ_FILTER_PASS;
                }
                else if ( strcasecmp( argv[ i ], "reject" ) == 0 )
                {
                    read_filter = SRA_READ_FILTER_REJECT;
                }
                else if ( strcasecmp( argv[ i ], "criteria" ) == 0 )
                {
                    read_filter = SRA_READ_FILTER_CRITERIA;
                }
                else if ( strcasecmp( argv[ i ], "redacted" ) == 0 )
                {
                    read_filter = SRA_READ_FILTER_REDACTED;
                }
                else
                {
                    /* must be accession */
                    i--;
                }
            }
        }
        else if ( SRADumper_GetArg( &fmt, "T", "group-in-dirs", &i, argc, argv, NULL ) )
        {
            sub_dir = true;
        }
        else if ( SRADumper_GetArg( &fmt, "K", "keep-empty-files", &i, argc, argv, NULL ) )
        {
            keep_empty = true;
        }
        else if ( SRADumper_GetArg( &fmt, NULL, "no-user-settings", &i, argc, argv, NULL ) )
        {
            KConfigDisableUserSettings ();
        }
        else if ( fmt.add_arg && fmt.add_arg( &fmt, SRADumper_GetArg, &i, argc, argv ) )
        {
        }
        else
        {
            rc = RC( rcApp, rcArgv, rcReading, rcParam, rcIncorrect );
            PLOGERR( klogErr, ( klogErr, rc, "$(p)", PLOG_S( p ), argv[ i ] ) );
            CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE );
        }
    }

    if ( to_stdout )
    {
        if ( outdir != NULL || sub_dir || keep_empty ||
                spot_group_on || ( read_filter_on && read_filter == 0xFF ) )
        {
            LOGMSG( klogWarn, "stdout mode is set, some options are ignored" );
            spot_group_on = false;
            if ( read_filter == 0xFF )
            {
                read_filter_on = false;
            }
        }
        KOutHandlerSetStdErr();
        KStsHandlerSetStdErr();
        KLogHandlerSetStdErr();
        ( void ) KDbgHandlerSetStdErr();
    }

    if ( do_gzip && do_bzip2 )
    {
        rc = RC( rcApp, rcArgv, rcReading, rcParam, rcAmbiguous );
        LOGERR( klogErr, rc, "output compression method" );
        CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE );
    }

    if ( minSpotId > maxSpotId )
    {
        spotid_t temp = maxSpotId;
        maxSpotId = minSpotId;
        minSpotId = temp;
    }

    if ( table_path_qty == 0 )
    {
        if ( D_option != NULL && D_option[ 0 ] != '\0' )
        {
            /* support deprecated '-D' option */
            table_path[ table_path_qty++ ] = D_option;
        }
        else if ( accession == NULL || accession[ 0 ] == '\0' )
        {
            /* must have accession to proceed */
            rc = RC( rcExe, rcArgv, rcValidating, rcParam, rcEmpty );
            LOGERR( klogErr, rc, "expected accession" );
            goto Catch;
        }
        else if ( P_option != NULL && P_option[ 0 ] != '\0' )
        {
            /* support deprecated '-P' option */
            i = snprintf( P_option_buffer, sizeof( P_option_buffer ), "%s/%s", P_option, accession );
            if ( i < 0 || i >= sizeof( P_option_buffer ) )
            {
                rc = RC( rcExe, rcArgv, rcValidating, rcParam, rcExcessive );
                LOGERR( klogErr, rc, "path too long" );
                goto Catch;
            }
            table_path[ table_path_qty++ ] = P_option_buffer;
        }
        else
        {
            table_path[ table_path_qty++ ] = accession;
        }
    }

    rc = SRAMgrMakeRead( &sraMGR );
    if ( rc != 0 )
    {
        LOGERR( klogErr, rc, "failed to open SRA manager" );
        goto Catch;
    }
    else
    {
        rc = SRASplitterFactory_FilerInit( to_stdout, do_gzip, do_bzip2, sub_dir, keep_empty, outdir );
        if ( rc != 0 )
        {
            LOGERR( klogErr, rc, "failed to initialize files" );
            goto Catch;
        }
    }

    {
        const VDBManager* vmgr = NULL;
        rc_t rc2 = SRAMgrGetVDBManagerRead( sraMGR, &vmgr );
        if ( rc2 != 0 )
        {
            LOGERR( klogErr, rc2, "while calling SRAMgrGetVDBManagerRead" );
        }
        rc2 = ReportSetVDBManager( vmgr );
        VDBManagerRelease( vmgr );
    }


    /* loop tables */
    for ( i = 0; i < table_path_qty; i++ )
    {
        const SRASplitterFactory* fact_head = NULL;
        spotid_t smax, smin;

        SRA_DUMP_DBG( 5, ( "table path '%s', name '%s'\n", table_path[ i ], table_name ) );
        if ( table_name != NULL )
        {
            rc = SRAMgrOpenAltTableRead( sraMGR, &fmt.table, table_name, table_path[ i ] );
            if ( rc != 0 )
            {
                PLOGERR( klogErr, ( klogErr, rc,
                                    "failed to open '$(path):$(table)'", "path=%s,table=%s",
                                    table_path[ i ], table_name ) );
                continue;
            }
        }

        ReportResetObject( table_path[ i ] );
        if ( fmt.table == NULL )
        {
            rc = SRAMgrOpenTableRead( sraMGR, &fmt.table, table_path[ i ] );
            if ( rc != 0 )
            {
                if ( UIError( rc, NULL, NULL ) )
                {
                    UITableLOGError( rc, NULL, true );
                }
                else
                {
                    PLOGERR( klogErr, ( klogErr, rc,
                                        "failed to open '$(path)'", "path=%s",
                                        table_path[ i ] ) );
                    if (GetRCState(rc) == rcNotFound) {
                        failed_to_open = true;
                    }
                }
                continue;
            }
        }

        /* infer accession from table_path if missing or more than one table */
        fmt.accession = table_path_qty > 1 ? NULL : accession;
        if ( fmt.accession == NULL || fmt.accession[ 0 ] == 0 )
        {
            char * basename;
            char *ext;
            size_t l;
            bool is_url = false;

            strcpy( P_option_buffer, table_path[ i ] );

            basename = strchr ( P_option_buffer, ':' );
            if ( basename )
            {
                ++basename;
                if ( basename [0] == '\0' )
                    basename = P_option_buffer;
                else
                    is_url = true;
            }
            else
                basename = P_option_buffer;

            if ( is_url )
            {
                ext = strchr ( basename, '#' );
                if ( ext )
                    ext[ 0 ] = '\0';
                ext = strchr ( basename, '?' );
                if ( ext )
                    ext[ 0 ] = '\0';
            }


            l = strlen( basename  );
            while ( strchr( "\\/", basename[ l - 1 ] ) != NULL )
            {
                basename[ --l ] = '\0';
            }
            fmt.accession = strrchr( basename, '/' );
            if ( fmt.accession++ == NULL )
            {
                fmt.accession = basename;
            }

            /* cut off [.lite].[c]sra[.nenc||.ncbi_enc] if any */
            ext = strrchr( fmt.accession, '.' );
            if ( ext != NULL )
            {
                if ( strcasecmp( ext, ".nenc" ) == 0 || strcasecmp( ext, ",ncbi_enc" ) == 0 )
                {
                    *ext = '\0';
                    ext = strrchr( fmt.accession, '.' );
                }
                if ( ext != NULL && ( strcasecmp( ext, ".sra" ) == 0 || strcasecmp( ext, ".csra" ) == 0 ) )
                {
                    *ext = '\0';
                    ext = strrchr( fmt.accession, '.' );
                    if ( ext != NULL && strcasecmp( ext, ".lite" ) == 0 )
                    {
                        *ext = '\0';
                    }
                }
            }
        }

        SRA_DUMP_DBG( 5, ( "accession: '%s'\n", fmt.accession ) );
        rc = SRASplitterFactory_FilerPrefix( accession ? accession : fmt.accession );

        while ( rc == 0 )
        {
            /* sort out the spot id range */
            if ( ( rc = SRATableMaxSpotId( fmt.table, &smax ) ) != 0 ||
                    ( rc = SRATableMinSpotId( fmt.table, &smin ) ) != 0 )
            {
                break;
            }

            {
                const struct VTable* tbl = NULL;
                rc_t rc2 = SRATableGetVTableRead( fmt.table, &tbl );
                if ( rc == 0 )
                {
                    rc = rc2;
                }
                rc2 = ReportResetTable( table_path[i], tbl );
                if ( rc == 0 )
                {
                    rc = rc2;
                }
                VTableRelease( tbl );   /* SRATableGetVTableRead adds Reference to tbl! */
            }

            /* test if we have to dump anything... */
            if ( smax < minSpotId || smin > maxSpotId )
            {
                break;
            }
            if ( smax > maxSpotId )
            {
                smax = maxSpotId;
            }
            if ( smin < minSpotId )
            {
                smin = minSpotId;
            }

            /* hack to reduce looping in AddSpot: needs redesign to pass nreads along through tree */
            if ( true ) /* ??? */
            {
                const SRAColumn* c = NULL;
                nreads_max = NREADS_MAX;
                rc = SRATableOpenColumnRead( fmt.table, &c, "PLATFORM", sra_platform_id_t );
                if ( rc == 0 )
                {
                    const INSDC_SRA_platform_id *platform;
                    bitsz_t o, z;
                    rc = SRAColumnRead( c, 1, (const void **)&platform, &o, &z );
                    if ( rc == 0 && platform != NULL )
                    {
                        if ( *platform != SRA_PLATFORM_PACBIO_SMRT )
                        {
                            nreads_max = 32;
                        }
                    }
                    SRAColumnRelease( c );
                }
                else if ( GetRCState( rc ) == rcNotFound && GetRCObject( rc ) == rcColumn )
                {
                    rc = 0;
                }
            }

            /* table dependent */
            rc = fmt.get_factory( &fmt, &fact_head );
            if ( rc != 0 )
            {
                break;
            }
            if ( fact_head == NULL )
            {
                rc = RC( rcExe, rcFormatter, rcResolving, rcInterface, rcNull );
                break;
            }

            if ( rc == 0 && ( spot_group_on || spot_groups > 0 ) )
            {
                const SRASplitterFactory* f = NULL;
                rc = SpotGroupSplitterFactory_Make( &f, fmt.table, spot_group_on, spot_group );
                if ( rc == 0 )
                {
                    rc = SRASplitterFactory_AddNext( f, fact_head );
                    if ( rc == 0 )
                    {
                        fact_head = f;
                    }
                    else
                    {
                        SRASplitterFactory_Release( f );
                    }
                }
            }

            if ( rc == 0 && read_filter_on )
            {
                const SRASplitterFactory* f = NULL;
                rc = ReadFilterSplitterFactory_Make( &f, fmt.table, read_filter );
                if ( rc == 0 )
                {
                    rc = SRASplitterFactory_AddNext( f, fact_head );
                    if ( rc == 0 )
                    {
                        fact_head = f;
                    }
                    else
                    {
                        SRASplitterFactory_Release( f );
                    }
                }
            }

            if ( rc == 0 )
            {
                /* this filter takes over head of chain to be first and kill off bad NREADS */
                const SRASplitterFactory* f = NULL;
                rc = MaxNReadsValidatorFactory_Make( &f, fmt.table );
                if ( rc == 0 )
                {
                    rc = SRASplitterFactory_AddNext( f, fact_head );
                    if ( rc == 0 )
                    {
                        fact_head = f;
                    }
                    else
                    {
                        SRASplitterFactory_Release( f );
                    }
                }
            }

            rc = SRASplitterFactory_Init( fact_head );
            if ( rc == 0 )
            {
                /* ********************************************************** */
                rc = SRADumper_DumpRun( fmt.table, smin, smax, fact_head );
                /* ********************************************************** */
                if ( rc == 0 )
                {
                    uint64_t total = 0, file = 0;
                    SRASplitterFactory_FilerReport( &total, &file );
                    OUTMSG(( "Written %lu spots for %s\n", total - total_spots, table_path[ i ] ));
                    if ( to_stdout && total > 0 )
                    {
                        PLOGMSG( klogInfo, ( klogInfo, "$(t) biggest file has $(n) spots",
                                             PLOG_2( PLOG_S( t ), PLOG_U64( n ) ), table_path[ i ], file ));
                    }
                    total_spots = total;
                }
            }
            break;
        }

        SRASplitterFactory_Release( fact_head );
        SRATableRelease( fmt.table );
        fmt.table = NULL;
        if ( rc == 0 )
        {
            PLOGMSG( klogInfo, ( klogInfo, "$(path)$(dot)$(table) $(spots) spots",
                                 PLOG_4(PLOG_S(path),PLOG_S(dot),PLOG_S(table),PLOG_U32(spots)),
                                 table_path[ i ], table_name ? ":" : "", table_name ? table_name : "", smax - smin + 1 ) );
        }
        else if ( !reportToUser( rc, argv [0 ] ) )
        {
            PLOGERR( klogErr, ( klogErr, rc, "failed $(path)$(dot)$(table)",
                                PLOG_3(PLOG_S(path),PLOG_S(dot),PLOG_S(table)),
                                table_path[ i ], table_name ? ":" : "", table_name ? table_name : "" ) );
        }
    }

Catch:
    if ( fmt.release )
    {
        rc_t rr = fmt.release( &fmt );
        if ( rr != 0 )
        {
            SRA_DUMP_DBG( 1, ( "formatter release error %R\n", rr ) );
        }
    }

    for ( i = 0; i < spot_groups; i++ )
    {
        free( spot_group[ i ] );
    }
    SRASplitterFiler_Release();
    SRAMgrRelease( sraMGR );
    OUTMSG(( "Written %lu spots total\n", total_spots ));


    if (failed_to_open) {
        ReportSilence();
    }
    {
        /* Report execution environment if necessary */
        rc_t rc2 = ReportFinalize( rc );
        if ( rc == 0 )
        {
            rc = rc2;
        }
    }
    return rc;
}
Exemple #11
0
rc_t txt2kdb_io()
{
    rc_t rc = 0;
    uint64_t rowid = 1;
    uint64_t tix = 0;
    KColumnBlob * blob;
    bool blobopen = false;

    while (rc == 0)
    {
        size_t num_read;
        uint8_t buffer  [4096];
        uint8_t * limit;
        uint8_t * append_start = buffer;
        uint8_t * cursor = buffer;
        bool eol = true;

        /* quit if we are already past the end of the range */
        if ( ! rowid_upper_range(rowid))
            break;

        /* read a buffer full.  It may straddle rows. */
        rc = KFileRead (G.txt, tix, buffer, sizeof buffer, &num_read);
        if (rc)
        {
            PLOGERR (klogFatal, (klogFatal, rc, "Read failed starting $(P)", PLOG_U64(P), tix));
            break;
        }
        /* break at EOF */
        if (num_read == 0)
            break;

        /* scan across the buffer looking for lines */
        for (limit = buffer + num_read; cursor < limit; append_start = cursor)
        {
            /* if we are at the beginning of a line (end of previous line or start of first */
            if (eol)
            {
                /* if we are within the pass thru range create a blob */
                if (rowid_lower_range(rowid) && rowid_upper_range(rowid))
                {
                    rc = KColumnCreateBlob (G.col, &blob);
                    if (rc)
                    {
                        PLOGERR (klogFatal, (klogFatal, rc, "Failed to create Blob for row $(R) at $(P)",
                                             PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix));
                        continue;
                    }
                    blobopen = true;
                }
                /* clear the flag */
                eol = false;
            }

            /* this blob append will go until end of buffer or end of line */
            for ( ; cursor < limit; ++ cursor, ++tix)
            {
                /* if we hit a NewLine flag it and break for append */
                if (*cursor == '\n')
                {
                    eol = true;
                    ++cursor;
                    ++tix;
                    break;
                }
            }

            /* if we are within the selected range append this to the open blob 
             * ir might be the first append, a middle append, a last append or only append */
            if (blobopen)
            {
                rc = KColumnBlobAppend (blob, append_start, cursor - append_start);
                if (rc)
                {
                    PLOGERR (klogFatal, (klogFatal, rc, "Failed to append Blob for row $(R) at $(P)",
                                         PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix));

                    break;
                }
            }
            /* if we hit a NewLine and are within range we will close this blob */
            if (eol)
            {
                if (blobopen)
                {
                    /* single row blobs */
                    rc = KColumnBlobAssignRange (blob, rowid, 1);
                    if (rc)
                    {
                        PLOGERR (klogFatal, (klogFatal, rc, "Failed to range assign blob for row $(R) at $(P)",
                                             PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix));
                        break;
                    }
                    rc = KColumnBlobCommit (blob);
                    if (rc)
                    {
                        PLOGERR (klogFatal, (klogFatal, rc, "Failed to commit blob for row $(R) at $(P)",
                                             PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix));
                        break;
                    }
                    rc = KColumnBlobRelease (blob);
                    if (rc)
                    {
                        PLOGERR (klogFatal, (klogFatal, rc, "Failed to release blob for row $(R) at $(P)",
                                             PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix));
                        break;
                    }
                    blobopen = false;
                }
                ++rowid;
                if ( ! rowid_upper_range (rowid))
                    break;
            }
        }       
    }
    
    /* if not in an error state and the last line was unterminated close the blob */
    if ((rc == 0) && blobopen)
    {
        rc = KColumnBlobAssignRange (blob, rowid, 1);
        if (rc)
        {
            PLOGERR (klogFatal, (klogFatal, rc, "Failed to range assign blob for row $(R) at $(P)",
                                 PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix));
        }
        else
        {
            rc = KColumnBlobCommit (blob);
            if (rc)
            {
                PLOGERR (klogFatal, (klogFatal, rc, "Failed to commit blob for row $(R) at $(P)",
                                     PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix));
            }
        }
        KColumnBlobRelease (blob);
    }
    return rc;
}