示例#1
0
static
rc_t HelicosLoaderFmt_WriteData(HelicosLoaderFmt* self, uint32_t argc, const SRALoaderFile* const argv[], int64_t* spots_bad_count)
{
    rc_t rc = 0;
    uint32_t i;
    HelicosFileInfo* files = NULL;
    bool done;

    if( (files = calloc(argc, sizeof(*files))) == NULL ) {
        rc = RC(rcSRA, rcFormatter, rcReading, rcMemory, rcInsufficient);
    }
    for(i = 0; rc == 0 && i < argc; i++) {
        HelicosFileInfo* file = &files[i];
        HelicosFileInfo_init(file);
        file->file = argv[i];
    }
    do {
        done = true;
        for(i = 0; rc == 0 && i < argc; i++) {
            HelicosFileInfo* file = &files[i];
            if( (rc = read_next_spot(self, file)) == 0 && file->ready ) {
                done = false;
                rc = SRAWriterHelicos_Write(self->writer, argv[0], &file->name, &file->sequence, &file->quality);
                file->ready = false;
            }
        }
    } while( rc == 0 && !done );
    free(files);
    return rc;
}
示例#2
0
static
rc_t FastqLoaderFmt_WriteData(FastqLoaderFmt* self, uint32_t argc, const SRALoaderFile* const argv[], int64_t* spots_bad_count)
{
    rc_t rc = 0;
    uint32_t i, g = 0;
    FastqFileInfo* files = NULL;
    bool done;
    static IlluminaSpot spot;
 
    if( (files = calloc(argc, sizeof(*files))) == NULL ) {
        rc = RC(rcSRA, rcFormatter, rcReading, rcMemory, rcInsufficient);
    }

    for(i = 0; rc == 0 && i < argc; i++) {
        ExperimentQualityType qType;
        FastqFileInfo* file = &files[i];

        file->file = argv[i];
        FileReadData_init(file->spot, false);
        FileReadData_init(&file->spot[1], false);
        if( (rc = SRALoaderFile_QualityScoringSystem(file->file, &qType)) == 0 &&
            (rc = SRALoaderFile_QualityEncoding(file->file, &file->qualEnc)) == 0 &&
            (rc = SRALoaderFile_AsciiOffset(file->file, &file->qualOffset)) == 0 ) {

            file->qualType = ILLUMINAWRITER_COLMASK_NOTSET;

            if( qType == eExperimentQualityType_Undefined ) {
                qType = self->processing->quality_type;
                file->qualOffset = self->processing->quality_offset;
            }
            switch(qType) {
                case eExperimentQualityType_LogOdds:
                case eExperimentQualityType_Other:
                    if( self->w454 != NULL || self->wIonTorrent != NULL ) {
                        rc = RC(rcSRA, rcFormatter, rcConstructing, rcParam, rcInvalid);
                        LOGERR(klogInt, rc, "quality type other than Phred is not supported for this PLATFORM");
                    }
                    file->qualMin = -40;
                    file->qualMax = 41;
                    file->qualType = ILLUMINAWRITER_COLMASK_QUALITY_LOGODDS1;
                    break;
                default:
                    SRALoaderFile_LOG(file->file, klogWarn, rc, 
                        "quality_scoring_system attribute not set for this file, using Phred as default", NULL);
                case eExperimentQualityType_Phred:
                    file->qualType = ILLUMINAWRITER_COLMASK_QUALITY_PHRED;
                    file->qualMin = 0;
                    file->qualMax = (self->wIllumina) ? 61: 127;
                    break;
            }
        }
    }
    do {
        done = true;
        for(i = 0; rc == 0 && i < argc; i++) {
            FastqFileInfo* file = &files[i];
            if( (rc = read_next_spot(self, file)) != 0 || !file->spot->ready ) {
                continue;
            }
            done = false;
#if _DEBUGGING
            {{
                FileReadData* ss = file->spot;
                do {
                    DEBUG_MSG(3, ("file-%u: name:'%s', bc:%s, rd:%i, flt:%hu, seq '%.*s', qual %u bytes\n",
                                  i + 1, ss->name.data, ss->barcode.data, ss->read.read_id, ss->read.filter,
                                  ss->read.seq.len, ss->read.seq.data, ss->read.qual.len));
                    if( ss == &file->spot[1]){ break; }
                    ss = file->spot[1].ready ? &file->spot[1] : NULL;
                } while( ss != NULL );
            }}
#endif
        }
        if( rc != 0 || done ) {
            break;
        }
        /* collect spot reads, matching by spot name
         * spot data may be split across multiple files
         */
        IlluminaSpot_Init(&spot);
        for(i = 0; rc == 0 && i < argc; i++) {
            FileReadData* fspot = files[i].spot[0].ready ? &files[i].spot[0] : NULL;
            while(rc == 0 && fspot != NULL ) {
                rc = IlluminaSpot_Add(&spot, &fspot->name, &fspot->barcode, &fspot->read);
                if( rc == 0 ) {
                    g = i;
                    fspot->ready = false;
                } else if( GetRCState(rc) == rcIgnored ) {
                    rc = 0;
                } else {
                    SRALoaderFile_LOG(files[i].file, klogErr, rc, "$(msg)", "msg=adding data to spot");
                }
                if( fspot == &files[i].spot[1]) { break; }
                fspot = files[i].spot[1].ready ? &files[i].spot[1] : NULL;
            }
        }
        if( rc == 0 ) {
            if( self->wIllumina != NULL ) {
                if( (rc = SRAWriterIllumina_Write(self->wIllumina, argv[0], &spot)) != 0 &&
                    GetRCTarget(rc) == rcFormatter && GetRCContext(rc) == rcValidating ) {
                    SRALoaderFile_LOG(files[g].file, klogWarn, rc, "$(msg) $(spot_name)", "msg=bad spot,spot_name=%.*s",
                                                spot.name->len, spot.name->data);
                    self->spots_bad_count++;
                    if( self->spots_bad_allowed < 0 ||
                        self->spots_bad_count <= self->spots_bad_allowed ) {
                        rc = 0;
                    }
                }
            } else if( spot.nreads != 1 ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcUnsupported);
                SRALoaderFile_LOG(files[g].file, klogErr, rc, "$(msg)", "msg=multiple reads for this platform");
            } else if( self->wIonTorrent != NULL ) {
                rc = SRAWriterIonTorrent_WriteRead(self->wIonTorrent, argv[0], spot.name,
                                                   spot.reads[0].seq, spot.reads[0].qual, NULL, NULL, 0, 0, 0, 0);
            } else {
                rc = SRAWriter454_WriteRead(self->w454, argv[0], spot.name,
                                            spot.reads[0].seq, spot.reads[0].qual, NULL, NULL, 0, 0, 0, 0);
            }
        }
    } while( rc == 0 );
    free(files);
    *spots_bad_count = self->spots_bad_count;
    return rc;
}
示例#3
0
static
rc_t IlluminaLoaderFmt_WriteData(IlluminaLoaderFmt* self, uint32_t argc, const SRALoaderFile* const argv[], int64_t* spots_bad_count)
{
    rc_t rc = 0;
    uint32_t t, i, k, ftype_q = sizeof(file_types) / sizeof(file_types[0]);
    SLList files;
    IlluminaFileInfo* file = NULL;

    SLListInit(&files);

    /* group files using spotname, for _prb. file name prefix is used,
       files reviewed by type detected from name and ordered by file_type array */
    for(t = 0; rc == 0 && t < ftype_q; t++) {
        for(i = 0; rc == 0 && i < argc; i++) {
            const char* fname, *blk_pfx;
            int prefix_len = 0;
            ERunFileType ftype;
            EIlluminaNativeFileType type = eIlluminaNativeFileTypeNotSet;
            FGroup_Find_data data;

            if( (rc = SRALoaderFileName(argv[i], &fname)) != 0 ) {
                SRALoaderFile_LOG(argv[i], klogErr, rc, "reading file name", NULL);
                break;
            }
            if( (rc = SRALoaderFile_FileType(argv[i], &ftype)) != 0 ) {
                SRALoaderFile_LOG(argv[i], klogErr, rc, "reading file type", NULL);
                break;
            }
            if( (rc = SRALoaderFileBlockName(argv[i], &blk_pfx)) != 0 ) {
                SRALoaderFile_LOG(argv[i], klogErr, rc, "reading DATA_BLOCK/@name", NULL);
                break;
            }
            if( blk_pfx == NULL ) {
                blk_pfx = "";
            }
            {{
                /* skip path if present */
                const char* p = strrchr(fname, '/');
                fname = p ? p + 1 : fname;
                p = NULL;
                for(k = 0; type == eIlluminaNativeFileTypeNotSet && k < ftype_q; k++) {
                    const char* const* e = file_types[k].key;
                    while( *e != NULL ) {
                        p = strstr(fname, *e++);
                        if( p != NULL ) {
                            type = file_types[k].type;
                            break;
                        } 
                    }
                }
                if( p != NULL ) {
                    prefix_len = p - fname;
                }
            }}
            if( ftype == rft_IlluminaNativeSeq ) {
                type = eIlluminaNativeFileTypeFasta;
            } else if( ftype == rft_IlluminaNativePrb ) {
                type = eIlluminaNativeFileTypeQuality4;
            } else if( ftype == rft_IlluminaNativeInt ) {
                type = eIlluminaNativeFileTypeIntensity;
            } else if( ftype == rft_IlluminaNativeQseq ) {
                type = eIlluminaNativeFileTypeQSeq;
            }
            if( type == eIlluminaNativeFileTypeNotSet ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcUnrecognized);
                SRALoaderFile_LOG(argv[i], klogErr, rc, "detecting file type by file name", NULL);
                break;
            }
            if( type != file_types[t].type ) {
                /* one type at a time */
                continue;
            }
            DEBUG_MSG(3, ("file '%s' type set to %d\n", fname, type));
            file = calloc(1, sizeof(*file));
            if( file == NULL ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcMemory, rcExhausted);
                SRALoaderFile_LOG(argv[i], klogErr, rc, "allocating file object", NULL);
                break;
            }
            IlluminaFileInfo_init(file);
            file->file = argv[i];
            file->type = type;

            if( file->type == eIlluminaNativeFileTypeQuality4 ) {
                /* in _prb there is no spotname inside so use file prefix */
                rc = pstring_assign(&data.key, fname, prefix_len);
            } else {
                /* try to get 1st spot so group can be organized by spot name */
                if( (rc = read_next_spot(blk_pfx, file)) != 0 || !file->ready ) {
                    rc = rc ? rc : RC(rcSRA, rcFormatter, rcReading, rcData, rcNotFound);
                    SRALoaderFile_LOG(argv[i], klogErr, rc, "reading 1st spot", NULL);
                    break;
                }
                rc = pstring_copy(&data.key, &file->name);
            }

            data.found = NULL;
            if( SLListDoUntil(&files, FGroup_Find, &data) && data.found != NULL ) {
                IlluminaFileInfo* ss = data.found->files;

                while( rc == 0 && file != NULL ) {
                    if( ss->type != eIlluminaNativeFileTypeQSeq && ss->type == file->type ) {
                        rc = RC(rcSRA, rcFormatter, rcReading, rcFile, rcDuplicate);
                        SRALoaderFile_LOG(argv[i], klogErr, rc, "type of file for lane", NULL);
                    } else if( ss->next != NULL ) {
                        ss = ss->next;
                    } else {
                        ss->next = file;
                        file->prev = ss;
                        data.found->mask |= file->type;
                        file = NULL;
                    }
                }
            } else {
                data.found = calloc(1, sizeof(*data.found));
                if( data.found == NULL ) {
                    rc = RC(rcSRA, rcFormatter, rcReading, rcMemory, rcInsufficient);
                    SRALoaderFile_LOG(argv[i], klogErr, rc, "preparing file group", NULL);
                    break;
                } else {
                    if( (rc = pstring_assign(&data.found->key, fname, prefix_len)) != 0 ) {
                        SRALoaderFile_LOG(argv[i], klogErr, rc, "setting file group key", NULL);
                        FGroup_Whack(&data.found->dad, NULL);
                        break;
                    } else {
                        FGroup* curr = (FGroup*)SLListHead(&files), *prev = NULL;
                        data.found->blk_pfx = blk_pfx;
                        data.found->files = file;
                        data.found->mask = file->type;
                        /* group inserted into list by coords in 1st spot */
                        while( curr != NULL ) {
                            if( curr->files[0].coord[0] > file->coord[0] ||
                                (curr->files[0].coord[0] == file->coord[0] &&
                                 curr->files[0].coord[1] > file->coord[1]) ) {
                                data.found->dad.next = &curr->dad;
                                if( prev == NULL ) {
                                    files.head = &data.found->dad;
                                } else {
                                    prev->dad.next = &data.found->dad;
                                }
                                break;
                            }
                            prev = curr;
                            curr = (FGroup*)curr->dad.next;
                        }
                        if( curr == NULL ) {
                            SLListPushTail(&files, &data.found->dad);
                        }
                        file = NULL;
                    }
                }
            }
        }
    }
    if( rc == 0 ) {
        SLListForEach(&files, FGroup_Validate, &rc);
    }
    if( rc == 0 ) {
        FGroup_Parse_data data;
        data.self = self;
        if( SLListDoUntil(&files, FGroup_Parse, &data) ) {
            rc = data.rc;
        }
    } else {
        free(file);
    }
    SLListWhack(&files, FGroup_Whack, NULL);
    *spots_bad_count = self->spots_bad_count;
    return rc;
}
示例#4
0
bool FGroup_Parse( SLNode *n, void *d )
{
    FGroup_Parse_data* data = (FGroup_Parse_data*)d;
    FGroup* g = (FGroup*)n;
    bool done;
    const SRALoaderFile* data_block_ref = NULL;

    data->rc = 0;
    do {
        IlluminaFileInfo* file = g->files;
        done = true;
        while( data->rc == 0 && file != NULL ) {
            if( (data->rc = read_next_spot(g->blk_pfx, file)) == 0 && file->ready ) {
                done = false;
            }
            file = file->next;
        }
        if( data->rc != 0 || done ) {
            break;
        }
        /* collect spot reads, matching by spot name
         * spot data may be split across multiple files
         */
        IlluminaSpot_Init(&data->spot);
        file = g->files;
        while( data->rc == 0 && file != NULL ) {
            if( file->ready ) {
                if( (file->type == eIlluminaNativeFileTypeNoise && data->self->skip_noise) ||
                    (file->type == eIlluminaNativeFileTypeIntensity && data->self->skip_intensity) ||
                    (file->type == eIlluminaNativeFileTypeSignal && data->self->skip_signal) ) {
                    file->ready = false;
                } else {
                    data_block_ref = file->file;
                    if( file->type == eIlluminaNativeFileTypeQSeq && (g->mask & eIlluminaNativeFileTypeQuality4) ) {
                        /* drop quality1 from qseq data */
                        pstring_clear(&file->read.qual);
                    } else if( file->type == eIlluminaNativeFileTypeQuality4 ) {
                        IlluminaFileInfo* neib = file->next ? file->next : file->prev;
                        /* need to fix spotname to be same cause prb do not have any name in it */
                        if( (data->rc = pstring_copy(&file->name, &neib->name)) != 0 ) {
                            SRALoaderFile_LOG(file->file, klogErr, data->rc, "$(msg) '$(n)'", "msg=syncing prb spot name,n=%s", neib->name.data);
                        }
                    }
                    if( data->rc == 0 ) {
                        data->rc = IlluminaSpot_Add(&data->spot, &file->name, &file->barcode, &file->read);
                        if( data->rc == 0 ) {
                            file->ready = false;
                        } else {
                            if( GetRCState(data->rc) == rcIgnored ) {
                                SRALoaderFile_LOG(file->file, klogWarn, data->rc, "$(msg) '$(s1)' <> '$(s2)'",
                                                "msg=spot name mismatch,s1=%.*s,s2=%.*s",
                                                data->spot.name->len, data->spot.name->data, file->name.len, file->name.data);
                                data->self->spots_bad_count++;
                                /* skip spot for all files in a group */
                                file = g->files;
                                while( file != NULL ) {
                                    file->ready = false;
                                    SRALoaderFile_LOG(file->file, klogWarn, data->rc,
                                                      "$(msg) '$(n)'", "msg=skipped spot,n=%s", file->name.data);
                                    file = file->next;
                                }
                                if( data->self->spots_bad_allowed >= 0 &&
                                    data->self->spots_bad_count > data->self->spots_bad_allowed ) {
                                    data->rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInvalid);
                                }
                                break;
                            }
                        }
                    }
                }
            }
            file = file->next;
        }
        if( GetRCState(data->rc) == rcIgnored ) {
            data->rc = 0;
            continue;
        }
        if( data->rc == 0 ) {
            data->rc = SRAWriterIllumina_Write(data->self->writer, data_block_ref, &data->spot);
        }
    } while( data->rc == 0 );
    return data->rc != 0;
}