예제 #1
0
static
rc_t IlluminaLoaderFmt_WriteData(IlluminaLoaderFmt* self, uint32_t argc, const SRALoaderFile* const argv[], int64_t* spots_bad_count)
{
    rc_t rc = 0;
    uint32_t t, i, k, ftype_q = sizeof(file_types) / sizeof(file_types[0]);
    SLList files;
    IlluminaFileInfo* file = NULL;

    SLListInit(&files);

    /* group files using spotname, for _prb. file name prefix is used,
       files reviewed by type detected from name and ordered by file_type array */
    for(t = 0; rc == 0 && t < ftype_q; t++) {
        for(i = 0; rc == 0 && i < argc; i++) {
            const char* fname, *blk_pfx;
            int prefix_len = 0;
            ERunFileType ftype;
            EIlluminaNativeFileType type = eIlluminaNativeFileTypeNotSet;
            FGroup_Find_data data;

            if( (rc = SRALoaderFileName(argv[i], &fname)) != 0 ) {
                SRALoaderFile_LOG(argv[i], klogErr, rc, "reading file name", NULL);
                break;
            }
            if( (rc = SRALoaderFile_FileType(argv[i], &ftype)) != 0 ) {
                SRALoaderFile_LOG(argv[i], klogErr, rc, "reading file type", NULL);
                break;
            }
            if( (rc = SRALoaderFileBlockName(argv[i], &blk_pfx)) != 0 ) {
                SRALoaderFile_LOG(argv[i], klogErr, rc, "reading DATA_BLOCK/@name", NULL);
                break;
            }
            if( blk_pfx == NULL ) {
                blk_pfx = "";
            }
            {{
                /* skip path if present */
                const char* p = strrchr(fname, '/');
                fname = p ? p + 1 : fname;
                p = NULL;
                for(k = 0; type == eIlluminaNativeFileTypeNotSet && k < ftype_q; k++) {
                    const char* const* e = file_types[k].key;
                    while( *e != NULL ) {
                        p = strstr(fname, *e++);
                        if( p != NULL ) {
                            type = file_types[k].type;
                            break;
                        } 
                    }
                }
                if( p != NULL ) {
                    prefix_len = p - fname;
                }
            }}
            if( ftype == rft_IlluminaNativeSeq ) {
                type = eIlluminaNativeFileTypeFasta;
            } else if( ftype == rft_IlluminaNativePrb ) {
                type = eIlluminaNativeFileTypeQuality4;
            } else if( ftype == rft_IlluminaNativeInt ) {
                type = eIlluminaNativeFileTypeIntensity;
            } else if( ftype == rft_IlluminaNativeQseq ) {
                type = eIlluminaNativeFileTypeQSeq;
            }
            if( type == eIlluminaNativeFileTypeNotSet ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcUnrecognized);
                SRALoaderFile_LOG(argv[i], klogErr, rc, "detecting file type by file name", NULL);
                break;
            }
            if( type != file_types[t].type ) {
                /* one type at a time */
                continue;
            }
            DEBUG_MSG(3, ("file '%s' type set to %d\n", fname, type));
            file = calloc(1, sizeof(*file));
            if( file == NULL ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcMemory, rcExhausted);
                SRALoaderFile_LOG(argv[i], klogErr, rc, "allocating file object", NULL);
                break;
            }
            IlluminaFileInfo_init(file);
            file->file = argv[i];
            file->type = type;

            if( file->type == eIlluminaNativeFileTypeQuality4 ) {
                /* in _prb there is no spotname inside so use file prefix */
                rc = pstring_assign(&data.key, fname, prefix_len);
            } else {
                /* try to get 1st spot so group can be organized by spot name */
                if( (rc = read_next_spot(blk_pfx, file)) != 0 || !file->ready ) {
                    rc = rc ? rc : RC(rcSRA, rcFormatter, rcReading, rcData, rcNotFound);
                    SRALoaderFile_LOG(argv[i], klogErr, rc, "reading 1st spot", NULL);
                    break;
                }
                rc = pstring_copy(&data.key, &file->name);
            }

            data.found = NULL;
            if( SLListDoUntil(&files, FGroup_Find, &data) && data.found != NULL ) {
                IlluminaFileInfo* ss = data.found->files;

                while( rc == 0 && file != NULL ) {
                    if( ss->type != eIlluminaNativeFileTypeQSeq && ss->type == file->type ) {
                        rc = RC(rcSRA, rcFormatter, rcReading, rcFile, rcDuplicate);
                        SRALoaderFile_LOG(argv[i], klogErr, rc, "type of file for lane", NULL);
                    } else if( ss->next != NULL ) {
                        ss = ss->next;
                    } else {
                        ss->next = file;
                        file->prev = ss;
                        data.found->mask |= file->type;
                        file = NULL;
                    }
                }
            } else {
                data.found = calloc(1, sizeof(*data.found));
                if( data.found == NULL ) {
                    rc = RC(rcSRA, rcFormatter, rcReading, rcMemory, rcInsufficient);
                    SRALoaderFile_LOG(argv[i], klogErr, rc, "preparing file group", NULL);
                    break;
                } else {
                    if( (rc = pstring_assign(&data.found->key, fname, prefix_len)) != 0 ) {
                        SRALoaderFile_LOG(argv[i], klogErr, rc, "setting file group key", NULL);
                        FGroup_Whack(&data.found->dad, NULL);
                        break;
                    } else {
                        FGroup* curr = (FGroup*)SLListHead(&files), *prev = NULL;
                        data.found->blk_pfx = blk_pfx;
                        data.found->files = file;
                        data.found->mask = file->type;
                        /* group inserted into list by coords in 1st spot */
                        while( curr != NULL ) {
                            if( curr->files[0].coord[0] > file->coord[0] ||
                                (curr->files[0].coord[0] == file->coord[0] &&
                                 curr->files[0].coord[1] > file->coord[1]) ) {
                                data.found->dad.next = &curr->dad;
                                if( prev == NULL ) {
                                    files.head = &data.found->dad;
                                } else {
                                    prev->dad.next = &data.found->dad;
                                }
                                break;
                            }
                            prev = curr;
                            curr = (FGroup*)curr->dad.next;
                        }
                        if( curr == NULL ) {
                            SLListPushTail(&files, &data.found->dad);
                        }
                        file = NULL;
                    }
                }
            }
        }
    }
    if( rc == 0 ) {
        SLListForEach(&files, FGroup_Validate, &rc);
    }
    if( rc == 0 ) {
        FGroup_Parse_data data;
        data.self = self;
        if( SLListDoUntil(&files, FGroup_Parse, &data) ) {
            rc = data.rc;
        }
    } else {
        free(file);
    }
    SLListWhack(&files, FGroup_Whack, NULL);
    *spots_bad_count = self->spots_bad_count;
    return rc;
}
예제 #2
0
static
rc_t MakeIndexes(const SRATable* stbl, KTable* ktbl, KMetadata* meta)
{
    rc_t rc = 0;
    int i;
    char* buffer = NULL;
    size_t buffer_sz = g_file_block_sz * 100;

    SIndexObj idx[] = {
     /*  meta, file,        format,         index,          func,    file_size, buffer_sz, minSpotId, maxSpotId */
        {NULL, "fastq",    "fastq",      "fuse-fastq",    Fastq_Idx,     0, 0, 0, 0},
        {NULL, "sff",      "SFF",        "fuse-sff",      SFF_Idx,       0, 0, 0, 0},
        {NULL, "fastq.gz", "fastq-gzip", "fuse-fastq-gz", FastqGzip_Idx, 0, 0, 0, 0},
        {NULL, "sff.gz",   "SFF-gzip",   "fuse-sff-gz",   SFFGzip_Idx,   0, 0, 0, 0}
    };

    for(i = 0; rc == 0 && i < sizeof(idx) / sizeof(idx[0]); i++) {
        KMDataNode* parent = NULL;
        if( (rc = KMetadataOpenNodeUpdate(meta, &parent, "/FUSE")) == 0 ) {
            KMDataNodeDropChild(parent, "root"); /* drop old stuff */
            if( g_ungzip || strcmp(&idx[i].file[strlen(idx[i].file) - 3], ".gz") == 0 ) {
                STSMSG(0, ("Preparing index %s", idx[i].index));
                MD5StateInit(&idx[i].md5);
                SLListInit(&idx[i].li);
                KMDataNodeDropChild(parent, "%s.tmp", idx[i].file);
                if( (rc = KMDataNodeOpenNodeUpdate(parent, &idx[i].meta, "%s.tmp", idx[i].file)) == 0 ) {
                    if( idx[i].func != NULL ) {
                        if( buffer == NULL ) {
                            if( (buffer = malloc(buffer_sz)) == NULL ) {
                                rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
                                break;
                            }
                        }
                        rc = idx[i].func(stbl, &idx[i], buffer, buffer_sz);
                        if( rc == 0 ) {
                            MD5StateFinish(&idx[i].md5, idx[i].md5_digest);
                            rc = CommitIndex(ktbl, idx[i].index, &idx[i].li);
                        }
                    }
                    if( rc == 0 ) {
                        rc = WriteFileMeta(&idx[i]);
                    }
                    KMDataNodeRelease(idx[i].meta);
                }
                if( GetRCState(rc) == rcUnsupported ) {
                    KMDataNodeDropChild(parent, "%s", idx[i].file);
                    PLOGERR(klogWarn, (klogWarn, rc, "Index $(i) is not supported for this table", PLOG_S(i), idx[i].index));
                    rc = 0;
                } else if( rc == 0 ) {
                    char f[4096];
                    strcpy(f, idx[i].file);
                    strcat(f, ".tmp");
                    KMDataNodeDropChild(parent, "%s", idx[i].file);
                    rc = KMDataNodeRenameChild(parent, f, idx[i].file);
                }
            } else if( !g_ungzip ) {
                KTableDropIndex(ktbl, idx[i].index);
                KMDataNodeDropChild(parent, "%s", idx[i].file);
            }
            KMDataNodeDropChild(parent, "%s.tmp", idx[i].file);
            KMDataNodeRelease(parent);
        }
        SLListWhack(&idx[i].li, WhackIndexData, NULL);
    }
    free(buffer);
    return rc;
}