static rc_t IlluminaLoaderFmt_WriteData(IlluminaLoaderFmt* self, uint32_t argc, const SRALoaderFile* const argv[], int64_t* spots_bad_count) { rc_t rc = 0; uint32_t t, i, k, ftype_q = sizeof(file_types) / sizeof(file_types[0]); SLList files; IlluminaFileInfo* file = NULL; SLListInit(&files); /* group files using spotname, for _prb. file name prefix is used, files reviewed by type detected from name and ordered by file_type array */ for(t = 0; rc == 0 && t < ftype_q; t++) { for(i = 0; rc == 0 && i < argc; i++) { const char* fname, *blk_pfx; int prefix_len = 0; ERunFileType ftype; EIlluminaNativeFileType type = eIlluminaNativeFileTypeNotSet; FGroup_Find_data data; if( (rc = SRALoaderFileName(argv[i], &fname)) != 0 ) { SRALoaderFile_LOG(argv[i], klogErr, rc, "reading file name", NULL); break; } if( (rc = SRALoaderFile_FileType(argv[i], &ftype)) != 0 ) { SRALoaderFile_LOG(argv[i], klogErr, rc, "reading file type", NULL); break; } if( (rc = SRALoaderFileBlockName(argv[i], &blk_pfx)) != 0 ) { SRALoaderFile_LOG(argv[i], klogErr, rc, "reading DATA_BLOCK/@name", NULL); break; } if( blk_pfx == NULL ) { blk_pfx = ""; } {{ /* skip path if present */ const char* p = strrchr(fname, '/'); fname = p ? p + 1 : fname; p = NULL; for(k = 0; type == eIlluminaNativeFileTypeNotSet && k < ftype_q; k++) { const char* const* e = file_types[k].key; while( *e != NULL ) { p = strstr(fname, *e++); if( p != NULL ) { type = file_types[k].type; break; } } } if( p != NULL ) { prefix_len = p - fname; } }} if( ftype == rft_IlluminaNativeSeq ) { type = eIlluminaNativeFileTypeFasta; } else if( ftype == rft_IlluminaNativePrb ) { type = eIlluminaNativeFileTypeQuality4; } else if( ftype == rft_IlluminaNativeInt ) { type = eIlluminaNativeFileTypeIntensity; } else if( ftype == rft_IlluminaNativeQseq ) { type = eIlluminaNativeFileTypeQSeq; } if( type == eIlluminaNativeFileTypeNotSet ) { rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcUnrecognized); SRALoaderFile_LOG(argv[i], klogErr, rc, "detecting file type by file name", NULL); break; } if( type != file_types[t].type ) { /* one type at a time */ continue; } DEBUG_MSG(3, ("file '%s' type set to %d\n", fname, type)); file = calloc(1, sizeof(*file)); if( file == NULL ) { rc = RC(rcSRA, rcFormatter, rcReading, rcMemory, rcExhausted); SRALoaderFile_LOG(argv[i], klogErr, rc, "allocating file object", NULL); break; } IlluminaFileInfo_init(file); file->file = argv[i]; file->type = type; if( file->type == eIlluminaNativeFileTypeQuality4 ) { /* in _prb there is no spotname inside so use file prefix */ rc = pstring_assign(&data.key, fname, prefix_len); } else { /* try to get 1st spot so group can be organized by spot name */ if( (rc = read_next_spot(blk_pfx, file)) != 0 || !file->ready ) { rc = rc ? rc : RC(rcSRA, rcFormatter, rcReading, rcData, rcNotFound); SRALoaderFile_LOG(argv[i], klogErr, rc, "reading 1st spot", NULL); break; } rc = pstring_copy(&data.key, &file->name); } data.found = NULL; if( SLListDoUntil(&files, FGroup_Find, &data) && data.found != NULL ) { IlluminaFileInfo* ss = data.found->files; while( rc == 0 && file != NULL ) { if( ss->type != eIlluminaNativeFileTypeQSeq && ss->type == file->type ) { rc = RC(rcSRA, rcFormatter, rcReading, rcFile, rcDuplicate); SRALoaderFile_LOG(argv[i], klogErr, rc, "type of file for lane", NULL); } else if( ss->next != NULL ) { ss = ss->next; } else { ss->next = file; file->prev = ss; data.found->mask |= file->type; file = NULL; } } } else { data.found = calloc(1, sizeof(*data.found)); if( data.found == NULL ) { rc = RC(rcSRA, rcFormatter, rcReading, rcMemory, rcInsufficient); SRALoaderFile_LOG(argv[i], klogErr, rc, "preparing file group", NULL); break; } else { if( (rc = pstring_assign(&data.found->key, fname, prefix_len)) != 0 ) { SRALoaderFile_LOG(argv[i], klogErr, rc, "setting file group key", NULL); FGroup_Whack(&data.found->dad, NULL); break; } else { FGroup* curr = (FGroup*)SLListHead(&files), *prev = NULL; data.found->blk_pfx = blk_pfx; data.found->files = file; data.found->mask = file->type; /* group inserted into list by coords in 1st spot */ while( curr != NULL ) { if( curr->files[0].coord[0] > file->coord[0] || (curr->files[0].coord[0] == file->coord[0] && curr->files[0].coord[1] > file->coord[1]) ) { data.found->dad.next = &curr->dad; if( prev == NULL ) { files.head = &data.found->dad; } else { prev->dad.next = &data.found->dad; } break; } prev = curr; curr = (FGroup*)curr->dad.next; } if( curr == NULL ) { SLListPushTail(&files, &data.found->dad); } file = NULL; } } } } } if( rc == 0 ) { SLListForEach(&files, FGroup_Validate, &rc); } if( rc == 0 ) { FGroup_Parse_data data; data.self = self; if( SLListDoUntil(&files, FGroup_Parse, &data) ) { rc = data.rc; } } else { free(file); } SLListWhack(&files, FGroup_Whack, NULL); *spots_bad_count = self->spots_bad_count; return rc; }
static rc_t MakeIndexes(const SRATable* stbl, KTable* ktbl, KMetadata* meta) { rc_t rc = 0; int i; char* buffer = NULL; size_t buffer_sz = g_file_block_sz * 100; SIndexObj idx[] = { /* meta, file, format, index, func, file_size, buffer_sz, minSpotId, maxSpotId */ {NULL, "fastq", "fastq", "fuse-fastq", Fastq_Idx, 0, 0, 0, 0}, {NULL, "sff", "SFF", "fuse-sff", SFF_Idx, 0, 0, 0, 0}, {NULL, "fastq.gz", "fastq-gzip", "fuse-fastq-gz", FastqGzip_Idx, 0, 0, 0, 0}, {NULL, "sff.gz", "SFF-gzip", "fuse-sff-gz", SFFGzip_Idx, 0, 0, 0, 0} }; for(i = 0; rc == 0 && i < sizeof(idx) / sizeof(idx[0]); i++) { KMDataNode* parent = NULL; if( (rc = KMetadataOpenNodeUpdate(meta, &parent, "/FUSE")) == 0 ) { KMDataNodeDropChild(parent, "root"); /* drop old stuff */ if( g_ungzip || strcmp(&idx[i].file[strlen(idx[i].file) - 3], ".gz") == 0 ) { STSMSG(0, ("Preparing index %s", idx[i].index)); MD5StateInit(&idx[i].md5); SLListInit(&idx[i].li); KMDataNodeDropChild(parent, "%s.tmp", idx[i].file); if( (rc = KMDataNodeOpenNodeUpdate(parent, &idx[i].meta, "%s.tmp", idx[i].file)) == 0 ) { if( idx[i].func != NULL ) { if( buffer == NULL ) { if( (buffer = malloc(buffer_sz)) == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); break; } } rc = idx[i].func(stbl, &idx[i], buffer, buffer_sz); if( rc == 0 ) { MD5StateFinish(&idx[i].md5, idx[i].md5_digest); rc = CommitIndex(ktbl, idx[i].index, &idx[i].li); } } if( rc == 0 ) { rc = WriteFileMeta(&idx[i]); } KMDataNodeRelease(idx[i].meta); } if( GetRCState(rc) == rcUnsupported ) { KMDataNodeDropChild(parent, "%s", idx[i].file); PLOGERR(klogWarn, (klogWarn, rc, "Index $(i) is not supported for this table", PLOG_S(i), idx[i].index)); rc = 0; } else if( rc == 0 ) { char f[4096]; strcpy(f, idx[i].file); strcat(f, ".tmp"); KMDataNodeDropChild(parent, "%s", idx[i].file); rc = KMDataNodeRenameChild(parent, f, idx[i].file); } } else if( !g_ungzip ) { KTableDropIndex(ktbl, idx[i].index); KMDataNodeDropChild(parent, "%s", idx[i].file); } KMDataNodeDropChild(parent, "%s.tmp", idx[i].file); KMDataNodeRelease(parent); } SLListWhack(&idx[i].li, WhackIndexData, NULL); } free(buffer); return rc; }