void stopMALdataflow(void) { int i; ATOMIC_SET(exiting, 1, exitingLock, "q_dequeue"); if (todo) { for (i = 0; i < THREADS; i++) MT_sema_up(&todo->s, "stopMALdataflow"); MT_lock_set(&dataflowLock, "stopMALdataflow"); for (i = 0; i < THREADS; i++) { if (workers[i].flag != IDLE && workers[i].flag != JOINING) { workers[i].flag = JOINING; MT_lock_unset(&dataflowLock, "stopMALdataflow"); MT_join_thread(workers[i].id); MT_lock_set(&dataflowLock, "stopMALdataflow"); } workers[i].flag = IDLE; } MT_lock_unset(&dataflowLock, "stopMALdataflow"); } }
/* coverity[+kill] */ void GDKexit(int status) { if (ATOMIC_CAS_int(GDKstopped, 0, 1, GDKstoppedLock, "GDKexit") == 0) { MT_lock_set(&GDKthreadLock, "GDKexit"); GDKnrofthreads = 0; MT_lock_unset(&GDKthreadLock, "GDKexit"); if (GDKvmtrim_id) MT_join_thread(GDKvmtrim_id); MT_sleep_ms(CATNAP); /* Kill all threads except myself */ if (status == 0) { MT_Id pid = MT_getpid(); Thread t, s; for (t = GDKthreads, s = t + THREADS; t < s; t++) { if (t->pid) { MT_Id victim = t->pid; if (t->pid != pid) MT_kill_thread(victim); } } } (void) GDKgetHome(); #if 0 /* we can't clean up after killing threads */ BBPexit(); #endif GDKlog(GDKLOGOFF); GDKunlockHome(); #if !defined(ATOMIC_LOCK) && !defined(NDEBUG) TEMDEBUG GDKlockstatistics(1); #endif MT_global_exit(status); } }
/** * Generic function that loads a given list of BAM files according to * the values of the other parameters. It starts with creating the * bam schema and the header tables if these do not exist yet. Then * it initializes bam_wrapper structs for all bam files and reads the * headers for all BAM files. If the pairwise storage schema has to * be used, It then creates a thread for every file that reads all * alignments for this file. * */ static str bam_loader(Client cntxt, MalBlkPtr mb, str * filenames, int nr_files, sht dbschema, sht nr_threads) { bam_wrapper *bws = NULL; MT_Id *reader_threads = NULL; reader_thread_data *r_thread_data = NULL; mvc *m = NULL; sql_schema *s = NULL; sql_table *files_table = NULL; lng cur_file_id; char buf_threads_msg[4096] = "There were reader threads that contained errors:\n"; int threads_msg_len = strlen(buf_threads_msg); int i, errnr; str msg = MAL_SUCCEED; TO_LOG("<bam_loader>: Loader started for %d BAM file%s...\n", nr_files, (nr_files != 1 ? "s" : "")); /* Check sanity of input */ if (dbschema != 0 && dbschema != 1) { msg = createException(MAL, "bam_loader", "Wrong value for dbschema: '%d' (0=straightforward storage schema, 1=pairwise storage schema)", dbschema); goto cleanup; } if (nr_threads <= 0) { nr_threads = 1; } else if(nr_threads > 4) { nr_threads = 4; } /* Get SQL context */ if ((msg = getSQLContext(cntxt, mb, &m, NULL)) != MAL_SUCCEED) { /* Here, and in multiple other locations in this code, * new message is stored in tmp var, since the old msg * needs to be freed after construction of the new * msg */ REUSE_EXCEPTION(msg, MAL, "bam_loader", "Could not retrieve SQLContext: %s", msg); goto cleanup; } /* Start with binding bam schema and the files table */ if ((msg = bind_bam_schema(m, &s)) != MAL_SUCCEED) goto cleanup; if((msg = bind_table(m, s, "files", &files_table)) != MAL_SUCCEED) goto cleanup; /* Get next file id from files table */ TO_LOG("<bam_loader> Retrieving next file id...\n"); if ((msg = next_file_id(m, files_table, &cur_file_id)) != MAL_SUCCEED) { goto cleanup; } /* Init bam_wrapper structs */ if ((bws = (bam_wrapper *) GDKmalloc(nr_files * sizeof(bam_wrapper))) == NULL) { msg = createException(MAL, "bam_loader", MAL_MALLOC_FAIL); goto cleanup; } /* Enables cleanup to check which bam_wrappers to clear */ memset(bws, 0, nr_files * sizeof(bam_wrapper)); for (i = 0; i < nr_files; ++i) { int fln = strlen(filenames[i]); TO_LOG("<bam_loader> Initializing BAM wrapper for file '%s'...\n", filenames[i]); if ((msg = init_bam_wrapper(bws + i, (IS_BAM(filenames[i], fln) ? BAM : SAM), filenames[i], cur_file_id++, dbschema)) != MAL_SUCCEED) { goto cleanup; } } /* Parse all headers */ for (i = 0; i < nr_files; ++i) { TO_LOG("<bam_loader> Parsing header for file '%s'...\n", filenames[i]); if ((msg = process_header(bws + i)) != MAL_SUCCEED) { goto cleanup; } } /* If we have to load the BAM data into the pairwise storage * schema, make sure that all input BAM files are sorted on * QNAME */ if (dbschema == 1) { for (i = 0; i < nr_files; ++i) { TO_LOG("<bam_loader> Checking sortedness for BAM file '%s'...\n", filenames[i]); if (bws[i].ord != ORDERING_QUERYNAME) { msg = createException(MAL, "bam_loader", "Only BAM files that are sorted on queryname can be inserted into the pairwise storage schema; " "BAM file '%s' has ordering '%s'", bws[i].file_location, ordering_str(bws[i]. ord)); goto cleanup; } } } /* Create alignment storage */ for (i = 0; i < nr_files; ++i) { TO_LOG("<bam_loader> Creating alignment tables for file '%s'...\n", filenames[i]); if ((dbschema == 0 && (msg = create_alignment_storage_0(cntxt, "bam.create_storage_0", bws + i)) != MAL_SUCCEED) || (dbschema == 1 && (msg = create_alignment_storage_1(cntxt, "bam.create_storage_1", bws + i)) != MAL_SUCCEED)) { goto cleanup; } } /* Now create threads to read alignment data of different files */ TO_LOG("<bam_loader> Creating reader threads...\n"); if ((reader_threads = (MT_Id *) GDKmalloc(nr_threads * sizeof(MT_Id))) == NULL) { msg = createException(MAL, "bam_loader", MAL_MALLOC_FAIL); goto cleanup; } if ((r_thread_data = create_reader_thread_data(bws, nr_files, nr_threads)) == NULL) { msg = createException(MAL, "bam_loader", MAL_MALLOC_FAIL); goto cleanup; } for (i = 0; i < nr_threads; ++i) { if ((errnr = MT_create_thread(&reader_threads[i], run_process_bam_alignments, &r_thread_data[i], MT_THR_JOINABLE)) != 0) { msg = createException(MAL, "bam_loader", "Could not create thread to process alignments (errnr %d)", errnr); goto cleanup; } } TO_LOG("<bam_loader> Waiting for reader threads to finish...\n"); /* Wait until all threads finish and collect their * messages. Though it is not very likely, it could be the * case that more than 1 thread generates an error message (not * likely because threads exit once they notice that another * thread has failed). Therefore, we collect all error * messages in one big error string */ for (i = 0; i < nr_threads; ++i) { if ((errnr = MT_join_thread(reader_threads[i])) != 0) { msg = createException(MAL, "bam_loader", "Could not join alignment processing thread (errnr %d)", errnr); goto cleanup; } /* Thread finished ok, append its error message, if any */ if (r_thread_data[i].msg != MAL_SUCCEED) { int step; if (msg == MAL_SUCCEED) { /* First encountered thread error, * indicate this by pointing to error * buf */ msg = buf_threads_msg; } /* snprintf returns -1 on failure; since we * don't want to fail when snprintf fails, we * use MAX to make sure we don't add a * negative amount to threads_msg_len */ step = snprintf(msg + threads_msg_len, 4096 - threads_msg_len, "* %s\n", r_thread_data[i].msg); threads_msg_len += MAX(0, step); GDKfree(r_thread_data[i].msg); } } /* Fail if any thread has failed */ if (msg != MAL_SUCCEED) { /* Do not use REUSE_EXCEPTION here, since msg was not * malloced. Instead, just copy buffer contents to * malloced buffer */ msg = GDKstrdup(msg); goto cleanup; } TO_LOG("<bam_loader> Copying data into DB...\n"); /* All threads finished succesfully, copy all data into DB */ for (i = 0; i < nr_files; ++i) { if ((msg = copy_into_db(cntxt, bws + i)) != MAL_SUCCEED) { goto cleanup; } } cleanup: if (bws) { for (i = 0; i < nr_files; ++i) { if (bws + i) clear_bam_wrapper(bws + i); } GDKfree(bws); } if (reader_threads) GDKfree(reader_threads); if (r_thread_data) destroy_reader_thread_data(r_thread_data); if (msg != MAL_SUCCEED) { TO_LOG("<bam_loader> Error on processing BAM files: %s\n", msg); } TO_LOG("<bam_loader>: Loader finished processing %d BAM file%s...\n", nr_files, (nr_files != 1 ? "s" : "")); return msg; }
/* We create a pool of GDKnr_threads-1 generic workers, that is, * workers that will take on jobs from any clients. In addition, we * create a single specific worker per client (i.e. each time we enter * here). This specific worker will only do work for the client for * which it was started. In this way we can guarantee that there will * always be progress for the client, even if all other workers are * doing something big. * * When all jobs for a client have been done (there are no more * entries for the client in the queue), the specific worker turns * itself into a generic worker. At the same time, we signal that one * generic worker should exit and this function returns. In this way * we make sure that there are once again GDKnr_threads-1 generic * workers. */ str runMALdataflow(Client cntxt, MalBlkPtr mb, int startpc, int stoppc, MalStkPtr stk) { DataFlow flow = NULL; str msg = MAL_SUCCEED; int size; int *ret; int i; #ifdef DEBUG_FLOW fprintf(stderr, "#runMALdataflow for block %d - %d\n", startpc, stoppc); printFunction(GDKstdout, mb, 0, LIST_MAL_STMT | LIST_MAPI); #endif /* in debugging mode we should not start multiple threads */ if (stk == NULL) throw(MAL, "dataflow", "runMALdataflow(): Called with stk == NULL"); ret = (int*) getArgReference(stk,getInstrPtr(mb,startpc),0); *ret = FALSE; if (stk->cmd) { *ret = TRUE; return MAL_SUCCEED; } assert(stoppc > startpc); /* check existence of workers */ if (todo == NULL) { /* create thread pool */ if (GDKnr_threads <= 1 || DFLOWinitialize() < 0) { /* no threads created, run serially */ *ret = TRUE; return MAL_SUCCEED; } i = THREADS; /* we didn't create an extra thread */ } assert(todo); /* in addition, create one more worker that will only execute * tasks for the current client to compensate for our waiting * until all work is done */ MT_lock_set(&dataflowLock, "runMALdataflow"); /* join with already exited threads */ { int joined; do { joined = 0; for (i = 0; i < THREADS; i++) { if (workers[i].flag == EXITED) { workers[i].flag = JOINING; workers[i].cntxt = NULL; joined = 1; MT_lock_unset(&dataflowLock, "runMALdataflow"); MT_join_thread(workers[i].id); MT_lock_set(&dataflowLock, "runMALdataflow"); workers[i].flag = IDLE; } } } while (joined); } for (i = 0; i < THREADS; i++) { if (workers[i].flag == IDLE) { /* only create specific worker if we are not doing a * recursive call */ if (stk->calldepth > 1) { int j; MT_Id pid = MT_getpid(); /* doing a recursive call: copy specificity from * current worker to new worker */ workers[i].cntxt = NULL; for (j = 0; j < THREADS; j++) { if (workers[j].flag == RUNNING && workers[j].id == pid) { workers[i].cntxt = workers[j].cntxt; break; } } } else { /* not doing a recursive call: create specific worker */ workers[i].cntxt = cntxt; } workers[i].flag = RUNNING; if (MT_create_thread(&workers[i].id, DFLOWworker, (void *) &workers[i], MT_THR_JOINABLE) < 0) { /* cannot start new thread, run serially */ *ret = TRUE; workers[i].flag = IDLE; MT_lock_unset(&dataflowLock, "runMALdataflow"); return MAL_SUCCEED; } break; } } MT_lock_unset(&dataflowLock, "runMALdataflow"); if (i == THREADS) { /* no empty thread slots found, run serially */ *ret = TRUE; return MAL_SUCCEED; } flow = (DataFlow)GDKzalloc(sizeof(DataFlowRec)); if (flow == NULL) throw(MAL, "dataflow", "runMALdataflow(): Failed to allocate flow"); flow->cntxt = cntxt; flow->mb = mb; flow->stk = stk; flow->error = 0; /* keep real block count, exclude brackets */ flow->start = startpc + 1; flow->stop = stoppc; MT_lock_init(&flow->flowlock, "flow->flowlock"); flow->done = q_create(stoppc- startpc+1, "flow->done"); if (flow->done == NULL) { MT_lock_destroy(&flow->flowlock); GDKfree(flow); throw(MAL, "dataflow", "runMALdataflow(): Failed to create flow->done queue"); } flow->status = (FlowEvent)GDKzalloc((stoppc - startpc + 1) * sizeof(FlowEventRec)); if (flow->status == NULL) { q_destroy(flow->done); MT_lock_destroy(&flow->flowlock); GDKfree(flow); throw(MAL, "dataflow", "runMALdataflow(): Failed to allocate flow->status"); } size = DFLOWgraphSize(mb, startpc, stoppc); size += stoppc - startpc; flow->nodes = (int*)GDKzalloc(sizeof(int) * size); if (flow->nodes == NULL) { GDKfree(flow->status); q_destroy(flow->done); MT_lock_destroy(&flow->flowlock); GDKfree(flow); throw(MAL, "dataflow", "runMALdataflow(): Failed to allocate flow->nodes"); } flow->edges = (int*)GDKzalloc(sizeof(int) * size); if (flow->edges == NULL) { GDKfree(flow->nodes); GDKfree(flow->status); q_destroy(flow->done); MT_lock_destroy(&flow->flowlock); GDKfree(flow); throw(MAL, "dataflow", "runMALdataflow(): Failed to allocate flow->edges"); } msg = DFLOWinitBlk(flow, mb, size); if (msg == MAL_SUCCEED) msg = DFLOWscheduler(flow, &workers[i]); GDKfree(flow->status); GDKfree(flow->edges); GDKfree(flow->nodes); q_destroy(flow->done); MT_lock_destroy(&flow->flowlock); GDKfree(flow); if (i != THREADS) { /* we created one worker, now tell one worker to exit again */ MT_lock_set(&todo->l, "runMALdataflow"); todo->exitcount++; MT_lock_unset(&todo->l, "runMALdataflow"); MT_sema_up(&todo->s, "runMALdataflow"); } return msg; }