Пример #1
0
void
stopMALdataflow(void)
{
	int i;

	ATOMIC_SET(exiting, 1, exitingLock, "q_dequeue");
	if (todo) {
		for (i = 0; i < THREADS; i++)
			MT_sema_up(&todo->s, "stopMALdataflow");
		MT_lock_set(&dataflowLock, "stopMALdataflow");
		for (i = 0; i < THREADS; i++) {
			if (workers[i].flag != IDLE && workers[i].flag != JOINING) {
				workers[i].flag = JOINING;
				MT_lock_unset(&dataflowLock, "stopMALdataflow");
				MT_join_thread(workers[i].id);
				MT_lock_set(&dataflowLock, "stopMALdataflow");
			}
			workers[i].flag = IDLE;
		}
		MT_lock_unset(&dataflowLock, "stopMALdataflow");
	}
}
Пример #2
0
/* coverity[+kill] */
void
GDKexit(int status)
{
	if (ATOMIC_CAS_int(GDKstopped, 0, 1, GDKstoppedLock, "GDKexit") == 0) {
		MT_lock_set(&GDKthreadLock, "GDKexit");
		GDKnrofthreads = 0;
		MT_lock_unset(&GDKthreadLock, "GDKexit");
		if (GDKvmtrim_id)
			MT_join_thread(GDKvmtrim_id);
		MT_sleep_ms(CATNAP);

		/* Kill all threads except myself */
		if (status == 0) {
			MT_Id pid = MT_getpid();
			Thread t, s;

			for (t = GDKthreads, s = t + THREADS; t < s; t++) {
				if (t->pid) {
					MT_Id victim = t->pid;

					if (t->pid != pid)
						MT_kill_thread(victim);
				}
			}
		}
		(void) GDKgetHome();
#if 0
		/* we can't clean up after killing threads */
		BBPexit();
#endif
		GDKlog(GDKLOGOFF);
		GDKunlockHome();
#if !defined(ATOMIC_LOCK) && !defined(NDEBUG)
		TEMDEBUG GDKlockstatistics(1);
#endif
		MT_global_exit(status);
	}
}
Пример #3
0
/**
 * Generic function that loads a given list of BAM files according to
 * the values of the other parameters.  It starts with creating the
 * bam schema and the header tables if these do not exist yet.  Then
 * it initializes bam_wrapper structs for all bam files and reads the
 * headers for all BAM files.  If the pairwise storage schema has to
 * be used, It then creates a thread for every file that reads all
 * alignments for this file.
 *
 */
static str
bam_loader(Client cntxt, MalBlkPtr mb, str * filenames, int nr_files,
	   sht dbschema, sht nr_threads)
{
	bam_wrapper *bws = NULL;
	MT_Id *reader_threads = NULL;
	reader_thread_data *r_thread_data = NULL;
	mvc *m = NULL;
	sql_schema *s = NULL;
	sql_table *files_table = NULL;
	lng cur_file_id;
	char buf_threads_msg[4096] = "There were reader threads that contained errors:\n";
	int threads_msg_len = strlen(buf_threads_msg);
	int i, errnr;
	str msg = MAL_SUCCEED;

	TO_LOG("<bam_loader>: Loader started for %d BAM file%s...\n",
		   nr_files, (nr_files != 1 ? "s" : ""));

	/* Check sanity of input */
	if (dbschema != 0 && dbschema != 1) {
		msg = createException(MAL, "bam_loader",
					  "Wrong value for dbschema: '%d' (0=straightforward storage schema, 1=pairwise storage schema)",
					  dbschema);
		goto cleanup;
	}
	if (nr_threads <= 0) {
		nr_threads = 1;
	} else if(nr_threads > 4) {
		nr_threads = 4;
	}

	/* Get SQL context */
	if ((msg = getSQLContext(cntxt, mb, &m, NULL)) != MAL_SUCCEED) {
		/* Here, and in multiple other locations in this code,
		 * new message is stored in tmp var, since the old msg
		 * needs to be freed after construction of the new
		 * msg */
		REUSE_EXCEPTION(msg, MAL, "bam_loader",
				"Could not retrieve SQLContext: %s", msg);
		goto cleanup;
	}

	/* Start with binding bam schema and the files table */
	if ((msg =
		 bind_bam_schema(m, &s)) != MAL_SUCCEED)
		goto cleanup;
	if((msg = 
		 bind_table(m, s, "files", &files_table)) != MAL_SUCCEED)
		goto cleanup;

	/* Get next file id from files table */
	TO_LOG("<bam_loader> Retrieving next file id...\n");
	if ((msg = next_file_id(m, files_table, &cur_file_id)) != MAL_SUCCEED) {
		goto cleanup;
	}

	/* Init bam_wrapper structs */
	if ((bws =
		 (bam_wrapper *) GDKmalloc(nr_files * sizeof(bam_wrapper))) ==
		NULL) {
		msg = createException(MAL, "bam_loader", MAL_MALLOC_FAIL);
		goto cleanup;
	}

	/* Enables cleanup to check which bam_wrappers to clear */
	memset(bws, 0, nr_files * sizeof(bam_wrapper));

	for (i = 0; i < nr_files; ++i) {
		int fln = strlen(filenames[i]);
		TO_LOG("<bam_loader> Initializing BAM wrapper for file '%s'...\n", filenames[i]);
		if ((msg =
			 init_bam_wrapper(bws + i, (IS_BAM(filenames[i], fln) ? BAM : SAM),
					  filenames[i], cur_file_id++, dbschema)) != MAL_SUCCEED) {
			goto cleanup;
		}
	}

	/* Parse all headers */
	for (i = 0; i < nr_files; ++i) {
		TO_LOG("<bam_loader> Parsing header for file '%s'...\n",
			   filenames[i]);
		if ((msg = process_header(bws + i)) != MAL_SUCCEED) {
			goto cleanup;
		}
	}

	/* If we have to load the BAM data into the pairwise storage
	 * schema, make sure that all input BAM files are sorted on
	 * QNAME */
	if (dbschema == 1) {
		for (i = 0; i < nr_files; ++i) {
			TO_LOG("<bam_loader> Checking sortedness for BAM file '%s'...\n", filenames[i]);
			if (bws[i].ord != ORDERING_QUERYNAME) {
				msg = createException(MAL, "bam_loader",
							  "Only BAM files that are sorted on queryname can be inserted into the pairwise storage schema; "
							  "BAM file '%s' has ordering '%s'",
							  bws[i].file_location,
							  ordering_str(bws[i].
								   ord));
				goto cleanup;
			}
		}
	}

	/* Create alignment storage */
	for (i = 0; i < nr_files; ++i) {
		TO_LOG("<bam_loader> Creating alignment tables for file '%s'...\n", filenames[i]);
		if ((dbschema == 0
			 && (msg = create_alignment_storage_0(cntxt,
								  "bam.create_storage_0",
								  bws + i)) != MAL_SUCCEED)
			|| (dbschema == 1
				&& (msg = create_alignment_storage_1(cntxt,
								  "bam.create_storage_1",
								  bws + i)) != MAL_SUCCEED)) {
			goto cleanup;
		}
	}


	/* Now create threads to read alignment data of different files */
	TO_LOG("<bam_loader> Creating reader threads...\n");
	if ((reader_threads =
		 (MT_Id *) GDKmalloc(nr_threads * sizeof(MT_Id))) == NULL) {
		msg = createException(MAL, "bam_loader", MAL_MALLOC_FAIL);
		goto cleanup;
	}

	if ((r_thread_data =
		 create_reader_thread_data(bws, nr_files, nr_threads)) == NULL) {
		msg = createException(MAL, "bam_loader", MAL_MALLOC_FAIL);
		goto cleanup;
	}

	for (i = 0; i < nr_threads; ++i) {
		if ((errnr =
			 MT_create_thread(&reader_threads[i],
					  run_process_bam_alignments,
					  &r_thread_data[i],
					  MT_THR_JOINABLE)) != 0) {
			msg = createException(MAL, "bam_loader",
						  "Could not create thread to process alignments (errnr %d)",
						  errnr);
			goto cleanup;
		}
	}

	TO_LOG("<bam_loader> Waiting for reader threads to finish...\n");
	/* Wait until all threads finish and collect their
	 * messages. Though it is not very likely, it could be the
	 * case that more than 1 thread generates an error message (not
	 * likely because threads exit once they notice that another
	 * thread has failed).  Therefore, we collect all error
	 * messages in one big error string
	 */
	for (i = 0; i < nr_threads; ++i) {
		if ((errnr = MT_join_thread(reader_threads[i])) != 0) {
			msg = createException(MAL, "bam_loader",
						  "Could not join alignment processing thread (errnr %d)",
						  errnr);
			goto cleanup;
		}
		/* Thread finished ok, append its error message, if any */
		if (r_thread_data[i].msg != MAL_SUCCEED) {
			int step;

			if (msg == MAL_SUCCEED) {
				/* First encountered thread error,
				 * indicate this by pointing to error
				 * buf */
				msg = buf_threads_msg;
			}
			/* snprintf returns -1 on failure; since we
			 * don't want to fail when snprintf fails, we
			 * use MAX to make sure we don't add a
			 * negative amount to threads_msg_len */
			step = snprintf(msg + threads_msg_len,
					4096 - threads_msg_len, "* %s\n",
					r_thread_data[i].msg);
			threads_msg_len += MAX(0, step);
			GDKfree(r_thread_data[i].msg);
		}
	}

	/* Fail if any thread has failed */
	if (msg != MAL_SUCCEED) {
		/* Do not use REUSE_EXCEPTION here, since msg was not
		 * malloced. Instead, just copy buffer contents to
		 * malloced buffer */
		msg = GDKstrdup(msg);
		goto cleanup;
	}

	TO_LOG("<bam_loader> Copying data into DB...\n");
	/* All threads finished succesfully, copy all data into DB */
	for (i = 0; i < nr_files; ++i) {
		if ((msg = copy_into_db(cntxt, bws + i)) != MAL_SUCCEED) {
			goto cleanup;
		}
	}

	  cleanup:
	if (bws) {
		for (i = 0; i < nr_files; ++i) {
			if (bws + i)
				clear_bam_wrapper(bws + i);
		}
		GDKfree(bws);
	}
	if (reader_threads)
		GDKfree(reader_threads);
	if (r_thread_data)
		destroy_reader_thread_data(r_thread_data);

	if (msg != MAL_SUCCEED) {
		TO_LOG("<bam_loader> Error on processing BAM files: %s\n",
			   msg);
	}

	TO_LOG("<bam_loader>: Loader finished processing %d BAM file%s...\n",
		   nr_files, (nr_files != 1 ? "s" : ""));
	return msg;
}
Пример #4
0
/* We create a pool of GDKnr_threads-1 generic workers, that is,
 * workers that will take on jobs from any clients.  In addition, we
 * create a single specific worker per client (i.e. each time we enter
 * here).  This specific worker will only do work for the client for
 * which it was started.  In this way we can guarantee that there will
 * always be progress for the client, even if all other workers are
 * doing something big.
 *
 * When all jobs for a client have been done (there are no more
 * entries for the client in the queue), the specific worker turns
 * itself into a generic worker.  At the same time, we signal that one
 * generic worker should exit and this function returns.  In this way
 * we make sure that there are once again GDKnr_threads-1 generic
 * workers. */
str
runMALdataflow(Client cntxt, MalBlkPtr mb, int startpc, int stoppc, MalStkPtr stk)
{
	DataFlow flow = NULL;
	str msg = MAL_SUCCEED;
	int size;
	int *ret;
	int i;

#ifdef DEBUG_FLOW
	fprintf(stderr, "#runMALdataflow for block %d - %d\n", startpc, stoppc);
	printFunction(GDKstdout, mb, 0, LIST_MAL_STMT | LIST_MAPI);
#endif

	/* in debugging mode we should not start multiple threads */
	if (stk == NULL)
		throw(MAL, "dataflow", "runMALdataflow(): Called with stk == NULL");
	ret = (int*) getArgReference(stk,getInstrPtr(mb,startpc),0);
	*ret = FALSE;
	if (stk->cmd) {
		*ret = TRUE;
		return MAL_SUCCEED;
	}

	assert(stoppc > startpc);

	/* check existence of workers */
	if (todo == NULL) {
		/* create thread pool */
		if (GDKnr_threads <= 1 || DFLOWinitialize() < 0) {
			/* no threads created, run serially */
			*ret = TRUE;
			return MAL_SUCCEED;
		}
		i = THREADS;			/* we didn't create an extra thread */
	}
	assert(todo);
	/* in addition, create one more worker that will only execute
	 * tasks for the current client to compensate for our waiting
	 * until all work is done */
	MT_lock_set(&dataflowLock, "runMALdataflow");
	/* join with already exited threads */
	{
		int joined;
		do {
			joined = 0;
			for (i = 0; i < THREADS; i++) {
				if (workers[i].flag == EXITED) {
					workers[i].flag = JOINING;
					workers[i].cntxt = NULL;
					joined = 1;
					MT_lock_unset(&dataflowLock, "runMALdataflow");
					MT_join_thread(workers[i].id);
					MT_lock_set(&dataflowLock, "runMALdataflow");
					workers[i].flag = IDLE;
				}
			}
		} while (joined);
	}
	for (i = 0; i < THREADS; i++) {
		if (workers[i].flag == IDLE) {
			/* only create specific worker if we are not doing a
			 * recursive call */
			if (stk->calldepth > 1) {
				int j;
				MT_Id pid = MT_getpid();

				/* doing a recursive call: copy specificity from
				 * current worker to new worker */
				workers[i].cntxt = NULL;
				for (j = 0; j < THREADS; j++) {
					if (workers[j].flag == RUNNING && workers[j].id == pid) {
						workers[i].cntxt = workers[j].cntxt;
						break;
					}
				}
			} else {
				/* not doing a recursive call: create specific worker */
				workers[i].cntxt = cntxt;
			}
			workers[i].flag = RUNNING;
			if (MT_create_thread(&workers[i].id, DFLOWworker, (void *) &workers[i], MT_THR_JOINABLE) < 0) {
				/* cannot start new thread, run serially */
				*ret = TRUE;
				workers[i].flag = IDLE;
				MT_lock_unset(&dataflowLock, "runMALdataflow");
				return MAL_SUCCEED;
			}
			break;
		}
	}
	MT_lock_unset(&dataflowLock, "runMALdataflow");
	if (i == THREADS) {
		/* no empty thread slots found, run serially */
		*ret = TRUE;
		return MAL_SUCCEED;
	}

	flow = (DataFlow)GDKzalloc(sizeof(DataFlowRec));
	if (flow == NULL)
		throw(MAL, "dataflow", "runMALdataflow(): Failed to allocate flow");

	flow->cntxt = cntxt;
	flow->mb = mb;
	flow->stk = stk;
	flow->error = 0;

	/* keep real block count, exclude brackets */
	flow->start = startpc + 1;
	flow->stop = stoppc;

	MT_lock_init(&flow->flowlock, "flow->flowlock");
	flow->done = q_create(stoppc- startpc+1, "flow->done");
	if (flow->done == NULL) {
		MT_lock_destroy(&flow->flowlock);
		GDKfree(flow);
		throw(MAL, "dataflow", "runMALdataflow(): Failed to create flow->done queue");
	}

	flow->status = (FlowEvent)GDKzalloc((stoppc - startpc + 1) * sizeof(FlowEventRec));
	if (flow->status == NULL) {
		q_destroy(flow->done);
		MT_lock_destroy(&flow->flowlock);
		GDKfree(flow);
		throw(MAL, "dataflow", "runMALdataflow(): Failed to allocate flow->status");
	}
	size = DFLOWgraphSize(mb, startpc, stoppc);
	size += stoppc - startpc;
	flow->nodes = (int*)GDKzalloc(sizeof(int) * size);
	if (flow->nodes == NULL) {
		GDKfree(flow->status);
		q_destroy(flow->done);
		MT_lock_destroy(&flow->flowlock);
		GDKfree(flow);
		throw(MAL, "dataflow", "runMALdataflow(): Failed to allocate flow->nodes");
	}
	flow->edges = (int*)GDKzalloc(sizeof(int) * size);
	if (flow->edges == NULL) {
		GDKfree(flow->nodes);
		GDKfree(flow->status);
		q_destroy(flow->done);
		MT_lock_destroy(&flow->flowlock);
		GDKfree(flow);
		throw(MAL, "dataflow", "runMALdataflow(): Failed to allocate flow->edges");
	}
	msg = DFLOWinitBlk(flow, mb, size);

	if (msg == MAL_SUCCEED)
		msg = DFLOWscheduler(flow, &workers[i]);

	GDKfree(flow->status);
	GDKfree(flow->edges);
	GDKfree(flow->nodes);
	q_destroy(flow->done);
	MT_lock_destroy(&flow->flowlock);
	GDKfree(flow);

	if (i != THREADS) {
		/* we created one worker, now tell one worker to exit again */
		MT_lock_set(&todo->l, "runMALdataflow");
		todo->exitcount++;
		MT_lock_unset(&todo->l, "runMALdataflow");
		MT_sema_up(&todo->s, "runMALdataflow");
	}
	return msg;
}