Example #1
yajl_gen_status GenVal(yajl_gen g, yajl_val v) {
	yajl_gen_status status;
	switch (v->type) {
	case yajl_t_string:	return yajl_gen_string(g, (unsigned char*)v->u.string, strlen(v->u.string));

	case yajl_t_number: 
			char buffer[100];
			char *num = buffer;
			size_t len;
			//if (YAJL_IS_INTEGER(v)) // buggy
			if (v->u.number.flags & YAJL_NUMBER_INT_VALID)
				len = sprintf(num, "%lld", YAJL_GET_INTEGER(v));
			//else if (YAJL_IS_DOUBLE(v))	// buggy
			else if (v->u.number.flags & YAJL_NUMBER_DOUBLE_VALID)
				len = sprintf(num, "%g", YAJL_GET_DOUBLE(v));
			else {
				num = YAJL_GET_NUMBER(v);
				len = strlen(buffer);
			return yajl_gen_number(g, num, len);

	case yajl_t_object:
		status = yajl_gen_map_open(g);
		if (status != yajl_gen_status_ok)
			return status;
		for (size_t i = 0; i < v->u.object.len; i++) {
			status = yajl_gen_string(g, (unsigned char *)v->u.object.keys[i], strlen(v->u.object.keys[i]));
			if (status != yajl_gen_status_ok)
				return status;
			status = GenVal(g, v->u.object.values[i]);
			if (status != yajl_gen_status_ok)
				return status;
		return yajl_gen_map_close(g);

	case yajl_t_array:
		status = yajl_gen_array_open(g);
		if (status != yajl_gen_status_ok)
			return status;
		for (size_t i = 0; i < v->u.array.len; i++) {
			status = GenVal(g, v->u.array.values[i]);
			if (status != yajl_gen_status_ok)
				return status;

		return yajl_gen_array_close(g);

	case yajl_t_true: return yajl_gen_bool(g, 1);
	case yajl_t_false: return yajl_gen_bool(g, 0);
	case yajl_t_null: return yajl_gen_null(g);
	case yajl_t_any: break;
	return yajl_gen_in_error_state;
Example #2
TEST_F(Yajl, yajl_gen) {
	for (size_t i = 0; i < kTrialCount; i++) {
		yajl_gen g = yajl_gen_alloc(NULL);

		yajl_gen_status status = GenVal(g, root_);
		if (status != yajl_gen_status_ok) {
			std::cout << "gen error: " << status << std::endl;

		const unsigned char * buf;
		size_t len;
		status = yajl_gen_get_buf(g, &buf, &len);
		ASSERT_EQ(yajl_gen_status_ok, status);
		//if (i == 0)
		//	std::cout << len << std::endl;
Example #3
int main(int argc, char *argv[]) {
	if(argc != 5) {
		printf("ERROR: Not enough arguments.\n");
		return EXIT_FAILURE;

	// Example MPI startup and using CLCG4 RNG
	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &mpi_commsize);
	MPI_Comm_rank(MPI_COMM_WORLD, &mpi_myrank);

	// assign cli args to globals
	g_matrix_size = atoi(argv[1]);
	g_threads = atoi(argv[2]);
	g_file_ranks = atoi(argv[3]);
	g_out = argv[4];
	g_row_size = g_matrix_size / mpi_commsize;

	// Init 16,384 RNG streams - each rank has an independent stream


	// my_matrix stores a local slice of the 128gb matrix
	// my_transpose stores the transpose of that slice
	my_matrix = calloc(g_row_size * g_matrix_size, sizeof(unsigned int));
	my_transpose = calloc(g_row_size * g_matrix_size, sizeof(unsigned int));

	// initialize and randomize matrix thru mpi ranks
	// each rank holds some number of rows
	// held in a 1d array to make mpi sending easier
	unsigned int i, j, k, l;
	for(i = 0; i < g_row_size; ++i) {
		for(j = 0; j < g_matrix_size; ++j) {
			my_matrix[i * g_matrix_size + j] = (unsigned int)(GenVal(mpi_myrank) * 100.0) + 1;

	// populate transpose with own values
	unsigned int start_idx = mpi_myrank * g_row_size;
	for(i = 0; i < g_row_size; ++i) {
		for(j = start_idx; j < g_row_size * g_matrix_size; j = j + g_matrix_size) {
			// calculation for the matrix
			k = (j - start_idx) / g_matrix_size + start_idx;
			my_transpose[i + j] = my_matrix[i * g_matrix_size + k];

	// initialize and allocate buffers
	unsigned int bufsize = g_row_size * g_row_size;
	unsigned int *sendbuf = calloc(bufsize, sizeof(unsigned int *));
	unsigned int **recvbuf = calloc(mpi_commsize-1, sizeof(unsigned int *));
	for(i = 0; i < mpi_commsize-1; ++i) {
		recvbuf[i] = calloc(bufsize, sizeof(unsigned int));

	// mpi stuff
	unsigned int num_reqs = 2 * (mpi_commsize - 1);
	unsigned int cur_req = 0;
	MPI_Request *requests = (MPI_Request *)malloc(num_reqs * sizeof(MPI_Request));
	MPI_Status *statuses = (MPI_Status *)malloc(num_reqs * sizeof(MPI_Status));

	// send to all other ranks
	for(i = 0; i < mpi_commsize; ++i) {
		if(i != mpi_myrank) {
			// store relevant data for the receiving rank
			int cnt = 0;
			int tx = 0;
			start_idx = i * g_row_size;
			for(j = 0; j < g_row_size; ++j) {
				for(k = start_idx; k < g_row_size * g_matrix_size; k = k + g_matrix_size) {
					// calculation for the matrix (a little messy, could be optimized)
					l = (k - start_idx) / g_matrix_size + start_idx;
					if(cnt >= bufsize) {
						// handles the overflow, after which we offset it (new column)
						cnt = ++tx;
					sendbuf[cnt] = my_matrix[j * g_matrix_size + l];
					cnt += g_row_size;
			MPI_Isend(sendbuf, bufsize, MPI_UNSIGNED, i, 0, MPI_COMM_WORLD, &requests[cur_req++]);

	// recv from all other rows
	// handling a little messy since irecv is nonblocking
	int cnt = 0;
	for(i = 0; i < mpi_commsize; ++i) {
		if(i != mpi_myrank) {
			MPI_Irecv(recvbuf[cnt++], bufsize, MPI_UNSIGNED, i, 0, MPI_COMM_WORLD, &requests[cur_req++]);

	// wait on MPI messages
	MPI_Waitall(num_reqs, requests, statuses);

	// store relevant values
	k = 0;
	l = 0;
	for(i = 0; i < g_row_size; ++i) {
		for(j = 0; j < g_matrix_size; ++j) {
			if(my_transpose[i * g_matrix_size + j] == 0) {
				my_transpose[i * g_matrix_size + j] = recvbuf[k][l++];
				if(l >= bufsize) {
					l = 0;

	unsigned long long t1 = 0, t2 = 0;
	if(mpi_myrank == 0) {
		t1 = GetTimeBase();

	// split into pthreads
	pthread_t *call_thd;
	call_thd = (pthread_t *)malloc(g_threads * sizeof(pthread_t));
	void *status;
	pthread_attr_t attr;
	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
	pthread_mutex_init(&mutexsum, NULL);

	long x;
	for(x = 0; x < g_threads; ++x) {
		pthread_create(&call_thd[i], &attr, mtx_sum, (void *)x);

	// wait on threads
	for(x = 0; x < g_threads; ++x) {
		pthread_join(call_thd[i], &status);


	if(mpi_myrank == 0) {
		t2 = GetTimeBase();
		float tmp = (t2-t1) / 1600000;
		printf("Elapsed compute time: %f\n", tmp);

	// I/O
	if(mpi_myrank == 0) {
		t1 = GetTimeBase();
	MPI_Offset offset = (mpi_myrank % g_file_ranks) * g_row_size * g_matrix_size * sizeof(unsigned int);
	MPI_File file;
	MPI_Status iostatus;
	MPI_Datatype localarray;

	/* create a type describing our piece of the array */
	int globalsizes[2] = {g_matrix_size, g_matrix_size};
	int localsizes [2] = {g_row_size, g_matrix_size};
	int starts[2]      = {mpi_myrank * g_row_size, 0};
	int order          = MPI_ORDER_C;
	MPI_Type_create_subarray(2, globalsizes, localsizes, starts, order, MPI_UNSIGNED, &localarray);

	// open the file, and set the view
	MPI_File_open(MPI_COMM_WORLD, g_out,
			MPI_INFO_NULL, &file);
	MPI_File_set_view(file, 0,  MPI_UNSIGNED, localarray, "native", MPI_INFO_NULL);

	// write to file at specified offset
	MPI_File_write_at(file, offset * mpi_myrank, my_matrix, g_row_size * g_matrix_size, MPI_UNSIGNED, &iostatus);
	if(mpi_myrank == 0) {
		t2 = GetTimeBase();
		float tmp = (t2-t1) / 1600000;
		printf("Elapsed IO time: %f\n", tmp);

	// cleanup routine




	for(i = 0; i < mpi_commsize-1; ++i) free(recvbuf[i]);

	// END -Perform a barrier and then leave MPI