Ejemplo n.º 1
0
static void meta_add(strbuffer_t *buf)
{
	/* Check if there is room for the message + 2 newlines */
	if (maxmsgspercombo && (xymonmetaqueued >= maxmsgspercombo)) {
		/* Nope ... flush buffer */
		meta_flush();
	}
	else {
		/* Yep ... add delimiter before new status (but not before the first!) */
		if (xymonmetaqueued) addtobuffer(metamsg, "\n\n");
	}

	addtostrbuffer(metamsg, buf);
	xymonmetaqueued++;
}
Ejemplo n.º 2
0
int main(int argc, char **argv) {
	meta_mpi_init(&argc, &argv);

#ifdef __DEBUG__
	int breakMe = 0;
	while (breakMe);
#endif

	/*{
	 int i = 0;
	 char hostname[256];
	 gethostname(hostname, sizeof(hostname));
	 printf("PID %d on %s ready for attach\n", getpid(), hostname);
	 fflush(stdout);
	 while (0 == i)
	 sleep(5);
	 }*/
	int comm_sz;
	int rank;
	MPI_Comm_size(MPI_COMM_WORLD, &comm_sz);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	printf("Hello from rank %d!\n", rank);
	int i = argc;
	int ni, nj, nk, tx, ty, tz, face_id, l_type;
	a_bool async, autoconfig;
	meta_face * face_spec;

	a_dim3 dimgrid_red, dimblock_red, dimgrid_tr_red, dimarray_3d, arr_start,
			arr_end, dim_array2d, start_2d, end_2d, trans_dim, rtrans_dim;
	if (i < 11) {
		printf(
				"<ni> <nj> <nk> <tblockx> <tblocky> <tblockz> <face> <data_type> <async> <autoconfig>\n");
		return (1);
	}
	ni = atoi(argv[1]);
	nj = atoi(argv[2]);
	nk = atoi(argv[3]);

	tx = atoi(argv[4]);
	ty = atoi(argv[5]);
	tz = atoi(argv[6]);

	face_id = atoi(argv[7]);

	l_type = atoi(argv[8]);
	set_type((meta_type_id) l_type);

	async = (a_bool) atoi(argv[9]);
	autoconfig = (a_bool) atoi(argv[10]);

	dimblock_red[0] = tx, dimblock_red[1] = ty, dimblock_red[2] = tz;
	dimgrid_red[0] = ni / tx + ((ni % tx) ? 1 : 0);
	dimgrid_red[1] = nj / ty + ((nj % ty) ? 1 : 0);
	dimgrid_red[2] = nk / tz + ((nk % tz) ? 1 : 0);
	dimarray_3d[0] = ni, dimarray_3d[1] = nj, dimarray_3d[2] = nk;

	//These are for the library reduction, which we use for sums
	void * sum_gpu, *zero;
	sum_gpu = malloc(g_typesize);
	zero = malloc(g_typesize);
	switch (g_type) {
	case a_db:
		*(double*) zero = 0;
		break;

	case a_fl:
		*(float*) zero = 0;
		break;

	case a_ul:
		*(unsigned long*) zero = 0;
		break;

	case a_in:
		*(int *) zero = 0;
		break;

	case a_ui:
		*(unsigned int *) zero = 0;
		break;
	}

#ifdef WITH_TIMERS
	metaTimersInit();
#endif

	gpu_initialize(rank);
	data_allocate(ni, nj, nk);
	data_initialize(ni, nj, nk);

	MPI_Request request;
	MPI_Status status;
	for (i = 0; i < 1; i++) {

		if (rank == 0) {
			//Only process 0 actually needs to initialize data
			// proc1 is just a relay that tests things on the receiving
			// end and mirror's the data back
			//copy the unmodified prism to device
			meta_copy_h2d(dev_data3, data3, ni * nj * nk * g_typesize, async);
			//check_buffer(data3, dev_data3, ni*nj*nk);
			//Validate grid and block sizes (if too big, shrink the z-dim and add iterations)
			for (;
					meta_validate_worksize(&dimgrid_red, &dimblock_red) != 0
							&& dimblock_red[2] > 1;
					dimgrid_red[2] <<= 1, dimblock_red[2] >>= 1)
				;
			//zero out the reduction sum
			meta_copy_h2d(reduction, zero, g_typesize, async);
			//reduce the face to check that the transfer was correct
			//accurately sets start and end indices to sum each face
			arr_start[0] = ((face_id == 3) ? ni - 1 : 0);
			arr_end[0] = ((face_id == 2) ? 0 : ni - 1);
			arr_start[1] = ((face_id == 5) ? nj - 1 : 0);
			arr_end[1] = ((face_id == 4) ? 0 : nj - 1);
			arr_start[2] = ((face_id == 1) ? nk - 1 : 0);
			arr_end[2] = ((face_id == 0) ? 0 : nk - 1);
			//check_dims(dimarray_3d, arr_start, arr_end);
//		printf("Integrity check dim(%d, %d, %d) start(%d, %d, %d) end(%d, %d, %d)\n", dimarray_3d[0], dimarray_3d[1], dimarray_3d[2], arr_start[0], arr_start[1], arr_start[2], arr_end[0], arr_end[1], arr_end[2]);
			a_err ret = meta_reduce(autoconfig ? NULL : &dimgrid_red,
					autoconfig ? NULL : &dimblock_red, dev_data3, &dimarray_3d,
					&arr_start, &arr_end, reduction, g_type, async);
			//a_dim3 testgrid, testblock;
			//testgrid[0] = testgrid[1] = testgrid[2] = 1;
			//testblock[0] = 16;
			//testblock[1] = 8;
			//testblock[2] = 1;
			//a_err ret = meta_reduce(&testgrid, &testblock, dev_data3, &dimarray_3d, &arr_start, &arr_end, reduction, g_type, async);
			printf("Reduce Error: %d\n", ret);
			//pull the sum back
			meta_copy_d2h(sum_gpu, reduction, g_typesize, async);
			//The 4 ternaries ensure the right args are passed to match the face
			// so this one call will work for any face
			printf("Initial Face Integrity Check: %s\n",
					check_face_sum(sum_gpu, (face_id < 4 ? nj : nk),
							(face_id < 2 || face_id > 3 ? ni : nk),
							(face_id & 1 ?
									(face_id < 2 ?
											nk - 1 :
											(face_id < 4 ? ni - 1 : nj - 1)) :
									0)) ? "FAILED" : "PASSED");

			//pack the face
			//TODO set a_dim3 structs once the internal implementation respects them
			face_spec = make_face(face_id, ni, nj, nk);
			ret = meta_pack_face(NULL, NULL, dev_face[face_id], dev_data3,
					face_spec, g_type, async);
			printf("Pack Return Val: %d\n", ret);
			//check_buffer(face[face_id], dev_face[face_id], face_spec->size[0]*face_spec->size[1]*face_spec->size[2]);

			//reduce the packed face to check that packing was correct
			meta_copy_h2d(reduction, zero, g_typesize, async);
			dim_array2d[0] = face_spec->size[2], dim_array2d[1] =
					face_spec->size[1], dim_array2d[2] = face_spec->size[0];
			start_2d[0] = start_2d[1] = start_2d[2] = 0;
			end_2d[0] = (dim_array2d[0] == 1 ? 0 : ni - 1);
			end_2d[1] = (dim_array2d[1] == 1 ? 0 : nj - 1);
			end_2d[2] = (dim_array2d[2] == 1 ? 0 : nk - 1);

			//check_dims(dim_array2d, start_2d, end_2d);
			ret = meta_reduce(autoconfig ? NULL : &dimgrid_red,
					autoconfig ? NULL : &dimblock_red, dev_face[face_id],
					&dim_array2d, &start_2d, &end_2d, reduction, g_type, async);
			meta_copy_d2h(sum_gpu, reduction, g_typesize, async);
			//The 4 ternaries ensure the right args are passed to match the face
			// so this one call will work for any face
			printf("Packed Face Integrity Check: %s\n",
					check_face_sum(sum_gpu, (face_id < 4 ? nj : nk),
							(face_id < 2 || face_id > 3 ? ni : nk),
							(face_id & 1 ?
									(face_id < 2 ?
											nk - 1 :
											(face_id < 4 ? ni - 1 : nj - 1)) :
									0)) ? "FAILED" : "PASSED");

			//transpose the packed face (into the companion face's unoccupied buffer)
			trans_dim[0] = (
					face_spec->size[2] == 1 ?
							face_spec->size[1] : face_spec->size[2]);
			trans_dim[1] = (
					face_spec->size[0] == 1 ?
							face_spec->size[1] : face_spec->size[0]);
			trans_dim[2] = 1;
			rtrans_dim[0] = trans_dim[1];
			rtrans_dim[1] = trans_dim[0];
			rtrans_dim[2] = 1;

			void * stuff = calloc(
					face_spec->size[0] * face_spec->size[1]
							* face_spec->size[2], g_typesize);
			meta_copy_h2d(dev_face[(face_id & 1) ? face_id - 1 : face_id + 1],
					stuff,
					g_typesize * face_spec->size[0] * face_spec->size[1]
							* face_spec->size[2], async);
			//printf("**BEFORE**\n");
			//check_buffer(face[face_id], dev_face[(face_id & 1) ? face_id-1 : face_id+1], face_spec->size[0]*face_spec->size[1]*face_spec->size[2]);
			//printf("**********\n");
			//check_dims(dimgrid_red, dimblock_red, trans_dim);
			//TODO Figure out what's wrong with transpose and re-enable
			ret = meta_transpose_face(NULL, NULL, dev_face[face_id],
					dev_face[(face_id & 1) ? face_id - 1 : face_id + 1],
					&trans_dim, &trans_dim, g_type, async);
			printf("Transpose error: %d\n", ret);
			//printf("**AFTER***\n");
			//check_buffer(face[face_id], dev_face[(face_id & 1) ? face_id-1 : face_id+1], face_spec->size[0]*face_spec->size[1]*face_spec->size[2]);
			//printf("**********\n");

			//reduce the specific sums needed to check that transpose was correct
			meta_copy_h2d(reduction, zero, g_typesize, async);
			//shuffle the (local) X/Y dimension
			rtrans_dim[0] = trans_dim[1];
			rtrans_dim[1] = trans_dim[0];
			rtrans_dim[2] = 1;
			start_2d[0] = start_2d[1] = start_2d[2] = 0;
			end_2d[0] = trans_dim[0] - 1, end_2d[1] = trans_dim[1] - 1, end_2d[2] =
					0;
			//check_dims(rtrans_dim, start_2d, end_2d);
			ret = meta_reduce(autoconfig ? NULL : &dimgrid_red,
					autoconfig ? NULL : &dimblock_red,
					dev_face[(face_id & 1) ? face_id - 1 : face_id + 1],
					&trans_dim, &start_2d, &end_2d, reduction, g_type, async);
			//ret = meta_reduce(NULL, NULL, dev_face[(face_id & 1)? face_id-1 : face_id+1], &trans_dim, &start_2d, &end_2d, reduction, g_type, async);
			meta_copy_d2h(sum_gpu, reduction, g_typesize, async);
			//The 4 ternaries ensure the right args are passed to match the face
			// so this one call will work for any face
			printf("Transposed Face Integrity Check: %s\n",
					check_face_sum(sum_gpu, (face_id < 4 ? nj : nk),
							(face_id < 2 || face_id > 3 ? ni : nk),
							(face_id & 1 ?
									(face_id < 2 ?
											nk - 1 :
											(face_id < 4 ? ni - 1 : nj - 1)) :
									0)) ? "FAILED" : "PASSED");

			//transpose the face back
			//TODO figure out what's wrong with transpose and re-enable
			ret = meta_transpose_face(autoconfig ? NULL : &dimgrid_red,
					autoconfig ? NULL : &dimblock_red,
					dev_face[(face_id & 1) ? face_id - 1 : face_id + 1],
					dev_face[face_id], &rtrans_dim, &rtrans_dim, g_type, async);
			//check_buffer(face[face_id], dev_face[face_id], face_spec->size[0]*face_spec->size[1]*face_spec->size[2]);
			//reduce the specified sums to ensure the reverse transpose worked too
			meta_copy_h2d(reduction, zero, g_typesize, async);
			start_2d[0] = start_2d[1] = start_2d[2] = 0;
			end_2d[0] = rtrans_dim[0] - 1, end_2d[1] = rtrans_dim[1] - 1, end_2d[2] =
					0;
			dimgrid_tr_red[0] = dimgrid_red[1];
			dimgrid_tr_red[1] = dimgrid_red[0];
			dimgrid_tr_red[2] = dimgrid_red[2];
			ret = meta_reduce(autoconfig ? NULL : &dimgrid_tr_red,
					autoconfig ? NULL : &dimblock_red,
					dev_face[(face_id & 1) ? face_id - 1 : face_id + 1],
					&rtrans_dim, &start_2d, &end_2d, reduction, g_type, async);
			meta_copy_d2h(sum_gpu, reduction, g_typesize, async);
			//The 4 ternaries ensure the right args are passed to match the face
			// so this one call will work for any face
			printf("Retransposed Face Integrity Check: %s\n",
					check_face_sum(sum_gpu, (face_id < 4 ? nj : nk),
							(face_id < 2 || face_id > 3 ? ni : nk),
							(face_id & 1 ?
									(face_id < 2 ?
											nk - 1 :
											(face_id < 4 ? ni - 1 : nj - 1)) :
									0)) ? "FAILED" : "PASSED");
			;

			//send the packed face to proc1
			ret = meta_mpi_packed_face_send(1, dev_face[face_id],
					trans_dim[0] * trans_dim[1], i, &request, g_type, async);

//Force the recv and unpack to finish
			meta_flush();
//At this point the send should be forced to complete
// which means there's either a failure in the SP helper or the RP helper

			//receive and unpack the face
			//TODO set a_dim3 structs - i believe these are fine
			//TODO set the face_spec - believe these are fine
			ret = meta_mpi_recv_and_unpack_face(
					autoconfig ? NULL : &dimgrid_red,
					autoconfig ? NULL : &dimblock_red, 1, face_spec, dev_data3,
					dev_face[face_id], i, &request, g_type, async);

//Force the recv and unpack to finish
			meta_flush();
			meta_copy_h2d(reduction, zero, g_typesize, async);
			arr_start[0] = ((face_id == 3) ? ni - 1 : 0);
			arr_end[0] = ((face_id == 2) ? 0 : ni - 1);
			arr_start[1] = ((face_id == 5) ? nj - 1 : 0);
			arr_end[1] = ((face_id == 4) ? 0 : nj - 1);
			arr_start[2] = ((face_id == 1) ? nk - 1 : 0);
			arr_end[2] = ((face_id == 0) ? 0 : nk - 1);
			//check_dims(dimarray_3d, arr_start, arr_end);
			ret = meta_reduce(autoconfig ? NULL : &dimgrid_red,
					autoconfig ? NULL : &dimblock_red, dev_data3, &dimarray_3d,
					&arr_start, &arr_end, reduction, g_type, async);
			meta_copy_d2h(sum_gpu, reduction, g_typesize, async);
			//The 4 ternaries ensure the right args are passed to match the face
			// so this one call will work for any face
			printf("RecvAndUnpacked ZeroFace Integrity Check: %s\n",
					(check_fp(0.0, *((double*) sum_gpu), 0.000001)) ?
							"PASSED" : "FAILED");
			if (!check_fp(0.0, *((double*) sum_gpu), 0.000001))
				printf("\tExpected [0.0], returned [%f]!\n", sum_gpu);

			ret = meta_mpi_recv_and_unpack_face(
					autoconfig ? NULL : &dimgrid_red,
					autoconfig ? NULL : &dimblock_red, 1, face_spec, dev_data3,
					dev_face[face_id], i, &request, g_type, async);

//Force the recv and unpack to finish
			meta_flush();
			meta_copy_h2d(reduction, zero, g_typesize, async);
			arr_start[0] = ((face_id == 3) ? ni - 1 : 0);
			arr_end[0] = ((face_id == 2) ? 0 : ni - 1);
			arr_start[1] = ((face_id == 5) ? nj - 1 : 0);
			arr_end[1] = ((face_id == 4) ? 0 : nj - 1);
			arr_start[2] = ((face_id == 1) ? nk - 1 : 0);
			arr_end[2] = ((face_id == 0) ? 0 : nk - 1);
			//check_dims(dimarray_3d, arr_start, arr_end);
			ret = meta_reduce(autoconfig ? NULL : &dimgrid_red,
					autoconfig ? NULL : &dimblock_red, dev_data3, &dimarray_3d,
					&arr_start, &arr_end, reduction, g_type, async);
			meta_copy_d2h(sum_gpu, reduction, g_typesize, async);
			//The 4 ternaries ensure the right args are passed to match the face
			// so this one call will work for any face
			printf("RecvAndUnpacked Face Integrity Check: %s\n",
					check_face_sum(sum_gpu, (face_id < 4 ? nj : nk),
							(face_id < 2 || face_id > 3 ? ni : nk),
							(face_id & 1 ?
									(face_id < 2 ?
											nk - 1 :
											(face_id < 4 ? ni - 1 : nj - 1)) :
									0)) ? "FAILED" : "PASSED");
			//TODO reduce the specified sub-sums on the face to further check accuracy of placement
		} else {
Ejemplo n.º 3
0
void meta_end(void)
{
	meta_flush();
}