예제 #1
0
파일: mcbsp.c 프로젝트: pirp/parallel_sieve
double bsp_time() {

	//get init data
	const struct mcbsp_thread_data * const data = mcbsp_internal_const_prefunction();

	//get stop time

#ifdef __MACH__
	//get rights for accessing Mach's timers
	const kern_return_t rc1 = host_get_clock_service( mach_host_self(), SYSTEM_CLOCK, &(data->clock) );
	if( rc1 != KERN_SUCCESS ) {
		fprintf( stderr, "Could not access the Mach system timer (%s)\n", mach_error_string( rc1 ) );
		mcbsp_util_fatal();
	}

	mach_timespec_t stop;
	const kern_return_t rc2 = clock_get_time( data->clock, &stop );
	if( rc2 != KERN_SUCCESS ) {
		fprintf( stderr, "Could not get time at call to bsp_time (%s)\n", mach_error_string( rc2 ) );
		mcbsp_util_fatal();
	}
#else
	struct timespec stop;
	clock_gettime( CLOCK_MONOTONIC, &stop);
#endif

	//return time
	double time = (stop.tv_sec-data->start.tv_sec);
	time += (stop.tv_nsec-data->start.tv_nsec)/1000000000.0;
	return time;
}
예제 #2
0
void mcbsp_internal_check_keys_allocated() {
	//if already allocated, we are done
	if( mcbsp_internal_keys_allocated ) return;

	//lock mutex against data race
	pthread_mutex_lock( &mcbsp_internal_keys_mutex );

	//if still not allocated, allocate
	if( !mcbsp_internal_keys_allocated ) {
		if( pthread_key_create( &mcbsp_internal_init_data, free ) != 0 ) {
			fprintf( stderr, "Could not allocate mcbsp_internal_init_data key!\n" );
			mcbsp_util_fatal();
		}
		if( pthread_key_create( &mcbsp_internal_thread_data, free ) != 0 ) {
			fprintf( stderr, "Could not allocate mcbsp_internal_thread_data key!\n" );
			mcbsp_util_fatal();
		}
		if( pthread_setspecific( mcbsp_internal_init_data, NULL ) != 0 ) {
			fprintf( stderr, "Could not initialise mcbsp_internal_init_data to NULL!\n" );
			mcbsp_util_fatal();
		}
		if( pthread_setspecific( mcbsp_internal_thread_data, NULL ) != 0 ) {
			fprintf( stderr, "Could not initialise mcbsp_internal_thread_data to NULL!\n" );
			mcbsp_util_fatal();
		}
		mcbsp_internal_keys_allocated = true;
	}

	//unlock mutex and exit
	pthread_mutex_unlock( &mcbsp_internal_keys_mutex );
}
예제 #3
0
파일: mcbsp.c 프로젝트: pirp/parallel_sieve
void bsp_vabort( char *error_message, va_list args ) {

	//print error message
	vfprintf( stderr, error_message, args );
	
	//get thread-local data and check for errors
	const struct mcbsp_thread_data * const data = mcbsp_internal_const_prefunction();

	//always check for failure of getting thread data, even in high-performance mode
#ifdef NDEBUG
	if( data == NULL ) {
		fprintf( stderr, "Error: could not get thread-local data in call to bsp_abort( error_message )!\n" );
		mcbsp_util_fatal();
	}
#endif
	
	//send signal to all sibling threads
	data->init->abort = true;

	//if there are threads in sync, wake them up
	//first get lock, otherwise threads may sync
	//while checking for synched threads
	pthread_mutex_lock( &(data->init->mutex) );
	if( data->init->sync_entry_counter > 0 )
		pthread_cond_broadcast( &(data->init->condition) );
	pthread_mutex_unlock( &( data->init->mutex) );
	
	//quit execution
	pthread_exit( NULL );
}
예제 #4
0
void mcbsp_internal_check_aborted() {
	const struct mcbsp_thread_data * const data = pthread_getspecific( mcbsp_internal_thread_data );
#ifndef NDEBUG
	if( data == NULL ) {
		assert( false );
		fprintf( stderr, "Error: could not get thread-local data in call to mcbsp_check_aborted()!\n" );
		mcbsp_util_fatal();
	}
#endif
	if( data->init->abort )
		pthread_exit( NULL );
}
예제 #5
0
struct mcbsp_init_data * bsp_begin_check() {
	//check if keys are allocated
	mcbsp_internal_check_keys_allocated();
	//get necessary data
	struct mcbsp_init_data *init = pthread_getspecific( mcbsp_internal_init_data );
	if( init == NULL ) {
		//maybe we are SPMD threads revisiting the bsp_begin?
		const struct mcbsp_thread_data * const thread_data = pthread_getspecific( mcbsp_internal_thread_data );
		if( thread_data != NULL ) {
			//yes, so continue execution
			return NULL;
		} else {
			//no. We are not the ones spawning an SPMD program,
			//neither are we spawned from a corresponding SPMD...
			//
			//two possibilities: either fail hard

			/*fprintf( stderr, "Could not get initialisation data! Was the call to bsp_begin preceded by a call to bsp_init?\n" );
			mcbsp_util_fatal();*/

			//or assume we were called from main() and we construct an implied init
			init = malloc( sizeof( struct mcbsp_init_data ) );
			if( init == NULL ) {
				fprintf( stderr, "Could not perform an implicit initialisation!\n" );
				mcbsp_util_fatal();
			}
			init->spmd = NULL; //we want to call main, but (*void)(void) does not match its profile
			init->argc = 0;
			init->argv = NULL;
			mcbsp_internal_check_keys_allocated();

			if( pthread_setspecific( mcbsp_internal_init_data, init ) != 0 ) {
				fprintf( stderr, "Error: could not set BSP program key in implicit initialisation!\n" );
				mcbsp_util_fatal();
			}
		}
	}

	return init;
}
예제 #6
0
void* mcbsp_internal_spmd( void *p ) {
	//get thread-local data
	struct mcbsp_thread_data *data = (struct mcbsp_thread_data *) p;

	//store thread-local data
	const int rc = pthread_setspecific( mcbsp_internal_thread_data, data );
	if( rc != 0 ) {
		fprintf( stderr, "Could not store thread local data!\n" );
		fprintf( stderr, "(%s)\n", strerror( rc ) );
		mcbsp_util_fatal();
	}

#ifdef __MACH__
	//get rights for accessing Mach's timers
	const kern_return_t rc1 = host_get_clock_service( mach_host_self(), SYSTEM_CLOCK, &(data->clock) );
	if( rc1 != KERN_SUCCESS ) {
		fprintf( stderr, "Could not access the Mach system timer (%s)\n", mach_error_string( rc1 ) );
		mcbsp_util_fatal();
	}

	//record start time
	const kern_return_t rc2 = clock_get_time( data->clock, &(data->start) );
	if( rc2 != KERN_SUCCESS ) {
		fprintf( stderr, "Could not get start time (%s)\n", mach_error_string( rc2 ) );
		mcbsp_util_fatal();
	}
#else
	//record start time
	clock_gettime( CLOCK_MONOTONIC, &(data->start) );
#endif

	//continue with SPMD part
	if( data->init->spmd == NULL )
		main( 0, NULL ); //we had an implicit bsp_init
	else
		data->init->spmd(); //call user-defined SPMD program

	//exit cleanly
	return NULL;
}
예제 #7
0
파일: mcbsp.c 프로젝트: pirp/parallel_sieve
void bsp_init( void (*spmd)(void), int argc, char **argv ) {
	//create a BSP-program specific initial data struct
	struct mcbsp_init_data *initialisationData = malloc( sizeof( struct mcbsp_init_data ) );
	if( initialisationData == NULL ) {
		fprintf( stderr, "Error: could not allocate MulticoreBSP initialisation struct!\n" );
		mcbsp_util_fatal();
	}
	//set values
	initialisationData->spmd 	= spmd;
	initialisationData->bsp_program = NULL;
	initialisationData->argc 	= argc;
	initialisationData->argv 	= argv;
	//continue initialisation
	bsp_init_internal( initialisationData );
}
예제 #8
0
void bsp_init_internal( struct mcbsp_init_data * const initialisationData ) {
	//store using pthreads setspecific. Note this is per BSP program, not per thread
	//active within this BSP program!
	mcbsp_internal_check_keys_allocated();
	if( pthread_getspecific( mcbsp_internal_init_data ) != NULL ) {
		const struct mcbsp_init_data * const oldData = pthread_getspecific( mcbsp_internal_init_data );
		if( !oldData->ended ) {
			fprintf( stderr, "Warning: initialisation data corresponding to another BSP run found;\n" );
			fprintf( stderr, "         and this other run did not terminate (gracefully).\n" );
		}
	}
	if( pthread_setspecific( mcbsp_internal_init_data, initialisationData ) != 0 ) {
		fprintf( stderr, "Error: could not set BSP program key!\n" );
		mcbsp_util_fatal();
	}
}
예제 #9
0
struct mcbsp_thread_data * mcbsp_internal_prefunction() {
	//check if the BSP execution was aborted
	mcbsp_internal_check_aborted();

	//get thread-local data
	struct mcbsp_thread_data * const data = pthread_getspecific( mcbsp_internal_thread_data );

	//check for errors if not in high-performance mode
#ifndef NDEBUG
	if( data == NULL ) {
		fprintf( stderr, "Error: could not get thread-local data in call to bsp_abort( error_message )!\n" );
		mcbsp_util_fatal();
	}
#endif

	//return data
	return data;
}
예제 #10
0
파일: bsp.c 프로젝트: jong42/git
void spmd( void ) {
	//parallel over three processes
	bsp_begin( 3 );

	//test bsp_push_reg (results in next superstep)
	size_t localInt;
	bsp_push_reg( &localInt, sizeof( size_t ) );
	checkLocalIntAddress[ bsp_pid() ] = &localInt;

	//check pid/nprocs, both using primitives as well as manually
	checkPcount[ bsp_pid() ] = (size_t)(bsp_nprocs());
	pthread_mutex_lock( &test_mutex );
	check++;
	checkP[ bsp_pid() ] = true;
	pthread_mutex_unlock( &test_mutex );

	//nobody should be at superstep 0
	if( superstep == 1 )
		superstepOK = false;

	//test barrier synchronisation
	bsp_sync();

	//note someone is at superstep 1
	superstep = 1;

	//check bsp_time
	if( bsp_time() <= 0 )
		bsp_abort( "FAILURE \t bsp_time returned 0 or less!\n" );

	//set up a pop_reg, but should only take effect after the next sync
	//(testing the push_reg after this statement thus provides a free test)
	bsp_pop_reg( &localInt );
	struct mcbsp_thread_data * const data = pthread_getspecific( mcbsp_internal_thread_data );
	if( data->localsToRemove.top != 1 || data->localsToRemove.cap != 16 ||
		*((void**)(data->localsToRemove.array)) != (void*)&localInt ) {
		fprintf( stderr, "FAILURE \t bsp_pop_reg did not push entry on the to-remove stack (%p != %p)!\n",
			*((void**)(data->localsToRemove.array)), (void*)&localInt );
		mcbsp_util_fatal();
	}

	//check push_reg
	for( unsigned char i=0; i<3; ++i ) {
		if( checkLocalIntAddress[ i ] != mcbsp_util_address_table_get( &(data->init->global2local), 0, i )->address ) {
			fprintf( stderr, "FAILURE \t bsp_push_reg did not register correct address!\n" );
			mcbsp_util_fatal();
		}
	}

	bsp_sync();

	//check pop_reg
	for( unsigned char i=0; i<3; ++i ) {
		if( mcbsp_util_address_table_get( &(data->init->global2local), 0, i ) != NULL ||
			data->localC != 0 ) {
			fprintf( stderr, "FAILURE \t bsp_pop_reg did not de-register correctly (entry=%p)!\n",
				mcbsp_util_address_table_get( &(data->init->global2local), 0, i )->address );
			mcbsp_util_fatal();
		}
		//localInt = *(size_t*)mcbsp_util_stack_pop( &(data->removedGlobals) );
	}

	bsp_sync();

	//going to test communication primitives on the following area
	size_t commTest[ 3 ];
	commTest[ 0 ] = commTest[ 1 ] = ((size_t)bsp_pid());
	commTest[ 2 ] = (size_t)(bsp_nprocs());
	bsp_push_reg( &commTest, 3 * sizeof( size_t ) );

	//make push valid
	bsp_sync();

	//after this put, commTest[ 0 ] should equal bsp_pid, commTest[ 1, 2 ] should equal bsp_pid-1 mod bsp_nprocs
	bsp_put( (bsp_pid() + 1) % bsp_nprocs(), &commTest, &commTest, sizeof( size_t ), 2*sizeof( size_t) );
	commTest[ 2 ] = ULONG_MAX; //this should not influence the result after sync.

	//test behind-the-scenes
	const struct mcbsp_util_stack queue = data->queues[ (bsp_pid() + 1) % bsp_nprocs() ];
	size_t predicted_cap = predictCap( sizeof( struct mcbsp_message ) + 2 * sizeof( size_t) );
	if( queue.cap != predicted_cap || queue.top != sizeof( struct mcbsp_message ) + 2 * sizeof( size_t) || queue.size != sizeof( struct mcbsp_message ) ) {
		fprintf( stderr, "FAILURE \t bsp_put did not adapt the communication queue as expected!\n(cap = %ld, top = %ld, size = %ld)\n",
			(size_t)queue.cap, (size_t)queue.top, (size_t)queue.size );
		mcbsp_util_fatal();
	}
	const struct mcbsp_message request = *((struct mcbsp_message*) ((char*)queue.array + queue.top - sizeof( struct mcbsp_message )) );
	if( request.length != 2 * sizeof( size_t) ) {
		fprintf( stderr, "FAILURE \t bsp_put did not push a request of the expected length!\n(length = %ld)\n", (size_t)request.length );
		mcbsp_util_fatal();
	}
	const size_t * const chk_array = (size_t*) ((char*)queue.array + queue.top - sizeof( struct mcbsp_message ) - 2 * sizeof( size_t ));
	if( chk_array[ 0 ] != ((size_t)bsp_pid()) || chk_array[ 1 ] != ((size_t)bsp_pid()) ) {
		fprintf( stderr, "FAILURE \t bsp_put did not push an expected communication request!\n" );
		mcbsp_util_fatal();
	}
	//note there is no easy way to check request.destination; the top-level BSP test will handle that one

	bsp_sync();

	//test for the above expectation after bsp_put, namely
	//commTest[ 0 ] should equal bsp_pid, commTest[ 1, 2 ] should equal bsp_pid-1 mod bsp_nprocs
	if( commTest[ 0 ] != ((size_t)bsp_pid()) || 
		commTest[ 1 ] != (size_t)((bsp_pid()+bsp_nprocs()-1)%bsp_nprocs()) ||
		commTest[ 2 ] != (size_t)((bsp_pid()+bsp_nprocs()-1)%bsp_nprocs())
	) {
		fprintf( stderr, "FAILURE \t array after bsp_put is not as expected! (%d: %ld %ld %ld))\n", bsp_pid(), commTest[ 0 ], commTest[ 1 ], commTest[ 2 ] );
		mcbsp_util_fatal();
	}
	
	//do a get on the next processor on the last element of commTest
	bsp_get( (bsp_pid() + 1) % bsp_nprocs(), &commTest, 2 * sizeof( size_t ), &(commTest[ 2 ]), sizeof( size_t ) );

	//fill the expected value after the get to test non-buffering
	commTest[ 2 ] = ((size_t)bsp_pid());

	//communicate
	bsp_sync();

	//commTest[ 0 ] should equal bsp_pid, commTest[ 1 ] should equal bsp_pid-1, commTest[ 2 ] should be bsp_pid+1
	if( commTest[ 0 ] != ((size_t)bsp_pid()) || 
		commTest[ 1 ] != (size_t)((bsp_pid()+bsp_nprocs() - 1)%bsp_nprocs())
	) {
		fprintf( stderr, "FAILURE \t start of array after bsp_get changed! (%d: %ld %ld %ld))\n", bsp_pid(), commTest[ 0 ], commTest[ 1 ], commTest[ 2 ] );
		mcbsp_util_fatal();
	}
	if( commTest[ 2 ] != (size_t)((bsp_pid()+bsp_nprocs() + 1)%bsp_nprocs()) ) {
		fprintf( stderr, "FAILURE \t last element of array after bsp_get erroneous! (%d: %ld %ld %ld))\n", bsp_pid(), commTest[ 0 ], commTest[ 1 ], commTest[ 2 ] );
		mcbsp_util_fatal();
	}

	bsp_sync();

	//test direct_get functionality
	size_t commTest2[ 3 ];
	commTest2[ 0 ] = commTest[ 0 ];

	//get commTest[1] from right neighbour
	bsp_direct_get( (bsp_pid() + 1) % bsp_nprocs(), &commTest, sizeof( size_t ), &(commTest2[ 1 ]), sizeof( size_t ) );

	//get commTest[2] from left neighbour
	bsp_direct_get( (bsp_pid() + bsp_nprocs() - 1) % bsp_nprocs(), &commTest, 2 * sizeof( size_t ), &(commTest2[ 2 ]), sizeof( size_t ) );

	//now everything should equal bsp_pid
	if( commTest2[ 0 ] != ((size_t)bsp_pid()) || 
		commTest2[ 1 ] != ((size_t)bsp_pid()) || 
		commTest2[ 2 ] != ((size_t)bsp_pid())
	) {
		fprintf( stderr, "FAILURE \t direct_get does not function properly! (%d: [%ld %ld %ld])\n", bsp_pid(), commTest2[ 0 ], commTest2[ 1 ], commTest2[ 2 ] );
		mcbsp_util_fatal();
	}

	//now test single BSMP message
	bsp_send( (bsp_pid() + 1) % bsp_nprocs(), NULL, &commTest, sizeof( size_t ) );
	
	//check messages
	const struct mcbsp_util_stack queue1 = data->queues[ (bsp_pid() + 1) % bsp_nprocs() ];
	const size_t new_predicted_cap = predictCap( sizeof( struct mcbsp_message ) + sizeof( size_t ) );
	predicted_cap = predicted_cap > new_predicted_cap ? predicted_cap : new_predicted_cap;
	if( queue1.cap != predicted_cap || queue1.size != sizeof( struct mcbsp_message ) || queue1.top != sizeof( struct mcbsp_message ) + sizeof( size_t ) ) {
		fprintf( stderr, "FAILURE \t bsp_send did not adapt the communication queue as expected!\n(cap = %ld, size = %ld, top = %ld; prediction was %ld, %ld, %ld)\n",
			(size_t)queue1.cap, (size_t)queue1.size, (size_t)queue1.top,
			(size_t)predicted_cap, (size_t)(sizeof( struct mcbsp_message )), (size_t)(sizeof( struct mcbsp_message ) + sizeof( size_t )) );
		mcbsp_util_fatal();
	}
	const struct mcbsp_message request2 = *(struct mcbsp_message*) ((char*)queue1.array + queue1.top - sizeof( struct mcbsp_message ));
	if( request2.destination != NULL ||
		request2.length != sizeof( size_t ) || // assumes tagSize = 0
		*(size_t *)queue1.array != ((size_t)bsp_pid()) ) {
		fprintf( stderr, "FAILURE \t bsp_send did not push the expected communication request!\n(top = %ld, destination = %p, length = %ld, payload = %ld\n",
			(size_t)queue1.top, request2.destination, (size_t)request2.length, *(size_t *)queue1.array );
		mcbsp_util_fatal();
	}

	bsp_sync();

	//inspect incoming BSMP queue (assuming tagSize = 0)
	predicted_cap = predictCap( sizeof( size_t ) + sizeof( size_t ) );
	if( data->bsmp.cap != predicted_cap || data->bsmp.top != sizeof( size_t ) + sizeof( size_t ) || data->bsmp.size != sizeof( size_t ) ) {
		fprintf( stderr, "FAILURE \t BSMP queue after superstep with sends is not as expected!\n(cap = %ld, top = %ld, size = %ld; prediction was %ld, %ld, %ld)\n",
			(size_t)data->bsmp.cap, (size_t)data->bsmp.top, (size_t)data->bsmp.size,
			(size_t)predicted_cap, (size_t)(8 + sizeof( size_t )), (size_t)(data->bsmp.size) );
		mcbsp_util_fatal();
	}
	if( *(size_t*)(data->bsmp.array) != (size_t)((bsp_pid() + bsp_nprocs() - 1) % bsp_nprocs()) ) {
		fprintf( stderr, "FAILURE \t Value in BSMP queue is not correct!\n" );
		mcbsp_util_fatal();
	}
	
	//inspect using primitives
	MCBSP_NUMMSG_TYPE   packets;
	MCBSP_BYTESIZE_TYPE packetSize;
	bsp_qsize( &packets, &packetSize );
	if( packets != 1 || packetSize != sizeof( size_t ) ) {
		fprintf( stderr, "FAILURE \t bsp_qsize does not function correctly!\n" );
		mcbsp_util_fatal();
	}
	bsp_move( &commTest, sizeof( size_t ) );
	if( commTest[ 0 ] != (size_t)(( bsp_pid() + bsp_nprocs() - 1 ) % bsp_nprocs()) ) {
		fprintf( stderr, "FAILURE \t bsp_move does not function correctly!\n" );
		mcbsp_util_fatal();
	}
	
	//check set_tagsize
	MCBSP_BYTESIZE_TYPE tsz = sizeof( size_t );
	bsp_set_tagsize( &tsz );
	if( tsz != 0 ) {
		fprintf( stderr, "FAILURE \t return value of bsp_set_tagsize is incorrect!\n" );
		mcbsp_util_fatal();
	}

	bsp_sync();

	//check set_tagsize
	if( data->init->tagSize != sizeof( size_t ) ) {
		fprintf( stderr, "FAILURE \t bsp_set_tagsize failed!\n" );
		mcbsp_util_fatal();
	}
	
	commTest[ 0 ] = ((size_t)bsp_pid());
	commTest[ 1 ] = 3;
	commTest[ 2 ] = 8 + ((size_t)bsp_pid());
	for( unsigned char i = 0; i < bsp_nprocs(); ++i ) {
		bsp_send( i, commTest, &(commTest[1]), 2 * sizeof( size_t ) );
		char * const test = (char*)(data->queues[ (size_t)i ].array) + data->queues[ (size_t)i ].top - sizeof( struct mcbsp_message ) - sizeof( size_t );
		if( *(size_t*)test != *commTest ) {
			fprintf( stderr, "FAILURE \t BSMP tag did not get pushed correctly (reads %ld instead of %ld)!\n", *(size_t*)test, *commTest );
			mcbsp_util_fatal();
		}
	}

	bsp_sync();

	MCBSP_BYTESIZE_TYPE status;
	size_t tag;
	for( unsigned char i = 0; i < bsp_nprocs(); ++i ) {
		bsp_get_tag( &status, &tag );
		if( tag >= ((size_t)bsp_nprocs()) || status != 2 * sizeof( size_t ) ) {
			fprintf( stderr, "FAILURE \t error in BSMP tag handling! (tag=%ld, status=%ld)\n", tag, (size_t)status );
			mcbsp_util_fatal();
		}
		size_t *p_tag, *msg;
		if( bsp_hpmove( (void**)&p_tag, (void**)&msg ) != 2 * sizeof( size_t ) ) {
			fprintf( stderr, "FAILURE \t bsp_hpmove does not return correct payload length." );
		}
		if( msg[ 0 ] != 3 || *p_tag != tag ) {
			fprintf( stderr, "FAILURE \t bsp_hpmove does not contain correct message (tag=%ld, payload = %ld) which should be (%ld, 3).\n", *p_tag, msg[ 0 ], tag );
			mcbsp_util_fatal();
		}
		commTest[ tag ] = msg[ 1 ];
	}
	for( unsigned short int i = 0; i < bsp_nprocs(); ++i ) {
		if( commTest[ i ] != (unsigned int)(8 + i) ) {
			fprintf( stderr, "FAILURE \t error in bsp_tag / bsp_(hp)move combination!\n" );
			mcbsp_util_fatal();
		}
	}

	bsp_sync();

#ifdef MCBSP_ALLOW_MULTIPLE_REGS
	//test multiple regs
	double mreg[17];
	bsp_push_reg( &(mreg[0]), 7*sizeof( double ) );

	bsp_sync();

	double mregs = 1.3;
	bsp_put( (bsp_pid() + 1) % bsp_nprocs(), &mregs, &mreg, 6 * sizeof( double ), sizeof( double ) );
	bsp_push_reg( &(mreg[0]), 17*sizeof( double ) );

	bsp_sync();

	bsp_push_reg( &(mreg[0]), 13*sizeof( double ) );
	bsp_put( (bsp_pid() + 1) % bsp_nprocs(), &mregs, &mreg, 16 * sizeof( double ), sizeof( double ) );

	bsp_sync();

	if( mreg[ 6 ] != mreg[ 16 ] ||  mreg[ 6 ] != mregs ) {
		fprintf( stderr, "FAILURE \t error in bsp_put + multiple bsp_push_reg calls (%f,%f,%f,...,%f,%f)\n", mreg[ 5 ], mreg[ 6 ], mreg[ 7 ], mreg[ 15 ], mreg[ 16 ] );
		mcbsp_util_fatal();
	}
	bsp_pop_reg( &(mreg[0]) );
	bsp_pop_reg( &(mreg[0]) );

	bsp_sync();

	bsp_put( (bsp_pid() + 1) % bsp_nprocs(), &mregs, &mreg, 2 * sizeof( double ), sizeof( double ) );

	bsp_sync();

	if( mreg[ 2 ] != mregs ) {
		fprintf( stderr, "FAILURE \t error in bsp_put + multiple bsp_push_reg + multiple bsp_pop_reg calls\n" );
		mcbsp_util_fatal();
	}
#endif

	bsp_end();
}
예제 #11
0
파일: bsp.c 프로젝트: jong42/git
int main(int argc, char **argv) {
	//test bsp_init
	bsp_init( spmd, argc, argv );
	if( !mcbsp_internal_keys_allocated ) {
		fprintf( stderr, "FAILURE \t bsp_init did not initialise internal keys!\n" );
		mcbsp_util_fatal();
	}
	struct mcbsp_init_data *initialisationData = pthread_getspecific( mcbsp_internal_init_data );
	if( initialisationData == NULL ) {
		fprintf( stderr, "FAILURE \t did not retrieve correct program initialisation data!\n" );
		mcbsp_util_fatal();
	}
	if( initialisationData->spmd != spmd ) {
		fprintf( stderr, "FAILURE \t did not retrieve correct user-defined SPMD entry point!\n" );
		mcbsp_util_fatal();
	}
	if( initialisationData->argc != argc ) {
		fprintf( stderr, "FAILURE \t did not retrieve correct argument count!\n" );
		mcbsp_util_fatal();
	}
	if( initialisationData->argv != argv ) {
		fprintf( stderr, "FAILURE \t did not retrieve correct arguments!\n" );
		mcbsp_util_fatal();
	}
	//bsp_init OK
	
	//test bsp_begin and bsp_end, init test
	struct mcbsp_util_machine_info * MCBSP_MACHINE_INFO = mcbsp_internal_getMachineInfo();
	MCBSP_MACHINE_INFO->threads = 7;
	if( MCBSP_MACHINE_INFO->manual_affinity != NULL ) free( MCBSP_MACHINE_INFO->manual_affinity );
	MCBSP_MACHINE_INFO->manual_affinity = malloc( 3 * sizeof( size_t ) );
	for( unsigned char i=0; i<3; ++i )
		MCBSP_MACHINE_INFO->manual_affinity[ i ] = 0;
	MCBSP_MACHINE_INFO->affinity = MANUAL;
	checkP[ 0 ] = checkP[ 1 ] = checkP[ 2 ] = false;

	//actual test
	spmd();
	if( check != 3 ) {
		fprintf( stderr, "FAILURE \t bsp_begin(3) did not correctly start three processes!\n" );
		mcbsp_util_fatal();
	}
	if( !( checkP[ 0 ] && checkP[ 1 ] && checkP[ 2 ] ) ) {
		fprintf( stderr, "FAILURE \t bsp_pid does not function correctly!\n" );
		mcbsp_util_fatal();
	}
	for( unsigned char i=0; i<3; ++i ) {
		if( checkPcount[ i ] != 3 ) {
			fprintf( stderr, "FAILURE \t bsp_nprocs does not function correctly!\n" );
			mcbsp_util_fatal();
		}
	}
	if( !superstepOK ) {
		fprintf( stderr, "FAILURE \t bsp_sync allowed one or more threads past a synchronisation point before at least one other thread reached it!\n" );
		mcbsp_util_fatal();
	}
	
	//cleanup
	free( MCBSP_MACHINE_INFO->manual_affinity );
	MCBSP_MACHINE_INFO->manual_affinity = NULL;
	MCBSP_MACHINE_INFO->affinity = MCBSP_DEFAULT_AFFINITY;
	mcbsp_util_destroyMachineInfo( MCBSP_MACHINE_INFO );
	//bsp_begin & bsp_end OK
	
	fprintf( stdout, "SUCCESS\n" );
	exit( EXIT_SUCCESS );
}
예제 #12
0
파일: mcbsp.c 프로젝트: pirp/parallel_sieve
void bsp_sync() {
	//get local data
	struct mcbsp_thread_data * const data = pthread_getspecific( mcbsp_internal_thread_data );

	//get lock
	pthread_mutex_lock( &(data->init->mutex) );

	//see if synchronisation is complete
	if( data->init->sync_entry_counter++ == data->init->P - 1 ) {
		data->init->sync_entry_counter = 0;
		pthread_cond_broadcast( &(data->init->condition) );
	} else
		pthread_cond_wait( &(data->init->condition), &(data->init->mutex) );

	//unlock mutex
	pthread_mutex_unlock( &(data->init->mutex) );

	//before continuing execution, check if we woke up due to an abort
	//and now exit if so (we could not exit earlier as not unlocking the
	//sync mutex will cause a deadlock).
	mcbsp_internal_check_aborted();

	//check for mismatched sync/end
	if( data->init->ended ) {
		fprintf( stderr, "Mismatched bsp_sync and bsp_end detected!\n" );
		mcbsp_util_fatal();
	}

	//handle the various BSP requests
	
	//update tagSize, phase 1
	if( data->bsp_id == 0 && data->newTagSize != data->init->tagSize )
		data->init->tagSize = data->newTagSize;

	//look for requests with destination us, first cache get-requests
	for( MCBSP_PROCESSOR_INDEX_DATATYPE s = 0; s < data->init->P; ++s ) {
		struct mcbsp_util_stack * const queue = &(data->init->threadData[ s ]->queues[ data->bsp_id ]);
		//each request in queue is directed to us. Handle all of them.
		for( size_t r = 0; r < queue->top; ++r ) {
			struct mcbsp_communication_request * const request = (struct mcbsp_communication_request *) (((char*)(queue->array)) + r * queue->size);
			if( request->payload == NULL ) {
				//allocate payload
				request->payload = malloc( request->length );
				//no data race here since we are the only ones allowed to write here
				memcpy( request->payload, request->source, request->length );
				//nullify payload (effectively turning the request into a put-request)
				request->source = NULL;
			}
		}
	}

	//handle bsp_pop_reg
	while( !mcbsp_util_stack_empty( &(data->localsToRemove ) ) ) {
		//get local memory address to remove registration of
		void * const toRemove = *((void**)(mcbsp_util_stack_pop( &(data->localsToRemove) )));

		//get corresponding global key
		const unsigned long int globalIndex = mcbsp_util_address_map_get( &(data->local2global), toRemove );
		if( globalIndex == ULONG_MAX ) {
			fprintf( stderr, "Error: bsp_pop_reg requested on non-registered pointer!\n" );
			mcbsp_util_fatal();
		}

		//delete from table
		if( mcbsp_util_address_table_delete( &(data->init->global2local), globalIndex, data->bsp_id ) ) {
			//NOTE: this is safe, since it is guaranteed that this address table entry
			//	will not change during synchronisation.

			//delete from map
			mcbsp_util_address_map_remove( &(data->local2global), toRemove );
		}

		//register globalIndex now is free
		if( data->localC == globalIndex + 1 )
			--(data->localC);
		else
			mcbsp_util_stack_push( &(data->removedGlobals), (void*)(&globalIndex) );
	}

	//handle push_reg
	while( !mcbsp_util_stack_empty( &(data->localsToPush) ) ) {
		//get address
		const struct mcbsp_push_request request =
			*((struct mcbsp_push_request*)mcbsp_util_stack_pop( &(data->localsToPush) ));
		void * const address = request.address;
		const MCBSP_BYTESIZE_TYPE size = request.size;

		//get global index of this registration. First check map if the key already existed
		const unsigned long int mapSearch     = mcbsp_util_address_map_get( &(data->local2global), address);
		//if the key was not found, create a new global entry
		const unsigned long int global_number = mapSearch != ULONG_MAX ? mapSearch :
								mcbsp_util_stack_empty( &(data->removedGlobals) ) ?
								data->localC++ :
								*(unsigned long int*)mcbsp_util_stack_pop( &(data->removedGlobals) );

		//insert value, local2global map (if this is a new global entry)
		if( mapSearch == ULONG_MAX )
			mcbsp_util_address_map_insert( &(data->local2global), address, global_number );

		//insert value, global2local map (false sharing is possible here, but effects should be negligable)
		mcbsp_util_address_table_set( &(data->init->global2local), global_number, data->bsp_id, address, size );
	}

	//coordinate exit using the same mutex (but not same condition!)
	pthread_mutex_lock( &(data->init->mutex) );
	if( data->init->sync_exit_counter++ == data->init->P - 1 ) {
		data->init->sync_exit_counter = 0;
		pthread_cond_broadcast( &(data->init->mid_condition) );
	} else
		pthread_cond_wait( &(data->init->mid_condition), &(data->init->mutex) );
	pthread_mutex_unlock( &(data->init->mutex) );

	//update tagsize, phase 2 (check)
	if( data->newTagSize != data->init->tagSize ) {
		fprintf( stderr, "Different tag sizes requested from different processes (%ld requested while process 0 requested %ld)!\n", data->newTagSize, data->init->tagSize );
		mcbsp_util_fatal();
	}
	
	//now process put requests to local destination
	for( MCBSP_PROCESSOR_INDEX_DATATYPE s = 0; s < data->init->P; ++s ) {
		struct mcbsp_util_stack * const queue = &(data->init->threadData[ s ]->queues[ data->bsp_id ]);
		//each request in queue is directed to us. Handle all of them.
		while( !mcbsp_util_stack_empty( queue ) ) {
			struct mcbsp_communication_request * const request = (struct mcbsp_communication_request *) mcbsp_util_stack_pop( queue );
			if( request->source == NULL && request->destination == NULL && request->payload != NULL ) {
				//this is a BSMP message
				//construct message
				void * message = malloc( request->length );
				memcpy( message, request->payload, request->length );
				//record message
				mcbsp_util_stack_push( &(data->bsmp), &message );
				//free payload
				free( request->payload );
			} else if( request->source == NULL && request->payload != NULL ) {
				//no data race here since we are the only ones allowed to write here
				memcpy( request->destination, request->payload, request->length );
				//free payload
				free( request->payload );
			} else {
				fprintf( stderr, "Unknown BSP communication request encountered!\n" );
				mcbsp_util_fatal();
			}
		}
	}

	//final sync
	pthread_mutex_lock( &(data->init->mutex) );
	if( data->init->sync_entry_counter++ == data->init->P - 1 ) {
		data->init->sync_entry_counter = 0;
		pthread_cond_broadcast( &(data->init->condition) );
	} else
		pthread_cond_wait( &(data->init->condition), &(data->init->mutex) );
	pthread_mutex_unlock( &(data->init->mutex) );
}
예제 #13
0
파일: mcbsp.c 프로젝트: pirp/parallel_sieve
void bsp_begin( const MCBSP_PROCESSOR_INDEX_DATATYPE P ) {
	struct mcbsp_init_data * const init = bsp_begin_check();

	//if the check did not return an init struct, we are a
	//spawned thread and should just continue the SPMD
	//code.
	if( init == NULL )
		return;

	//otherwise we need to start the SPMD code 
	int *pinning = mcbsp_util_pinning( MCBSP_AFFINITY, P );
	if( pinning == NULL ) {
		fprintf( stderr, "Could not get a valid pinning!\n" );
		mcbsp_util_fatal();
	}

	init->threads = malloc( P * sizeof( pthread_t ) );
	if( init->threads == NULL ) {
		fprintf( stderr, "Could not allocate new threads!\n" );
		mcbsp_util_fatal();
	}

	pthread_attr_t attr;

#ifndef __MACH__
	cpu_set_t mask;
#endif

	//further initialise init object
	init->P     = P;
	init->abort = false;
	init->ended = false;
	init->sync_entry_counter = 0;
	init->sync_exit_counter  = 0;
	pthread_mutex_init( &(init->mutex), NULL );
	pthread_cond_init ( &(init->condition), NULL );
	pthread_cond_init ( &(init->mid_condition), NULL );
	mcbsp_util_address_table_initialise( &(init->global2local), P );
	init->threadData = malloc( P * sizeof( struct mcbsp_thread_data * ) );
	init->tagSize = 0;

	//spawn P-1 threads. The condition checks for both signed and unsigned types
	//since user may set MCBSP_PROCESSOR_INDEX_DATATYPE to a signed type.
	for( MCBSP_PROCESSOR_INDEX_DATATYPE s = P - 1; s < P && s >= 0; --s ) {
		//allocate new thread-local data
		struct mcbsp_thread_data *thread_data = malloc( sizeof( struct mcbsp_thread_data ) );
		if( thread_data == NULL ) {
			fprintf( stderr, "Could not allocate local thread data!\n" );
			mcbsp_util_fatal();
		}
		//provide a link to the SPMD program init struct
		thread_data->init   = init;
		//set local ID
		thread_data->bsp_id = s;
		//set the maximum number of registered globals at any time (0, since SPMD not started yet)
		thread_data->localC = 0;
		//initialise local to global map
		mcbsp_util_address_map_initialise( &(thread_data->local2global ) );
		//initialise stack used for efficient registration of globals after de-registrations
		mcbsp_util_stack_initialise( &(thread_data->removedGlobals), sizeof( unsigned long int ) );
		//initialise stack used for de-registration of globals
		mcbsp_util_stack_initialise( &(thread_data->localsToRemove), sizeof( void * ) );
		//initialise stacks used for communication
		thread_data->queues = malloc( P * sizeof( struct mcbsp_util_stack ) );
		for( MCBSP_PROCESSOR_INDEX_DATATYPE i = 0; i < P; ++i )
			mcbsp_util_stack_initialise( &(thread_data->queues[ i ]), sizeof( struct mcbsp_communication_request) );
		//initialise default tag size
		thread_data->newTagSize = 0;
		//initialise BSMP queue
		mcbsp_util_stack_initialise( &(thread_data->bsmp), sizeof( void * ) );
		//initialise push request queue
		mcbsp_util_stack_initialise( &(thread_data->localsToPush), sizeof( struct mcbsp_push_request ) );
		//provide a link back to this thread-local data struct
		init->threadData[ s ] = thread_data;

		//spawn new threads if s>0
		if( s > 0 ) {
			//create POSIX threads attributes (for pinning)
			pthread_attr_init( &attr );
#ifndef __MACH__
			CPU_ZERO( &mask );
			CPU_SET ( pinning[ s ], &mask );
			pthread_attr_setaffinity_np( &attr, sizeof( cpu_set_t ), &mask );
#endif

			//spawn the actual thread
			if( pthread_create( &(init->threads[ s ]), &attr, mcbsp_internal_spmd, thread_data ) != 0 ) {
				fprintf( stderr, "Could not spawn new thread!\n" );
				mcbsp_util_fatal();
			}

#ifdef __MACH__
			thread_port_t osx_thread = pthread_mach_thread_np( init->threads[ s ] );
			struct thread_affinity_policy ap;
			if( MCBSP_AFFINITY == SCATTER ) {
				//Affinity API release notes do not specify whether 0 is a valid tag, or in fact equal to NULL; so 1-based to be sure
				ap.affinity_tag = s + 1;
			} else if( MCBSP_AFFINITY == COMPACT ) {
				ap.affinity_tag = 1;
			} else if( MCBSP_AFFINITY == MANUAL ) {
				ap.affinity_tag = MCBSP_MANUAL_AFFINITY[ s ];
			} else {
				fprintf( stderr, "Unhandled affinity type for Mac OS X!\n" );
				mcbsp_util_fatal();
			}
			thread_policy_set( osx_thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&ap, THREAD_AFFINITY_POLICY_COUNT );
#endif

			//destroy attributes object
			pthread_attr_destroy( &attr );
		} else {
			//continue ourselves as bsp_id 0. Do pinning
#ifdef __MACH__
			thread_port_t osx_thread = pthread_mach_thread_np( pthread_self() );
			struct thread_affinity_policy ap;
			if( MCBSP_AFFINITY == SCATTER || MCBSP_AFFINITY == COMPACT )
				ap.affinity_tag = 1;
			else if( MCBSP_AFFINITY == MANUAL )
				ap.affinity_tag = MCBSP_MANUAL_AFFINITY[ s ];
			else {
				fprintf( stderr, "Unhandled affinity type for Mac OS X!\n" );
				mcbsp_util_fatal();
			}
			thread_policy_set( osx_thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&ap, THREAD_AFFINITY_POLICY_COUNT );
#else
			CPU_ZERO( &mask );
			CPU_SET ( pinning[ s ], &mask );
			if( pthread_setaffinity_np( pthread_self(), sizeof( cpu_set_t ), &mask ) != 0 ) {
				fprintf( stderr, "Could not pin master thread to requested hardware thread (%d)!\n", pinning[ s ] );
				mcbsp_util_fatal();
			}
#endif
			//record our own descriptor
			init->threads[ 0 ] = pthread_self();
			//copy part of mcbsp_internal_spmd.
			const int rc = pthread_setspecific( mcbsp_internal_thread_data, thread_data );
			if( rc != 0 ) {
				fprintf( stderr, "Could not store thread-local data in continuator thread!\n" );
				fprintf( stderr, "(%s)\n", strerror( rc ) );
				mcbsp_util_fatal();
			}
#ifdef __MACH__
			//get rights for accessing Mach's timers
			const kern_return_t rc1 = host_get_clock_service( mach_host_self(), SYSTEM_CLOCK, &(thread_data->clock) );
			if( rc1 != KERN_SUCCESS ) {
				fprintf( stderr, "Could not access the Mach system timer (%s)\n", mach_error_string( rc1 ) );
				mcbsp_util_fatal();
			}
			const kern_return_t rc2 = clock_get_time( thread_data->clock, &(thread_data->start) );
			if( rc2 != KERN_SUCCESS ) {
				fprintf( stderr, "Could not get starting time (%s)\n", mach_error_string( rc2 ) );
				mcbsp_util_fatal();
			}
#else
			clock_gettime( CLOCK_MONOTONIC, &(thread_data->start) );
#endif
			//this one is extra, enables possible BSP-within-BSP execution.
			if( pthread_setspecific( mcbsp_internal_init_data, NULL ) != 0 ) {
				fprintf( stderr, "Could not reset initialisation data to NULL on SPMD start!\n" );
				mcbsp_util_fatal();
			}
		}
	}
	//free pinning only if it was not manually defined
	if( MCBSP_AFFINITY != MANUAL )
		free( pinning );
}
예제 #14
0
파일: mcbsp.c 프로젝트: pirp/parallel_sieve
void bsp_end() {
	//get thread-local data
	struct mcbsp_thread_data * const data = pthread_getspecific( mcbsp_internal_thread_data );
	if( data == NULL ) {
		fprintf( stderr, "Error: could not get thread-local data in call to bsp_abort( error_message )!\n" );
		mcbsp_util_fatal();
	}

	//record end
	data->init->ended = true;

	//get lock
	pthread_mutex_lock( &(data->init->mutex) );

	//see if synchronisation is complete
	if( data->init->sync_entry_counter++ == data->init->P - 1 ) {
		data->init->sync_entry_counter = 0;
		pthread_cond_broadcast( &(data->init->condition) );
	} else
		pthread_cond_wait( &(data->init->condition), &(data->init->mutex) );

	//unlock mutex
	pthread_mutex_unlock( &(data->init->mutex) );

	//set thread-local data to NULL
	if( pthread_setspecific( mcbsp_internal_thread_data, NULL ) != 0 ) {
		fprintf( stderr, "Could not set thread-local data to NULL on thread exit.\n" );
		mcbsp_util_fatal();
	}

	//free data and exit gracefully,
#ifdef __MACH__
	mach_port_deallocate( mach_task_self(), data->clock );
#endif
	mcbsp_util_address_map_destroy( &(data->local2global) );
	mcbsp_util_stack_destroy( &(data->removedGlobals) );
	mcbsp_util_stack_destroy( &(data->localsToRemove) );
	for( MCBSP_PROCESSOR_INDEX_DATATYPE s = 0; s < data->init->P; ++s ) {
		mcbsp_util_stack_destroy( &(data->queues[ s ]) );
	}
	free( data->queues );
	mcbsp_util_stack_destroy( &(data->bsmp) );
	mcbsp_util_stack_destroy( &(data->localsToPush) );

	//exit if not master thread
	if( data->bsp_id != 0 ) {
		//free thread-local data
		free( data );
		pthread_exit( NULL );
	}

	//master thread cleans up init struct
	struct mcbsp_init_data *init = data->init;

	//that's everything we needed from the thread-local data struct
	free( data );

	//wait for other threads
	for( MCBSP_PROCESSOR_INDEX_DATATYPE s = 1; s < init->P; ++s )
		pthread_join( init->threads[ s ], NULL );

	//destroy mutex and condition 
	pthread_mutex_destroy( &(init->mutex) );
	pthread_cond_destroy(  &(init->condition) );
	pthread_cond_destroy(  &(init->mid_condition) );
	
	//destroy global address table
	mcbsp_util_address_table_destroy( &(init->global2local) );

	//destroy pointers to thread-local data structs
	free( init->threadData );

	//exit gracefully, free threads array
	free( init->threads );

	//exit gracefully, free BSP program init data
	free( init );
}