예제 #1
0
/** =========================================================================
 *  Soft reset
 *
 **/
void chip_soft_reset(void)
{
    unsigned reg_value;
    read_sswitch_reg(get_core_id(), 6, &reg_value);
    write_sswitch_reg(0, 6, reg_value);
    write_sswitch_reg(get_core_id(), 6, reg_value);
}
예제 #2
0
/////////////////////////////////////////////////////////
// main testing function 
/////////////////////////////////////////////////////////
int main(int argc, const char * const argv[])
{
  (void)argc;
  (void)argv;

  boolean_T pass;
  int coreid, k;
  float filt[200];
  float tmp[2];



  /////////////////////////////////////////////////////////
  // main test loop 
  // each core loops over a kernel instance
  /////////////////////////////////////////////////////////
  
  coreid = get_core_id();

  printf("starting %d kernel iterations... (coreid = %d)\n",KERNEL_ITS,coreid);

  if (coreid>3)
    coreid=coreid-4;

  synch_barrier();

  perf_begin();
  
  for(k = 0; k < getKernelIts(); k++)
  {
    // matlab kernel
    mlButter(fv1, *(float (*)[200])&fv0[200 * coreid], filt);
  }

  synch_barrier();

  perf_end();

  /////////////////////////////////////////////////////////
  // check results
  /////////////////////////////////////////////////////////

  synch_barrier();


  tmp[0] = sum(filt);
  tmp[1] = var(filt);
  pass   = checkRes(tmp, *(float (*)[4])&fv2[coreid << 2]);
  
  flagPassFail(pass, get_core_id());

/////////////////////////////////////////////////////////
// synchronize and exit
/////////////////////////////////////////////////////////

  return !pass;
}
예제 #3
0
/**
 * Prepare one shrink image IPC message.
 */
void register_shrink_on_core(const uint8_T *Img, const uint32_T SubArea[4], const uint32_t yStart, const uint32_t yEnd,
                             const uint32_t yEvenOdd, const uint32_t yHeight, const uint32_t xWidth, const uint32_t xWidthSmall, uint8_T *ImgSmall, const uint32_T CoreNo)
{
#ifdef _NO_IPC_TEST_
    shrinkImage_on_core(Img, SubArea, yStart, yEnd, yEvenOdd, yHeight, xWidth, xWidthSmall, ImgSmall);
#else

    //To see which entries have to be placed to which parameter look at the union declarations in struct processing_info.
    //Very ugly, I know. But this software is only a scientific proof of concept and it's near its end, hence this kind of code can be written now during the last days ...

    process_message_t * p_msg = 0;

    p_msg = p_queue_msg[CoreNo];
    p_msg->core_id = get_core_id(CoreNo);
    p_msg->info.processing_type = pt_shrink;
    memcpy(p_msg->info.Shr_SubArea, SubArea, sizeof(uint32_T) * 4);
    p_msg->info.Shr_Img=Img;
    p_msg->info.Shr_ImgSmall=ImgSmall;
    p_msg->info.Shr_yStart=yStart;
    p_msg->info.Shr_yEnd=yEnd;
    p_msg->info.Shr_yEvenOdd=yEvenOdd;
    p_msg->info.Shr_yHeight=yHeight;
    p_msg->info.Shr_xWidth=xWidth;
    p_msg->info.Shr_xWidthSmall=xWidthSmall;

#endif
}
예제 #4
0
/**
 * tries to determine the physical package, a cpu belongs to
 */
int get_pkg(int cpu)
{
    int pkg=-1;
    char buffer[10];

    if (cpu == -1) { cpu = get_cpu(); }
    if (cpu != -1)
    {
        sprintf(path, "/sys/devices/system/cpu/cpu%i/topology/physical_package_id", cpu);
        if( read_file(path, buffer, sizeof(buffer)) ) pkg = atoi(buffer);

        /* fallbacks if sysfs is not working */
        if (pkg == -1)
        {
            /* assume 0 if there is only one CPU or only one package */
            if ((num_cpus() == 1) || (num_packages() == 1)) { pkg = 0; }
            /* get the physical package id from /proc/cpuinfo */
            else if(!get_proc_cpuinfo_data("physical id", buffer, cpu)) { pkg = atoi(buffer); }
            /* if the number of cpus equals the number of packages assume pkg_id = cpu_id*/
            else if (num_cpus() == num_packages()) { pkg = cpu; }
            /* if there is only one core per package assume pkg_id = core_id */
            else if (num_cores_per_package() == 1) { pkg = get_core_id(cpu); }
            /* if the number of packages equals the number of numa nodes assume pkg_id = numa node */
            else if (num_numa_nodes() == num_packages()) { pkg = get_numa_node(cpu); }

            /* NOTE pkg_id in UMA Systems with multiple sockets and more than 1 Core per socket can't be determined
            without correct topology information in sysfs*/
        }
    }

    return pkg;
}
예제 #5
0
파일: barrier.c 프로젝트: libsmelt/libsmelt
void mp_barrier(cycles_t *measurement)
{
    coreid_t tid = get_core_id();

#ifdef QRM_DBG_ENABLED
    ++_num_barrier;
    uint32_t _num_barrier_recv = _num_barrier;
#endif

    debug_printfff(DBG__REDUCE, "barrier enter #%d\n", _num_barrier);

    // Recution
    // --------------------------------------------------
#ifdef QRM_DBG_ENABLED
    uint32_t _tmp =
#endif
    mp_reduce(_num_barrier);

#ifdef QRM_DBG_ENABLED
    // Sanity check
    if (tid==get_sequentializer()) {
        assert (_tmp == get_num_threads()*_num_barrier);
    }
    if (measurement)
        *measurement = bench_tsc();

#endif

    // Broadcast
    // --------------------------------------------------
    if (tid == get_sequentializer()) {
        mp_send_ab(_num_barrier);

    } else {
#ifdef QRM_DBG_ENABLED
        _num_barrier_recv =
#endif
            mp_receive_forward(0);
    }

#ifdef QRM_DBG_ENABLED
    if (_num_barrier_recv != _num_barrier) {
    debug_printf("ASSERTION fail %d != %d\n", _num_barrier_recv, _num_barrier);
    }
    assert (_num_barrier_recv == _num_barrier);

    // Add a shared memory barrier to absolutely make sure that
    // everybody finished the barrier before leaving - this simplifies
    // debugging, as the programm will get stuck if barriers are
    // broken, rather than some threads (wrongly) continuing and
    // causing problems somewhere else
#if 0 // Enable separately
    debug_printfff(DBG_REDUCE, "finished barrier .. waiting for others\n");
    shl_barrier_shm(get_num_threads());
#endif
#endif

    debug_printfff(DBG__REDUCE, "barrier complete #%d\n", _num_barrier);
}
예제 #6
0
파일: testCSR.c 프로젝트: 8l/pulpino
int main()
{
  if(get_core_id() == 0) {
    run_suite(testcases);
  }

  return 0;
}
예제 #7
0
파일: smp.c 프로젝트: 24hours/linux
void __init smp_init_cpus(void)
{
	unsigned i;
	unsigned int ncpus = get_core_count();
	unsigned int core_id = get_core_id();

	pr_info("%s: Core Count = %d\n", __func__, ncpus);
	pr_info("%s: Core Id = %d\n", __func__, core_id);

	for (i = 0; i < ncpus; ++i)
		set_cpu_possible(i, true);
}
예제 #8
0
파일: aesxam.c 프로젝트: 8l/pulpino
int main()
{

  int coreid, i, error = 0;
  coreid = get_core_id();

  // set start value of jrand function
  next = 1;

  if (coreid == 0)
    {
      int f=0;
      initialize_aes();

      // 1 iterations of enc+dec
      for (f=0;f<1;f++){
	compute_aes();
	//check output
	for (i = 0; i < 16; i++){
	  if (encoutbuf[i] != check_encoutbuf[i]) {
	    error+=1;
	    /* 	  printf("Error occured in encryption\n",0,0,0,0); */
	    //printf("encrypted: %d, expected: %d\n",encoutbuf[i],check_encoutbuf[i],0,0);
	  }
	  if (decoutbuf[i] != check_decoutbuf[i]) {
	    error+=1;
	    /* 	  printf("Error occured in decryption\n",0,0,0,0); */
	    //printf("decrypted: %d, expected: %d\n",decoutbuf[i],check_decoutbuf[i],0,0);
	  } 
	}
      } 
      int *DEFAULT_RESULT;
      if (error == 0) {
	//printf ("OOOOOOK!!!!!!\n",0,0,0,0);
	DEFAULT_RESULT = (int*)0x10003ffc;
	*(DEFAULT_RESULT) = 1;
      }
      else {
	//printf ("Not OK!! %d\n",error,0,0,0);
  
	DEFAULT_RESULT = (int*)0x10003ffc;
	*(DEFAULT_RESULT) = error;
      }
           
    }
  synch_barrier();
  eoc(0);
  
}
예제 #9
0
파일: smp.c 프로젝트: AlexShiLucky/linux
void __init smp_init_cpus(void)
{
	unsigned i;
	unsigned int ncpus = get_core_count();
	unsigned int core_id = get_core_id();

	pr_info("%s: Core Count = %d\n", __func__, ncpus);
	pr_info("%s: Core Id = %d\n", __func__, core_id);

	if (ncpus > NR_CPUS) {
		ncpus = NR_CPUS;
		pr_info("%s: limiting core count by %d\n", __func__, ncpus);
	}

	for (i = 0; i < ncpus; ++i)
		set_cpu_possible(i, true);
}
예제 #10
0
        /**
         * Brief: Adds task to a chosen core and if the core isn't running,
         *        resets it's thread to start the execution.
         * Param: The task to be executed.
         */
        std::size_t add_task(TASK task)
        {
            std::size_t core_id = get_core_id(); // Get a suitable core.
            std::size_t task_id = get_task_id(); // Find the smallest id.

            task_pack<TASK> tmp_pack;
            tmp_pack.id = task_id;
            tmp_pack.task = std::move(task);
            set_result(task_id, T(), false); // Create a record for this task.
            cores_[core_id].add_task(std::move(tmp_pack));

            if(!cores_[core_id].running)
            { // Re-start the core thread if it's not running.
                cores_[core_id].running = true;
                core_threads_[core_id].reset(
                        new std::thread(&Scheduler<T, TASK>::run_core, this, core_id)
                        );
                core_threads_[core_id]->detach();
            }
            
            return task_id;
        }
예제 #11
0
        std::size_t add_task(TASK task)
        {
            std::size_t core_id = get_core_id(); // Get a suitable core.
            std::size_t task_id = get_task_id(); // Find the smallest id.

            task_pack<TASK> tmp_pack;
            tmp_pack.id = task_id;
            tmp_pack.task = std::move(task);
            set_result(task_id, 0, false); // Create a record for this task.
            cores_[core_id].add_task(std::move(tmp_pack));

            if(!cores_[core_id].running)
            { // If the core is available, execute the task immedietly.
              // The mutex will be unlocked at the end of the run_core method.
                cores_[core_id].running = true;
                core_threads_[core_id].reset(
                        new std::thread(&Scheduler<void, TASK>::run_core, this, core_id)
                        );
                core_threads_[core_id]->detach();
            }
            
            return task_id;
        }
예제 #12
0
/**
 * Cachec invalidate zeroized memory on all cores (happens between the calculations / after a calculation result and at boot time)
 */
void cacheinval_on_core(const uint8_T *TBuf, const uint32_T TSize, const uint32_T number_of_cores)
{
#ifndef _NO_IPC_TEST_

    int32_t i=0;

    //To see which entries have to be placed to which parameter look at the union declarations in struct processing_info.
    //Very ugly, I know. But this software is only a scientific proof of concept and it's near its end, hence this kind of code can be written now during the last days ...

    process_message_t * p_msg = 0;

    for (i = CORE_AMOUNT-1; i >= (int)(CORE_AMOUNT-number_of_cores); i-- )
    {
        p_msg = p_queue_msg[i];
        p_msg->core_id = get_core_id(i);
        p_msg->info.processing_type = pt_cacheinval;

        p_msg->info.Tvec = TBuf;
        p_msg->info.Tsize = TSize;
    }

    send_to_cores(pt_cacheinval, number_of_cores, NULL, NULL, NULL);
#endif
}
예제 #13
0
/**
 * Prepare one ssd() and/or jacobian() IPC message.
 */
void prepare_ipc_message(const processing_type_e ProcessingType, const real32_T w[3], const uint32_T BoundBox[4], const uint32_T MarginAddon[3], const real32_T
                         DSPRange[4], const emxArray_uint8_T *Tvec, const uint32_T TOffset, const emxArray_uint8_T *Rvec, const uint32_T ROffset,
                         const uint32_T d, const uint32_T CoreNo, const uint32_T i_from, const uint32_T i_to)
{
    process_message_t * p_msg = 0;

    p_msg = p_queue_msg[CoreNo];
    p_msg->core_id = get_core_id(CoreNo);
    p_msg->info.processing_type = ProcessingType;
    memcpy(p_msg->info.w, w, sizeof(real32_T) * 3);
    memcpy(p_msg->info.BoundBox, BoundBox, sizeof(uint32_T) * 4);
    memcpy(p_msg->info.MarginAddon, MarginAddon, sizeof(uint32_T) * 3);
    memcpy(p_msg->info.DSPRange, DSPRange, sizeof(real32_T) * 4);
    p_msg->info.Tvec = &Tvec->data[0];
    p_msg->info.Tsize = Tvec->allocatedSize;
    p_msg->info.TOffset = TOffset;
    p_msg->info.Rvec = &Rvec->data[0];
    p_msg->info.Rsize = Rvec->allocatedSize;
    p_msg->info.ROffset = ROffset;
    p_msg->info.d = d;
    p_msg->info.i_from = i_from;
    p_msg->info.i_to = i_to;
    p_msg->info.NewImageDataArrived = g_NewImageDataArrived;
}
예제 #14
0
/////////////////////////////////////////////////////////
// main testing function 
/////////////////////////////////////////////////////////
int main(int argc, const char * const argv[])
{
  (void)argc;
  (void)argv;

  int coreid;
  int it;

  int k;
  boolean_T pass, flag;
  float y[100];
  int ix;
  float b_y;
  float xbar;
  float r;
  float c_y;
  float tmp[2];
  float golden[4];


  /////////////////////////////////////////////////////////
  // main test loop 
  // each core loops over a kernel instance
  /////////////////////////////////////////////////////////

  coreid = get_core_id();

  printf("starting %d kernel iterations... (coreid = %d)\n",KERNEL_ITS,coreid);

  if (coreid>3)
    coreid=coreid-4;

  synch_barrier();

  perf_begin();

  for(it = 0; it < KERNEL_ITS; it++)
  {
    // matlab kernel
    for (ix = 0; ix < 100; ix++) {
      y[ix] = (real32_T)fLog(fv0[ix + 100 * coreid]);
    }
  }

  synch_barrier();

  perf_end();

  synch_barrier();

  /////////////////////////////////////////////////////////
  // check results
  /////////////////////////////////////////////////////////

  pass = true;
  b_y = y[0];
  ix = 0;
  xbar = y[0];
  for (k = 0; k < 99; k++) {
    b_y += y[k + 1];
    ix++;
    xbar += y[ix];
  }

  xbar *= 1.0F/100.0F;
  ix = 0;
  r = y[0] - xbar;
  c_y = r * r;
  for (k = 0; k < 99; k++) {
    ix++;
    r = y[ix] - xbar;
    c_y += r * r;
  }

  c_y *= 1.0F/99.0F;
  tmp[0] = b_y;
  tmp[1] = c_y;
  pass  = true;

  for (ix = 0; ix < 2; ix++) {
    for (k = 0; k < 2; k++) {
      golden[k + (ix << 1)] = fv1[(k + (ix << 1)) + (coreid << 2)];
    }
    flag = true;
    flag = flag && (tmp[ix] <= golden[ix << 1]);
    flag = flag && (tmp[ix] >= golden[1 + (ix << 1)]);
    printErrors(!flag, ix, tmp[ix] ,golden[(ix << 1)] ,golden[1 + (ix << 1)]);
    pass = pass && flag;
  }

  flagPassFail(pass, get_core_id());
  
  synch_barrier();

/////////////////////////////////////////////////////////
// synchronize and exit
/////////////////////////////////////////////////////////

  return !pass;
}
예제 #15
0
#include "memTester.h"

sl_def(memTester,, sl_shparm(sl_place_t, syscall_gateway)) {
	sl_place_t syscall_gateway = sl_getp(syscall_gateway);
	syscall_target(&syscall_gateway);

    sl_index(i);
    unsigned pid = get_current_place();
    unsigned core_id = get_core_id();

    output_string("MemTester (thread ", 2);
    output_uint((unsigned int)i, 2);
    output_string(") now running on core ", 2);
    output_uint(core_id, 2);
    output_string(", place_id ", 2);
    output_hex(pid, 2);
    output_char('\n', 2);
    output_char('\n', 2);
    output_char('\n', 2);

    run(0);

    //Let the compiler know we care
	sl_setp(syscall_gateway, sl_getp(syscall_gateway));
}
sl_enddef

// Space reserved for testing small pages (4KiB):
//	0x440000 - 0x550000 inclusive
// Smallest page = 4KB = 0x440000 : 0x440FFF
//
예제 #16
0
/////////////////////////////////////////////////////////
// main testing function 
/////////////////////////////////////////////////////////
int main(int argc, const char * const argv[])
{
  (void)argc;
  (void)argv;

  boolean_T pass, flag;
  int coreid;
  
  float omega, ampl, runningPhase;
  float sig[200];
  int k;
  int i;
  float y;
  int ix;
  float xbar;
  float r;
  float b_y;
  float tmp[2];

  float golden[4];
  boolean_T c_y;
  boolean_T exitg1;
  
  /////////////////////////////////////////////////////////
  // main test loop 
  // each core loops over a kernel instance
  /////////////////////////////////////////////////////////
  
  coreid = get_core_id();

  printf("starting %d kernel iterations... (coreid = %d)\n",KERNEL_ITS,coreid);
  
  if (coreid>3)
    coreid=coreid-4;

  synch_barrier();
  
  perf_begin();

  omega =  fv1[coreid];
  ampl  =  fv2[coreid];
  
  for(k = 0; k < getKernelIts(); k++)
  {
    runningPhase = omega;
  
    // matlab kernel
    for (i = 0; i < 200; i++) {
      sig[i] = ampl * fSin(runningPhase);
      runningPhase += omega;

      if(runningPhase > pi2[0])
      {
	runningPhase -= pi2[0];
      }    
    }
  }

  synch_barrier();

  perf_end();

  /////////////////////////////////////////////////////////
  // check results
  /////////////////////////////////////////////////////////

  synch_barrier();


  y = sig[0];
  ix = 0;
  xbar = sig[0];
  for (k = 0; k < 199; k++) {
    y += sig[k + 1];
    ix++;
    xbar += sig[ix];
  }

  xbar = fDiv(xbar,200.0F);
  ix = 0;
  r = sig[0] - xbar;
  b_y = r * r;
  for (k = 0; k < 199; k++) {
    ix++;
    r = sig[ix] - xbar;
    b_y += r * r;
  }

  b_y = fDiv(b_y,199.0F);
  tmp[0] = y;
  tmp[1] = b_y;

  pass = true;
  for (k = 0; k < 2; k++) {
    for (ix = 0; ix < 2; ix++) {
      golden[ix + (k << 1)] = fv0[(ix + (k << 1)) + (coreid << 2)];
    }
    flag = true;
    flag = flag && (tmp[k] <= golden[k << 1]);
    flag = flag && (tmp[k] >= golden[1 + (k << 1)]);
    printErrors(!flag, k, tmp[k] ,golden[k << 1], golden[1 + (k << 1)]);
    pass = pass && flag;
  }

  flagPassFail(pass, get_core_id());

/////////////////////////////////////////////////////////
// synchronize and exit
/////////////////////////////////////////////////////////

  return !pass;
}
예제 #17
0
int main() {
	
	/* 	Variable Definition	*/
	int coreid;
	int i,j;
	int start_frame;
	int index;
	int time;

	
	coreid = get_core_id();
	if (coreid == 0) {
		//	initialization
		NB_BLOB = 0	;	
		start_frame = 0;

		for(i=0;i<NFRAME;i++){	// for each frame
			
			//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%//
			//%%%%%%%%%%%%%%%	* DATA TRANSFER FROM L2 TO L1 *	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%//

			index = 0;
			N_pixel = N_pixelL2[i];
			
reset_timer();
start_timer();
			for(j=start_frame; j<start_frame + N_pixel*2; j++){
				pixel[index++]=pixelL2[j];
			}
stop_timer();
printf("FRAME: %d (%d-%d) Transfer Time: %d\n",i,start_frame,j,get_time());

			start_frame = j;
			//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%//


			//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%//
			//%%%%%%%%%%%%%%%	* PROCESSING *		%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%//

			init_data();
reset_timer();
start_timer();
			blob_formation();
stop_timer();
printf("Blob Formation Time: %d\n",get_time(),0,0,0);

reset_timer();
start_timer();
			prevBlob_filter();
stop_timer();
printf("Filtering prev Blob List Time: %d\n",get_time(),0,0,0);

reset_timer();
start_timer();
			newBlob_filter();
stop_timer();
printf("Filtering new Blob List Time: %d\n",get_time(),0,0,0);

reset_timer();
start_timer();
			blob_merge();
stop_timer();
printf("Blob Merging Time: %d\n",get_time(),0,0,0);
			//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%//
			

			//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%//
			//%%%%%%%%%%%%%%%	* CHECKSUM	 *		%%%%%%%%%%%%%%%%%%%%%%%%%%%%%//

			printf("FRAME = %d\n",i,0,0,0);
			printf("NB_BLOB = %d\n",NB_BLOB,0,0,0);
			
			for(j=0;j<NB_BLOB;j++){
				printf("Blob %d: centroid = (%d,%d), weight = %d, ",j,BLOB_LIST[j].xc,BLOB_LIST[j].yc,BLOB_LIST[j].W);
				printf("xmax = %d, xmin = %d, ymax = %d, ymin = %d\n",BLOB_LIST[j].xmax, BLOB_LIST[j].xmin, BLOB_LIST[j].ymax, BLOB_LIST[j].ymin);
				if(BLOB_LIST[j].xc == results[(i*B_MAX+j)*6 ]) 		printf("OK xc!\t",0,0,0,0);	else	printf("FAIL xc!\t",0,0,0,0);
				if(BLOB_LIST[j].yc == results[(i*B_MAX+j)*6+1 ]) 	printf("OK yc!\t",0,0,0,0);	else	printf("FAIL yc!\t",0,0,0,0);
				if(BLOB_LIST[j].xmax == results[(i*B_MAX+j)*6+2 ]) 	printf("OK xmax!\t",0,0,0,0);	else	printf("FAIL xmax!\t",0,0,0,0);
				if(BLOB_LIST[j].xmin == results[(i*B_MAX+j)*6+3 ]) 	printf("OK xmin!\t",0,0,0,0);	else	printf("FAIL xmin!\t",0,0,0,0);
				if(BLOB_LIST[j].ymax == results[(i*B_MAX+j)*6+4 ]) 	printf("OK ymax!\t",0,0,0,0);	else	printf("FAIL ymax!\t",0,0,0,0);
				if(BLOB_LIST[j].ymin == results[(i*B_MAX+j)*6+5 ]) 	printf("OK ymin!\n",0,0,0,0);	else	printf("FAIL ymin!\n",0,0,0,0);
			}
			printf("\n\n",0,0,0,0);
			//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%//
		}

		eoc(0);	
	}
	
	
}
예제 #18
0
파일: montepi.c 프로젝트: cdkersey/iqyax
void init_rng(unsigned *p) {
  for (unsigned i = 0; i < get_core_id(); i++) rand_r(p);
}
예제 #19
0
int mm_ctx_dup(mm_context *dst_ctx, mm_context *src_ctx)
{
    /* Copy the src ctx to the dst creating mappings as we go
     *
     * Return -1 if there is a problem
     */

    struct mm_mapping *pmap;
    int               err;

    kdebug("MM_CTX_DUP core = %d\n", get_core_id());
    mm_context_dump(src_ctx);
    mm_context_dump(dst_ctx);

    rwlock_wrlock(&src_ctx->lock);

    pmap = first_mapping(src_ctx);

    while (pmap)
    {
        struct mm_object  *new_obj1, *new_obj2;
        struct mm_mapping *new_map;

        if (!(pmap->prot & PROT_WRITE))
        {
            /* This map is read-only - share the existing object */

            new_obj1 = pmap->object;
            ++new_obj1->refcnt;

            /* Create a new mapping */
            if ((new_map = mapping_new(pmap->start_addr, pmap->length, pmap->prot,
                        pmap->fd, new_obj1)) == NULL)
            {
                rwlock_unlock(&src_ctx->lock);
                return -1;
            }
        }
        else
        {
            /* This map is writable - make a new object */
            /* Create an mm_object */

            /* TODO: COW */

            /*
             * Create 2 new objects
             * Point them at the existing obj
             * Create new map
             * Point each map at an obj
             */

            if ((new_obj1 = object_new()) == NULL)
            {
                rwlock_unlock(&src_ctx->lock);
                return -1;
            }
            if ((new_obj2 = object_new()) == NULL)
            {
                rwlock_unlock(&src_ctx->lock);
                return -1;
            }
            new_obj1->chain = pmap->object;
            new_obj1->share_type = share_private;
            new_obj2->chain = pmap->object;
            new_obj2->share_type = share_private;
            //pmap->object->share_type = cow;

            /* Point the original map at new_obj1 */
            pmap->object = new_obj1;

            /* Create a new mapping  to point at new_obj2 */
            if ((new_map = mapping_new(pmap->start_addr, pmap->length, pmap->prot,
                        pmap->fd, new_obj2)) == NULL)
            {
                rwlock_unlock(&src_ctx->lock);
                return -1;
            }

            mm_change_commit(src_ctx, pmap, PM_USER);
        }

        dst_ctx->mapping_list.add_tail(new_map);
        mm_check_uncommit(dst_ctx, new_map);

        /* Go to the next parent mapping */
        pmap = next_mapping(src_ctx, pmap);
    }

    cpu_tlb_flush_global();

    rwlock_unlock(&src_ctx->lock);
//    kdebug("MM_CTX_DUP done\n");
//    mm_context_dump(src_ctx);
//    mm_context_dump(dst_ctx);
//    pagestruct_audit();

    return 0;
}
예제 #20
0
/////////////////////////////////////////////////////////
// main testing function 
/////////////////////////////////////////////////////////
int main(int argc, const char * const argv[])
{
  (void)argc;
  (void)argv;

  int coreid, k;
  boolean_T pass;

  float V[25];
  float s[5];
  float U[25];
  
  int b_k;
  float y[25];
  float b_y;
  float c_y;
  float d_y;
  float tmp[3];
  
  init_fp_regs();

  /////////////////////////////////////////////////////////
  // main test loop 
  // each core loops over a kernel instance
  /////////////////////////////////////////////////////////
  
  coreid = get_core_id();

  printf("starting %d kernel iterations... (coreid = %d)\n",KERNEL_ITS,coreid);
  
  if (coreid>3)
    coreid=coreid-4;

  synch_barrier();
  
  perf_begin();

  for(k = 0; k < getKernelIts(); k++)
  {

    // call matlab kernel
    eml_xgesvd(*(float (*)[25])&fv0[25 * coreid], U, s, V);
    
  }

  synch_barrier();

  perf_end();

  /////////////////////////////////////////////////////////
  // check results
  /////////////////////////////////////////////////////////
  
  synch_barrier();

  for (b_k = 0; b_k < 25; b_k++) {
    y[b_k] = fAbs(U[b_k]);
  }

  b_y = y[0];
  c_y = s[0];
  
  for (b_k = 0; b_k < 4; b_k++) {
    c_y += s[b_k + 1];
  }

  for (b_k = 0; b_k < 24; b_k++) {
    b_y += y[b_k + 1];
  }

  for (b_k = 0; b_k < 25; b_k++) {
    y[b_k] = fAbs(V[b_k]);
  }

  d_y = y[0];
  for (b_k = 0; b_k < 24; b_k++) {
    d_y += y[b_k + 1];
  }

  tmp[0] = b_y;
  tmp[1] = c_y;
  tmp[2] = d_y;
  
  pass = true;
  for (b_k = 0; b_k < 3; b_k++) {
    pass = pass && (tmp[b_k] <= fv1[(0 + (b_k << 1)) + 6 * coreid]);
    pass = pass && (tmp[b_k] >= fv1[(1 + (b_k << 1)) + 6 * coreid]);
  }

  flagPassFail(pass, get_core_id());
  
  synch_barrier();

/////////////////////////////////////////////////////////
// synchronize and exit
/////////////////////////////////////////////////////////

  return !pass;
}
예제 #21
0
파일: montepi.c 프로젝트: cdkersey/iqyax
int main() {
  /* Initialize the RNG */
  unsigned rng, i, trials = TRIALS / get_num_cores(), x, y, active;
  rng = 0;
  init_rng(&rng);

  barrier(&b0);

  /* Do the simulation */
  for (i = 0; i < trials; ++i) {
    x = rand_mc(&rng) / ((1u<<31)/10000);
    y = rand_mc(&rng) / ((1u<<31)/10000);
    if (x * x + y * y <= 100000000) count[get_core_id()]++;
  }

  printf("core %u: %u\n", get_core_id(), count[get_core_id()]);

  barrier(&b1); if (get_core_id() == 0) barrier_init(&b0);
  barrier(&b2); if (get_core_id() == 0) barrier_init(&b1);
  /* Do the final reduction */

  for (active = get_num_cores()/2; active > 0; active /= 2) {
    if (get_core_id() < active) {
      unsigned idx0 = get_core_id(), idx1 = get_core_id() + active;
      count[idx0] = count[idx0] + count[idx1];
      printf("%u active cores, sum = %u\n", active, count[idx0]);
    }

    barrier(&b0); if (get_core_id() == 0) barrier_init(&b2);
    barrier(&b1); if (get_core_id() == 0) barrier_init(&b0);
    barrier(&b2); if (get_core_id() == 0) barrier_init(&b1);
  }

  if (get_core_id() == 0) {
    unsigned pi_whole = count[0]*4/TRIALS,
             pi_frac = count[0]*4%TRIALS/(TRIALS/100);
    printf("pi is approximately %u.%02u\n", pi_whole, pi_frac);
  }

  return 0;
}
예제 #22
0
/////////////////////////////////////////////////////////
// main testing function 
/////////////////////////////////////////////////////////
int main(int argc, const char * const argv[])
{
  (void)argc;
  (void)argv;

  int coreid;
  int it;

  boolean_T pass;
  boolean_T flag;
  float y[10];
  int ix;
  float b_y;
  int b_k;
  float xbar;
  float r;
  float c_y;
  float check[2];
  float golden[4];



  /////////////////////////////////////////////////////////
  // main test loop 
  // each core loops over a kernel instance
  /////////////////////////////////////////////////////////
  coreid = get_core_id();

  printf("starting %d kernel iterations... (coreid = %d)\n",KERNEL_ITS,coreid);

  if (coreid>3)
    coreid=coreid-4;

  synch_barrier();

  perf_begin();

  for(it = 0; it < getKernelIts(); it++)
  {
    // matlab kernel
    for (ix = 0; ix < 10; ix++) {
      b_y = 0.0F;
      for (b_k = 0; b_k < 10; b_k++) {
        b_y += fv1[(ix + 10 * b_k) + 100 * coreid] * fv0[b_k + 10 * coreid];
      }
      y[ix] = b_y + fv3[coreid] * fv2[ix + 10 * coreid];
    }
  }

  synch_barrier();

  perf_end();

  synch_barrier();

  /////////////////////////////////////////////////////////
  // check results
  /////////////////////////////////////////////////////////

  b_y = y[0];
  ix = 0;
  xbar = y[0];
  for (b_k = 0; b_k < 9; b_k++) {
    b_y += y[b_k + 1];
    ix++;
    xbar += y[ix];
  }

  xbar *= 1.0F/10.0F;
  ix = 0;
  r = y[0] - xbar;
  c_y = r * r;
  for (b_k = 0; b_k < 9; b_k++) {
    ix++;
    r = y[ix] - xbar;
    c_y += r * r;
  }

  c_y *= 1.0F/9.0F;
  check[0] = b_y;
  check[1] = c_y;
  pass = true;
  for (ix = 0; ix < 2; ix++) {
    for (b_k = 0; b_k < 2; b_k++) {
      golden[b_k + (ix << 1)] = fv4[(b_k + (ix << 1)) + (coreid << 2)];
    }
    flag = true;
    flag = pass && (check[ix] <= golden[ix << 1]);
    flag = pass && (check[ix] >= golden[1 + (ix << 1)]);
    printErrors(!flag, ix, check[ix], golden[ix<<1], golden[1+(ix<<1)]);
    pass = pass && flag;
  }
  flagPassFail(pass, get_core_id());


/////////////////////////////////////////////////////////
// synchronize and exit
/////////////////////////////////////////////////////////

  return !pass;
}