예제 #1
0
파일: time.c 프로젝트: astachurski/ling
void time_init(void)		// UTC
{
	get_time_values_from_xen();

	uint32_t version;
	uint32_t wc_sec;
	uint32_t wc_nsec;
	do {
		version = shared_info.wc_version;
		rmb();
		wc_sec = shared_info.wc_sec;
		wc_nsec = shared_info.wc_nsec;
		rmb();
	} while ((version & 1) | (version ^ shared_info.wc_version));

	wall_clock_base = (uint64_t)wc_sec * 1000000000ULL +wc_nsec;
	start_of_day_wall_clock = wall_clock();
}
예제 #2
0
파일: mtwrk.c 프로젝트: jys673/Synergy
main()
{
	int G, tsd, res, i, j, k, status;
	int ix, ia, ib, tplength;
	double t0, t1;
	//sleep(20);

	tsd = cnf_open("problem",0);

	res = cnf_open("result",0);

	strcpy(tpname,"B*");
	status = cnf_tsread(tsd, tpname, (double *)ituple_B, 0); 
    //printf(" mtwrk. received B (%s) \n", tpname);
	tplength = (1+N*N)*sizeof(double);
	if ((ituple_A = (double *)malloc(tplength)) == NULL) 
		exit(-1);
	while (1)  		/* loop forever */
	{
		printf("Worker waiting for a tuple \n");
		strcpy(tpname,"A*");
		tplength = cnf_tsget(tsd, tpname, ituple_A, 0);
		t0 = wall_clock();

		//printf(" mtwrk got (%s) \n",tpname);

		ix = atoi(&tpname[1]);
		if (tplength > 0) {		/* normal receive */
			G = (int) ituple_A[0];

			printf(" mtwrk got ix (%d) G(%d) \n",ix, G);

		/* check for the application termination signal */
			if (G == -1) {
				status = cnf_tsput(tsd, tpname, ituple_A, tplength);
				cnf_term();
				return;
			}
			for (i = 0; i < G; i++)
				for (j = 0; j < N; j++)
				{
					ita[i][j] = ituple_A[i*N+j+1];
					ott[i][j] = 0;
				}
			if ((otuple = (double *)malloc(tplength)) == NULL)
				exit(-1);
			otuple[0] = ituple_A[0];

			for (i =0; i < G; i++)
			    for (k =0; k < N; k++)
			    	for (j =0; j < N; j++)
			    		ott[i][j] = ott[i][j] + ita[i][k] * ituple_B[k][j];


			for (i = 0; i < G; i++)
				for (j = 0; j < N; j++)
					otuple[i*N+j+1] = ott[i][j];


			 sprintf(tpname,"%d\0",ix);
			 //printf(" mtwrk. put in (%s) \n",tpname);
			 status = cnf_tsput(res, tpname, otuple, tplength);
			 t1 = wall_clock() - t0;
			 //printf(" Worker MFLOPS = (%f) \n", N*N*G/t1);
			 free(otuple);
		} else {
			printf("Worker Terminated \n"); 
			cnf_term();
			return;
		}
	}

//#endif
}
예제 #3
0
int
main(int argc,char *argv[])
{
  mpf_t  pi,qi,ci;
  mpz_t   pstack,qstack,gstack;
  long d=100,out=0,threads=1,depth,psize,qsize;
  double begin, mid0, mid3, mid4, end;
  double wbegin, wmid0, wmid3, wmid4, wend;

  prog_name = argv[0];

  if (argc==1) {
    fprintf(stderr,"\nSyntax: %s <digits> <option> <threads>\n",prog_name);
    fprintf(stderr,"      <digits> digits of pi to output\n");
    fprintf(stderr,"      <option> 0 - just run (default)\n");
    fprintf(stderr,"               1 - output digits\n");
    fprintf(stderr,"      <threads> number of threads (default 1)\n");
    exit(1);
  }
  if (argc>1)
    d = strtoul(argv[1],0,0);
  if (argc>2)
    out = atoi(argv[2]);
  if (argc>3)
    threads = atoi(argv[3]);

  terms = d/DIGITS_PER_ITER;
  depth = 0;
  while ((1L<<depth)<terms)
    depth++;
  depth++;

  fprintf(stderr,"#terms=%ld, depth=%ld, threads=%ld cores=%d\n", terms, depth, threads, get_nprocs());

  begin = cpu_time();
  wbegin = wall_clock();

  mpz_init(pstack);
  mpz_init(qstack);
  mpz_init(gstack);

  /* begin binary splitting process */

  if (terms<=0) {
    mpz_set_ui(pstack,1);
    mpz_set_ui(qstack,0);
    mpz_set_ui(gstack,1);
  } else {
#ifdef _OPENMP
    #pragma omp parallel num_threads(threads)
      #pragma omp single nowait
      {
         bs(0,terms,1,pstack,qstack,gstack);
      }
#else
      bs(0,terms,1,pstack,qstack,gstack);
#endif

  }

  mid0 = cpu_time();
  wmid0 = wall_clock();
  fprintf(stderr,"bs       cputime = %6.2f  wallclock = %6.2f   factor = %6.1f\n",
    mid0-begin,wmid0-wbegin,(mid0-begin)/(wmid0-wbegin));
  fflush(stderr);

  mpz_clear(gstack);

  /* prepare to convert integers to floats */

  mpf_set_default_prec((long)(d*BITS_PER_DIGIT+16));

  /*
	  p*(C/D)*sqrt(C)
    pi = -----------------
	     (q+A*p)
  */

  psize = mpz_sizeinbase(pstack,10);
  qsize = mpz_sizeinbase(qstack,10);

  mpz_addmul_ui(qstack,pstack,A);
  mpz_mul_ui(pstack,pstack,C/D);

  mpf_init(pi);
  mpf_set_z(pi,pstack);
  mpz_clear(pstack);

  mpf_init(qi);
  mpf_set_z(qi,qstack);
  mpz_clear(qstack);

  /* final step */

  mid3 = cpu_time();
  wmid3 = wall_clock();

#ifdef _OPENMP
  #pragma omp parallel num_threads(threads)
    #pragma omp single nowait
    {
      #pragma omp task shared(qi,pi)
      {
        mpf_div(qi,pi,qi);
        mpf_clear(pi);
      }
      #pragma omp task shared(ci)
      {
        mpf_init(ci);
        mpf_sqrt_ui(ci,C);
      }
      #pragma omp taskwait 
    }
#else
      mpf_div(qi, pi, qi);
      mpf_clear(pi);
      mpf_init(ci);
      mpf_sqrt_ui(ci, C);
#endif

  mid4 = cpu_time();
  wmid4 = wall_clock();
  fprintf(stderr,"div/sqrt cputime = %6.2f  wallclock = %6.2f   factor = %6.1f\n",
    mid4-mid3,wmid4-wmid3,(mid4-mid3)/(wmid4-wmid3));

  mpf_mul(qi,qi,ci);
  mpf_clear(ci);

  end = cpu_time();
  wend = wall_clock();
  fprintf(stderr,"mul      cputime = %6.2f  wallclock = %6.2f   factor = %6.1f\n",
    end-mid4,wend-wmid4,(end-mid4)/(wend-wmid4));

  fprintf(stderr,"total    cputime = %6.2f  wallclock = %6.2f   factor = %6.1f\n",
    end-begin,wend-wbegin,(end-begin)/(wend-wbegin));
  fflush(stderr);

  fprintf(stderr,"   P size=%ld digits (%f)\n"
	 "   Q size=%ld digits (%f)\n",
	 psize, (double)psize/d, qsize, (double)qsize/d);

  /* output Pi and timing statistics */

  if (out&1)  {
    fprintf(stdout,"pi(0,%ld)=\n", terms);
    mpf_out_str(stdout,10,d,qi);
    fprintf(stdout,"\n");
  }

  /* free float resources */

  mpf_clear(qi);

  exit (0);
}
예제 #4
0
int main(int argc, char **argv[])
{
 
/* <reference> */
 
    float inSubMat[M][M], outSubMat[M][M], LU[M][M], L[M][M], U[M][M];
    int subdist, rowdist, coldist;
    int i, j, dist, k1, k2, p1, p2, q1, q2, q3;
 
/* </reference> */
 

    double t0, t1;

    t0 = wall_clock();

    for (i = 0; i < N; i++)
    {
        for (j = 0; j < N; j++)
        {
            if (i == j)
            {
                outMat[i][j] = N;
            }
            else
            {
                outMat[i][j] = 1;
            }
        }
    }

     
/* <master id="234567"> */
_distributor = _open_space("distributor", 0, "234567");
_constructor = _open_space("constructor", 0, "234567");
 

    i = 0;
    while (i < N)
    {
        j = i + M - 1;
        dist = M;
        if (j > N-1) 
        {
            j = N-1;
            dist = N - i;
        }
        /* LU factors for submatrix */
        for (k1 = 0; k1 < dist; k1++)
        {
            for (k2 = 0; k2 < dist; k2++)
            {
                inSubMat[k1][k2] = outMat[i+k1][i+k2];
            }
        }
        LUFactor(inSubMat, outSubMat, dist);
        /* update */
        for (k1 = 0; k1 < dist; k1++)
        {
            for (k2 = 0; k2 < dist; k2++)
            {
                outMat[i+k1][i+k2] = outSubMat[k1][k2];
                LU[k1][k2] = outSubMat[k1][k2];
            }
        }

        /* Solve triangles, LZ and WU */

        for (k1 = j+1; k1 < N; k1 = k1+M)
        {
            k2 = k1 + M - 1;
            subdist = M;
            if (k2 > N-1)
            {
                k2 = N - 1;
                subdist = N - k1;
            }
            /* Solve LZ */
            for (p1 = i; p1 < i+M; p1++)
            {
                for (p2 = k1; p2 <= k2; p2++)
                {
                    inSubMat[p1-i][p2-k1] = outMat[p1][p2];
                }
            }
            TriangleSolver(LU, inSubMat, outSubMat, subdist, 1);
            /* update */
            for (p1 = i; p1 < i+M; p1++)
            {
                for (p2 = k1; p2 <= k2; p2++)
                {
                    outMat[p1][p2] = outSubMat[p1-i][p2-k1]; 
                }
            }
            /* Solve WU */
            for (p1 = i; p1 < i+M; p1++)
            {
                for (p2 = k1; p2 <= k2; p2++)
                {
                    inSubMat[p2-k1][p1-i] = outMat[p2][p1];
                }
            }
            TriangleSolver(LU, inSubMat, outSubMat, subdist, 2);
            /* update */
            for (p1 = i; p1 < i+M; p1++)
            {
                for (p2 = k1; p2 <= k2; p2++)
                {
                    outMat[p2][p1] = outSubMat[p2-k1][p1-i]; 
                }
            }
        }

             
_cleanup_space(_distributor, "234567");
_cleanup_space(_constructor, "234567");
    /* <token action="SET" idxset="(k1)"/> */
    sprintf(_tp_name, "token#%s", "234567");
    sprintf(_tp_token, "=(k1:%d~%d,%d:#%d)", j+1, N, 1, M);
    _tp_size = sizeof(_tp_token);
    _tokens  = _set_token(_distributor, _tp_name, (char *)_tp_token, _tp_size);
    if (_tokens < 0) exit(-1);


    /* <send var="i" type="int"/> */
    sprintf(_tp_name, "int:i#%s", "234567");
    _tp_size = sizeof(int);
    _tp_i_234567 = &i;
    _status  = _send_data(_distributor, _tp_name, (char *)_tp_i_234567, _tp_size);
    if (_status < 0) exit(-1);

 
             
    /* <send var="j" type="int"/> */
    sprintf(_tp_name, "int:j#%s", "234567");
    _tp_size = sizeof(int);
    _tp_j_234567 = &j;
    _status  = _send_data(_distributor, _tp_name, (char *)_tp_j_234567, _tp_size);
    if (_status < 0) exit(-1);

 
             
    /* <send var="outMat" type="float[N(i~N)][N(i~N)]"/> */
    sprintf(_tp_name, "float(%d)(%d):outMat#%s[%d~%d,%d][%d~%d,%d]@%d", (N), (N), "234567", (i), (N), 1, (i), (N), 1, sizeof(float));
    _tp_size = (((N) - (i)) * ((N) - (i))) * sizeof(float);
    _tp_outMat_234567 = (float *)malloc(_tp_size);
    for (_x0_234567 = (i), _y0_234567 =0; _x0_234567 < (N); _x0_234567 +=1, _y0_234567 ++) {
        for (_x1_234567 = (i), _y1_234567 =0; _x1_234567 < (N); _x1_234567 +=1, _y1_234567 ++) {

            _tp_outMat_234567[_y0_234567 * ((N) - (i)) + _y1_234567] = outMat[_x0_234567][_x1_234567];
        }
    }

    _status  = _send_data(_distributor, _tp_name, (char *)_tp_outMat_234567, _tp_size);
    if (_status < 0) exit(-1);
    free(_tp_outMat_234567);

 

          

             
    /* <read var="outMat" type="float[N(j+1~N)][N(j+1~N)]"/> */
    sprintf(_tp_name, "float(%d)(%d):outMat#%s[%d~%d,%d][%d~%d,%d]@%d", (N), (N), "234567", (j+1), (N), 1, (j+1), (N), 1, sizeof(float));
    _tp_size = (((N) - (j+1)) * ((N) - (j+1))) * sizeof(float);
    _tp_outMat_234567 = (float *)malloc(_tp_size);
    _tp_size = _read_data(_constructor, _tp_name, (char *)_tp_outMat_234567, _tp_size);
    if (_tp_size < 0) exit(-1);
    for (_x0_234567 = (j+1), _y0_234567 =0; _x0_234567 < (N); _x0_234567 +=1, _y0_234567 ++) {
        for (_x1_234567 = (j+1), _y1_234567 =0; _x1_234567 < (N); _x1_234567 +=1, _y1_234567 ++) {

            outMat[_x0_234567][_x1_234567] = _tp_outMat_234567[_y0_234567 * ((N) - (j+1)) + _y1_234567];
        }
    }

    free(_tp_outMat_234567);

 

        i = i + M;
    }

     
_close_space(_constructor, "234567", 1);
_close_space(_distributor, "234567", 1);
/* </master> */
 

/*
    for (i = 0; i < N; i++)
    {
        for (j = 0; j < N; j++)
        {
            printf("%6.3f ", outMat[i][j]);
        }
        printf("\n");
    }
*/
    t1 = wall_clock() - t0;
    if (t1>0) printf(" (%f) MFLOPS.\n", (float) 2*N*N*N/3/t1);
    else printf(" MFLOPS: Not measured.\n");
    printf("elapse time = %10.6f\n", t1/1000000);

    return 0;
}
예제 #5
0
int main(int argc, char **argv)
{
   int i;
   problem *env;
   double gamma, gamma0, gamma1, tau, slope;
   double start_time;

   solution_data utopia1;
   solution_data utopia2;
   solution_data solutions[MAX_NUM_PAIRS];
   int numsolutions = 0, numprobs = 0, numinfeasible = 0;
   solution_pairs pairs[MAX_NUM_PAIRS];
   int numpairs = 0, cur_position = 0, first = 0, last = 0, previous = 0;
   double *indices, *values;
   int length;
   int solution1, solution2;
   double utopia[2];
   node_desc *root= NULL;
   base_desc *base = NULL;
   double compare_sol_tol, ub = 0.0;
   
   start_time = wall_clock(NULL);

   /* Initialize the SYMPHONY environment */
   OsiSymSolverInterface si;
   
   /* Get pointer to the SYMPHONY environment */
   env = si.getSymphonyEnvironment();

   /* Parse the command line */
   si.parseCommandLine(argc, argv);
   
   /* Read in the problem */
   si.loadProblem();

   /* Find a priori problem bounds */
   si.findInitialBounds();
   
   /* Set some parameters */
   compare_sol_tol = p->par.compare_solution_tolerance;
   si.setSymParam(OsiSymGranularity,-MAX(p->lp_par.rho, compare_sol_tol));

#ifdef BINARY_SEARCH
   printf("Using binary search with tolerance = %f...\n",
	  p->par.binary_search_tolerance);
#endif
#ifdef LIFO
   printf("Using LIFO search order...\n");
#endif
   if (p->lp_par.rho > 0){
      printf("Using secondary objective weight %.8f\n", cnrp->lp_par.rho);
   }
   printf("\n");

#ifdef SAVE_CUT_POOL
   printf("Saving the global cut pool between iterations...\n");
   si.createPermanentCutPools();
   si.setSymParam(OsiSymUsePermanentCutPools, TRUE);
#endif
   
   /* First, calculate the utopia point */
   p->lp_par.gamma = 1.0;
   p->lp_par.tau = 0.0;
      
   printf("***************************************************\n");
   printf("***************************************************\n");
   printf("Now solving with gamma = 1.0 tau = 0.0 \n", gamma, tau);  
   printf("***************************************************\n");
   printf("***************************************************\n\n");

   /* Solve */
   si.branchAndBound();
   numprobs++;
   
   /* Store the solution */
   length = solutions[numsolutions].length = p->best_sol.xlength;
   indices = solutions[numsolutions].indices = (int *) calloc(length, ISIZE);
   values = solutions[numsolutions].values = (double *) calloc(length, DSIZE);
   memcpy((char *) indices, p->bestsol.xind, length * ISIZE);
   memcpy((char *) values, p->bestsol.xval, length * DSIZE);
   solutions[numsolutions].gamma = 1.0;
   solutions[numsolutions].tau = 0.0;
   solutions[numsolutions].obj[0] = p->obj[0];
   solutions[numsolutions++].obj[1] = p->obj[1];
   utopia[0] = p->obj[0];
      
   cnrp->lp_par.gamma = 0.0;
   cnrp->cg_par.tau = cnrp->lp_par.tau = 1.0;
      
   printf("***************************************************\n");
   printf("***************************************************\n");
   printf("Now solving with gamma = 0.0 tau = 1.0 \n", gamma, tau);  
   printf("***************************************************\n");
   printf("***************************************************\n\n");

   /* Solve */
   si.branchAndBound();
   numprobs++;
   
   /* Store the solution */
   length = solutions[numsolutions].length = p->best_sol.xlength;
   indices = solutions[numsolutions].indices = (int *) calloc(length, ISIZE);
   values = solutions[numsolutions].values = (double *) calloc(length, DSIZE);
   memcpy((char *) indices, p->bestsol.xind, length * ISIZE);
   memcpy((char *) values, p->bestsol.xval, length * DSIZE);
   solutions[numsolutions].gamma = 0.0;
   solutions[numsolutions].tau = 1.0;
   solutions[numsolutions].obj[0] = p->obj[0];
   solutions[numsolutions++].obj[1] = p->obj[1];
   utopia[1] = p->obj[1];
   
   p->utopia[1] = utopia[1];
   p->utopia[0] = utopia[0];
   
   printf("***************************************************\n");
   printf("***************************************************\n");
   printf("Utopia point has fixed cost %.3f and variable cost %.3f \n",
	  utopia[0], utopia[1]);
   printf("***************************************************\n");
   printf("***************************************************\n\n");
   
   /* Add the first pair to the list */
#ifdef BINARY_SEARCH
   pairs[first].gamma1 = 1.0;
   pairs[first].gamma2 = 0.0;
#endif
   pairs[first].solution1 = 0;
   pairs[first].solution2 = 1;

   first = last = 0;
   numpairs = 1;

   /* Keep taking pairs off the list and processing them until there are none
      left */
   while (numpairs > 0 && numpairs < MAX_NUM_PAIRS &&
	  numsolutions < MAX_NUM_SOLUTIONS &&
	  numinfeasible < MAX_NUM_INFEASIBLE){

#ifdef LIFO
      solution1 = pairs[last].solution1;
      solution2 = pairs[last].solution2;
      cur_position = last;
      if (--last < 0){
	 last = MAX_NUM_PAIRS - 1;
      }
      numpairs--;
#else
      solution1 = pairs[first].solution1;
      solution2 = pairs[first].solution2;
      cur_position = first;
      if (++first > MAX_NUM_PAIRS-1)
	 first = 0;
      numpairs--;
#endif

#ifdef BINARY_SEARCH
      gamma = (pairs[cur_position].gamma1 + pairs[cur_position].gamma2)/2;
#elif defined(FIND_NONDOMINATED_SOLUTIONS)
      gamma = (utopia[1] - solutions[solution1].obj[1])/
	 (utopia[0] - solutions[solution2].obj[0] +
	  utopia[1] - solutions[solution1].obj[1]);
#else
      slope = (solutions[solution1].obj[1] -
	       solutions[solution2].obj[1])/
	      (solutions[solution2].obj[0] -
	       solutions[solution1].obj[0]);
      gamma = slope/(1+slope);
#endif
      tau = 1 - gamma;
      
      p->lp_par.gamma = gamma;
      p->lp_par.tau = tau;

      /* Find upper bound */

      env->has_ub = FALSE;
      env->ub = MAXDOUBLE;
#ifndef BINARY_SEARCH
      for (i = 0; i < numsolutions; i++){
#ifdef FIND_NONDOMINATED_SOLUTIONS
	 ub = MAX(gamma*(solutions[i].obj[0] - utopia[0]),
		  tau*(solutions[i].obj[1] - utopia[1]));
#else
	 ub = gamma*solutions[i].obj[0] + tau*solutions[i].obj[1];
#endif 
	 if (ub < env->ub){
	    env->has_ub = TRUE;
	    env->ub = ub - compare_sol_tol;
	 }
      }
#endif
      
      printf("***************************************************\n");
      printf("***************************************************\n");
      printf("Now solving with gamma = %.6f tau = %.6f \n", gamma, tau);  
      printf("***************************************************\n");
      printf("***************************************************\n\n");
      
      p->obj[0] = p->obj[1] = 0.0;
      
      si.branchAndBound();
      numprobs++;
      
#ifdef BINARY_SEARCH
      if (p->obj[0] - solutions[solution1].obj[0] <
	  compare_sol_tol &&
	  solutions[solution1].obj[1] - p->obj[1] <
	  compare_sol_tol){
	 if (pairs[cur_position].gamma1 - gamma >
	     cnrp->par.binary_search_tolerance){
	    if (++last > MAX_NUM_PAIRS - 1)
	       last = 0;
	    pairs[last].solution1 = solution1;
	    pairs[last].solution2 = solution2;
	    pairs[last].gamma1 = gamma;
	    pairs[last].gamma2 = pairs[cur_position].gamma2;
	    numpairs++;
	 }
	 continue;
      }
      if (solutions[solution2].obj[0] - p->obj[0] < compare_sol_tol
	  && p->obj[1] - solutions[solution2].obj[1] <
	  compare_sol_tol){
	 if (gamma - pairs[cur_position].gamma2 >
	     cnrp->par.binary_search_tolerance){
	    if (++last > MAX_NUM_PAIRS - 1)
	       last = 0;
	    pairs[last].solution1 = solution1;
	    pairs[last].solution2 = solution2;
	    pairs[last].gamma1 = pairs[cur_position].gamma1;
	    pairs[last].gamma2 = gamma;
	    numpairs++;
	 }
	 continue;
      }
#else
      if (p->obj[0] == 0.0 && p->obj[1] == 0.0){
	 numinfeasible++;
	 continue;
      }else if (p->obj[0] - solutions[solution1].obj[0] <
		compare_sol_tol &&
		solutions[solution1].obj[1] - p->obj[1] <
		compare_sol_tol){
	 numinfeasible++;
	 continue;
      }else if (solutions[solution2].obj[0] - p->obj[0] <
		compare_sol_tol &&
		p->obj[1] - solutions[solution2].obj[1] <
		compare_sol_tol){
	 numinfeasible++;
	 continue;
      }
#endif
      
      /* Insert new solution */
      numinfeasible = 0;
      if (last + 2 == MAX_NUM_PAIRS){
	 last = 0;
	 previous = MAX_NUM_PAIRS - 1;
      }else if (last + 2 == MAX_NUM_PAIRS + 1){
	 last = 1;
	 previous = 0;
      }else{
	 last += 2;
	 previous = last - 1;
      }
#ifdef BINARY_SEARCH
      pairs[previous].gamma1 = pairs[cur_position].gamma1;
      pairs[previous].gamma2 = gamma;
      pairs[last].gamma1 = gamma;
      pairs[last].gamma2 = pairs[cur_position].gamma2;
#endif
      pairs[previous].solution1 = solution1;
      pairs[previous].solution2 = solution2;
      pairs[last].solution1 = solution2;
      pairs[last].solution2 = solution2+1;
      numpairs += 2;
      for (i = numsolutions; i > solution2; i--){
	 solutions[i] = solutions[i-1];
      }
      numsolutions++;
#ifndef LIFO
      if (first < last){
	 for (i = first; i < last - 1; i++){
	    if (pairs[i].solution1 >= solution2){
	       pairs[i].solution1++;
	    }
	    if (pairs[i].solution2 >= solution2){
	       pairs[i].solution2++;
	    }
	 }
      }else{
	 for (i = first; i < MAX_NUM_PAIRS - (last == 0 ? 1 : 0); i++){
	    if (pairs[i].solution1 >= solution2){
	       pairs[i].solution1++;
	    }
	    if (pairs[i].solution2 >= solution2){
	       pairs[i].solution2++;
	    }
	 }
	 for (i = 0; i < last - 1; i++){
	    if (pairs[i].solution1 >= solution2){
	       pairs[i].solution1++;
	    }
	    if (pairs[i].solution2 >= solution2){
	       pairs[i].solution2++;
	    }
	 }
      }
	 
#endif
      length = solutions[solutions2].length = p->best_sol.xlength;
      indices = solutions[solutions2].indices = (int *) calloc(length, ISIZE);
      values = solutions[solutions2].values = (double *) calloc(length, DSIZE);
      memcpy((char *) indices, p->bestsol.xind, length * ISIZE);
      memcpy((char *) values, p->bestsol.xval, length * DSIZE);
      solutions[solution2].gamma = gamma;
      solutions[solution2].tau = tau;
      solutions[solution2].obj[0] = p->obj[0];
      solutions[solution2].obj[1] = p->obj[1];
   }

   printf("\n********************************************************\n");

   if (numsolutions >= MAX_NUM_SOLUTIONS){
      printf("Maximum number of solutions (%i) reached\n\n",
	     MAX_NUM_SOLUTIONS);
   }

   if (numinfeasible >= MAX_NUM_INFEASIBLE){
      printf("Maximum number of infeasible subproblems (%i) reached\n\n",
	     MAX_NUM_INFEASIBLE);
   }
   
   if (numpairs >= MAX_NUM_PAIRS){
      printf("Maximum number of solution pairs (%i) reached\n\n",
	     MAX_NUM_PAIRS);
      printf("\n********************************************************\n");
#ifdef FIND_NONDOMINATED_SOLUTIONS
      printf(  "* Found set of non-dominated solutions!!!!!!! *\n");
#else
      printf(  "* Found set of supported solutions!!!!!!!     *\n");
#endif
   }else{
      printf("\n********************************************************\n");
#ifdef FIND_NONDOMINATED_SOLUTIONS
      printf(  "* Found complete set of non-dominated solutions!!!!!!! *\n");
#else
      printf(  "* Found complete set of supported solutions!!!!!!!     *\n");
#endif
   }
   printf(  "* Now displaying stats...                              *\n");
   printf(  "********************************************************\n\n");

#ifdef SAVE_CUT_POOL
   for (i = 0; i < env->par.tm_par.max_cp_num; i++){
      env->comp_times.bc_time.cut_pool += env->cp[i]->cut_pool_time;
      env->warm_start->stat.cuts_in_pool += env->cp[i]->cut_num;
   }
#endif
   
   print_statistics(&(env->comp_times.bc_time), &(env->warm_start->stat), 0.0,
		    0.0, 0, start_time);

   printf("\nNumber of subproblems solved: %i\n", numprobs);
   printf("Number of solutions found: %i\n\n", numsolutions);
   
   printf("***************************************************\n");
   printf("***************************************************\n");
#ifdef FIND_NONDOMINATED_SOLUTIONS
   printf("Displaying non-dominated solution values and breakpoints\n");  
#else
   printf("Displaying supported solution values and breakpoints\n");  
#endif
   printf("***************************************************\n");
   printf("***************************************************\n\n");

   gamma0 = 1.0;
   for (i = 0; i < numsolutions - 1; i++){
#ifdef FIND_NONDOMINATED_SOLUTIONS
      gamma1 = (utopia[1] - solutions[i].obj[1])/
	 (utopia[0] - solutions[i+1].obj[0] +
	  utopia[1] - solutions[i].obj[1]);
#else
      slope = (solutions[i].obj[1] -
	       solutions[i+1].obj[1])/
	      (solutions[i+1].obj[0] -
	       solutions[i].obj[0]);
      gamma1 = slope/(1+slope);
#endif
      printf("First Objective: %.3f Second Objective: %.3f ",
	     solutions[i].obj[0], solutions[i].obj[1]);
      printf("Range: %.6f - %.6f\n", gamma1, gamma0);
      gamma0 = gamma1;
   }
   printf("First Objective: %.3f Second Objective: %.3f ",
	  solutions[i].obj[0], solutions[i].obj[1]);
   printf("Range: %.6f - %.6f\n", 0.0, gamma0);
   
   for (i = 0 ; i < numsolutions; i++){
      FREE(solutions[i].values);
      FREE(solutions[i].indices);
   }
   
   return(0);
}   
예제 #6
0
node_desc *create_explicit_node_desc(lp_prob *p)
{
   LPdata *lp_data = p->lp_data;
   int m = lp_data->m, n = lp_data->n;

   int bvarnum = p->base.varnum;
   var_desc **extravars = lp_data->vars + bvarnum;
   int extravarnum = n - bvarnum;

   int bcutnum = p->base.cutnum;
   row_data *rows = lp_data->rows;
   int extrarownum = m - bcutnum;
   int cutindsize;

   node_desc *desc = (node_desc *) calloc(1, sizeof(node_desc));

   /* Will need these anyway for basis */
   int *rstat = (int *) malloc(m * ISIZE);
   int *cstat = (int *) malloc(n * ISIZE);
   int *erstat = (extrarownum == 0) ? NULL : (int *) malloc(extrarownum*ISIZE);
   int *ecstat = (extravarnum == 0) ? NULL : (int *) malloc(extravarnum*ISIZE);

   int *ulist, *clist; /* this later uses tmp.i1 */
   int cutcnt, i, j;
#ifndef COMPILE_IN_LP
   int s_bufid, r_bufid;
#endif

   get_basis(lp_data, cstat, rstat);
   if (extrarownum > 0)
      memcpy(erstat, rstat + bcutnum, extrarownum * ISIZE);
   if (extravarnum > 0)
      memcpy(ecstat, cstat + bvarnum, extravarnum * ISIZE);

   /* To start with, send the non-indexed cuts (only those which will be
      saved) to the treemanager and ask for names */
   for (cutcnt = cutindsize = 0, i = bcutnum; i < m; i++){
      if ((rows[i].cut->branch & CUT_BRANCHED_ON) ||
	  !rows[i].free || (rows[i].free && rstat[i] != SLACK_BASIC)){
	 cutindsize++;
	 if (rows[i].cut->name < 0)
	    cutcnt++;
      }
   }
   if (cutcnt > 0){
#ifdef COMPILE_IN_LP
      row_data *tmp_rows = (row_data *) malloc(cutcnt*sizeof(row_data));
      
      for (j = 0, i = bcutnum; j < cutcnt; i++){
	 if (rows[i].cut->name < 0 &&
	     (!rows[i].free || (rows[i].free && rstat[i] != SLACK_BASIC)))
	    tmp_rows[j++] = rows[i];
      }
      unpack_cut_set(p->tm, 0, cutcnt, tmp_rows);
      FREE(tmp_rows);
#else
      s_bufid = init_send(DataInPlace);
      send_int_array(&cutcnt, 1);
      for (i = bcutnum; i < m; i++){
	 if (rows[i].cut->name < 0 &&
	     (!rows[i].free || (rows[i].free && rstat[i] != SLACK_BASIC)))
	    pack_cut(rows[i].cut);
      }
      send_msg(p->tree_manager, LP__CUT_NAMES_REQUESTED);
      freebuf(s_bufid);
#endif
   }

   /* create the uind list and the extravars basis description */
   desc->uind.type = EXPLICIT_LIST;
   desc->uind.added = 0;
   desc->uind.size = extravarnum;
   desc->basis.extravars.type = EXPLICIT_LIST;
   desc->basis.extravars.size = extravarnum;
   desc->basis.extravars.list = NULL;
   if (extravarnum > 0){
      desc->uind.list = ulist = (int *) malloc(extravarnum * ISIZE);
      desc->basis.extravars.stat = ecstat;
      for (i = extravarnum - 1; i >= 0; i--)
	 ulist[i] = extravars[i]->userind;
      if (lp_data->ordering == COLIND_ORDERED)
	 qsortucb_ii(ulist, ecstat, extravarnum);
   }else{
      desc->uind.list = NULL;
      desc->basis.extravars.stat = NULL;
   }
   /* create the basevars basis description */
   desc->basis.basevars.type = EXPLICIT_LIST;
   desc->basis.basevars.size = bvarnum;
   desc->basis.basevars.list = NULL;
   if (bvarnum)
      desc->basis.basevars.stat = cstat;
   else
      FREE(cstat);

   /* create the not_fixed list */
   desc->nf_status = lp_data->nf_status;
   if (desc->nf_status == NF_CHECK_AFTER_LAST ||
       desc->nf_status == NF_CHECK_UNTIL_LAST){
      desc->not_fixed.type = EXPLICIT_LIST;
      desc->not_fixed.added = 0;
      if ((desc->not_fixed.size = lp_data->not_fixed_num) > 0){
	 desc->not_fixed.list = (int *) malloc(desc->not_fixed.size * ISIZE);
	 memcpy(desc->not_fixed.list, lp_data->not_fixed,
		lp_data->not_fixed_num * ISIZE);
      }else{
	 desc->not_fixed.list = NULL;
      }
   }

#ifndef COMPILE_IN_LP
   /* At this point we will need the missing names */
   if (cutcnt > 0){
      static struct timeval tout = {15, 0};
      int *names = lp_data->tmp.i1; /* m */
      double start = wall_clock(NULL);
      do{
	 r_bufid = treceive_msg(p->tree_manager, LP__CUT_NAMES_SERVED, &tout);
	 if (! r_bufid){
	    if (pstat(p->tree_manager) != PROCESS_OK){
	       printf("TM has died -- LP exiting\n\n");
	       exit(-301);
	    }
	 }
      }while (! r_bufid);
      p->comp_times.idle_names += wall_clock(NULL) - start;
      receive_int_array(names, cutcnt);
      for (j = 0, i = bcutnum; j < cutcnt; i++){
	 if (rows[i].cut->name < 0 &&
	     (!rows[i].free || (rows[i].free && rstat[i] != SLACK_BASIC)))
	    rows[i].cut->name = names[j++];
      }
   }
#endif

   /* create the cutind list and the extrarows basis description */
   desc->cutind.type = EXPLICIT_LIST;
   desc->cutind.added = 0;
   desc->cutind.size = cutindsize;
   desc->basis.extrarows.type = EXPLICIT_LIST;
   desc->basis.extrarows.list = NULL;
   desc->basis.extrarows.size = cutindsize;
   if (cutindsize > 0){
      desc->cutind.list = clist = (int *) malloc(cutindsize * ISIZE);
      desc->basis.extrarows.stat = erstat;
      for (cutindsize = 0, i = bcutnum; i < m; i++){
	 if ((rows[i].cut->branch & CUT_BRANCHED_ON) ||
	     !rows[i].free || (rows[i].free && rstat[i] != SLACK_BASIC)){
	    clist[cutindsize] = rows[i].cut->name;
	    erstat[cutindsize++] = rstat[i];
	 }
      }
      qsortucb_ii(clist, erstat, cutindsize);
   }else{
      desc->cutind.list = NULL;
      desc->basis.extrarows.stat = NULL;
   }
   /* create the baserows basis description */
   desc->basis.baserows.type = EXPLICIT_LIST;
   desc->basis.baserows.size = bcutnum;
   desc->basis.baserows.list = NULL;
   if (bcutnum)
      desc->basis.baserows.stat = rstat;
   else
      FREE(rstat);

   /* Mark that there is a basis */
   desc->basis.basis_exists = TRUE;

   /* Add user description */
   add_to_desc_u(p, desc);

   return(desc);
}
예제 #7
0
파일: mtclnt.c 프로젝트: jys673/Synergy
main()
{ char host[128];
	int i, j, k, received;
	int ix, iy, tplength, status;
	int G, R, P, res, tsd, x;
	double t0, t1;
	float F;
	FILE *fd;

	gethostname(host, sizeof(host));
	t0 = wall_clock();
	ix = 0;

	printf("Before cnf_open... \n");
	tsd = cnf_open("problem",0);
	res = cnf_open("result",0);


	G = cnf_getf(); // Get chunk size
	P = cnf_getP(); // Get number of processors
    printf(" mtclnt.  Chunk size (%d) \n",G);
	R = N;

        tplength = (1+N*N)*sizeof(double);
	// Building Matrix A and B
        for (i = 0; i < N; i++)
                for (j = 0; j < N; j++)
		{
                        ituple_B[i][j] = (double) i * j;
			A[i][j] = (double) i * j;
		}

        sprintf(tpname,"B%d\0",0);

        status = cnf_tsput(tsd, tpname, (double *)ituple_B, tplength);
        tplength = (1+ G*N) * sizeof(double);


	//printf("tplength = (%d) \n", tplength);
	if ((ituple_A = (double *) malloc(tplength)) == NULL) exit(1);

	while (R > 0) {
		if (R < G) G = R;
		R = R - G ;
		//printf(" mtclnt. G(%d) R(%d) \n", G,R);
		ituple_A[0] = G;
		for (x = 0; x < G; x++)
			for (j = 0; j < N; j++) 
				ituple_A[x*N+j+1] = A[ix+x][j];
		sprintf(tpname,"A%d\0",ix);
		status = cnf_tsput(tsd, tpname, ituple_A, tplength);
		ix += G;
	}
	free(ituple_A);

	/* now receive the result  */
	received = i = 0;
	tplength = (1+N*N)*sizeof(double);
	if ((otuple = (double *)malloc(tplength)) == NULL)
		exit(1);
	while (received < N) {
		strcpy(tpname,"*");
		printf(" mtclnt.  waiting for a tuple) \n");
		tplength = cnf_tsget(res, tpname, otuple, 0);

		G = (int) otuple[0];
		ix = atoi(tpname);
		iy = 1;
		printf(" mtclnt.  tuple %d received %d) \n", ix, received);
					/* reassemble the result matrix */
		for (i= 0; i < G; i++) {
				received ++;
				for (j=0; j < N; j++) {
						C[ix][j] = otuple[iy];
						iy++;
				}
				ix ++;
		}
	}
	free(otuple);

	printf(" mtclnt.  received everything\n");
	/* insert zero size tuple as termination signal */
	tplength = sizeof(double);
	if ((ituple_A = (double *)malloc(tplength)) == NULL)
		exit(1);

	ituple_A[0]  = -1;
	sprintf(tpname, "A%d\0",N*N);
	status = cnf_tsput(tsd, tpname, ituple_A, tplength);
	free(ituple_A);
	t1 = wall_clock() - t0;

	//fd = fopen("matrix.par.time", "a");
	printf("Performance: (%s) (%f)sec. P(%d) f(%d) n(%d)\n",
			host, t1/1000000, P, G,  N*1);
	if (t1>0) printf(" (%f) MFLOPS.\n", (float) (N*N/t1)*N);
	else printf(" MFLOPS: Not measured.\n");
	//fclose(fd);
	cnf_term();

#ifdef bd
#endif 

}
예제 #8
0
파일: lp.c 프로젝트: coin-or/SYMPHONY
int main(void)
{
   lp_prob *p;
   int r_bufid;
   double time, diff;
   struct timeval timeout = {10, 0};
   char first_node_rec = FALSE;
   int termcode;

   p = (lp_prob *) calloc(1, sizeof(lp_prob));

   p->start_time = wall_clock(NULL);

   if ((termcode = lp_initialize(p, 0)) < 0){
      printf("LP initialization failed with error code %i\n\n", termcode);
      lp_exit(p);
   }
   
   /*------------------------------------------------------------------------*\
    * Continue receiving node data and fathoming branches until this
    * process is killed
   \*------------------------------------------------------------------------*/

   p->phase = 0;
   while (TRUE){
      p->lp_data->col_set_changed = TRUE;
      /*---------------------------------------------------------------------*\
       * waits for an active node message but if there's anything left after
       * receiving that, those messages are processed, before going to
       * process_chain().
      \*---------------------------------------------------------------------*/
      time = wall_clock(NULL);
      do{
	 r_bufid = treceive_msg(ANYONE, ANYTHING, &timeout);
      }while (! process_message(p, r_bufid, NULL, NULL) );
      diff = wall_clock(NULL) - time;
      if (first_node_rec){
	 p->comp_times.idle_node += diff;
      }else{
	 first_node_rec = TRUE;
	 p->comp_times.ramp_up_lp += diff;
      }	 
      do{
	 r_bufid = nreceive_msg(ANYONE, ANYTHING);
	 if (r_bufid)
	    process_message(p, r_bufid, NULL, NULL);
      }while (r_bufid);

      p->comp_times.communication += used_time(&p->tt);

      if (process_chain(p) < 0){
	 printf("\nThere was an error in the LP process. Exiting now.\n\n");
	 /* There was an error in the LP. Abandon node. */
	 lp_exit(p);
      }
   }

   p->comp_times.wall_clock_lp = wall_clock(NULL) - p->start_time;
   
   lp_exit(p);

   return(0);
}
예제 #9
0
void print_statistics(node_times *tim, problem_stat *stat, double ub,
		      double lb, double initial_time, double start_time,
		      double obj_offset, char obj_sense, char has_ub)
{
   static str_int nfstatus[4] = {
      {"NF_CHECK_ALL"           , NF_CHECK_ALL }
      , {"NF_CHECK_AFTER_LAST"    , NF_CHECK_AFTER_LAST }
      , {"NF_CHECK_UNTIL_LAST"    , NF_CHECK_UNTIL_LAST }
      , {"NF_CHECK_NOTHING"       , NF_CHECK_NOTHING }
   };

   initial_time += tim->communication;
   initial_time += tim->lp;
   initial_time += tim->separation;
   initial_time += tim->fixing;
   initial_time += tim->pricing;
   initial_time += tim->strong_branching;
   initial_time += tim->cut_pool;
#ifndef WIN32  /* FIXME: CPU timing doesn't work in Windows */
   printf("====================== CP Timing =========================\n");
   printf("  Cut Pool                  %.3f\n", tim->cut_pool);
#endif
   printf("====================== LP/CG Timing =========================\n");
#ifndef WIN32  /* FIXME: CPU timing doesn't work in Windows */
   printf("  LP: Solution Time         %.3f\n", tim->lp);
   printf("      Variable Fixing       %.3f\n", tim->fixing);
   printf("      Pricing               %.3f\n", tim->pricing);
   printf("      Strong Branching      %.3f\n", tim->strong_branching);
   printf("      Communication         %.3f\n", tim->communication);
#ifndef COMPILE_IN_LP
   printf("      Ramp Up Time (TM)     %.3f\n", tim->ramp_up_tm);
   printf("      Ramp Up Time (LP)     %.3f\n", tim->ramp_up_lp);
   printf("      Ramp Down Time        %.3f\n", tim->ramp_down_time);
#endif
   printf("      Idle Time (Node Pack) %.3f\n", tim->start_node);
   printf("      Idle Time (Nodes)     %.3f\n", tim->idle_node);
   printf("      Idle Time (Names)     %.3f\n", tim->idle_names);
   printf("      Idle Time (Diving)    %.3f\n", tim->idle_diving);
   printf("      Idle Time (Cuts)      %.3f\n", tim->idle_cuts);
   printf("  Separation                %.3f\n", tim->separation); 
   printf("  Total User Time              %.3f\n", initial_time);
#endif
   printf("  Total Real Time              %.3f\n\n", wall_clock(NULL)-
	  start_time);
   printf("====================== Statistics =========================\n");
   printf("Number of created nodes :       %i\n", stat->created);
   printf("Number of analyzed nodes:       %i\n", stat->analyzed);
   printf("Depth of tree:                  %i\n", stat->max_depth);
   printf("Size of the tree:               %i\n", stat->tree_size);
   printf("Leaves before trimming:         %i\n",
	  stat->leaves_before_trimming);
   printf("Leaves after trimming:          %i\n", stat->leaves_after_trimming);
   printf("Repriced root's nf_status:      %s\n",
	  nfstatus[(int)stat->nf_status].str);
   printf("Not fixed variable num:         %i\n", stat->vars_not_priced);
   printf("Number of Chains:               %i\n", stat->chains);
   printf("Number of Diving Halts:         %i\n", stat->diving_halts);
   printf("Number of cuts in cut pool:     %i\n", stat->cuts_in_pool);
   if(stat->root_lb > -MAXDOUBLE){
      if (obj_sense == SYM_MAXIMIZE){
	 printf("Upper Bound in Root:            %.3f\n",
		-stat->root_lb + obj_offset);
      }else{
	 printf("Lower Bound in Root:            %.3f\n",
		stat->root_lb + obj_offset);
      }
   }
   if (lb > 0){
      if (obj_sense == SYM_MAXIMIZE){
	 printf("\nCurrent Lower Bound:         %.3f", -ub + obj_offset);
	 printf("\nCurrent Upper Bound:         %.3f", -lb + obj_offset);
	 printf("\nGap Percentage:              %.2f\n", -100*(ub-lb)/ub);
      }else{
	 printf("\nCurrent Upper Bound:         %.3f", ub + obj_offset);
	 printf("\nCurrent Lower Bound:         %.3f", lb + obj_offset);
	 printf("\nGap Percentage:              %.2f\n", 100*(ub-lb)/ub);
      }
   }else if (has_ub){
      printf("\nUpper Bound:        %.3f\n", ub + obj_offset);
   }
}
예제 #10
0
파일: blu_seq.c 프로젝트: jys673/Synergy
int main()
{
/* <reference> */
    int i, j, dist, k1, k2, p1, p2, q1, q2, q3;
    int subdist, rowdist, coldist;
  
    float inSubMat[M][M], outSubMat[M][M], LU[M][M], L[M][M], U[M][M];
/* </reference> */

    double t0, t1;

    t0 = wall_clock();

    for (i=0; i<N; i++)
    {
        for (j=0; j<N; j++)
        {
            if (i==j)
            {
                outMat[i][j] = N;
            }
            else
            {
                outMat[i][j] = 1;
            }
        }
    }
  
    /* <master id="123456"> */

    for (i = 0; i < N; i = i + M)
    {
        j = i + M - 1;
        dist = M;
        if (j > N-1) 
        {
            j = N-1;
            dist = N - i;
        }
    
        // LU factors for submatrix
    
        for (k1 = 0; k1 < dist; k1++)
        {
            for (k2 = 0; k2 < dist; k2++)
            {
                inSubMat[k1][k2] = outMat[i+k1][i+k2];
            }
        }
    
        LUFactor(inSubMat, outSubMat, dist);
    
        //update
        for (k1 = 0; k1 < dist; k1++)
        {
            for (k2 = 0; k2 < dist; k2++)
            {
                outMat[i+k1][i+k2] = outSubMat[k1][k2];
                LU[k1][k2] = outSubMat[k1][k2];
            }
        }
   
        for (k1 = j + 1; k1 < N; k1 = k1 + M)
        {
            k2 = k1 + M - 1;
            subdist = M;
            if (k2 > N-1)
            {
                k2 = N - 1;
                subdist = N - k1;
            }
      
            //Solve LZ
            for (p1 = i; p1 < i+M; p1++)
            {
                for (p2 = k1; p2 <= k2; p2++)
                {
                    inSubMat[p1-i][p2-k1] = outMat[p1][p2];
                }
            }
            TriangleSolver(LU, inSubMat, outSubMat, subdist, 1);
      
            //update
            for (p1 = i; p1 < i+M; p1++)
            {
                for (p2 = k1; p2 <= k2; p2++)
                {
                    outMat[p1][p2] = outSubMat[p1-i][p2-k1]; 
                }
            }    
      
            //Solve WU
            for (p1 = i; p1 < i+M; p1++)
            {
                for (p2 = k1; p2 <= k2; p2++)
                {
                    inSubMat[p2-k1][p1-i] = outMat[p2][p1];
                }
            }
            TriangleSolver(LU, inSubMat, outSubMat, subdist, 2);
      
            //update
            for (p1 = i; p1 < i+M; p1++)
            {
                for (p2 = k1; p2 <= k2; p2++)
                {
                    outMat[p2][p1] = outSubMat[p2-k1][p1-i]; 
                }
            }          
        }
    
            /* <send var="i"  type="int"/> */
            /* <send var="outMat" type="float[N(i~N)  ][N(i~N)  ]"/> */

        /* <worker> */
            /* <read var="i"  type="int"/> */

            /* <read var="outMat" type="float[N(i~i+M)][N(i~N)  ]"/> */
            /* <read var="outMat" type="float[N(k1)   ][N(i~N)  ]"/> */

        //A = A - WZ
        /* <target index="k1" limits="(i+M,N,M)" chunk="M" order="1"> */
        for (k1 = i + M; k1 < N; k1 = k1 + M)
        /* </target> */
        {
            k2 = k1 + M - 1;
            rowdist = M;
            if (k2 > N-1)
            {
                k2 = N - 1;
                rowdist = N - k1;
            }
      
            for (p1 = i + M; p1 < N; p1 = p1 + M)
            {
                p2 = p1 + M - 1;
                coldist = M;
                if (p2 > N-1)
                {
                    p2 = N - 1;
                    coldist = N - p1;
                }

                /*
                 * matrix multiplication
                 * outMat[k1:k2][p1:p2] = outMat[k1:k2][p1:p2] - 
                                outMat[k1:k2][i:i+M-1] * outMat[i:i+M-1][k1:k2];
                 */
                for (q1 = k1; q1 <= k2; q1++)
                {
                    for (q2 = p1; q2 <= p2; q2++)
                    {
                        for (q3 = 0; q3 < M; q3++)
                        {
                            outMat[q1][q2] = outMat[q1][q2] - 
                                      outMat[q1][i+q3]*outMat[i+q3][q2];
                        }
                    }
                }
            }
        }

            /* <send var="outMat" type="float[N(k1)     ][N(i+M~N)]"/> */
        /* </worker> */

            /* <read var="outMat" type="float[N(i+M~N)  ][N(i+M~N)]"/> */
    }

    /* </master> */
/*  
    for (i=0; i<N; i++)
    {
      for (j=0; j<N; j++)
      {
        printf("%6.3f ", outMat[i][j]);
      }
      printf("\n");
    }
*/
    t1 = wall_clock() - t0;
    if (t1>0) printf(" (%f) MFLOPS.\n", (float) 2*N*N*N/3/t1);
    else printf(" MFLOPS: Not measured.\n");
    printf("elapse time = %10.6f\n", t1/1000000);

    return 0;
}