void 
ParMETISGraphPartitionerImpl::p_partition(void)
{
  int me(this->processor_rank());
  std::vector<idx_t> vtxdist;
  std::vector<idx_t> xadj;
  std::vector<idx_t> adjncy;

  ParMETISGraphWrapper wrap(p_adjacency_list);

  wrap.get_csr_local(vtxdist, xadj, adjncy);

  int nnodes(vtxdist[me+1] - vtxdist[me]);

#if 0
  for (int p = 0; p < this->processor_size(); ++p) {
    if (this->processor_rank() == p) {
      std::cout << "Processor " << p << ":     nodes: ";
      for (Index n = 0; n < nnodes; ++n) {
        std::cout << p_adjacency_list.node_index(n) << ",";
      }
      std::cout << std::endl;
      std::cout << "Processor " << p << ":   vtxdist: ";
      std::copy(vtxdist.begin(), vtxdist.end(),
                std::ostream_iterator<idx_t>(std::cout, ","));
      std::cout << std::endl;
      std::cout << "Processor " << p << ":      xadj: ";
      std::copy(xadj.begin(), xadj.end(),
                std::ostream_iterator<idx_t>(std::cout, ","));
      std::cout << std::endl;
      std::cout << "Processor " << p << ":    adjncy: ";
      std::copy(adjncy.begin(), adjncy.end(),
                std::ostream_iterator<idx_t>(std::cout, ","));
      std::cout << std::endl;
    }
    this->communicator().barrier();
  }
#endif

  // Call the partitioner (try to use variable names that match the documentation)

  int status;

  idx_t ncon(1);
  idx_t wgtflag(3), numflag(0);
  idx_t nparts(this->processor_size());
  std::vector<idx_t> vwgt(nnodes, 1);
  std::vector<idx_t> adjwgt(adjncy.size(), 2);
  std::vector<real_t> tpwgts(nparts*ncon, 1.0/static_cast<real_t>(nparts));
  real_t ubvec(1.05);
  std::vector<idx_t> options(3);
  options[0] = 1;
  options[1] = 127;
  options[2] = 14;
  MPI_Comm comm(this->communicator());

  idx_t edgecut;
  std::vector<idx_t> part(nnodes);
  status = ParMETIS_V3_PartKway(&vtxdist[0], 
                                &xadj[0], 
                                &adjncy[0],
                                &vwgt[0],
                                &adjwgt[0],
                                &wgtflag,
                                &numflag,
                                &ncon,
                                &nparts,
                                &tpwgts[0],
                                &ubvec,
                                &options[0],
                                &edgecut, &part[0],
                                &comm);
  if (status != 0) {
    // FIXME: throw an exception
  }

  // "part" contains the destination processors; transfer this to the
  // local array

  wrap.set_partition(vtxdist, part);
  wrap.get_partition(p_node_destinations);

}
示例#2
0
文件: cgen64.c 项目: 8l/go-learn
/*
 * attempt to generate 64-bit
 *	res = n
 * return 1 on success, 0 if op not handled.
 */
void
cgen64(Node *n, Node *res)
{
	Node t1, t2, *l, *r;
	Node lo1, lo2, hi1, hi2;
	Node al, ah, bl, bh, cl, ch, s, n1, creg;
	Prog *p1, *p2, *p3, *p4, *p5, *p6;

	uint64 v;

	if(res->op != OINDREG && res->op != ONAME) {
		dump("n", n);
		dump("res", res);
		fatal("cgen64 %O of %O", n->op, res->op);
	}

	l = n->left;
	if(!l->addable) {
		tempname(&t1, l->type);
		cgen(l, &t1);
		l = &t1;
	}

	split64(l, &lo1, &hi1);
	switch(n->op) {
	default:
		fatal("cgen64 %O", n->op);

	case OMINUS:
		split64(res, &lo2, &hi2);

		regalloc(&t1, lo1.type, N);
		regalloc(&al, lo1.type, N);
		regalloc(&ah, hi1.type, N);

		gins(AMOVW, &lo1, &al);
		gins(AMOVW, &hi1, &ah);

		gmove(ncon(0), &t1);
		p1 = gins(ASUB, &al, &t1);
		p1->scond |= C_SBIT;
		gins(AMOVW, &t1, &lo2);

		gmove(ncon(0), &t1);
		gins(ASBC, &ah, &t1);
		gins(AMOVW, &t1, &hi2);

		regfree(&t1);
		regfree(&al);
		regfree(&ah);
		splitclean();
		splitclean();
		return;

	case OCOM:
		split64(res, &lo2, &hi2);
		regalloc(&n1, lo1.type, N);

		gins(AMOVW, &lo1, &n1);
		gins(AMVN, &n1, &n1);
		gins(AMOVW, &n1, &lo2);

		gins(AMOVW, &hi1, &n1);
		gins(AMVN, &n1, &n1);
		gins(AMOVW, &n1, &hi2);

		regfree(&n1);
		splitclean();
		splitclean();
		return;

	case OADD:
	case OSUB:
	case OMUL:
	case OLSH:
	case ORSH:
	case OAND:
	case OOR:
	case OXOR:
		// binary operators.
		// common setup below.
		break;
	}

	// setup for binary operators
	r = n->right;
	if(r != N && !r->addable) {
		tempname(&t2, r->type);
		cgen(r, &t2);
		r = &t2;
	}
	if(is64(r->type))
		split64(r, &lo2, &hi2);

	regalloc(&al, lo1.type, N);
	regalloc(&ah, hi1.type, N);

	// Do op.  Leave result in ah:al.
	switch(n->op) {
	default:
		fatal("cgen64: not implemented: %N\n", n);

	case OADD:
		// TODO: Constants
		regalloc(&bl, types[TPTR32], N);
		regalloc(&bh, types[TPTR32], N);
		gins(AMOVW, &hi1, &ah);
		gins(AMOVW, &lo1, &al);
		gins(AMOVW, &hi2, &bh);
		gins(AMOVW, &lo2, &bl);
		p1 = gins(AADD, &bl, &al);
		p1->scond |= C_SBIT;
		gins(AADC, &bh, &ah);
		regfree(&bl);
		regfree(&bh);
		break;

	case OSUB:
		// TODO: Constants.
		regalloc(&bl, types[TPTR32], N);
		regalloc(&bh, types[TPTR32], N);
		gins(AMOVW, &lo1, &al);
		gins(AMOVW, &hi1, &ah);
		gins(AMOVW, &lo2, &bl);
		gins(AMOVW, &hi2, &bh);
		p1 = gins(ASUB, &bl, &al);
		p1->scond |= C_SBIT;
		gins(ASBC, &bh, &ah);
		regfree(&bl);
		regfree(&bh);
		break;

	case OMUL:
		// TODO(kaib): this can be done with 4 regs and does not need 6
		regalloc(&bl, types[TPTR32], N);
		regalloc(&bh, types[TPTR32], N);
		regalloc(&cl, types[TPTR32], N);
		regalloc(&ch, types[TPTR32], N);

		// load args into bh:bl and bh:bl.
		gins(AMOVW, &hi1, &bh);
		gins(AMOVW, &lo1, &bl);
		gins(AMOVW, &hi2, &ch);
		gins(AMOVW, &lo2, &cl);

		// bl * cl
		p1 = gins(AMULLU, N, N);
		p1->from.type = D_REG;
		p1->from.reg = bl.val.u.reg;
		p1->reg = cl.val.u.reg;
		p1->to.type = D_REGREG;
		p1->to.reg = ah.val.u.reg;
		p1->to.offset = al.val.u.reg;
//print("%P\n", p1);

		// bl * ch
		p1 = gins(AMULA, N, N);
		p1->from.type = D_REG;
		p1->from.reg = bl.val.u.reg;
		p1->reg = ch.val.u.reg;
		p1->to.type = D_REGREG;
		p1->to.reg = ah.val.u.reg;
		p1->to.offset = ah.val.u.reg;
//print("%P\n", p1);

		// bh * cl
		p1 = gins(AMULA, N, N);
		p1->from.type = D_REG;
		p1->from.reg = bh.val.u.reg;
		p1->reg = cl.val.u.reg;
		p1->to.type = D_REGREG;
		p1->to.reg = ah.val.u.reg;
		p1->to.offset = ah.val.u.reg;
//print("%P\n", p1);

		regfree(&bh);
		regfree(&bl);
		regfree(&ch);
		regfree(&cl);

		break;

	case OLSH:
		regalloc(&bl, lo1.type, N);
		regalloc(&bh, hi1.type, N);
		gins(AMOVW, &hi1, &bh);
		gins(AMOVW, &lo1, &bl);

		if(r->op == OLITERAL) {
			v = mpgetfix(r->val.u.xval);
			if(v >= 64) {
				// TODO(kaib): replace with gins(AMOVW, nodintconst(0), &al)
				// here and below (verify it optimizes to EOR)
				gins(AEOR, &al, &al);
				gins(AEOR, &ah, &ah);
			} else if(v > 32) {
				gins(AEOR, &al, &al);
				//	MOVW	bl<<(v-32), ah
				gshift(AMOVW, &bl, SHIFT_LL, (v-32), &ah);
			} else if(v == 32) {
				gins(AEOR, &al, &al);
				gins(AMOVW, &bl, &ah);
			} else if(v > 0) {
				//	MOVW	bl<<v, al
				gshift(AMOVW, &bl, SHIFT_LL, v, &al);

				//	MOVW	bh<<v, ah
				gshift(AMOVW, &bh, SHIFT_LL, v, &ah);

				//	OR		bl>>(32-v), ah
				gshift(AORR, &bl, SHIFT_LR, 32-v, &ah);
			} else {
				gins(AMOVW, &bl, &al);
				gins(AMOVW, &bh, &ah);
			}
			goto olsh_break;
		}

		regalloc(&s, types[TUINT32], N);
		regalloc(&creg, types[TUINT32], N);
		if (is64(r->type)) {
			// shift is >= 1<<32
			split64(r, &cl, &ch);
			gmove(&ch, &s);
			p1 = gins(AMOVW, &s, &s);
			p1->scond |= C_SBIT;
			p6 = gbranch(ABNE, T);
			gmove(&cl, &s);
			splitclean();
		} else {
			gmove(r, &s);
			p6 = P;
		}
		p1 = gins(AMOVW, &s, &s);
		p1->scond |= C_SBIT;

		// shift == 0
		p1 = gins(AMOVW, &bl, &al);
		p1->scond = C_SCOND_EQ;
		p1 = gins(AMOVW, &bh, &ah);
		p1->scond = C_SCOND_EQ;
		p2 = gbranch(ABEQ, T);

		// shift is < 32
		nodconst(&n1, types[TUINT32], 32);
		gmove(&n1, &creg);
		gcmp(ACMP, &s, &creg);

		//	MOVW.LO		bl<<s, al
		p1 = gregshift(AMOVW, &bl, SHIFT_LL, &s, &al);
		p1->scond = C_SCOND_LO;

		//	MOVW.LO		bh<<s, ah
		p1 = gregshift(AMOVW, &bh, SHIFT_LL, &s, &ah);
		p1->scond = C_SCOND_LO;

		//	SUB.LO		s, creg
		p1 = gins(ASUB, &s, &creg);
		p1->scond = C_SCOND_LO;

		//	OR.LO		bl>>creg, ah
		p1 = gregshift(AORR, &bl, SHIFT_LR, &creg, &ah);
		p1->scond = C_SCOND_LO;

		//	BLO	end
		p3 = gbranch(ABLO, T);

		// shift == 32
		p1 = gins(AEOR, &al, &al);
		p1->scond = C_SCOND_EQ;
		p1 = gins(AMOVW, &bl, &ah);
		p1->scond = C_SCOND_EQ;
		p4 = gbranch(ABEQ, T);

		// shift is < 64
		nodconst(&n1, types[TUINT32], 64);
		gmove(&n1, &creg);
		gcmp(ACMP, &s, &creg);

		//	EOR.LO	al, al
		p1 = gins(AEOR, &al, &al);
		p1->scond = C_SCOND_LO;

		//	MOVW.LO		creg>>1, creg
		p1 = gshift(AMOVW, &creg, SHIFT_LR, 1, &creg);
		p1->scond = C_SCOND_LO;

		//	SUB.LO		creg, s
		p1 = gins(ASUB, &creg, &s);
		p1->scond = C_SCOND_LO;

		//	MOVW	bl<<s, ah
		p1 = gregshift(AMOVW, &bl, SHIFT_LL, &s, &ah);
		p1->scond = C_SCOND_LO;

		p5 = gbranch(ABLO, T);

		// shift >= 64
		if (p6 != P) patch(p6, pc);
		gins(AEOR, &al, &al);
		gins(AEOR, &ah, &ah);

		patch(p2, pc);
		patch(p3, pc);
		patch(p4, pc);
		patch(p5, pc);
		regfree(&s);
		regfree(&creg);

olsh_break:
		regfree(&bl);
		regfree(&bh);
		break;


	case ORSH:
		regalloc(&bl, lo1.type, N);
		regalloc(&bh, hi1.type, N);
		gins(AMOVW, &hi1, &bh);
		gins(AMOVW, &lo1, &bl);

		if(r->op == OLITERAL) {
			v = mpgetfix(r->val.u.xval);
			if(v >= 64) {
				if(bh.type->etype == TINT32) {
					//	MOVW	bh->31, al
					gshift(AMOVW, &bh, SHIFT_AR, 31, &al);

					//	MOVW	bh->31, ah
					gshift(AMOVW, &bh, SHIFT_AR, 31, &ah);
				} else {
					gins(AEOR, &al, &al);
					gins(AEOR, &ah, &ah);
				}
			} else if(v > 32) {
				if(bh.type->etype == TINT32) {
					//	MOVW	bh->(v-32), al
					gshift(AMOVW, &bh, SHIFT_AR, v-32, &al);

					//	MOVW	bh->31, ah
					gshift(AMOVW, &bh, SHIFT_AR, 31, &ah);
				} else {
					//	MOVW	bh>>(v-32), al
					gshift(AMOVW, &bh, SHIFT_LR, v-32, &al);
					gins(AEOR, &ah, &ah);
				}
			} else if(v == 32) {
				gins(AMOVW, &bh, &al);
				if(bh.type->etype == TINT32) {
					//	MOVW	bh->31, ah
					gshift(AMOVW, &bh, SHIFT_AR, 31, &ah);
				} else {
					gins(AEOR, &ah, &ah);
				}
			} else if( v > 0) {
				//	MOVW	bl>>v, al
				gshift(AMOVW, &bl, SHIFT_LR, v, &al);
	
				//	OR		bh<<(32-v), al
				gshift(AORR, &bh, SHIFT_LL, 32-v, &al);

				if(bh.type->etype == TINT32) {
					//	MOVW	bh->v, ah
					gshift(AMOVW, &bh, SHIFT_AR, v, &ah);
				} else {
					//	MOVW	bh>>v, ah
					gshift(AMOVW, &bh, SHIFT_LR, v, &ah);
				}
			} else {
				gins(AMOVW, &bl, &al);
				gins(AMOVW, &bh, &ah);
			}
			goto orsh_break;
		}

		regalloc(&s, types[TUINT32], N);
		regalloc(&creg, types[TUINT32], N);
		if (is64(r->type)) {
			// shift is >= 1<<32
			split64(r, &cl, &ch);
			gmove(&ch, &s);
			p1 = gins(AMOVW, &s, &s);
			p1->scond |= C_SBIT;
			p6 = gbranch(ABNE, T);
			gmove(&cl, &s);
			splitclean();
		} else {
			gmove(r, &s);
			p6 = P;
		}
		p1 = gins(AMOVW, &s, &s);
		p1->scond |= C_SBIT;

		// shift == 0
		p1 = gins(AMOVW, &bl, &al);
		p1->scond = C_SCOND_EQ;
		p1 = gins(AMOVW, &bh, &ah);
		p1->scond = C_SCOND_EQ;
		p2 = gbranch(ABEQ, T);

		// check if shift is < 32
		nodconst(&n1, types[TUINT32], 32);
		gmove(&n1, &creg);
		gcmp(ACMP, &s, &creg);

		//	MOVW.LO		bl>>s, al
		p1 = gregshift(AMOVW, &bl, SHIFT_LR, &s, &al);
		p1->scond = C_SCOND_LO;

		//	SUB.LO		s,creg
		p1 = gins(ASUB, &s, &creg);
		p1->scond = C_SCOND_LO;

		//	OR.LO		bh<<(32-s), al
		p1 = gregshift(AORR, &bh, SHIFT_LL, &creg, &al);
		p1->scond = C_SCOND_LO;

		if(bh.type->etype == TINT32) {
			//	MOVW	bh->s, ah
			p1 = gregshift(AMOVW, &bh, SHIFT_AR, &s, &ah);
		} else {
			//	MOVW	bh>>s, ah
			p1 = gregshift(AMOVW, &bh, SHIFT_LR, &s, &ah);
		}
		p1->scond = C_SCOND_LO;

		//	BLO	end
		p3 = gbranch(ABLO, T);

		// shift == 32
		if(bh.type->etype == TINT32)
			p1 = gshift(AMOVW, &bh, SHIFT_AR, 31, &ah);
		else
			p1 = gins(AEOR, &al, &al);
		p1->scond = C_SCOND_EQ;
		p1 = gins(AMOVW, &bh, &al);
		p1->scond = C_SCOND_EQ;
		p4 = gbranch(ABEQ, T);

		// check if shift is < 64
		nodconst(&n1, types[TUINT32], 64);
		gmove(&n1, &creg);
		gcmp(ACMP, &s, &creg);

		//	MOVW.LO		creg>>1, creg
		p1 = gshift(AMOVW, &creg, SHIFT_LR, 1, &creg);
		p1->scond = C_SCOND_LO;

		//	SUB.LO		creg, s
		p1 = gins(ASUB, &creg, &s);
		p1->scond = C_SCOND_LO;

		if(bh.type->etype == TINT32) {
			//	MOVW	bh->(s-32), al
			p1 = gregshift(AMOVW, &bh, SHIFT_AR, &s, &al);
			p1->scond = C_SCOND_LO;

			//	MOVW	bh->31, ah
			p1 = gshift(AMOVW, &bh, SHIFT_AR, 31, &ah);
			p1->scond = C_SCOND_LO;
		} else {
			//	MOVW	bh>>(v-32), al
			p1 = gregshift(AMOVW, &bh, SHIFT_LR, &s, &al);
			p1->scond = C_SCOND_LO;

			p1 = gins(AEOR, &ah, &ah);
			p1->scond = C_SCOND_LO;
		}

		//	BLO	end
		p5 = gbranch(ABLO, T);

		// s >= 64
		if (p6 != P) patch(p6, pc);
		if(bh.type->etype == TINT32) {
			//	MOVW	bh->31, al
			gshift(AMOVW, &bh, SHIFT_AR, 31, &al);

			//	MOVW	bh->31, ah
			gshift(AMOVW, &bh, SHIFT_AR, 31, &ah);
		} else {
			gins(AEOR, &al, &al);
			gins(AEOR, &ah, &ah);
		}

		patch(p2, pc);
		patch(p3, pc);
		patch(p4, pc);
		patch(p5, pc);
		regfree(&s);
		regfree(&creg);


orsh_break:
		regfree(&bl);
		regfree(&bh);
		break;

	case OXOR:
	case OAND:
	case OOR:
		// TODO(kaib): literal optimizations
		// make constant the right side (it usually is anyway).
//		if(lo1.op == OLITERAL) {
//			nswap(&lo1, &lo2);
//			nswap(&hi1, &hi2);
//		}
//		if(lo2.op == OLITERAL) {
//			// special cases for constants.
//			lv = mpgetfix(lo2.val.u.xval);
//			hv = mpgetfix(hi2.val.u.xval);
//			splitclean();	// right side
//			split64(res, &lo2, &hi2);
//			switch(n->op) {
//			case OXOR:
//				gmove(&lo1, &lo2);
//				gmove(&hi1, &hi2);
//				switch(lv) {
//				case 0:
//					break;
//				case 0xffffffffu:
//					gins(ANOTL, N, &lo2);
//					break;
//				default:
//					gins(AXORL, ncon(lv), &lo2);
//					break;
//				}
//				switch(hv) {
//				case 0:
//					break;
//				case 0xffffffffu:
//					gins(ANOTL, N, &hi2);
//					break;
//				default:
//					gins(AXORL, ncon(hv), &hi2);
//					break;
//				}
//				break;

//			case OAND:
//				switch(lv) {
//				case 0:
//					gins(AMOVL, ncon(0), &lo2);
//					break;
//				default:
//					gmove(&lo1, &lo2);
//					if(lv != 0xffffffffu)
//						gins(AANDL, ncon(lv), &lo2);
//					break;
//				}
//				switch(hv) {
//				case 0:
//					gins(AMOVL, ncon(0), &hi2);
//					break;
//				default:
//					gmove(&hi1, &hi2);
//					if(hv != 0xffffffffu)
//						gins(AANDL, ncon(hv), &hi2);
//					break;
//				}
//				break;

//			case OOR:
//				switch(lv) {
//				case 0:
//					gmove(&lo1, &lo2);
//					break;
//				case 0xffffffffu:
//					gins(AMOVL, ncon(0xffffffffu), &lo2);
//					break;
//				default:
//					gmove(&lo1, &lo2);
//					gins(AORL, ncon(lv), &lo2);
//					break;
//				}
//				switch(hv) {
//				case 0:
//					gmove(&hi1, &hi2);
//					break;
//				case 0xffffffffu:
//					gins(AMOVL, ncon(0xffffffffu), &hi2);
//					break;
//				default:
//					gmove(&hi1, &hi2);
//					gins(AORL, ncon(hv), &hi2);
//					break;
//				}
//				break;
//			}
//			splitclean();
//			splitclean();
//			goto out;
//		}
		regalloc(&n1, lo1.type, N);
		gins(AMOVW, &lo1, &al);
		gins(AMOVW, &hi1, &ah);
		gins(AMOVW, &lo2, &n1);
		gins(optoas(n->op, lo1.type), &n1, &al);
		gins(AMOVW, &hi2, &n1);
		gins(optoas(n->op, lo1.type), &n1, &ah);
		regfree(&n1);
		break;
	}
	if(is64(r->type))
		splitclean();
	splitclean();

	split64(res, &lo1, &hi1);
	gins(AMOVW, &al, &lo1);
	gins(AMOVW, &ah, &hi1);
	splitclean();

//out:
	regfree(&al);
	regfree(&ah);
}