bool RODFNet::isSource(const RODFDetector& det, const RODFDetectorCon& detectors, bool strict) const { std::vector<ROEdge*> seen; return isSource(det, getDetectorEdge(det), seen, detectors, strict); }
static void handle_T0(FmmvHandle *FMMV, Box *box, int T0[][9], _FLOAT_ *X, _FLOAT_ *X1, _FLOAT_ *X2) { int i; _FLOAT_ *D_X2X = FMMV->D_X2X; int s_exp = FMMV->s_exp; int s_exp2 = FMMV->s_exp/2; for (i=0; i<8; i++) { if (isSource(box->child[T0[i][0]])) { if (T0[i][2]) { /* conjugate */ VEC_MUL_CJ(s_exp2, D_X2X + s_exp*T0[i][1], X1 + s_exp*T0[i][0], X + s_exp*T0[i][3]); } else { VEC_MUL_C(s_exp2, D_X2X + s_exp*T0[i][1], X1 + s_exp*T0[i][0], X + s_exp*T0[i][3]); } VEC_ADD(s_exp, X + s_exp*T0[i][3], X + s_exp*T0[i][4], X + s_exp*T0[i][4]); if (T0[i][6]) { /* conjugate */ VEC_MUL_CJ(s_exp2, D_X2X + s_exp*T0[i][5], X2 + s_exp*T0[i][0], X + s_exp*T0[i][7]); } else { VEC_MUL_C(s_exp2, D_X2X + s_exp*T0[i][5], X2 + s_exp*T0[i][0], X + s_exp*T0[i][7]); } VEC_ADD(s_exp, X + s_exp*T0[i][7], X + s_exp*T0[i][8], X + s_exp*T0[i][8]); } } VEC_ADD(s_exp, X + s_exp*XU_D, X + s_exp*XU_all, X + s_exp*XU_all); VEC_ADD(s_exp, X + s_exp*XD_D, X + s_exp*XD_all, X + s_exp*XD_all); }
bool Path::isSource() const { if (exists()) { const char *ext = extension(); if (ext) return isSource(ext); } return false; }
void QueueItem::addSource(const UserPtr& aUser) { dcassert(!isSource(aUser)); SourceIter i = getBadSource(aUser); if(i != badSources.end()) { sources.push_back(*i); badSources.erase(i); } else { sources.push_back(Source(aUser)); } }
static int noOfSourceNeighborsWithSourceChilds(Box *box) { int i; int n=0; for (i=0; i<26; i++) { if (isSource(box->neighbor[i])&& hasSourceChilds(box->neighbor[i])) n++; } return n; }
static int noOfSourceNeighborsWithSourceChilds2(Box **neighbor2_list, int i0, int i1) { int i; int n=0; for (i=i0; i<i1; i++) { if (isSource(neighbor2_list[i])&& hasSourceChilds(neighbor2_list[i])) n++; } return n; }
bool CompilerInterface::sources(FileName *filenames) const { FUNCTION_TRACE; int i; filenames->clear(); for (i=1;i<args.argc();i++) { const char *file=args[i]; if (!isDestination(file)) if (isSource(file)) filenames->append(file); } return true; }
void GenericProcessor::setSourceNode(GenericProcessor* sn) { //std::cout << "My name is " << getName() << ". Setting source node." << std::endl; if (!isSource()) { // std::cout << " I am not a source." << std::endl; if (sn != 0) { // std::cout << " The source is not blank." << std::endl; if (!sn->isSink()) { // std::cout << " The source is not a sink." << std::endl; if (sourceNode != sn) { // std::cout << " The source is new and named " << sn->getName() << std::endl; if (this->isMerger()) setMergerSourceNode(sn); else sourceNode = sn; sn->setDestNode(this); } else { // std::cout << " The source node is not new." << std::endl; } } else { // std::cout << " The source is a sink." << std::endl; sourceNode = 0; } } else { // std::cout << " The source is blank." << std::endl; sourceNode = 0; } } else { // std::cout << " I am a source. I can't have a source node." << std::endl; if (sn != 0) sn->setDestNode(this); } }
bool CompilerInterface::restrictArgs(const char* selected_source,Argv &backup_args ) { FUNCTION_TRACE; int i; backup_args=param_args(); Argv new_args; for (i=0;i<backup_args.argc();i++) { const char *file=backup_args[i]; if (!isSource(file) || filename_strcmp(selected_source,file)==0) new_args.append(file); } setArgs(new_args); return true; }
void Pathfinder::pqAddto(t_tmp& actualNode, priority_queue<t_tmp>& pq, const int& targetNet, list<int>& targetNodes, rt_dir direction){ t_tmp tmp; int lCost, nextNode; list<int>::iterator target_it; nextNode=getDir(actualNode.node, direction); if(!(graph[nextNode].isVisited(trace_id) || (isSource(nextNode) && graph[nextNode].net!=-targetNet))){ tmp.node=nextNode; tmp.father=actualNode.node; tmp.costAccumulated=actualNode.costAccumulated+calcCost(nextNode, direction); lCost=calcDistance(nextNode, targetNodes.front()); for(target_it=++targetNodes.begin(); target_it!=targetNodes.end(); ++target_it) lCost=min(lCost,calcDistance(nextNode, *target_it)); tmp.aStarCost=tmp.costAccumulated+lCost; // cout << "(" << getPosX(actualNode.node) << " " <<getPosY(actualNode.node)<< " " <<getPosZ(actualNode.node)<< ") --(" << tmp.aStarCost <<")--> (" << getPosX(tempNode.node) << " " <<getPosY(tempNode.node)<< " " <<getPosZ(tempNode.node)<< ") " << endl; pq.push(tmp); } }
void RODFNet::computeTypes(RODFDetectorCon& detcont, bool sourcesStrict) const { PROGRESS_BEGIN_MESSAGE("Computing detector types"); const std::vector< RODFDetector*>& dets = detcont.getDetectors(); // build needed information. first buildDetectorEdgeDependencies(detcont); // compute detector types then for (std::vector< RODFDetector*>::const_iterator i = dets.begin(); i != dets.end(); ++i) { if (isSource(**i, detcont, sourcesStrict)) { (*i)->setType(SOURCE_DETECTOR); mySourceNumber++; } if (isDestination(**i, detcont)) { (*i)->setType(SINK_DETECTOR); mySinkNumber++; } if ((*i)->getType() == TYPE_NOT_DEFINED) { (*i)->setType(BETWEEN_DETECTOR); myInBetweenNumber++; } } // recheck sources for (std::vector< RODFDetector*>::const_iterator i = dets.begin(); i != dets.end(); ++i) { if ((*i)->getType() == SOURCE_DETECTOR && isFalseSource(**i, detcont)) { (*i)->setType(DISCARDED_DETECTOR); myInvalidNumber++; mySourceNumber--; } } // print results PROGRESS_DONE_MESSAGE(); WRITE_MESSAGE("Computed detector types:"); WRITE_MESSAGE(" " + toString(mySourceNumber) + " source detectors"); WRITE_MESSAGE(" " + toString(mySinkNumber) + " sink detectors"); WRITE_MESSAGE(" " + toString(myInBetweenNumber) + " in-between detectors"); WRITE_MESSAGE(" " + toString(myInvalidNumber) + " invalid detectors"); }
void M2X(FmmvHandle *FMMV, int dir, Box *box, _FLOAT_ *X1, _FLOAT_ *X2) { int p = FMMV->pM; int len0 = (p+1)*(p+1); int len0m = len0/2; int len0p = len0 - len0m; int len2 = (2*p+1)*FMMV->s_eps; int s_exp = FMMV->s_exp; int s_exp2 = s_exp/2; SIMD_ALIGN _FLOAT_ x1[2*(FMM_P_MAX+1)*(FMM_P_MAX+2)]; SIMD_ALIGN _FLOAT_ x2[2*(FMM_P_MAX+1)*(FMM_P_MAX+2)]; SIMD_ALIGN _FLOAT_ y1[2*2*(FMM_P_MAX+1)*FMM_S_EPS_MAX]; SIMD_ALIGN _FLOAT_ y2[2*4*(FMM_P_MAX+1)*FMM_S_EPS_MAX]; SIMD_ALIGN _FLOAT_ y3[2*2*(FMM_P_MAX+1)*FMM_S_EPS_MAX]; SIMD_ALIGN _FLOAT_ z1[2*FMM_S_EXP_MAX]; _FLOAT_ *XX1[8]; _FLOAT_ *XX2[8]; _FLOAT_ **X1_p[8] = {0,0,0,0,0,0,0,0}; _FLOAT_ **X2_p[8] = {0,0,0,0,0,0,0,0}; _FLOAT_ *M[8] = {zeros, zeros, zeros, zeros, zeros, zeros, zeros, zeros}; int i, k, missing; int q[8]; y2[2*len2] = 0.0; y2[2*len2+1] = 0.0; k = 0; for (i=0; i<8; i++) { if (isSource(box->child[i])&&box->child[i]->M) { M[i] = box->child[i]->M; XX1[i] = X1 + i*s_exp; XX2[i] = X2 + i*s_exp; X1_p[i] = XX1+i; X2_p[i] = XX2+i; memset(X1 + i*s_exp, 0, s_exp*sizeof(_FLOAT_)); memset(X2 + i*s_exp, 0, s_exp*sizeof(_FLOAT_)); q[k]=i; k++; } else missing=i; } if (k==0) return; for (i=k; i<8; i++) q[i] = missing; for (i=0; i<(k&1?(k>>1)+1:k>>1); i++) { switch (dir) { case 0: /* UD */ P_2riri2rrii_simd2(FMMV->pM, M[q[2*i]], M[q[2*i+1]], x1); break; case 1: /* NS */ P_2riri2rrii_simd2(FMMV->pM, M[q[2*i]], M[q[2*i+1]], x1); Rz_minus_pi2_rrii_simd2(p, x1, x2); Ry_simd2(p, FMMV->Ry_pi2, x2, x1); break; case 2: /* EW */ P_2riri2rrii_simd2(FMMV->pM, M[q[2*i]], M[q[2*i+1]], x2); Ry_simd2(p, FMMV->Ry_minus_pi2, x2, x1); break; } perm_simd2(len0p, FMMV->P_MRT_plus, x1, x2); CMX_plus_simd2(FMMV, x2, y1); perm_simd2(len0m, FMMV->P_MRT_minus, x1, x2); CMX_minus_simd2(FMMV, x2, y2); VEC_COPY(2*len2, y1, y3); VEC_ADD(2*len2, y2, y1, y1); VEC_SUB(2*len2, y3, y2, y3); y1[2*len2]=0; y1[2*len2+1]=0; perm_simd2(2*FMMV->len_F, FMMV->P_VF, y1, y2); neg_simd2(FMMV->len_neg_F, FMMV->neg_F, y2); F_M2X_simd2(FMMV, y2, z1); P_X_riri2rrii2_simd2(s_exp2, z1, X1_p[q[2*i]], X1_p[q[2*i+1]]); y3[2*len2]=0; y3[2*len2+1]=0; perm_simd2(2*FMMV->len_F, FMMV->P_VF, y3, y2); neg_simd2(FMMV->len_neg_F, FMMV->neg_F, y2); F_M2X_simd2(FMMV, y2, z1); P_X_riri2rrii2_simd2(s_exp2, z1, X2_p[q[2*i]], X2_p[q[2*i+1]]); } }
void M2M(FmmvHandle *FMMV, Box *box) { int p = FMMV->pM; int len0 = (p+1)*(p+1); int len = (p+1)*(p+2); int *P_RT = FMMV->P_MRT; int *P_riri2rrii = FMMV->P_Mriri2rrii; _FLOAT_ x1[(FMM_P_MAX+1)*(FMM_P_MAX+2)]; _FLOAT_ x2[(FMM_P_MAX+1)*(FMM_P_MAX+2)]; _FLOAT_ xx[(FMM_P_MAX+1)*(FMM_P_MAX+2)]; if (!isSource(box)) return; if (isSource(box->child[SWD])&&box->child[SWD]->M) { Rz_pi4(p, box->child[SWD]->M, x2); perm(len0, P_riri2rrii, x2, x1); Ry(p, FMMV->Ry_pi_minus_theta, x1, x2); perm(len0, P_RT, x2, x1); Tz_M2M(FMMV, x1); perm_inv(len0, P_RT, x1, xx); if (isSource(box->child[NEU])&&box->child[NEU]->M) { Rz_pi4(p, box->child[NEU]->M, x2); perm(len0, P_riri2rrii, x2, x1); Ry(p, FMMV->Ry_minus_theta, x1, x2); perm(len0, P_RT, x2, x1); Tz_M2M(FMMV, x1); perm_inv(len0, P_RT, x1, x2); Ry_pi(p, x2); VEC_ADD(len, x2, xx, xx); } Ry(p, FMMV->Ry_minus_pi_minus_theta, xx, x1); perm_inv(len0, P_riri2rrii, x1, x2); Rz_minus_pi4(p, x2, x1); box->M = VEC_ADD2(FMMV, len, x1, box->M); } else if (isSource(box->child[NEU])&&box->child[NEU]->M) { Rz_pi4(p, box->child[NEU]->M, x2); perm(len0, P_riri2rrii, x2, x1); Ry(p, FMMV->Ry_minus_theta,x1, x2); perm(len0, P_RT, x2, x1); Tz_M2M(FMMV, x1); perm_inv(len0, P_RT, x1, x2); Ry(p, FMMV->Ry_theta, x2, x1); perm_inv(len0, P_riri2rrii, x1, x2); Rz_minus_pi4(p, x2, x1); box->M = VEC_ADD2(FMMV, len, x1, box->M); } if (isSource(box->child[NWD])&&box->child[NWD]->M) { Rz_minus_pi4(p, box->child[NWD]->M, x2); perm(len0, P_riri2rrii, x2, x1); Ry(p, FMMV->Ry_pi_minus_theta, x1, x2); perm(len0, P_RT, x2, x1); Tz_M2M(FMMV, x1); perm_inv(len0, P_RT, x1, xx); if (isSource(box->child[SEU])&&box->child[SEU]->M) { Rz_minus_pi4(p, box->child[SEU]->M, x2); perm(len0, P_riri2rrii, x2, x1); Ry(p, FMMV->Ry_minus_theta, x1, x2); perm(len0, P_RT, x2, x1); Tz_M2M(FMMV, x1); perm_inv(len0, P_RT, x1, x2); Ry_pi(p, x2); VEC_ADD(len, x2, xx, xx); } Ry(p, FMMV->Ry_minus_pi_minus_theta, xx, x1); perm_inv(len0, P_riri2rrii, x1, x2); Rz_pi4(p, x2, x1); box->M = VEC_ADD2(FMMV, len, x1, box->M); } else if (isSource(box->child[SEU])&&box->child[SEU]->M) { Rz_minus_pi4(p, box->child[SEU]->M, x2); perm(len0, P_riri2rrii, x2, x1); Ry(p, FMMV->Ry_minus_theta, x1, x2); perm(len0, P_RT, x2, x1); Tz_M2M(FMMV, x1); perm_inv(len0, P_RT, x1, x2); Ry(p, FMMV->Ry_theta, x2, x1); perm_inv(len0, P_riri2rrii, x1, x2); Rz_pi4(p, x2, x1); box->M = VEC_ADD2(FMMV, len, x1, box->M); } if (isSource(box->child[SED])&&box->child[SED]->M) { Rz_3pi4(p, box->child[SED]->M, x2); perm(len0, P_riri2rrii, x2, x1); Ry(p, FMMV->Ry_pi_minus_theta, x1, x2); perm(len0, P_RT, x2, x1); Tz_M2M(FMMV, x1); perm_inv(len0, P_RT, x1, xx); if (isSource(box->child[NWU])&&box->child[NWU]->M) { Rz_3pi4(p, box->child[NWU]->M, x2); perm(len0, P_riri2rrii, x2, x1); Ry(p, FMMV->Ry_minus_theta, x1, x2); perm(len0, P_RT, x2, x1); Tz_M2M(FMMV, x1); perm_inv(len0, P_RT, x1, x2); Ry_pi(p, x2); VEC_ADD(len, x2, xx, xx); } Ry(p, FMMV->Ry_minus_pi_minus_theta, xx, x1); perm_inv(len0, P_riri2rrii, x1, x2); Rz_minus_3pi4(p, x2, x1); box->M = VEC_ADD2(FMMV, len, x1, box->M); } else if (isSource(box->child[NWU])&&box->child[NWU]->M) { Rz_3pi4(p, box->child[NWU]->M, x2); perm(len0, P_riri2rrii, x2, x1); Ry(p, FMMV->Ry_minus_theta, x1, x2); perm(len0, P_RT, x2, x1); Tz_M2M(FMMV, x1); perm_inv(len0, P_RT, x1, x2); Ry(p, FMMV->Ry_theta, x2, x1); perm_inv(len0, P_riri2rrii, x1, x2); Rz_minus_3pi4(p, x2, x1); box->M = VEC_ADD2(FMMV, len, x1, box->M); } if (isSource(box->child[NED])&&box->child[NED]->M) { Rz_minus_3pi4(p, box->child[NED]->M, x2); perm(len0, P_riri2rrii, x2, x1); Ry(p, FMMV->Ry_pi_minus_theta, x1, x2); perm(len0, P_RT, x2, x1); Tz_M2M(FMMV, x1); perm_inv(len0, P_RT, x1, xx); if (isSource(box->child[SWU])&&box->child[SWU]->M) { Rz_minus_3pi4(p, box->child[SWU]->M, x2); perm(len0, P_riri2rrii, x2, x1); Ry(p, FMMV->Ry_minus_theta, x1, x2); perm(len0, P_RT, x2, x1); Tz_M2M(FMMV, x1); perm_inv(len0, P_RT, x1, x2); Ry_pi(p, x2); VEC_ADD(len, x2, xx, xx); } Ry(p, FMMV->Ry_minus_pi_minus_theta, xx, x1); perm_inv(len0, P_riri2rrii, x1, x2); Rz_3pi4(p, x2, x1); box->M = VEC_ADD2(FMMV, len, x1, box->M); } else if (isSource(box->child[SWU])&&box->child[SWU]->M) { Rz_minus_3pi4(p, box->child[SWU]->M, x2); perm(len0, P_riri2rrii, x2, x1); Ry(p, FMMV->Ry_minus_theta, x1, x2); perm(len0, P_RT, x2, x1); Tz_M2M(FMMV, x1); perm_inv(len0, P_RT, x1, x2); Ry(p, FMMV->Ry_theta, x2, x1); perm_inv(len0, P_riri2rrii, x1, x2); Rz_3pi4(p, x2, x1); box->M = VEC_ADD2(FMMV, len, x1, box->M); } }
void Pathfinder::routeNets(int nrAttempts){ list<int> targetNodes; map<int,t_nets>::iterator nets_it; list<int>::iterator netTree_it; vector<int>::iterator netTree_it2; vector<int>::iterator nodes_it; //Initializations clock_t start=clock(); actualAttempt=0; trace_id=0; conflicts=0; visited=0; netlist.erase(blockageNet); for(nets_it=netlist.begin(); nets_it!=netlist.end(); ++nets_it){ //cout << (nets_it->second).nodes[0] << endl; if(rand()%2==0) getCenter((nets_it->second).nodes); //cout << (nets_it->second).nodes[0] << endl; } //Pathfinder Routing cout << "-> Routing graph..." << endl; do{ // Loop over all nets for(nets_it=netlist.begin(); nets_it!=netlist.end(); ++nets_it){ if(nets_it->second.conflict){ //If conflict exists, check it out and do rip-upand re-route. Otherwise jump net. --conflicts; nets_it->second.conflict=false; for(netTree_it=nets_it->second.netTree.begin();netTree_it!=nets_it->second.netTree.end(); ++netTree_it){ if(graph[*netTree_it].nrNets>1){ nets_it->second.conflict=true; break; } } } if(nets_it->second.conflict | !actualAttempt){ //Rip-up net for(netTree_it=(nets_it->second).netTree.begin(); netTree_it!=(nets_it->second).netTree.end(); ++netTree_it){ if(!isSource(*netTree_it)){ --graph[*netTree_it].nrNets; if(getNet(*netTree_it)==nets_it->first) graph[*netTree_it].net=0; } } //Clear netTree of the net (nets_it->second).netTree.clear(); (nets_it->second).routeResult.clear(); //Create list of target nodes targetNodes.clear(); for(nodes_it=(nets_it->second).nodes.begin();nodes_it!=(nets_it->second).nodes.end(); ++nodes_it){ targetNodes.push_back(*nodes_it); //cout << *nodes_it << endl; } //Insert the first node to the tree (nets_it->second).netTree.push_back(*targetNodes.begin()); targetNodes.erase(targetNodes.begin()); //Loop until all sinks have been found while(!targetNodes.empty() && aStar(nets_it, targetNodes)){} //Route the tree to the closest node in the graph if(!targetNodes.empty()) break; if(nets_it->second.conflict) ++conflicts; } } //update H cost ++trace_id; for(nets_it=netlist.begin(); nets_it!=netlist.end(); ++nets_it){ if(nets_it->second.conflict){ for(netTree_it=nets_it->second.netTree.begin();netTree_it!=nets_it->second.netTree.end(); ++netTree_it){ if(graph[*netTree_it].nrNets>1 && graph[*netTree_it].idNr!=trace_id){ ++graph[*netTree_it].history; graph[*netTree_it].idNr=trace_id; } } } } if(!(++actualAttempt%15)) cerr << "." << conflicts << "."; // cout << conflicts << endl; }while(targetNodes.empty() && conflicts && actualAttempt<nrAttempts); // cout << visited << endl; cout << endl << "-> Runtime = " << float((clock()-start)/(CLOCKS_PER_SEC/1000))/1000 << "s" << endl; if(!targetNodes.empty()) cout << "-> Impossible to route net: " << nets_it->first << endl; if(conflicts || !targetNodes.empty()){ cout <<"-> Unable to route the circuit after "; cout << actualAttempt << " attempts."<< endl; }else{ cout <<"-> Routing finished in "; cout << actualAttempt << " attempts."<< endl; showResult(); } if(!targetNodes.empty() || conflicts) throw AstranError("Could not finish routing"); }
void M2L_ws2_reduced(FmmvHandle *FMMV, Box *box0) { int s_exp = FMMV->s_exp; SIMD_ALIGN _FLOAT_ X1[8*FMM_S_EXP_MAX]; SIMD_ALIGN _FLOAT_ X2[8*FMM_S_EXP_MAX]; SIMD_ALIGN _FLOAT_ X[13*FMM_S_EXP_MAX]; SIMD_ALIGN _FLOAT_ z1[FMM_S_EXP_MAX]; Box *nb; int i, j, k, kk; Box *IL[208]; Box *box; Box *ILR[98]; Box *neighbor2_list[124]; if (!((FMMV->periodicBoundaryConditions)&&(box0->level==-1))) { if (!isSource(box0) || !hasSourceChilds(box0)) return; for (i=0; i<26; i++) { nb = box0->neighbor[i]; if (isTarget(nb) && hasTargetChilds(nb) && (nb->noOfXin2 == -1)) { nb->noOfXin2 = noOfSourceNeighborsWithSourceChilds(nb); ++(FMMV->noOfStoredXin); if (FMMV->noOfStoredXin>FMMV->maxNoOfStoredXin) { FMMV->maxNoOfStoredXin = FMMV->noOfStoredXin; } } } } for (k=0; k<8; k++) { if ((FMMV->periodicBoundaryConditions)&&(box0->level==-1)&&(k==1)) { break; } box = box0->child[k]; if (!isSource(box) || !hasSourceChilds(box)) continue; gen_M2L_interaction_list2(FMMV, box, IL, neighbor2_list); for (i=0; i<124-26; i++) { nb = neighbor2_list[i+26]; if (isTarget(nb) && hasTargetChilds(nb)) { ILR[i] = nb; } else { ILR[i] = 0; } } /*** U/D lists ***/ memset(X + s_exp*XU_all, 0, s_exp*sizeof(_FLOAT_)); memset(X + s_exp*XD_all, 0, s_exp*sizeof(_FLOAT_)); memset(X + s_exp*XU_D, 0, s_exp*sizeof(_FLOAT_)); memset(X + s_exp*XD_D, 0, s_exp*sizeof(_FLOAT_)); M2X(FMMV, 0, box, X1, X2); handle_T0(FMMV, box, T0_UD, X, X1, X2); handle_T1(FMMV, T1_U_ws2, 18, X, XU_D, XU, IL); handle_T1(FMMV, T1_D_ws2, 18, X, XD_D, XD, IL); handle_T1_reduced(FMMV, T1_U_ws2_reduced, 12, X, XU_all, XU, ILR); handle_T1_reduced(FMMV, T1_D_ws2_reduced, 12, X, XD_all, XD, ILR); if (ILR[ir_0_0_8]) { VEC_MUL(s_exp, FMMV->D_X2X + s_exp*dr_0_0_8, X + s_exp*XU_all, z1); ILR[ir_0_0_8]->X2[XU] = VEC_ADD2(FMMV, s_exp, z1, ILR[ir_0_0_8]->X2[XU]); } if (ILR[ir_0_0_m8]) { VEC_MUL(s_exp, FMMV->D_X2X + s_exp*dr_0_0_8, X + s_exp*XD_all, z1); ILR[ir_0_0_m8]->X2[XD] = VEC_ADD2(FMMV, s_exp, z1, ILR[ir_0_0_m8]->X2[XD]); } /*** N/S lists ***/ memset(X, 0, 12*s_exp*sizeof(_FLOAT_)); M2X(FMMV, 1, box, X1, X2); handle_T0(FMMV, box, T0_NS, X, X1, X2); handle_T1(FMMV, T1_N_ws2, 12, X, XU_D, XN, IL); handle_T1(FMMV, T1_S_ws2, 12, X, XD_D, XS, IL); handle_T1_reduced(FMMV, T1_N_ws2_reduced, 7, X, XU_all, XN, ILR); handle_T1_reduced(FMMV, T1_S_ws2_reduced, 7, X, XD_all, XS, ILR); if (ILR[ir_0_8_0]) { VEC_MUL(s_exp, FMMV->D_X2X + s_exp*dr_0_0_8, X + s_exp*XU_all, z1); ILR[ir_0_8_0]->X2[XN] = VEC_ADD2(FMMV, s_exp, z1, ILR[ir_0_8_0]->X2[XN]); } if (ILR[ir_0_m8_0]) { VEC_MUL(s_exp, FMMV->D_X2X + s_exp*dr_0_0_8, X + s_exp*XD_all, z1); ILR[ir_0_m8_0]->X2[XS] = VEC_ADD2(FMMV, s_exp, z1, ILR[ir_0_m8_0]->X2[XS]); } handle_T2(FMMV, T2_N_ws2, 24, X, XN, IL); handle_T2(FMMV, T2_S_ws2, 24, X, XS, IL); /*** E/W lists ***/ memset(X, 0, 12*s_exp*sizeof(_FLOAT_)); M2X(FMMV, 2, box, X1, X2); handle_T0(FMMV, box, T0_EW, X, X1, X2); handle_T1(FMMV, T1_E_ws2, 8, X, XU_D, XE, IL); handle_T1(FMMV, T1_W_ws2, 8, X, XD_D, XW, IL); handle_T1_reduced(FMMV, T1_E_ws2_reduced, 4, X, XU_all, XE, ILR); handle_T1_reduced(FMMV, T1_W_ws2_reduced, 4, X, XD_all, XW, ILR); if (ILR[ir_8_0_0]) { VEC_MUL(s_exp, FMMV->D_X2X + s_exp*dr_0_0_8, X + s_exp*XU_all, z1); ILR[ir_8_0_0]->X2[XE] = VEC_ADD2(FMMV, s_exp, z1, ILR[ir_8_0_0]->X2[XE]); } if (ILR[ir_m8_0_0]) { VEC_MUL(s_exp, FMMV->D_X2X + s_exp*dr_0_0_8, X + s_exp*XD_all, z1); ILR[ir_m8_0_0]->X2[XW] = VEC_ADD2(FMMV, s_exp, z1, ILR[ir_m8_0_0]->X2[XW]); } handle_T2(FMMV, T2_E_ws2, 36, X, XE, IL); handle_T2(FMMV, T2_W_ws2, 36, X, XW, IL); if (!((FMMV->periodicBoundaryConditions)&&(box0->level==-1))) { for (i=0; i<26; i++) { //TODO: check 26! nb = neighbor2_list[i]; if (isTarget(nb) && hasTargetChilds(nb)) { --(nb->noOfXin); --(FMMV->noOfStoredXin); if (nb->noOfXin == 0) { X2L(FMMV, 0, nb, 0); X2L(FMMV, 1, nb, 0); X2L(FMMV, 2, nb, 0); for (j=0; j<8; j++) if (nb->child[j]) { for (kk=0; kk<6; kk++) { FREE_X(FMMV, nb->child[j]->X[kk]); nb->child[j]->X[kk] = 0; } } } } } } } if ((FMMV->periodicBoundaryConditions)&&(box0->level==-1)) { box = box0->child[0]; /* box == root */ X2L(FMMV, 0, box, 0); X2L(FMMV, 1, box, 0); X2L(FMMV, 2, box, 0); for (j=0; j<8; j++) if (box->child[j]) { for (k=0; k<6; k++) { FREE_X(FMMV, box->child[j]->X[k]); box->child[j]->X[k] = 0; } } for (j=1; j<8; j++) { box0->child[j]= 0; } X2L(FMMV, 0, box0, 1); X2L(FMMV, 1, box0, 1); X2L(FMMV, 2, box0, 1); for (k=0; k<6; k++) { FREE_X(FMMV, box->X2[k]); box->X2[k] = 0; } } else { for (i=0; i<26; i++) { nb = box0->neighbor[i]; if (isTarget(nb) && hasTargetChilds(nb)) { --(nb->noOfXin2); --(FMMV->noOfStoredXin); if (nb->noOfXin2 == 0) { X2L(FMMV, 0, nb, 1); X2L(FMMV, 1, nb, 1); X2L(FMMV, 2, nb, 1); for (j=0; j<8; j++) if (nb->child[j]) { for (k=0; k<6; k++) { FREE_X(FMMV, nb->child[j]->X2[k]); nb->child[j]->X2[k] = 0; } } } } } } }
//Astar Search bool Pathfinder::aStar(map<int,t_nets>::iterator& net, list<int>& targetNodes){ static t_tmp actualNode; int node; ++trace_id; //Initialize priority queue PQ with the sourceNodes priority_queue<t_tmp> pq; actualNode.costAccumulated=0; actualNode.father=-1; for(list<int>::iterator nodes_it = net->second.netTree.begin(); nodes_it != net->second.netTree.end(); ++nodes_it){ actualNode.aStarCost=getClosestNodeDistance(*nodes_it, targetNodes); // actualNode.costAccumulated=calcDistance(*nodes_it, net->second.nodes.front() )/arbFactor ; actualNode.node=*nodes_it; pq.push(actualNode); graph[*nodes_it].setFather(trace_id, -1); } // int center=getCenter(net->second.nodes); //Loop until new sink is found while(!(pq.empty() || targetNodes.empty())){ //Remove lowest cost node from PQ actualNode=pq.top(); pq.pop(); if(actualNode.father!=-1){ if(graph[actualNode.node].isVisited(trace_id)) continue; graph[actualNode.node].setFather(trace_id, actualNode.father); if(getNet(actualNode.node) == net->first) break; } // cout << ++visited << ": visitando (" << getPosX(actualNode.node) << " " <<getPosY(actualNode.node)<< " " <<getPosZ(actualNode.node)<< ") " << actualNode.costAccumulated << "+" << actualNode.aStarCost-actualNode.costAccumulated << "=" << actualNode.aStarCost << " vindo de (" << getPosX(actualNode.father) << " " <<getPosY(actualNode.father)<< " " <<getPosZ(actualNode.father)<< ")"<<endl; //Test the neighbours and add to queue the valids if(getPosX(actualNode.node)+1 < sizeX) pqAddto(actualNode,pq, net->first,targetNodes,RT_EAST); if(getPosY(actualNode.node)+1 < sizeY) pqAddto(actualNode,pq, net->first,targetNodes,RT_NORTH); if(getPosX(actualNode.node)) pqAddto(actualNode,pq, net->first,targetNodes,RT_WEST); if(getPosY(actualNode.node)) pqAddto(actualNode,pq, net->first,targetNodes,RT_SOUTH); if(getPosZ(actualNode.node)) pqAddto(actualNode,pq, net->first,targetNodes,RT_DOWN); if(getPosZ(actualNode.node)+1 < sizeZ) pqAddto(actualNode,pq, net->first,targetNodes,RT_UP); } // cout << ++visited << ": visitando (" << getPosX(actualNode.node) << " " <<getPosY(actualNode.node)<< " " <<getPosZ(actualNode.node)<< ") " << actualNode.costAccumulated << "+" << actualNode.aStarCost-actualNode.costAccumulated << "=" << actualNode.aStarCost << " vindo de (" << getPosX(actualNode.father) << " " <<getPosY(actualNode.father)<< " " <<getPosZ(actualNode.father)<< ")"<<endl; //Trace the path back to the source node if(getNet(actualNode.node)==net->first && actualNode.father!=-1){ node = actualNode.node; targetNodes.remove(node); while (graph[node].getFather(trace_id)!=-1) { //Put these new nodes in the Routing Tree net->second.netTree.push_back(node); net->second.routeResult.push_back(node); if(!isSource(node)){ if(graph[node].nrNets){ bool &tmp=netlist[graph[node].net].conflict; if(!tmp){ tmp=true; ++conflicts; } net->second.conflict=true; } graph[node].net=net->first; ++graph[node].nrNets; } node = graph[node].getFather(trace_id); } net->second.routeResult.push_back(node); net->second.routeResult.push_back(-1); return true; } return false; }
void M2L_ws2(FmmvHandle *FMMV, Box *box) { int s_exp = FMMV->s_exp; SIMD_ALIGN _FLOAT_ X1[8*FMM_S_EXP_MAX]; SIMD_ALIGN _FLOAT_ X2[8*FMM_S_EXP_MAX]; SIMD_ALIGN _FLOAT_ X[13*FMM_S_EXP_MAX]; Box *nb; int i, j, k; Box *IL[992]; Box *neighbor2_list[124]; if (!isSource(box) || !hasSourceChilds(box)) return; gen_M2L_interaction_list2(FMMV, box, IL, neighbor2_list); /*** U/D lists ***/ memset(X + s_exp*XU_all, 0, s_exp*sizeof(_FLOAT_)); memset(X + s_exp*XD_all, 0, s_exp*sizeof(_FLOAT_)); memset(X + s_exp*XU_D, 0, s_exp*sizeof(_FLOAT_)); memset(X + s_exp*XD_D, 0, s_exp*sizeof(_FLOAT_)); M2X(FMMV, 0, box, X1, X2); handle_T0(FMMV, box, T0_UD, X, X1, X2); handle_T1(FMMV, T1_U_ws2, 18, X, XU_D, XU, IL); handle_T1(FMMV, T1_D_ws2, 18, X, XD_D, XD, IL); handle_T1(FMMV, T1_U_ws2+18, 100, X, XU_all, XU, IL); handle_T1(FMMV, T1_D_ws2+18, 100, X, XD_all, XD, IL); /*** N/S lists ***/ memset(X, 0, 12*s_exp*sizeof(_FLOAT_)); M2X(FMMV, 1, box, X1, X2); handle_T0(FMMV, box, T0_NS, X, X1, X2); handle_T1(FMMV, T1_N_ws2, 12, X, XU_D, XN, IL); handle_T1(FMMV, T1_S_ws2, 12, X, XD_D, XS, IL); handle_T1(FMMV, T1_N_ws2+12, 60, X, XU_all, XN, IL); handle_T1(FMMV, T1_S_ws2+12, 60, X, XD_all, XS, IL); handle_T2(FMMV, T2_N_ws2, 24, X, XN, IL); handle_T2(FMMV, T2_S_ws2, 24, X, XS, IL); /*** E/W lists ***/ memset(X, 0, 12*s_exp*sizeof(_FLOAT_)); M2X(FMMV, 2, box, X1, X2); handle_T0(FMMV, box, T0_EW, X, X1, X2); handle_T1(FMMV, T1_E_ws2, 8, X, XU_D, XE, IL); handle_T1(FMMV, T1_W_ws2, 8, X, XD_D, XW, IL); handle_T1(FMMV, T1_E_ws2+8, 36, X, XU_all, XE, IL); handle_T1(FMMV, T1_W_ws2+8, 36, X, XD_all, XW, IL); handle_T2(FMMV, T2_E_ws2, 36, X, XE, IL); handle_T2(FMMV, T2_W_ws2, 36, X, XW, IL); for (i=0; i< 124; i++) { nb = neighbor2_list[i]; if (isTarget(nb) && hasTargetChilds(nb)) { --(nb->noOfXin); --(FMMV->noOfStoredXin); if (nb->noOfXin == 0) { X2L(FMMV, 0, nb, 0); X2L(FMMV, 1, nb, 0); X2L(FMMV, 2, nb, 0); for (j=0; j<8; j++) if (nb->child[j]) { for (k=0; k<6; k++) { FREE_X(FMMV, nb->child[j]->X[k]); nb->child[j]->X[k] = 0; } } } } } }
void* non_adaptive_fmm_ws2(GenericFmmThreadArg *arg) { FmmvHandle *FMMV = arg->fh; int thread = arg->thread; Box *box, *box1, *box2; int level; int i, j; void (*GEN_M)(FmmvHandle *FMMV, Box *box) = FMMV->gen_M; void (*EVAL_L)(FmmvHandle *FMMV, Box *box) = FMMV->eval_L; void (*EVAL_DIRECT)(FmmvHandle *FMMV, Box *target, Box *source) = FMMV->eval_direct; switch(thread) { case (FARFIELD_THREAD): stat_start(FMMV, STAT_FARFIELD); break; case (NEARFIELD_THREAD): stat_start(FMMV, STAT_NEARFIELD); break; default: break; } if ((thread==STANDARD_THREAD)||(thread==FARFIELD_THREAD)) { /*** Upward Pass ***/ /* Form multipole expansions at finest level */ stat_start(FMMV, STAT_GEN_M); if (FMMV->maxLevel>1) { for (box=FMMV->firstSourceBoxOfLevel[FMMV->maxLevel]; box!=0; box=box->nextSourceBox) { GEN_M(FMMV, box); } } stat_stop(FMMV, STAT_GEN_M); /* Form multipole expansions at coarser levels by merging */ stat_start(FMMV, STAT_M2M); init_M2M(FMMV, -1); for (level=FMMV->maxLevel-1; level>=2; level--) { init_M2M(FMMV, level); for (box=FMMV->firstSourceBoxOfLevel[level]; box!=0; box=box->nextSourceBox) { M2M(FMMV, box); } } finish_M2M(FMMV); stat_stop(FMMV, STAT_M2M); /*** Downward Pass ***/ stat_start(FMMV, STAT_M2L); init_M2L(FMMV, -1); if (FMMV->reducedScheme) { for (level=2; level<=FMMV->maxLevel; level++) { init_M2L(FMMV, level); /* Convert multipole to exponential expansions and shift exponential expansions */ for (box=FMMV->firstSourceBoxOfLevel[level-2]; box!=0; box=box->nextSourceBox) { M2L_ws2_reduced(FMMV, box); for (i=0; i<FMM_CHILDS_PER_BOX; i++) { if (box->child[i]) { for (j=0; j<FMM_CHILDS_PER_BOX; j++) { if (box->child[i]->child[j]) { FREE_M(FMMV, box->child[i]->child[j]->M); } } } } } } } else { for (level=2; level<=FMMV->maxLevel; level++) { init_M2L(FMMV, level); /* Convert multipole to exponential expansions and shift exponential expansions */ for (box=FMMV->firstSourceBoxOfLevel[level-1]; box!=0; box=box->nextSourceBox) { M2L_ws2(FMMV, box); for (i=0; i<FMM_CHILDS_PER_BOX; i++) { if (box->child[i]) FREE_M(FMMV, box->child[i]->M); } } } } finish_M2L(FMMV); stat_stop(FMMV, STAT_M2L); /* Shift local expansions from each parent to each of its children */ stat_start(FMMV, STAT_L2L); init_L2L(FMMV, -1); for (level=2; level<=FMMV->maxLevel; level++) { init_L2L(FMMV, level-1); for (box=FMMV->firstTargetBoxOfLevel[level-1]; box!=0; box=box->nextTargetBox) { L2L(FMMV, box); FREE_L(FMMV, box->L); } } finish_L2L(FMMV); stat_stop(FMMV, STAT_L2L); /*** Evaluation of Potentials ***/ stat_start(FMMV, STAT_EVAL_L); if (thread==STANDARD_THREAD) { if (FMMV->maxLevel>1) { for (box=FMMV->firstTargetBoxOfLevel[FMMV->maxLevel]; box!=0; box=box->nextTargetBox) { /* Evaluate local expansions at finest level */ EVAL_L(FMMV, box); FREE_L(FMMV, box->L); } } } /* if (thread==STANDARD_THREAD) */ #ifdef USE_PTHREADS else { /* if (thread==FARFIELD_THREAD) */ if (FMMV->maxLevel>1) { for (box=FMMV->firstTargetBoxOfLevel[FMMV->maxLevel]; box!=0; box=box->nextTargetBox) { /* Evaluate local expansions at finest level */ pthread_mutex_lock(&(box->parent->mutex)); pthread_mutex_lock(&(box->mutex)); EVAL_L(FMMV, box); pthread_mutex_unlock(&(box->mutex)); pthread_mutex_unlock(&(box->parent->mutex)); FREE_L(FMMV, box->L); } } } /* if (thread==FARFIELD_THREAD) */ #endif stat_stop(FMMV, STAT_EVAL_L); } /* if ((thread==STANDARD_THREAD)||(thread==FARFIELD_THREAD)) */ if (thread==STANDARD_THREAD) { stat_start(FMMV, STAT_LIST1); if (FMMV->maxLevel<=1) { box=FMMV->firstTargetBoxOfLevel[0]; EVAL_DIRECT(FMMV, box, box); } else { for (box=FMMV->firstTargetBoxOfLevel[FMMV->maxLevel-1]; box!=0; box=box->nextTargetBox) { EVAL_DIRECT(FMMV, box, box); } for (box=FMMV->firstTargetBoxOfLevel[FMMV->maxLevel]; box!=0; box=box->nextTargetBox) { for (i=0; i<FMM_CHILDS_PER_BOX-1; i++) { box1 = box->parent->neighbor[theseParentNeighbors[box->whichChild][i]]; EVAL_DIRECT(FMMV, box, box1); } for (i=0; i<N_OTHER_NEIGHBORS; i++) { box1 = box->parent->neighbor[otherParentNeighborsChilds[box->whichChild][i][0]]; if (isSource(box1)) { for (j=2; j<otherParentNeighborsChilds[box->whichChild][i][1]+2; j++) { box2 = box1->child[otherParentNeighborsChilds[box->whichChild][i][j]]; EVAL_DIRECT(FMMV, box, box2); } } } } } stat_stop(FMMV, STAT_LIST1); } /* if (thread==STANDARD_THREAD) */ #ifdef USE_PTHREADS if (thread==NEARFIELD_THREAD) { stat_start(FMMV, STAT_LIST1); if (FMMV->maxLevel<=1) { box=FMMV->firstTargetBoxOfLevel[0]; EVAL_DIRECT(FMMV, box, box); } else { for (box=FMMV->firstTargetBoxOfLevel[FMMV->maxLevel-1]; box!=0; box=box->nextTargetBox) { pthread_mutex_lock(&(box->mutex)); EVAL_DIRECT(FMMV, box, box); pthread_mutex_unlock(&(box->mutex)); } for (box=FMMV->firstTargetBoxOfLevel[FMMV->maxLevel]; box!=0; box=box->nextTargetBox) { pthread_mutex_lock(&(box->mutex)); if (FMMV->targets) { /* no lock for source boxes */ for (i=0; i<FMM_CHILDS_PER_BOX-1; i++) { box1 = box->parent->neighbor[theseParentNeighbors[box->whichChild][i]]; if (box1) { EVAL_DIRECT(FMMV, box, box1); } } for (i=0; i<N_OTHER_NEIGHBORS; i++) { box1 = box->parent->neighbor[otherParentNeighborsChilds[box->whichChild][i][0]]; if (isSource(box1)) { for (j=2; j<otherParentNeighborsChilds[box->whichChild][i][1]+2; j++) { box2 = box1->child[otherParentNeighborsChilds[box->whichChild][i][j]]; if (box2) { EVAL_DIRECT(FMMV, box, box2); } } } } } else { for (i=0; i<FMM_CHILDS_PER_BOX-1; i++) { box1 = box->parent->neighbor[theseParentNeighbors[box->whichChild][i]]; if (box1) { pthread_mutex_lock(&(box1->mutex)); EVAL_DIRECT(FMMV, box, box1); pthread_mutex_unlock(&(box1->mutex)); } } for (i=0; i<N_OTHER_NEIGHBORS; i++) { box1 = box->parent->neighbor[otherParentNeighborsChilds[box->whichChild][i][0]]; if (isSource(box1)) { for (j=2; j<otherParentNeighborsChilds[box->whichChild][i][1]+2; j++) { box2 = box1->child[otherParentNeighborsChilds[box->whichChild][i][j]]; if (box2) { pthread_mutex_lock(&(box2->mutex)); EVAL_DIRECT(FMMV, box, box2); pthread_mutex_unlock(&(box2->mutex)); } } } } } pthread_mutex_unlock(&(box->mutex)); } } stat_stop(FMMV, STAT_LIST1); } /* if (thread==NEARFIELD_THREAD) */ #endif switch(thread) { case (FARFIELD_THREAD): stat_stop(FMMV, STAT_FARFIELD); break; case (NEARFIELD_THREAD): stat_stop(FMMV, STAT_NEARFIELD); break; default: break; } return 0; }
void* non_adaptive_fmm_periodic_ws2(GenericFmmThreadArg *arg) { FmmvHandle *FMMV = arg->fh; int thread = arg->thread; Box *box, *box1, *box2; int level; int i, j, k, jj; void (*GEN_M)(FmmvHandle *FMMV, Box *box) = FMMV->gen_M; void (*EVAL_L)(FmmvHandle *FMMV, Box *box) = FMMV->eval_L; void (*EVAL_DIRECT)(FmmvHandle *FMMV, Box *target, Box *source) = FMMV->eval_direct; void (*EVAL_DIRECT_periodic)(FmmvHandle *FMMV, Box *target, Box *source, _FLOAT_ dx, _FLOAT_ dy #if (FMM_DIM>=3) , _FLOAT_ dz #endif ) = FMMV->eval_direct_periodic; switch(thread) { case (FARFIELD_THREAD): stat_start(FMMV, STAT_FARFIELD); break; case (NEARFIELD_THREAD): stat_start(FMMV, STAT_NEARFIELD); break; default: break; } if ((thread==STANDARD_THREAD)||(thread==FARFIELD_THREAD)) { /*** Upward Pass ***/ /* Form multipole expansions at finest level */ stat_start(FMMV, STAT_GEN_M); for (box=FMMV->firstSourceBoxOfLevel[FMMV->maxLevel]; box!=0; box=box->nextSourceBox) { GEN_M(FMMV, box); } stat_stop(FMMV, STAT_GEN_M); /* Form multipole expansions at coarser levels by merging */ stat_start(FMMV, STAT_M2M); init_M2M(FMMV, -1); for (level=FMMV->maxLevel-1; level>=0; level--) {/* note: now compute M up to level 0 */ for (box=FMMV->firstSourceBoxOfLevel[level]; box!=0; box=box->nextSourceBox) { init_M2M(FMMV, level); M2M(FMMV, box); } } finish_M2M(FMMV); stat_stop(FMMV, STAT_M2M); /*** Downward Pass ***/ stat_start(FMMV, STAT_M2L); init_M2L(FMMV, -1); /******************************************************************/ FMMV->firstSourceBoxOfLevel[0]->L = (_FLOAT_ *) FMMV_MALLOC(FMMV, FMM_SIZE_OF_L(FMMV->pL)*sizeof(_FLOAT_)); #ifdef USE_PTHREADS if (thread==FARFIELD_THREAD) { pthread_mutex_lock(&(FMMV->firstSourceBoxOfLevel[0]->mutex)); } #endif periodic_lattice_M2L(FMMV, FMMV->firstSourceBoxOfLevel[0]->M, FMMV->firstSourceBoxOfLevel[0]->L); #ifdef USE_PTHREADS if (thread==FARFIELD_THREAD) { pthread_mutex_unlock(&(FMMV->firstSourceBoxOfLevel[0]->mutex)); } #endif FREE_M(FMMV, FMMV->firstSourceBoxOfLevel[0]->M); /******************************************************************/ if (FMMV->reducedScheme) { /* we need an artificial parent for root */ Box parentOfRoot; box = FMMV->firstSourceBoxOfLevel[0]; parentOfRoot.level = -1; box->whichChild=0; box->parent=&parentOfRoot; for (i=0; i<FMM_CHILDS_PER_BOX; i++) { parentOfRoot.child[i] = box; } for (i=0; i<26; i++) { parentOfRoot.neighbor[i] = &parentOfRoot; } for (i=0; i<6; i++) { parentOfRoot.X[i] = 0; parentOfRoot.X2[i] = 0; } init_M2L(FMMV, 1); M2L_ws2_reduced(FMMV, &parentOfRoot); box = FMMV->firstSourceBoxOfLevel[0]; for (i=0; i<FMM_CHILDS_PER_BOX; i++) { if (box->child[i]) { FREE_M(FMMV, box->child[i]->M); } } for (level=2; level<=FMMV->maxLevel; level++) { /* Convert multipole to exponential expansions and shift exponential expansions */ init_M2L(FMMV, level); for (box=FMMV->firstSourceBoxOfLevel[level-2]; box!=0; box=box->nextSourceBox) { M2L_ws2_reduced(FMMV, box); for (i=0; i<FMM_CHILDS_PER_BOX; i++) { if (box->child[i]) { for (j=0; j<FMM_CHILDS_PER_BOX; j++) { if (box->child[i]->child[j]) { FREE_M(FMMV, box->child[i]->child[j]->M); } } } } } } } else { for (level=1; level<=FMMV->maxLevel; level++) { /* Convert multipole to exponential expansions and shift exponential expansions */ init_M2L(FMMV, level); for (box=FMMV->firstSourceBoxOfLevel[level-1]; box!=0; box=box->nextSourceBox) { M2L_ws2(FMMV, box); for (i=0; i<FMM_CHILDS_PER_BOX; i++) { if (box->child[i]) FREE_M(FMMV, box->child[i]->M); } } } } finish_M2L(FMMV); stat_stop(FMMV, STAT_M2L); /* Shift local expansions from each parent to each of its children */ stat_start(FMMV, STAT_L2L); init_L2L(FMMV, -1); for (level=1; level<=FMMV->maxLevel; level++) { init_L2L(FMMV, level-1); for (box=FMMV->firstTargetBoxOfLevel[level-1]; box!=0; box=box->nextTargetBox) { L2L(FMMV, box); FREE_L(FMMV, box->L); } } finish_L2L(FMMV); stat_stop(FMMV, STAT_L2L); /*** Evaluation of Potentials ***/ stat_start(FMMV, STAT_EVAL_L); if (thread==STANDARD_THREAD) { for (box=FMMV->firstTargetBoxOfLevel[FMMV->maxLevel]; box!=0; box=box->nextTargetBox) { /* Evaluate local expansions at finest level */ EVAL_L(FMMV, box); FREE_L(FMMV, box->L); } } /* if (thread==STANDARD_THREAD) */ #ifdef USE_PTHREADS else { /* if (thread==FARFIELD_THREAD) */ for (box=FMMV->firstTargetBoxOfLevel[FMMV->maxLevel]; box!=0; box=box->nextTargetBox) { /* Evaluate local expansions at finest level */ pthread_mutex_lock(&(box->mutex)); pthread_mutex_lock(&(box->parent->mutex)); EVAL_L(FMMV, box); pthread_mutex_unlock(&(box->mutex)); pthread_mutex_unlock(&(box->parent->mutex)); FREE_L(FMMV, box->L); } } /* if (thread==FARFIELD_THREAD) */ #endif stat_stop(FMMV, STAT_EVAL_L); } /* if ((thread==STANDARD_THREAD)||(thread==FARFIELD_THREAD)) */ if (thread==STANDARD_THREAD) { stat_start(FMMV, STAT_LIST1); if (FMMV->maxLevel==0) { int dx, dy; #if (FMM_DIM>=3) int dz; #endif box=FMMV->firstTargetBoxOfLevel[0]; EVAL_DIRECT(FMMV, box, box); for (dx=-2; dx<=+2; dx++) { for (dy=-2; dy<=+2; dy++) { #if (FMM_DIM>=3) for (dz=-2; dz<=+2; dz++) { #endif if (!((dx==0)&&(dy==0) #if (FMM_DIM>=3) &&(dz==0) #endif )) { EVAL_DIRECT_periodic(FMMV, box, box, dx, dy #if (FMM_DIM>=3) , dz #endif ); } #if (FMM_DIM>=3) } #endif }} } else { for (box=FMMV->firstTargetBoxOfLevel[FMMV->maxLevel-1]; box!=0; box=box->nextTargetBox) { EVAL_DIRECT(FMMV, box, box); } for (box=FMMV->firstTargetBoxOfLevel[FMMV->maxLevel]; box!=0; box=box->nextTargetBox) { for (i=0; i<FMM_CHILDS_PER_BOX-1; i++) { box1 = box->parent->neighbor[k=theseParentNeighbors[box->whichChild][i]]; if (isSource(box1)) { /*&& (box1->firstParticle < box->firstParticle) */ jj=((box->parent->atBoundary) & (boundarysData[k])); EVAL_DIRECT_periodic(FMMV, box, box1, pS[jj].dx, pS[jj].dy #if (FMM_DIM>=3) , pS[jj].dz #endif ); } } for (i=0; i<N_OTHER_NEIGHBORS; i++) { box1 = box->parent->neighbor[k=otherParentNeighborsChilds[box->whichChild][i][0]]; if (isSource(box1)) { jj=((box->parent->atBoundary) & (boundarysData[k])); for (j=2; j<otherParentNeighborsChilds[box->whichChild][i][1]+2; j++) { box2 = box1->child[otherParentNeighborsChilds[box->whichChild][i][j]]; EVAL_DIRECT_periodic(FMMV, box, box2, pS[jj].dx, pS[jj].dy #if (FMM_DIM>=3) , pS[jj].dz #endif ); } } } } } stat_stop(FMMV, STAT_LIST1); } /* if (thread==STANDARD_THREAD) */ #ifdef USE_PTHREADS if (thread==NEARFIELD_THREAD) { stat_start(FMMV, STAT_LIST1); if (FMMV->maxLevel==0) { int dx, dy; #if (FMM_DIM>=3) int dz; #endif box=FMMV->firstTargetBoxOfLevel[0]; pthread_mutex_lock(&(box->mutex)); EVAL_DIRECT(FMMV, box, box); for (dx=-2; dx<=+2; dx++) { for (dy=-2; dy<=+2; dy++) { #if (FMM_DIM>=3) for (dz=-2; dz<=+2; dz++) { #endif if (!((dx==0)&&(dy==0) #if (FMM_DIM>=3) &&(dz==0) #endif )) { EVAL_DIRECT_periodic(FMMV, box, box, dx, dy #if (FMM_DIM>=3) , dz #endif ); } #if (FMM_DIM>=3) } #endif }} pthread_mutex_unlock(&(box->mutex)); } else { for (box=FMMV->firstTargetBoxOfLevel[FMMV->maxLevel-1]; box!=0; box=box->nextTargetBox) { pthread_mutex_lock(&(box->mutex)); EVAL_DIRECT(FMMV, box, box); pthread_mutex_unlock(&(box->mutex)); } for (box=FMMV->firstTargetBoxOfLevel[FMMV->maxLevel]; box!=0; box=box->nextTargetBox) { pthread_mutex_lock(&(box->mutex)); if (FMMV->targets) { /* no lock for source boxes */ for (i=0; i<FMM_CHILDS_PER_BOX-1; i++) { box1 = box->parent->neighbor[k=theseParentNeighbors[box->whichChild][i]]; if (isSource(box1)) { /*&& (box1->firstParticle < box->firstParticle) */ jj=((box->parent->atBoundary) & (boundarysData[k])); EVAL_DIRECT_periodic(FMMV, box, box1, pS[jj].dx, pS[jj].dy #if (FMM_DIM>=3) , pS[jj].dz #endif ); } } for (i=0; i<N_OTHER_NEIGHBORS; i++) { box1 = box->parent->neighbor[k=otherParentNeighborsChilds[box->whichChild][i][0]]; if (isSource(box1)) { /* && (box1->firstParticle < box->firstParticle)) { */ jj=((box->parent->atBoundary) & (boundarysData[k])); for (j=2; j<otherParentNeighborsChilds[box->whichChild][i][1]+2; j++) { box2 = box1->child[otherParentNeighborsChilds[box->whichChild][i][j]]; if(box2) { EVAL_DIRECT_periodic(FMMV, box, box2, pS[jj].dx, pS[jj].dy #if (FMM_DIM>=3) , pS[jj].dz #endif ); } } } } } else { for (i=0; i<FMM_CHILDS_PER_BOX-1; i++) { box1 = box->parent->neighbor[k=theseParentNeighbors[box->whichChild][i]]; if (isSource(box1)) { /*&& (box1->firstParticle < box->firstParticle) */ jj=((box->parent->atBoundary) & (boundarysData[k])); pthread_mutex_lock(&(box1->mutex)); EVAL_DIRECT_periodic(FMMV, box, box1, pS[jj].dx, pS[jj].dy #if (FMM_DIM>=3) , pS[jj].dz #endif ); pthread_mutex_unlock(&(box1->mutex)); } } for (i=0; i<N_OTHER_NEIGHBORS; i++) { box1 = box->parent->neighbor[k=otherParentNeighborsChilds[box->whichChild][i][0]]; if (isSource(box1)) { /* && (box1->firstParticle < box->firstParticle)) { */ jj=((box->parent->atBoundary) & (boundarysData[k])); for (j=2; j<otherParentNeighborsChilds[box->whichChild][i][1]+2; j++) { box2 = box1->child[otherParentNeighborsChilds[box->whichChild][i][j]]; if(box2) { pthread_mutex_lock(&(box2->mutex)); EVAL_DIRECT_periodic(FMMV, box, box2, pS[jj].dx, pS[jj].dy #if (FMM_DIM>=3) , pS[jj].dz #endif ); pthread_mutex_unlock(&(box2->mutex)); } } } } } pthread_mutex_unlock(&(box->mutex)); } } stat_stop(FMMV, STAT_LIST1); } /* if (thread==NEARFIELD_THREAD) */ #endif switch(thread) { case (FARFIELD_THREAD): stat_stop(FMMV, STAT_FARFIELD); break; case (NEARFIELD_THREAD): stat_stop(FMMV, STAT_NEARFIELD); break; default: break; } return 0; }
bool RODFNet::isSource(const RODFDetector& det, ROEdge* edge, std::vector<ROEdge*>& seen, const RODFDetectorCon& detectors, bool strict) const { if (seen.size() == 1000) { // !!! WRITE_WARNING("Quitting checking for being a source for detector '" + det.getID() + "' due to seen edge limit."); return false; } if (edge == getDetectorEdge(det)) { // maybe there is another detector at the same edge // get the list of this/these detector(s) const std::vector<std::string>& detsOnEdge = myDetectorsOnEdges.find(edge)->second; for (std::vector<std::string>::const_iterator i = detsOnEdge.begin(); i != detsOnEdge.end(); ++i) { if ((*i) == det.getID()) { continue; } const RODFDetector& sec = detectors.getDetector(*i); if (getAbsPos(sec) < getAbsPos(det)) { // ok, there is another detector on the same edge and it is // before this one -> no source return false; } } } // it's a source if no edges are approaching the edge if (!hasApproaching(edge)) { if (edge != getDetectorEdge(det)) { if (hasDetector(edge)) { return false; } } return true; } if (edge != getDetectorEdge(det)) { // ok, we are at one of the edges in front if (myAmInHighwayMode) { if (edge->getSpeed() >= 19.4) { if (hasDetector(edge)) { // we are still on the highway and there is another detector return false; } // the next is a hack for the A100 scenario... // We have to look into further edges herein edges const std::vector<ROEdge*>& appr = myApproachingEdges.find(edge)->second; size_t noOk = 0; size_t noFalse = 0; size_t noSkipped = 0; for (size_t i = 0; i < appr.size(); i++) { if (!hasDetector(appr[i])) { noOk++; } else { noFalse++; } } if ((noFalse + noSkipped) == appr.size()) { return false; } } } } if (myAmInHighwayMode) { if (edge->getSpeed() < 19.4 && edge != getDetectorEdge(det)) { // we have left the highway already // -> the detector will be a highway source if (!hasDetector(edge)) { return true; } } } if (myDetectorsOnEdges.find(edge) != myDetectorsOnEdges.end() && myDetectorEdges.find(det.getID())->second != edge) { return false; } // let's check the edges in front const std::vector<ROEdge*>& appr = myApproachingEdges.find(edge)->second; size_t noOk = 0; size_t noFalse = 0; size_t noSkipped = 0; seen.push_back(edge); for (size_t i = 0; i < appr.size(); i++) { bool had = std::find(seen.begin(), seen.end(), appr[i]) != seen.end(); if (!had) { if (isSource(det, appr[i], seen, detectors, strict)) { noOk++; } else { noFalse++; } } else { noSkipped++; } } if (!strict) { return (noFalse + noSkipped) != appr.size(); } else { return (noOk + noSkipped) == appr.size(); } }
void eval_direct(FmmvHandle *FMMV, Box *target, Box *source) #endif { #if ((FMM_KIND==FMM_ST_STANDARD)||(FMM_KIND==FMM_ST_GRAD) \ ||(FMM_KIND==FMM_ST_DIPOLE)||(FMM_KIND==FMM_ST_DIPOLE_GRAD)) if (!(isTarget(target)&&isSource(source))) return; #else if (!(target&&source)||(target->firstTarget>source->firstParticle)) return; #endif if (FMMV->beta!=0){ #ifdef PERIODIC eval_direct_yukawa_periodic(FMMV, target, source, dx, dy, dz); #else eval_direct_yukawa(FMMV, target, source); #endif return; } else { DEFINE_IDA_LOCAL_ALIASES(FMMV) _FLOAT_ x, y, z, one_over_r; _FLOAT_ xi,yi,zi, qj; #if ((FMM_KIND==FMM_STANDARD)||(FMM_KIND==FMM_GRAD) \ ||(FMM_KIND==FMM_DIPOLE)||(FMM_KIND==FMM_DIPOLE_GRAD)) _FLOAT_ qi; #endif int i,j,ni,nj,i0,j0,j00,j1; #if ((FMM_KIND==FMM_GRAD)||(FMM_KIND==FMM_DIPOLE)||(FMM_KIND==FMM_DIPOLE_GRAD) \ ||(FMM_KIND==FMM_ST_GRAD)||(FMM_KIND==FMM_ST_DIPOLE)||(FMM_KIND==FMM_ST_DIPOLE_GRAD)) _FLOAT_ one_o_r_3; #endif #if ((FMM_KIND==FMM_GRAD)||(FMM_KIND==FMM_DIPOLE_GRAD) \ ||(FMM_KIND==FMM_ST_GRAD)||(FMM_KIND==FMM_ST_DIPOLE_GRAD)) _FLOAT_ qj_o_r_3; #endif #if ((FMM_KIND==FMM_GRAD)||(FMM_KIND==FMM_DIPOLE_GRAD)) _FLOAT_ qi_o_r_3; #endif #if ((FMM_KIND==FMM_DIPOLE)||(FMM_KIND==FMM_DIPOLE_GRAD) \ ||(FMM_KIND==FMM_ST_DIPOLE)||(FMM_KIND==FMM_ST_DIPOLE_GRAD)) _FLOAT_ mxj, myj, mzj; _FLOAT_ m_times_rj; #endif #if ((FMM_KIND==FMM_DIPOLE)||(FMM_KIND==FMM_DIPOLE_GRAD)) _FLOAT_ mxi, myi, mzi; _FLOAT_ m_times_ri; #endif #if ((FMM_KIND==FMM_DIPOLE_GRAD) \ ||(FMM_KIND==FMM_ST_DIPOLE_GRAD)) _FLOAT_ one_o_r_5; #endif i0 = target->firstTarget; ni = target->noOfTargets; j0 = source->firstParticle; nj = source->noOfParticles; #if ((FMM_KIND==FMM_ST_STANDARD)||(FMM_KIND==FMM_ST_GRAD) \ ||(FMM_KIND==FMM_ST_DIPOLE)||(FMM_KIND==FMM_ST_DIPOLE_GRAD)) j00 =j0; FMMV->noOfDirectInteractions += ni*nj; #else if (i0==j0) { FMMV->noOfDirectInteractions += (ni*(ni-1))/2; } else { FMMV->noOfDirectInteractions += (ni*(ni-1))/2; } #endif j1 = j0+nj; for (i=i0; i<i0+ni; i++) { #ifdef PERIODIC xi = access_tx(i) - dx; yi = access_ty(i) - dy; zi = access_tz(i) - dz; #else xi = access_tx(i); yi = access_ty(i); zi = access_tz(i); #endif #if ((FMM_KIND==FMM_STANDARD)||(FMM_KIND==FMM_GRAD) \ ||(FMM_KIND==FMM_DIPOLE)||(FMM_KIND==FMM_DIPOLE_GRAD)) qi = access_q(i); #endif #if ((FMM_KIND==FMM_DIPOLE)||(FMM_KIND==FMM_DIPOLE_GRAD)) mxi = access_mx(i); myi = access_my(i); mzi = access_mz(i); #endif #if ((FMM_KIND==FMM_ST_STANDARD)||(FMM_KIND==FMM_ST_GRAD) \ ||(FMM_KIND==FMM_ST_DIPOLE)||(FMM_KIND==FMM_ST_DIPOLE_GRAD)) #else j00 = (i<j0 ? j0 : i+1); #endif for (j=j00; j<j1; j++) { x = xi - access_x(j); y = yi - access_y(j); z = zi - access_z(j); #if (EVAL_DIRECT_ACCURACY==0) one_over_r = RECIP_SQRT0(x*x + y*y + z*z); #elif (EVAL_DIRECT_ACCURACY==1) one_over_r = RECIP_SQRT1(x*x + y*y + z*z); #elif (EVAL_DIRECT_ACCURACY==2) one_over_r = RECIP_SQRT2(x*x + y*y + z*z); #endif qj = access_q(j); #if ((FMM_KIND==FMM_DIPOLE)||(FMM_KIND==FMM_DIPOLE_GRAD) \ ||(FMM_KIND==FMM_ST_DIPOLE)||(FMM_KIND==FMM_ST_DIPOLE_GRAD)) mxj = access_mx(j); myj = access_my(j); mzj = access_mz(j); one_o_r_3 = one_over_r*one_over_r*one_over_r; m_times_rj = x*mxj + y*myj + z*mzj; access_pot(i) += qj*one_over_r + m_times_rj*one_o_r_3; #if ((FMM_KIND==FMM_DIPOLE)||(FMM_KIND==FMM_DIPOLE_GRAD)) m_times_ri = x*mxi + y*myi + z*mzi; access_pot(j) += qi*one_over_r - m_times_ri*one_o_r_3; #endif #else access_pot(i) += qj*one_over_r; #if ((FMM_KIND==FMM_STANDARD)||(FMM_KIND==FMM_GRAD)) access_pot(j) += qi*one_over_r; #endif #endif #if ((FMM_KIND==FMM_GRAD)||(FMM_KIND==FMM_ST_GRAD)) one_o_r_3 = one_over_r*one_over_r*one_over_r; qj_o_r_3 = qj*one_o_r_3; access_gradx(i) -= x*qj_o_r_3; access_grady(i) -= y*qj_o_r_3; access_gradz(i) -= z*qj_o_r_3; #endif #if (FMM_KIND==FMM_GRAD) qi_o_r_3 = qi*one_o_r_3; access_gradx(j) += x*qi_o_r_3; access_grady(j) += y*qi_o_r_3; access_gradz(j) += z*qi_o_r_3; #endif #if ((FMM_KIND==FMM_DIPOLE_GRAD)||(FMM_KIND==FMM_ST_DIPOLE_GRAD)) one_o_r_5 = one_o_r_3*one_over_r*one_over_r; qj_o_r_3 = qj*one_o_r_3 + 3.0*m_times_rj*one_o_r_5; access_gradx(i) -= x*qj_o_r_3 - mxj*one_o_r_3; access_grady(i) -= y*qj_o_r_3 - myj*one_o_r_3; access_gradz(i) -= z*qj_o_r_3 - mzj*one_o_r_3; #endif #if (FMM_KIND==FMM_DIPOLE_GRAD) qi_o_r_3 = qi*one_o_r_3 - 3.0*m_times_ri*one_o_r_5; access_gradx(j) += x*qi_o_r_3 + mxi*one_o_r_3; access_grady(j) += y*qi_o_r_3 + myi*one_o_r_3; access_gradz(j) += z*qi_o_r_3 + mzi*one_o_r_3; #endif } } } }