void matrix_solver_t::add_term(std::size_t k, terminal_t *term) { if (term->m_otherterm->net().isRailNet()) { m_rails_temp[k]->add(term, -1, false); } else { int ot = get_net_idx(&term->m_otherterm->net()); if (ot>=0) { m_terms[k]->add(term, ot, true); } /* Should this be allowed ? */ else // if (ot<0) { m_rails_temp[k]->add(term, ot, true); log().fatal("found term with missing othernet {1}\n", term->name()); } } }
ATTR_COLD void matrix_solver_t::setup_matrix() { const unsigned iN = m_nets.size(); for (unsigned k = 0; k < iN; k++) { m_terms[k]->m_railstart = m_terms[k]->count(); for (unsigned i = 0; i < m_rails_temp[k]->count(); i++) this->m_terms[k]->add(m_rails_temp[k]->terms()[i], m_rails_temp[k]->net_other()[i], false); m_rails_temp[k]->clear(); // no longer needed m_terms[k]->set_pointers(); } for (unsigned k = 0; k < iN; k++) pfree(m_rails_temp[k]); // no longer needed m_rails_temp.clear(); /* Sort in descending order by number of connected matrix voltages. * The idea is, that for Gauss-Seidel algo the first voltage computed * depends on the greatest number of previous voltages thus taking into * account the maximum amout of information. * * This actually improves performance on popeye slightly. Average * GS computations reduce from 2.509 to 2.370 * * Smallest to largest : 2.613 * Unsorted : 2.509 * Largest to smallest : 2.370 * * Sorting as a general matrix pre-conditioning is mentioned in * literature but I have found no articles about Gauss Seidel. * * For Gaussian Elimination however increasing order is better suited. * FIXME: Even better would be to sort on elements right of the matrix diagonal. * */ if (m_sort != NOSORT) { int sort_order = (m_sort == DESCENDING ? 1 : -1); for (unsigned k = 0; k < iN - 1; k++) for (unsigned i = k+1; i < iN; i++) { if (((int) m_terms[k]->m_railstart - (int) m_terms[i]->m_railstart) * sort_order < 0) { std::swap(m_terms[i], m_terms[k]); std::swap(m_nets[i], m_nets[k]); } } for (unsigned k = 0; k < iN; k++) { int *other = m_terms[k]->net_other(); for (unsigned i = 0; i < m_terms[k]->count(); i++) if (other[i] != -1) other[i] = get_net_idx(&m_terms[k]->terms()[i]->m_otherterm->net()); } } /* create a list of non zero elements. */ for (unsigned k = 0; k < iN; k++) { terms_t * t = m_terms[k]; /* pretty brutal */ int *other = t->net_other(); t->m_nz.clear(); for (unsigned i = 0; i < t->m_railstart; i++) if (!t->m_nz.contains(other[i])) t->m_nz.push_back(other[i]); t->m_nz.push_back(k); // add diagonal /* and sort */ psort_list(t->m_nz); } /* create a list of non zero elements right of the diagonal * These list anticipate the population of array elements by * Gaussian elimination. */ for (unsigned k = 0; k < iN; k++) { terms_t * t = m_terms[k]; /* pretty brutal */ int *other = t->net_other(); if (k==0) t->m_nzrd.clear(); else { t->m_nzrd = m_terms[k-1]->m_nzrd; unsigned j=0; while(j < t->m_nzrd.size()) { if (t->m_nzrd[j] < k + 1) t->m_nzrd.remove_at(j); else j++; } } for (unsigned i = 0; i < t->m_railstart; i++) if (!t->m_nzrd.contains(other[i]) && other[i] >= (int) (k + 1)) t->m_nzrd.push_back(other[i]); /* and sort */ psort_list(t->m_nzrd); } /* create a list of non zero elements below diagonal k * This should reduce cache misses ... */ bool **touched = new bool*[iN]; for (unsigned k=0; k<iN; k++) touched[k] = new bool[iN]; for (unsigned k = 0; k < iN; k++) { for (unsigned j = 0; j < iN; j++) touched[k][j] = false; for (unsigned j = 0; j < m_terms[k]->m_nz.size(); j++) touched[k][m_terms[k]->m_nz[j]] = true; } unsigned ops = 0; for (unsigned k = 0; k < iN; k++) { ops++; // 1/A(k,k) for (unsigned row = k + 1; row < iN; row++) { if (touched[row][k]) { ops++; if (!m_terms[k]->m_nzbd.contains(row)) m_terms[k]->m_nzbd.push_back(row); for (unsigned col = k + 1; col < iN; col++) if (touched[k][col]) { touched[row][col] = true; ops += 2; } } } } log().verbose("Number of mults/adds for {1}: {2}", name(), ops); if (0) for (unsigned k = 0; k < iN; k++) { pstring line = pfmt("{1}")(k, "3"); for (unsigned j = 0; j < m_terms[k]->m_nzrd.size(); j++) line += pfmt(" {1}")(m_terms[k]->m_nzrd[j], "3"); log().verbose("{1}", line); } /* * save states */ for (unsigned k = 0; k < iN; k++) { pstring num = pfmt("{1}")(k); save(m_terms[k]->m_last_V, "lastV." + num); save(m_terms[k]->m_DD_n_m_1, "m_DD_n_m_1." + num); save(m_terms[k]->m_h_n_m_1, "m_h_n_m_1." + num); save(m_terms[k]->go(),"GO" + num, m_terms[k]->count()); save(m_terms[k]->gt(),"GT" + num, m_terms[k]->count()); save(m_terms[k]->Idr(),"IDR" + num , m_terms[k]->count()); } for (unsigned k=0; k<iN; k++) delete [] touched[k]; delete [] touched; }