void expand(const SXMatrix& ex2, SXMatrix &ww, SXMatrix& tt){ casadi_assert(ex2.scalar()); SX ex = ex2.toScalar(); // Terms, weights and indices of the nodes that are already expanded std::vector<std::vector<SXNode*> > terms; std::vector<std::vector<double> > weights; std::map<SXNode*,int> indices; // Stack of nodes that are not yet expanded std::stack<SXNode*> to_be_expanded; to_be_expanded.push(ex.get()); while(!to_be_expanded.empty()){ // as long as there are nodes to be expanded // Check if the last element on the stack is already expanded if (indices.find(to_be_expanded.top()) != indices.end()){ // Remove from stack to_be_expanded.pop(); continue; } // Weights and terms std::vector<double> w; // weights std::vector<SXNode*> f; // terms if(to_be_expanded.top()->isConstant()){ // constant nodes are seen as multiples of one w.push_back(to_be_expanded.top()->getValue()); f.push_back(casadi_limits<SX>::one.get()); } else if(to_be_expanded.top()->isSymbolic()){ // symbolic nodes have weight one and itself as factor w.push_back(1); f.push_back(to_be_expanded.top()); } else { // binary node casadi_assert(to_be_expanded.top()->hasDep()); // make sure that the node is binary // Check if addition, subtracton or multiplication SXNode* node = to_be_expanded.top(); // If we have a binary node that we can factorize if(node->getOp() == OP_ADD || node->getOp() == OP_SUB || (node->getOp() == OP_MUL && (node->dep(0)->isConstant() || node->dep(1)->isConstant()))){ // Make sure that both children are factorized, if not - add to stack if (indices.find(node->dep(0).get()) == indices.end()){ to_be_expanded.push(node->dep(0).get()); continue; } if (indices.find(node->dep(1).get()) == indices.end()){ to_be_expanded.push(node->dep(1).get()); continue; } // Get indices of children int ind1 = indices[node->dep(0).get()]; int ind2 = indices[node->dep(1).get()]; // If multiplication if(node->getOp() == OP_MUL){ double fac; if(node->dep(0)->isConstant()){ // Multiplication where the first factor is a constant fac = node->dep(0)->getValue(); f = terms[ind2]; w = weights[ind2]; } else { // Multiplication where the second factor is a constant fac = node->dep(1)->getValue(); f = terms[ind1]; w = weights[ind1]; } for(int i=0; i<w.size(); ++i) w[i] *= fac; } else { // if addition or subtraction if(node->getOp() == OP_ADD){ // Addition: join both sums f = terms[ind1]; f.insert(f.end(), terms[ind2].begin(), terms[ind2].end()); w = weights[ind1]; w.insert(w.end(), weights[ind2].begin(), weights[ind2].end()); } else { // Subtraction: join both sums with negative weights for second term f = terms[ind1]; f.insert(f.end(), terms[ind2].begin(), terms[ind2].end()); w = weights[ind1]; w.reserve(f.size()); for(int i=0; i<weights[ind2].size(); ++i) w.push_back(-weights[ind2][i]); } // Eliminate multiple elements std::vector<double> w_new; w_new.reserve(w.size()); // weights std::vector<SXNode*> f_new; f_new.reserve(f.size()); // terms std::map<SXNode*,int> f_ind; // index in f_new for(int i=0; i<w.size(); i++){ // Try to locate the node std::map<SXNode*,int>::iterator it = f_ind.find(f[i]); if(it == f_ind.end()){ // if the term wasn't found w_new.push_back(w[i]); f_new.push_back(f[i]); f_ind[f[i]] = f_new.size()-1; } else { // if the term already exists w_new[it->second] += w[i]; // just add the weight } } w = w_new; f = f_new; } } else { // if we have a binary node that we cannot factorize // By default, w.push_back(1); f.push_back(node); } } // Save factorization of the node weights.push_back(w); terms.push_back(f); indices[to_be_expanded.top()] = terms.size()-1; // Remove node from stack to_be_expanded.pop(); } // Save expansion to output int thisind = indices[ex.get()]; ww = SXMatrix(weights[thisind]); vector<SX> termsv(terms[thisind].size()); for(int i=0; i<termsv.size(); ++i) termsv[i] = SX::create(terms[thisind][i]); tt = SXMatrix(termsv); }
void SXFunctionInternal::init() { // Call the init function of the base class XFunctionInternal<SXFunction, SXFunctionInternal, SX, SXNode>::init(); // Stack used to sort the computational graph stack<SXNode*> s; // All nodes vector<SXNode*> nodes; // Add the list of nodes int ind=0; for (vector<SX >::iterator it = outputv_.begin(); it != outputv_.end(); ++it, ++ind) { int nz=0; for (vector<SXElement>::iterator itc = it->begin(); itc != it->end(); ++itc, ++nz) { // Add outputs to the list s.push(itc->get()); sort_depth_first(s, nodes); // A null pointer means an output instruction nodes.push_back(static_cast<SXNode*>(0)); } } // Set the temporary variables to be the corresponding place in the sorted graph for (int i=0; i<nodes.size(); ++i) { if (nodes[i]) { nodes[i]->temp = i; } } // Sort the nodes by type constants_.clear(); operations_.clear(); for (vector<SXNode*>::iterator it = nodes.begin(); it != nodes.end(); ++it) { SXNode* t = *it; if (t) { if (t->isConstant()) constants_.push_back(SXElement::create(t)); else if (!t->isSymbolic()) operations_.push_back(SXElement::create(t)); } } // Use live variables? bool live_variables = getOption("live_variables"); // Input instructions vector<pair<int, SXNode*> > symb_loc; // Current output and nonzero, start with the first one int curr_oind, curr_nz=0; for (curr_oind=0; curr_oind<outputv_.size(); ++curr_oind) { if (outputv_[curr_oind].nnz()!=0) { break; } } // Count the number of times each node is used vector<int> refcount(nodes.size(), 0); // Get the sequence of instructions for the virtual machine algorithm_.resize(0); algorithm_.reserve(nodes.size()); for (vector<SXNode*>::iterator it=nodes.begin(); it!=nodes.end(); ++it) { // Current node SXNode* n = *it; // New element in the algorithm AlgEl ae; // Get operation ae.op = n==0 ? OP_OUTPUT : n->getOp(); // Get instruction switch (ae.op) { case OP_CONST: // constant ae.d = n->getValue(); ae.i0 = n->temp; break; case OP_PARAMETER: // a parameter or input symb_loc.push_back(make_pair(algorithm_.size(), n)); ae.i0 = n->temp; break; case OP_OUTPUT: // output instruction ae.i0 = curr_oind; ae.i1 = outputv_[curr_oind].at(curr_nz)->temp; ae.i2 = curr_nz; // Go to the next nonzero curr_nz++; if (curr_nz>=outputv_[curr_oind].nnz()) { curr_nz=0; curr_oind++; for (; curr_oind<outputv_.size(); ++curr_oind) { if (outputv_[curr_oind].nnz()!=0) { break; } } } break; default: // Unary or binary operation ae.i0 = n->temp; ae.i1 = n->dep(0).get()->temp; ae.i2 = n->dep(1).get()->temp; } // Number of dependencies int ndeps = casadi_math<double>::ndeps(ae.op); // Increase count of dependencies for (int c=0; c<ndeps; ++c) { refcount.at(c==0 ? ae.i1 : ae.i2)++; } // Add to algorithm algorithm_.push_back(ae); } // Place in the work vector for each of the nodes in the tree (overwrites the reference counter) vector<int> place(nodes.size()); // Stack with unused elements in the work vector stack<int> unused; // Work vector size size_t worksize = 0; // Find a place in the work vector for the operation for (vector<AlgEl>::iterator it=algorithm_.begin(); it!=algorithm_.end(); ++it) { // Number of dependencies int ndeps = casadi_math<double>::ndeps(it->op); // decrease reference count of children // reverse order so that the first argument will end up at the top of the stack for (int c=ndeps-1; c>=0; --c) { int ch_ind = c==0 ? it->i1 : it->i2; int remaining = --refcount.at(ch_ind); if (remaining==0) unused.push(place[ch_ind]); } // Find a place to store the variable if (it->op!=OP_OUTPUT) { if (live_variables && !unused.empty()) { // Try to reuse a variable from the stack if possible (last in, first out) it->i0 = place[it->i0] = unused.top(); unused.pop(); } else { // Allocate a new variable it->i0 = place[it->i0] = worksize++; } } // Save the location of the children for (int c=0; c<ndeps; ++c) { if (c==0) { it->i1 = place[it->i1]; } else { it->i2 = place[it->i2]; } } // If binary, make sure that the second argument is the same as the first one // (in order to treat all operations as binary) NOTE: ugly if (ndeps==1 && it->op!=OP_OUTPUT) { it->i2 = it->i1; } } if (verbose()) { if (live_variables) { userOut() << "Using live variables: work array is " << worksize << " instead of " << nodes.size() << endl; } else { userOut() << "Live variables disabled." << endl; } } // Allocate work vectors (symbolic/numeric) alloc_w(worksize); alloc(); s_work_.resize(worksize); // Reset the temporary variables for (int i=0; i<nodes.size(); ++i) { if (nodes[i]) { nodes[i]->temp = 0; } } // Now mark each input's place in the algorithm for (vector<pair<int, SXNode*> >::const_iterator it=symb_loc.begin(); it!=symb_loc.end(); ++it) { it->second->temp = it->first+1; } // Add input instructions for (int ind=0; ind<inputv_.size(); ++ind) { int nz=0; for (vector<SXElement>::iterator itc = inputv_[ind].begin(); itc != inputv_[ind].end(); ++itc, ++nz) { int i = itc->getTemp()-1; if (i>=0) { // Mark as input algorithm_[i].op = OP_INPUT; // Location of the input algorithm_[i].i1 = ind; algorithm_[i].i2 = nz; // Mark input as read itc->setTemp(0); } } } // Locate free variables free_vars_.clear(); for (vector<pair<int, SXNode*> >::const_iterator it=symb_loc.begin(); it!=symb_loc.end(); ++it) { if (it->second->temp!=0) { // Save to list of free parameters free_vars_.push_back(SXElement::create(it->second)); // Remove marker it->second->temp=0; } } // Initialize just-in-time compilation for numeric evaluation using OpenCL just_in_time_opencl_ = getOption("just_in_time_opencl"); if (just_in_time_opencl_) { #ifdef WITH_OPENCL freeOpenCL(); allocOpenCL(); #else // WITH_OPENCL casadi_error("Option \"just_in_time_opencl\" true requires CasADi " "to have been compiled with WITH_OPENCL=ON"); #endif // WITH_OPENCL } // Initialize just-in-time compilation for sparsity propagation using OpenCL just_in_time_sparsity_ = getOption("just_in_time_sparsity"); if (just_in_time_sparsity_) { #ifdef WITH_OPENCL spFreeOpenCL(); spAllocOpenCL(); #else // WITH_OPENCL casadi_error("Option \"just_in_time_sparsity\" true requires CasADi to " "have been compiled with WITH_OPENCL=ON"); #endif // WITH_OPENCL } if (CasadiOptions::profiling && CasadiOptions::profilingBinary) { profileWriteName(CasadiOptions::profilingLog, this, getOption("name"), ProfilingData_FunctionType_SXFunction, algorithm_.size()); int alg_counter = 0; // Iterator to free variables vector<SXElement>::const_iterator p_it = free_vars_.begin(); std::stringstream stream; for (vector<AlgEl>::const_iterator it = algorithm_.begin(); it!=algorithm_.end(); ++it) { stream.str(""); if (it->op==OP_OUTPUT) { stream << "output[" << it->i0 << "][" << it->i2 << "] = @" << it->i1; } else { stream << "@" << it->i0 << " = "; if (it->op==OP_INPUT) { stream << "input[" << it->i1 << "][" << it->i2 << "]"; } else { if (it->op==OP_CONST) { stream << it->d; } else if (it->op==OP_PARAMETER) { stream << *p_it++; } else { int ndep = casadi_math<double>::ndeps(it->op); casadi_math<double>::printPre(it->op, stream); for (int c=0; c<ndep; ++c) { if (c==0) { stream << "@" << it->i1; } else { casadi_math<double>::printSep(it->op, stream); stream << "@" << it->i2; } } casadi_math<double>::printPost(it->op, stream); } } } stream << std::endl; profileWriteSourceLine(CasadiOptions::profilingLog, this, alg_counter++, stream.str(), it->op); } } // Print if (verbose()) { userOut() << "SXFunctionInternal::init Initialized " << getOption("name") << " (" << algorithm_.size() << " elementary operations)" << endl; } }