MStatus sseDeformer::compute(const MPlug& plug, MDataBlock& data) { MStatus status; if (plug.attribute() != outputGeom) { printf("Ignoring requested plug\n"); return status; } unsigned int index = plug.logicalIndex(); MObject thisNode = this->thisMObject(); // get input value MPlug inPlug(thisNode,input); inPlug.selectAncestorLogicalIndex(index,input); MDataHandle hInput = data.inputValue(inPlug, &status); MCheckStatus(status, "ERROR getting input mesh\n"); // get the input geometry MDataHandle inputData = hInput.child(inputGeom); if (inputData.type() != MFnData::kMesh) { printf("Incorrect input geometry type\n"); return MStatus::kFailure; } MObject iSurf = inputData.asMesh() ; MFnMesh inMesh; inMesh.setObject( iSurf ) ; MDataHandle outputData = data.outputValue(plug); outputData.copy(inputData); if (outputData.type() != MFnData::kMesh) { printf("Incorrect output mesh type\n"); return MStatus::kFailure; } MObject oSurf = outputData.asMesh() ; if(oSurf.isNull()) { printf("Output surface is NULL\n"); return MStatus::kFailure; } MFnMesh outMesh; outMesh.setObject( oSurf ) ; MCheckStatus(status, "ERROR setting points\n"); // get all points at once for demo purposes. Really should get points from the current group using iterator MFloatPointArray pts; outMesh.getPoints(pts); int nPoints = pts.length(); MDataHandle envData = data.inputValue(envelope, &status); float env = envData.asFloat(); MDataHandle sseData = data.inputValue(sseEnabled, &status); bool sseEnabled = (bool) sseData.asBool(); // NOTE: Using MTimer and possibly other classes disables // autovectorization with Intel <=10.1 compiler on OSX and Linux!! // Must compile this function with -fno-exceptions on OSX and // Linux to guarantee autovectorization is done. Use -fvec_report2 // to check for vectorization status messages with Intel compiler. MTimer timer; timer.beginTimer(); if(sseEnabled) { // Innter loop will autovectorize. Around 3x faster than the // loop below it. It would be faster if first element was // guaranteed to be aligned on 16 byte boundary. for(int i=0; i<nPoints; i++) { float* ptPtr = &pts[i].x; for(int j=0; j<4; j++) { ptPtr[j] = env * (cosf(ptPtr[j]) * sinf(ptPtr[j]) * tanf(ptPtr[j])); } } } else { // This inner loop will not autovectorize. for(int i=0; i<nPoints; i++) { MFloatPoint& pt = pts[i]; for(int j=0; j<3; j++) { pt[j] = env * (cosf(pt[j]) * sinf(pt[j]) * tanf(pt[j])); } } } timer.endTimer(); if(sseEnabled) { printf("SSE enabled, runtime %f\n", timer.elapsedTime()); } else { printf("SSE disabled, runtime %f\n", timer.elapsedTime()); } outMesh.setPoints(pts); return status; }
MStatus AbcBullet::doIt(const MArgList & args) { try { MStatus status; MTime oldCurTime = MAnimControl::currentTime(); MArgParser argData(syntax(), args, &status); if (argData.isFlagSet("help")) { MGlobal::displayInfo(util::getHelpText()); return MS::kSuccess; } bool verbose = argData.isFlagSet("verbose"); // If skipFrame is true, when going through the playback range of the // scene, as much frames are skipped when possible. This could cause // a problem for, time dependent solutions like // particle system / hair simulation bool skipFrame = true; if (argData.isFlagSet("dontSkipUnwrittenFrames")) skipFrame = false; double startEvaluationTime = DBL_MAX; if (argData.isFlagSet("preRollStartFrame")) { double startAt = 0.0; argData.getFlagArgument("preRollStartFrame", 0, startAt); startEvaluationTime = startAt; } unsigned int jobSize = argData.numberOfFlagUses("jobArg"); if (jobSize == 0) return status; // the frame range we will be iterating over for all jobs, // includes frames which are not skipped and the startAt offset std::set<double> allFrameRange; // this will eventually hold only the animated jobs. // its a list because we will be removing jobs from it std::list < AbcWriteJobPtr > jobList; for (unsigned int jobIndex = 0; jobIndex < jobSize; jobIndex++) { JobArgs jobArgs; MArgList jobArgList; argData.getFlagArgumentList("jobArg", jobIndex, jobArgList); MString jobArgsStr = jobArgList.asString(0); MStringArray jobArgsArray; jobArgs.verbose = verbose; { // parse the job arguments // e.g. -perFrameCallbackMel "print \"something\"" will be splitted to // [0] -perFrameCallbackMel // [1] print "something" enum State { kArgument, // parsing an argument (not quoted) kDoubleQuotedString, // parsing a double quoted string kSingleQuotedString, // parsing a single quoted string }; State state = kArgument; MString stringBuffer; for (unsigned int charIdx = 0; charIdx < jobArgsStr.numChars(); charIdx++) { MString ch = jobArgsStr.substringW(charIdx, charIdx); switch (state) { case kArgument: if (ch == " ") { // space terminates the current argument if (stringBuffer.length() > 0) { jobArgsArray.append(stringBuffer); stringBuffer.clear(); } // goto another argument state = kArgument; } else if (ch == "\"") { if (stringBuffer.length() > 0) { // double quote is part of the argument stringBuffer += ch; } else { // goto double quoted string state = kDoubleQuotedString; } } else if (ch == "'") { if (stringBuffer.length() > 0) { // single quote is part of the argument stringBuffer += ch; } else { // goto single quoted string state = kSingleQuotedString; } } else { stringBuffer += ch; } break; case kDoubleQuotedString: // double quote terminates the current string if (ch == "\"") { jobArgsArray.append(stringBuffer); stringBuffer.clear(); state = kArgument; } else if (ch == "\\") { // escaped character MString nextCh = (++charIdx < jobArgsStr.numChars()) ? jobArgsStr.substringW(charIdx, charIdx) : "\\"; if (nextCh == "n") stringBuffer += "\n"; else if (nextCh == "t") stringBuffer += "\t"; else if (nextCh == "r") stringBuffer += "\r"; else if (nextCh == "\\") stringBuffer += "\\"; else if (nextCh == "'") stringBuffer += "'"; else if (nextCh == "\"") stringBuffer += "\""; else stringBuffer += nextCh; } else { stringBuffer += ch; } break; case kSingleQuotedString: // single quote terminates the current string if (ch == "'") { jobArgsArray.append(stringBuffer); stringBuffer.clear(); state = kArgument; } else if (ch == "\\") { // escaped character MString nextCh = (++charIdx < jobArgsStr.numChars()) ? jobArgsStr.substringW(charIdx, charIdx) : "\\"; if (nextCh == "n") stringBuffer += "\n"; else if (nextCh == "t") stringBuffer += "\t"; else if (nextCh == "r") stringBuffer += "\r"; else if (nextCh == "\\") stringBuffer += "\\"; else if (nextCh == "'") stringBuffer += "'"; else if (nextCh == "\"") stringBuffer += "\""; else stringBuffer += nextCh; } else { stringBuffer += ch; } break; } } // the rest of the argument if (stringBuffer.length() > 0) { jobArgsArray.append(stringBuffer); } } double startTime = oldCurTime.value(); double endTime = oldCurTime.value(); double strideTime = 1.0; bool hasRange = false; bool hasRoot = false; std::set <double> shutterSamples; std::string fileName; unsigned int numJobArgs = jobArgsArray.length(); for (unsigned int i = 0; i < numJobArgs; ++i) { MString arg = jobArgsArray[i]; arg.toLowerCase(); if (arg == "-f" || arg == "-file") { if (i+1 >= numJobArgs) { MString err = MStringResource::getString( kInvalidArgFile, status ); MGlobal::displayError(err); return MS::kFailure; } fileName = jobArgsArray[++i].asChar(); } else if (arg == "-fr" || arg == "-framerange") { if (i+2 >= numJobArgs || !jobArgsArray[i+1].isDouble() || !jobArgsArray[i+2].isDouble()) { MString err = MStringResource::getString( kInvalidArgFrameRange, status ); MGlobal::displayError(err); return MS::kFailure; } hasRange = true; startTime = jobArgsArray[++i].asDouble(); endTime = jobArgsArray[++i].asDouble(); // make sure start frame is smaller or equal to endTime if (startTime > endTime) { double temp = startTime; startTime = endTime; endTime = temp; } } else if (arg == "-frs" || arg == "-framerelativesample") { if (i+1 >= numJobArgs || !jobArgsArray[i+1].isDouble()) { MString err = MStringResource::getString( kInvalidArgFrameRelativeSample, status ); MGlobal::displayError(err); return MS::kFailure; } shutterSamples.insert(jobArgsArray[++i].asDouble()); } else if (arg == "-nn" || arg == "-nonormals") { jobArgs.noNormals = true; } else if (arg == "-ro" || arg == "-renderableonly") { jobArgs.excludeInvisible = true; } else if (arg == "-s" || arg == "-step") { if (i+1 >= numJobArgs || !jobArgsArray[i+1].isDouble()) { MString err = MStringResource::getString( kInvalidArgStep, status ); MGlobal::displayError(err); return MS::kFailure; } strideTime = jobArgsArray[++i].asDouble(); } else if (arg == "-sl" || arg == "-selection") { jobArgs.useSelectionList = true; } else if (arg == "-sn" || arg == "-stripnamespaces") { if (i+1 >= numJobArgs || !jobArgsArray[i+1].isUnsigned()) { // the strip all namespaces case // so we pick a very LARGE number jobArgs.stripNamespace = 0xffffffff; } else { jobArgs.stripNamespace = jobArgsArray[++i].asUnsigned(); } } else if (arg == "-uv" || arg == "-uvwrite") { jobArgs.writeUVs = true; } else if (arg == "-wcs" || arg == "-writecolorsets") { jobArgs.writeColorSets = true; } else if (arg == "-wfs" || arg == "-writefacesets") { jobArgs.writeFaceSets = true; } else if (arg == "-ws" || arg == "-worldspace") { jobArgs.worldSpace = true; } else if (arg == "-wv" || arg == "-writevisibility") { jobArgs.writeVisibility = true; } else if (arg == "-mfc" || arg == "-melperframecallback") { if (i+1 >= numJobArgs) { MGlobal::displayError( "melPerFrameCallback incorrectly specified."); return MS::kFailure; } jobArgs.melPerFrameCallback = jobArgsArray[++i].asChar(); } else if (arg == "-pfc" || arg == "-pythonperframecallback") { if (i+1 >= numJobArgs) { MString err = MStringResource::getString( kInvalidArgPythonPerframeCallback, status ); MGlobal::displayError(err); return MS::kFailure; } jobArgs.pythonPerFrameCallback = jobArgsArray[++i].asChar(); } else if (arg == "-mpc" || arg == "-melpostjobcallback") { if (i+1 >= numJobArgs) { MString err = MStringResource::getString( kInvalidArgMelPostJobCallback, status ); MGlobal::displayError(err); return MS::kFailure; } jobArgs.melPostCallback = jobArgsArray[++i].asChar(); } else if (arg == "-ppc" || arg == "-pythonpostjobcallback") { if (i+1 >= numJobArgs) { MString err = MStringResource::getString( kInvalidArgPythonPostJobCallback, status ); MGlobal::displayError(err); return MS::kFailure; } jobArgs.pythonPostCallback = jobArgsArray[++i].asChar(); } // geomArbParams - attribute filtering stuff else if (arg == "-atp" || arg == "-attrprefix") { if (i+1 >= numJobArgs) { MString err = MStringResource::getString( kInvalidArgAttrPrefix, status ); MGlobal::displayError(err); return MS::kFailure; } jobArgs.prefixFilters.push_back(jobArgsArray[++i].asChar()); } else if (arg == "-a" || arg == "-attr") { if (i+1 >= numJobArgs) { MString err = MStringResource::getString( kInvalidArgAttr, status ); MGlobal::displayError(err); return MS::kFailure; } jobArgs.attribs.insert(jobArgsArray[++i].asChar()); } // userProperties - attribute filtering stuff else if (arg == "-uatp" || arg == "-userattrprefix") { if (i+1 >= numJobArgs) { MString err = MStringResource::getString( kInvalidArgUserAttrPrefix, status ); MGlobal::displayError(err); return MS::kFailure; } jobArgs.userPrefixFilters.push_back(jobArgsArray[++i].asChar()); } else if (arg == "-u" || arg == "-userattr") { if (i+1 >= numJobArgs) { MGlobal::displayError("userAttr incorrectly specified."); return MS::kFailure; } jobArgs.userAttribs.insert(jobArgsArray[++i].asChar()); } else if (arg == "-rt" || arg == "-root") { if (i+1 >= numJobArgs) { MGlobal::displayError("root incorrectly specified."); return MS::kFailure; } hasRoot = true; MString root = jobArgsArray[++i]; MSelectionList sel; if (sel.add(root) != MS::kSuccess) { MString warn = root; warn += " could not be select, skipping."; MGlobal::displayWarning(warn); continue; } unsigned int numRoots = sel.length(); for (unsigned int j = 0; j < numRoots; ++j) { MDagPath path; if (sel.getDagPath(j, path) != MS::kSuccess) { MString warn = path.fullPathName(); warn += " (part of "; warn += root; warn += " ) not a DAG Node, skipping."; MGlobal::displayWarning(warn); continue; } jobArgs.dagPaths.insert(path); } } else if (arg == "-ef" || arg == "-eulerfilter") { jobArgs.filterEulerRotations = true; } else { MString warn = "Ignoring unsupported flag: "; warn += jobArgsArray[i]; MGlobal::displayWarning(warn); } } // for i if (fileName == "") { MString error = "-file not specified."; MGlobal::displayError(error); return MS::kFailure; } { MString fileRule, expandName; MString alembicFileRule = "alembicCache"; MString alembicFilePath = "cache/alembic"; MString queryFileRuleCmd; queryFileRuleCmd.format("workspace -q -fre \"^1s\"", alembicFileRule); MString queryFolderCmd; queryFolderCmd.format("workspace -en `workspace -q -fre \"^1s\"`", alembicFileRule); // query the file rule for alembic cache MGlobal::executeCommand(queryFileRuleCmd, fileRule); if (fileRule.length() > 0) { // we have alembic file rule, query the folder MGlobal::executeCommand(queryFolderCmd, expandName); } else { // alembic file rule does not exist, create it MString addFileRuleCmd; addFileRuleCmd.format("workspace -fr \"^1s\" \"^2s\"", alembicFileRule, alembicFilePath); MGlobal::executeCommand(addFileRuleCmd); // save the workspace. maya may discard file rules on exit MGlobal::executeCommand("workspace -s"); // query the folder MGlobal::executeCommand(queryFolderCmd, expandName); } // resolve the expanded file rule if (expandName.length() == 0) { expandName = alembicFilePath; } // get the path to the alembic file rule MFileObject directory; directory.setRawFullName(expandName); MString directoryName = directory.resolvedFullName(); // make sure the cache folder exists if (!directory.exists()) { // create the cache folder MString createFolderCmd; createFolderCmd.format("sysFile -md \"^1s\"", directoryName); MGlobal::executeCommand(createFolderCmd); } // resolve the relative path MFileObject absoluteFile; absoluteFile.setRawFullName(fileName.c_str()); #if MAYA_API_VERSION < 201300 if (absoluteFile.resolvedFullName() != absoluteFile.expandedFullName()) { #else if (!MFileObject::isAbsolutePath(fileName.c_str())) { #endif // this is a relative path MString absoluteFileName = directoryName + "/" + fileName.c_str(); absoluteFile.setRawFullName(absoluteFileName); fileName = absoluteFile.resolvedFullName().asChar(); } else { fileName = absoluteFile.resolvedFullName().asChar(); } // check the path must exist before writing MFileObject absoluteFilePath; absoluteFilePath.setRawFullName(absoluteFile.path()); if (!absoluteFilePath.exists()) { MString error; error.format("Path ^1s does not exist!", absoluteFilePath.resolvedFullName()); MGlobal::displayError(error); return MS::kFailure; } // check the file is used by any AlembicNode in the scene MItDependencyNodes dgIter(MFn::kPluginDependNode); for (; !dgIter.isDone(); dgIter.next()) { MFnDependencyNode alembicNode(dgIter.thisNode()); if (alembicNode.typeName() != "AlembicNode") { continue; } MPlug abcFilePlug = alembicNode.findPlug("abc_File"); if (abcFilePlug.isNull()) { continue; } MFileObject alembicFile; alembicFile.setRawFullName(abcFilePlug.asString()); if (!alembicFile.exists()) { continue; } if (alembicFile.resolvedFullName() == absoluteFile.resolvedFullName()) { MString error = "Can't export to an Alembic file which is in use."; MGlobal::displayError(error); return MS::kFailure; } } std::ofstream ofs(fileName.c_str()); if (!ofs.is_open()) { MString error = MString("Can't write to file: ") + fileName.c_str(); MGlobal::displayError(error); return MS::kFailure; } ofs.close(); } if (shutterSamples.empty()) { shutterSamples.insert(0.0); } if (jobArgs.prefixFilters.empty()) { jobArgs.prefixFilters.push_back("ABC_"); } // the list of frames written into the abc file std::set<double> transSamples; std::set <double>::const_iterator shutter; std::set <double>::const_iterator shutterStart = shutterSamples.begin(); std::set <double>::const_iterator shutterEnd = shutterSamples.end(); for (double frame = startTime; frame <= endTime; frame += strideTime) { for (shutter = shutterStart; shutter != shutterEnd; ++shutter) { double curFrame = *shutter + frame; transSamples.insert(curFrame); } } if (transSamples.empty()) { transSamples.insert(startTime); } if (jobArgs.dagPaths.size() > 1) { // check for validity of the DagPath relationships complexity : n^2 util::ShapeSet::const_iterator m, n; util::ShapeSet::const_iterator end = jobArgs.dagPaths.end(); for (m = jobArgs.dagPaths.begin(); m != end; ) { MDagPath path1 = *m; m++; for (n = m; n != end; n++) { MDagPath path2 = *n; if (util::isAncestorDescendentRelationship(path1,path2)) { MString errorMsg = path1.fullPathName(); errorMsg += " and "; errorMsg += path2.fullPathName(); errorMsg += " have an ancestor relationship."; MGlobal::displayError(errorMsg); return MS::kFailure; } } // for n } // for m } // no root is specified, and we aren't using a selection // so we'll try to translate the whole Maya scene by using all // children of the world as roots. else if (!hasRoot && !jobArgs.useSelectionList) { MSelectionList sel; #if MAYA_API_VERSION >= 201100 sel.add("|*", true); #else // older versions of Maya will not be able to find top level nodes // within namespaces sel.add("|*"); #endif unsigned int numRoots = sel.length(); for (unsigned int i = 0; i < numRoots; ++i) { MDagPath path; sel.getDagPath(i, path); jobArgs.dagPaths.insert(path); } } else if (hasRoot && jobArgs.dagPaths.empty()) { MString errorMsg = "No valid root nodes were specified."; MGlobal::displayError(errorMsg); return MS::kFailure; } else if (jobArgs.useSelectionList) { MSelectionList activeList; MGlobal::getActiveSelectionList(activeList); if (activeList.length() == 0) { MString errorMsg = "-selection specified but nothing is actively selected."; MGlobal::displayError(errorMsg); return MS::kFailure; } } AbcA::TimeSamplingPtr transTime; std::vector<double> samples; for (shutter = shutterStart; shutter != shutterEnd; ++shutter) { samples.push_back((startTime + *shutter) * util::spf()); } if (hasRange) { transTime.reset(new AbcA::TimeSampling(AbcA::TimeSamplingType( static_cast<Alembic::Util::uint32_t>(samples.size()), strideTime * util::spf()), samples)); } else { transTime.reset(new AbcA::TimeSampling()); } AbcWriteJobPtr job(new AbcWriteJob(fileName.c_str(), transSamples, transTime, jobArgs)); jobList.push_front(job); // make sure we add additional whole frames, if we arent skipping // the inbetween ones if (!skipFrame && !allFrameRange.empty()) { double localMin = *(transSamples.begin()); std::set<double>::iterator last = transSamples.end(); last--; double localMax = *last; double globalMin = *(allFrameRange.begin()); last = allFrameRange.end(); last--; double globalMax = *last; // if the min of our current frame range is beyond // what we know about, pad a few more frames if (localMin > globalMax) { for (double f = globalMax; f < localMin; f++) { allFrameRange.insert(f); } } // if the max of our current frame range is beyond // what we know about, pad a few more frames if (localMax < globalMin) { for (double f = localMax; f < globalMin; f++) { allFrameRange.insert(f); } } } // right now we just copy over the translation samples since // they are guaranteed to contain all the geometry samples allFrameRange.insert(transSamples.begin(), transSamples.end()); } // add extra evaluation run up, if necessary if (startEvaluationTime != DBL_MAX && !allFrameRange.empty()) { double firstFrame = *allFrameRange.begin(); for (double f = startEvaluationTime; f < firstFrame; ++f) { allFrameRange.insert(f); } } std::set<double>::iterator it = allFrameRange.begin(); std::set<double>::iterator itEnd = allFrameRange.end(); MComputation computation; computation.beginComputation(); // loop through every frame in the list, if a job has that frame in it's // list of transform or shape frames, then it will write out data and // call the perFrameCallback, if that frame is also the last one it has // to work on then it will also call the postCallback. // If it doesn't have this frame, then it does nothing MTimer timer; for (; it != itEnd; it++) { timer.beginTimer(); MGlobal::viewFrame(*it); std::list< AbcWriteJobPtr >::iterator j = jobList.begin(); std::list< AbcWriteJobPtr >::iterator jend = jobList.end(); while (j != jend) { if (computation.isInterruptRequested()) return MS::kFailure; bool lastFrame = (*j)->eval(*it); if (lastFrame) { j = jobList.erase(j); } else j++; } timer.endTimer(); if (verbose) { double frame = *it; MString info,arg1,arg2; arg1.set(frame); arg2.set(timer.elapsedTime()); info.format( "processed frame: ^1s in ^2s seconds", arg1, arg2 ); MGlobal::displayInfo(info); } } computation.endComputation(); // set the time back MGlobal::viewFrame(oldCurTime); return MS::kSuccess; } catch (Alembic::Util::Exception & e) { MString theError("Alembic Exception encountered: "); theError += e.what(); MGlobal::displayError(theError); return MS::kFailure; } catch (std::exception & e) { MString theError("std::exception encountered: "); theError += e.what(); MGlobal::displayError(theError); return MS::kFailure; } }
void ComparisonStageIR::build_stage() { assert(!is_tiled() || (is_tiled() && !track_progress())); // timer is only allowed for serial loops (just use it to get avg iterations per second or something like that) assert(!time_loop() || (time_loop() && !is_parallelized())); set_stage_function(create_stage_function()); set_user_function(create_user_function()); // stuff before the loop // build the return idx MVar *loop_start = new MVar(MScalarType::get_long_type()); // don't make a constant b/c it should be updateable loop_start->register_for_delete(); MStatement *set_loop_start = new MStatement(loop_start, MVar::create_constant<long>(0)); set_loop_start->register_for_delete(); MStatement *set_result = new MStatement(get_return_idx(), loop_start); set_result->register_for_delete(); set_start_block(new MBlock("start")); get_start_block()->register_for_delete(); get_start_block()->add_expr(set_loop_start); get_start_block()->add_expr(set_result); // When we don't parallelize, then make the inner loop's index outside of both the loops rather than within // the outer loop. This is a hack for llvm because if we have an alloca call within each iteration of the outer loop, // we will be "leaking" stack space each time that is called, so moving it outside of the loop prevents that. // However, it makes it hard to work with when we then parallelize because the code sees that inner loop index as a // free variable that needs to be added to the closure. This is not fun because our index is now a pointer to an index // and then we would need to update the index by going through the pointer, etc. Basically, it would cause some hacks on the // LLVM side (and unless this becomes something that is needed in the future, I don't want to deal with it). // So instead, it is dealt with below. Without parallelization, the inner loop index is initialized outside of the // nested loop, and then updated to the correct start right before the inner loop begins execution. // When parallelization is turned on, the inner loop index is made INSIDE the outer loop. This is because the // parallelized outer loop calls a function every iteration which is the outer loop body, and then within that the // inner loop is created. alloca is scoped at the function level, so the inner loop index gets a single alloca // in this function call, and then the inner loop is created. // This may not be required of other possible back-end languages that we choose, but it will depend on their scoping rules. // // TL;DR LLVM has function scoping for allocainst, so if we create the inner loop index as so // val outer_index... // for outer_index... // val inner_index... // for inner_index... // every iteration of the outer loop adds space to the stack which isn't released until the function ends. So we want // val outer_index... // val inner_index... // for outer_index... // for inner_index... MVar *inner_start = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); MBlock *preallocation_block = create_preallocator(); get_start_block()->add_expr(preallocation_block); MTimer *timer = nullptr; timer = new MTimer(); timer->register_for_delete(); MFor *outer_loop_skeleton_1 = nullptr; MFor *inner_loop_skeleton_1 = nullptr; MFor *outer_loop_skeleton_2 = nullptr; MFor *inner_loop_skeleton_2 = nullptr; MBlock *inner_loop_body = nullptr; // think of all comparisons as being in an NxM matrix where N is the left input and M is the right input. // N is the outermost iteration tile_size_N = MVar::create_constant<long>(2); tile_size_M = MVar::create_constant<long>(2); MVar *final_loop_bound; if (!is_tiled() || !is_tileable()) { // No tiling // To make sure that the inner loop doesn't get replace with a different bound if parallelizing, copy // the bound to a different variable and use that MVar *bound_copy = new MVar(MScalarType::get_long_type()); bound_copy->register_for_delete(); MStatement *set_copy = new MStatement(bound_copy, get_stage_function()->/*get_args()*/get_loaded_args()[3]); set_copy->register_for_delete(); get_start_block()->add_expr(set_copy); // loop components MVar *outer_loop_start = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); outer_loop_skeleton_1 = create_stage_for_loop(outer_loop_start, MVar::create_constant<long>(1), get_stage_function()->/*get_args()*/get_loaded_args()[1], false, get_start_block()); if (is_parallelizable() && is_parallelized()) { outer_loop_skeleton_1->set_exec_type(PARALLEL); } MVar *_inner_start = nullptr; if ((left_input || right_input) && !_force_commutative) { _inner_start = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); } else { MAdd *add = new MAdd(outer_loop_skeleton_1->get_loop_index(), MVar::create_constant<long>(1)); outer_loop_skeleton_1->get_body_block()->add_expr(add); add->register_for_delete(); _inner_start = add->get_result(); } if (!time_loop()) { get_start_block()->add_expr(outer_loop_skeleton_1); } else { get_start_block()->add_expr(timer); timer->get_timer_block()->add_expr(outer_loop_skeleton_1); } MStatement *set_inner_start = new MStatement(inner_start, _inner_start); set_inner_start->register_for_delete(); outer_loop_skeleton_1->get_body_block()->add_expr(set_inner_start); MBlock *temp_block = new MBlock(); temp_block->register_for_delete(); inner_loop_skeleton_1 = create_stage_for_loop(inner_start, MVar::create_constant<long>(1), bound_copy, true, temp_block); // TODO hack, need to add the loop index initialization before the outer loop, but we have to add the outer loop before this since // the inner_start depends on the outer loop get_start_block()->insert_at(temp_block, get_start_block()->get_exprs().size() - 2); // insert right before the outer loop // stuff for calling the user function in the loop inner_loop_body = inner_loop_skeleton_1->get_body_block(); } else if (is_tiled() && is_tileable()) { // tiling // loop components MDiv *_outer_1_bound = new MDiv(get_stage_function()->/*get_args()*/get_loaded_args()[1], tile_size_N); _outer_1_bound->register_for_delete(); MDiv *_inner_1_bound = new MDiv(get_stage_function()->/*get_args()*/get_loaded_args()[3], tile_size_M); _inner_1_bound->register_for_delete(); // compensate for when the number of elements isn't a multiple of the tile size MAdd *outer_1_bound = new MAdd(_outer_1_bound->get_result(), MVar::create_constant<long>(1)); outer_1_bound->register_for_delete(); MAdd *inner_1_bound = new MAdd(_inner_1_bound->get_result(), MVar::create_constant<long>(1)); inner_1_bound->register_for_delete(); get_start_block()->add_expr(_outer_1_bound); get_start_block()->add_expr(_inner_1_bound); get_start_block()->add_expr(outer_1_bound); get_start_block()->add_expr(inner_1_bound); MVar *outer_loop_start_1 = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); outer_loop_start_1->override_name("outer_loop_start_1"); MVar *inner_loop_start_1 = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); inner_loop_start_1->override_name("inner_loop_start_1"); MVar *outer_loop_start_2 = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); outer_loop_start_2->override_name("outer_loop_start_2"); MVar *inner_loop_start_2 = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); inner_loop_start_2->override_name("inner_loop_start_2"); // n = 0 to N/tile_size_N + 1 outer_loop_skeleton_1 = create_stage_for_loop(outer_loop_start_1, MVar::create_constant<long>(1), outer_1_bound->get_result(), true, get_start_block()); outer_loop_skeleton_1->override_name("outer_loop_skeleton1"); // // if (!time_loop()) { // get_start_block()->add_expr(outer_loop_skeleton_1); // } else { // get_start_block()->add_expr(timer); // timer->get_timer_block()->add_expr(outer_loop_skeleton_1); // } // m = 0 to M/tile_size_M + 1 inner_loop_skeleton_1 = create_stage_for_loop(inner_loop_start_1, MVar::create_constant<long>(1), inner_1_bound->get_result(), true, get_start_block()); inner_loop_skeleton_1->override_name("inner_loop_skeleton1"); // nn = 0 to tile_size_N outer_loop_skeleton_2 = create_stage_for_loop(outer_loop_start_2, MVar::create_constant<long>(1), tile_size_N, true, get_start_block()); outer_loop_skeleton_2->override_name("outer_loop_skeleton2"); // mm = 0 to tile_size_M inner_loop_skeleton_2 = create_stage_for_loop(inner_loop_start_2, MVar::create_constant<long>(1), tile_size_M, true, get_start_block()); inner_loop_skeleton_2->override_name("inner_loop_skeleton2"); if (!time_loop()) { get_start_block()->add_expr(outer_loop_skeleton_1); } else { get_start_block()->add_expr(timer); timer->get_timer_block()->add_expr(outer_loop_skeleton_1); } inner_loop_skeleton_1->get_body_block()->add_expr(outer_loop_skeleton_2); outer_loop_skeleton_2->get_body_block()->add_expr(inner_loop_skeleton_2); inner_loop_body = inner_loop_skeleton_2->get_body_block(); } MBlock *user_arg_block; std::vector<MVar *> args = create_user_function_inputs(&user_arg_block, outer_loop_skeleton_1, outer_loop_skeleton_2, inner_loop_skeleton_1, inner_loop_skeleton_2, nullptr, false, nullptr, nullptr, get_stage_function()->/*get_args()*/get_loaded_args()[1], get_stage_function()->/*get_args()*/get_loaded_args()[3]); if (!is_tiled() || !is_tileable()) { inner_loop_body->add_expr(user_arg_block); } // if tiled, this is already added in the create_user_function_inputs inner_loop_body = user_arg_block; int bucket_idx = inner_loop_body->get_exprs().size(); MFunctionCall *call = call_user_function(get_user_function(), args); inner_loop_body->add_expr(call); // handle the output of the user call MBlock *processed_call = process_user_function_call(call, NULL, false); inner_loop_body->add_expr(processed_call); // do any other postprocessing needed in the loop before the next iteration MBlock *extra = loop_extras(); inner_loop_body->add_expr(extra); if (track_progress() && !is_parallelized()) { // still return the original loop bound MBlock *temp = new MBlock(); temp->register_for_delete(); final_loop_bound = outer_loop_skeleton_1->get_loop_bound(); outer_loop_skeleton_1->get_body_block()->add_expr(inner_loop_skeleton_1); inner_loop_body->insert_at(apply_buckets(args[0], args[1], inner_loop_skeleton_2 ? inner_loop_skeleton_2 : inner_loop_skeleton_1), bucket_idx); std::pair<MFor *, MFor *> splits = ProgressTracker::create_progress_tracker(outer_loop_skeleton_1, inner_loop_skeleton_1, get_num_tracking_splits(), temp, true); // find the original outer_loop_skeleton_1 in the block and remove it. Then replace with the new one in splits.first int idx = 0; if (!time_loop()) { for (std::vector<MExpr *>::const_iterator iter = get_start_block()->get_exprs().cbegin(); iter != get_start_block()->get_exprs().cend(); iter++) { if (*iter == outer_loop_skeleton_1) { break; } idx++; } get_start_block()->remove_at(idx); } else { for (std::vector<MExpr *>::const_iterator iter = timer->get_timer_block()->get_exprs().cbegin(); iter != timer->get_timer_block()->get_exprs().cend(); iter++) { if (*iter == outer_loop_skeleton_1) { break; } idx++; } timer->get_timer_block()->remove_at(idx); } outer_loop_skeleton_1 = splits.first; // do the replacement // outer_loop_skeleton_1 added to temp block in the progress tracker function if (!time_loop()) { get_stage_function()->add_body_block(temp); } else { timer->get_timer_block()->insert_at(temp, idx); } } else { outer_loop_skeleton_1->get_body_block()->add_expr(inner_loop_skeleton_1); final_loop_bound = outer_loop_skeleton_1->get_loop_bound(); inner_loop_body->insert_at(apply_buckets(args[0], args[1], inner_loop_skeleton_2 ? inner_loop_skeleton_2 : inner_loop_skeleton_1), bucket_idx); } // modify this loop if it needs to be parallelized if (is_parallelizable() && is_parallelized()) { parallelize_main_loop(get_start_block(), outer_loop_skeleton_1, inner_loop_skeleton_1); } // // if (is_tiled() && is_tileable()) { // inner_loop_skeleton_1->get_body_block()->add_expr(outer_loop_skeleton_2); // outer_loop_skeleton_2->get_body_block()->add_expr(inner_loop_skeleton_2); // } // postprocessing after the outer loop is done (no postprocessing needed after the inner loop since it just goes back to the outer loop) MBlock *after_loop = time_loop() ? timer->get_after_timer_block() : outer_loop_skeleton_1->get_end_block(); MBlock *finished = finish_stage(nullptr, final_loop_bound); MBlock *deletion = delete_fields(); after_loop->add_expr(deletion); after_loop->add_expr(finished); get_stage_function()->insert_body_block_at(get_start_block(), 1); // insert before the temp block, which would have been added if doing tracking. Insert after the stage arg loading though. // the temp block has the loop now, so it can't come before everything else }
MStatus splatDeformer::compute(const MPlug& plug, MDataBlock& data) { // do this if we are using an OpenMP implementation that is not the same as Maya's. // Even if it is the same, it does no harm to make this call. MThreadUtils::syncNumOpenMPThreads(); MStatus status = MStatus::kUnknownParameter; if (plug.attribute() != outputGeom) { return status; } unsigned int index = plug.logicalIndex(); MObject thisNode = this->thisMObject(); // get input value MPlug inPlug(thisNode,input); inPlug.selectAncestorLogicalIndex(index,input); MDataHandle hInput = data.inputValue(inPlug, &status); MCheckStatus(status, "ERROR getting input mesh\n"); // get the input geometry MDataHandle inputData = hInput.child(inputGeom); if (inputData.type() != MFnData::kMesh) { printf("Incorrect input geometry type\n"); return MStatus::kFailure; } // get the input groupId - ignored for now... MDataHandle hGroup = inputData.child(groupId); unsigned int groupId = hGroup.asLong(); // get deforming mesh MDataHandle deformData = data.inputValue(deformingMesh, &status); MCheckStatus(status, "ERROR getting deforming mesh\n"); if (deformData.type() != MFnData::kMesh) { printf("Incorrect deformer geometry type %d\n", deformData.type()); return MStatus::kFailure; } MObject dSurf = deformData.asMeshTransformed(); MFnMesh fnDeformingMesh; fnDeformingMesh.setObject( dSurf ) ; MDataHandle outputData = data.outputValue(plug); outputData.copy(inputData); if (outputData.type() != MFnData::kMesh) { printf("Incorrect output mesh type\n"); return MStatus::kFailure; } MItGeometry iter(outputData, groupId, false); // create fast intersector structure MMeshIntersector intersector; intersector.create(dSurf); // get all points at once. Faster to query, and also better for // threading than using iterator MPointArray verts; iter.allPositions(verts); int nPoints = verts.length(); // use bool variable as lightweight object for failure check in loop below bool failed = false; MTimer timer; timer.beginTimer(); #ifdef _OPENMP #pragma omp parallel for #endif for(int i=0; i<nPoints; i++) { // Cannot break out of an OpenMP loop, so if one of the // intersections failed, skip the rest if(failed) continue; // mesh point object must be in loop-local scope to avoid race conditions MPointOnMesh meshPoint; // Do intersection. Need to use per-thread status value as // MStatus has internal state and may trigger race conditions // if set from multiple threads. Probably benign in this case, // but worth being careful. MStatus localStatus = intersector.getClosestPoint(verts[i], meshPoint); if(localStatus != MStatus::kSuccess) { // NOTE - we cannot break out of an OpenMP region, so set // bad status and skip remaining iterations failed = true; continue; } // default OpenMP scheduling breaks traversal into large // chunks, so low risk of false sharing here in array write. verts[i] = meshPoint.getPoint(); } timer.endTimer(); printf("Runtime for threaded loop %f\n", timer.elapsedTime()); // write values back onto output using fast set method on iterator iter.setAllPositions(verts); if(failed) { printf("Closest point failed\n"); return MStatus::kFailure; } return status; }
MStatus finalproject::compute(const MPlug& plug, MDataBlock& data) { // do this if we are using an OpenMP implementation that is not the same as Maya's. // Even if it is the same, it does no harm to make this call. MThreadUtils::syncNumOpenMPThreads(); MStatus status = MStatus::kUnknownParameter; if (plug.attribute() != outputGeom) { return status; } unsigned int index = plug.logicalIndex(); MObject thisNode = this->thisMObject(); // get input value MPlug inPlug(thisNode,input); inPlug.selectAncestorLogicalIndex(index,input); MDataHandle hInput = data.inputValue(inPlug, &status); MCheckStatus(status, "ERROR getting input mesh\n"); // get the input geometry MDataHandle inputData = hInput.child(inputGeom); if (inputData.type() != MFnData::kMesh) { printf("Incorrect input geometry type\n"); return MStatus::kFailure; } // get the input groupId - ignored for now... MDataHandle hGroup = inputData.child(groupId); unsigned int groupId = hGroup.asLong(); // get deforming mesh MDataHandle deformData = data.inputValue(deformingMesh, &status); MCheckStatus(status, "ERROR getting deforming mesh\n"); if (deformData.type() != MFnData::kMesh) { printf("Incorrect deformer geometry type %d\n", deformData.type()); return MStatus::kFailure; } MDataHandle offloadData = data.inputValue(offload, &status); //gathers world space positions of the object and the magnet MObject dSurf = deformData.asMeshTransformed(); MObject iSurf = inputData.asMeshTransformed(); MFnMesh fnDeformingMesh, fnInputMesh; fnDeformingMesh.setObject( dSurf ) ; fnInputMesh.setObject( iSurf ) ; MDataHandle outputData = data.outputValue(plug); outputData.copy(inputData); if (outputData.type() != MFnData::kMesh) { printf("Incorrect output mesh type\n"); return MStatus::kFailure; } MItGeometry iter(outputData, groupId, false); // get all points at once. Faster to query, and also better for // threading than using iterator MPointArray objVerts; iter.allPositions(objVerts); int objNumPoints = objVerts.length(); MPointArray magVerts, tempverts; fnDeformingMesh.getPoints(magVerts); fnInputMesh.getPoints(tempverts); int magNumPoints = magVerts.length(); double min = DBL_MAX, max = -DBL_MAX; //finds min and max z-coordinate values to determine middle point (choice of z-axis was ours) for (int i = 0; i < magNumPoints; i++) { min = magVerts[i].z < min ? magVerts[i].z : min; max = magVerts[i].z > max ? magVerts[i].z : max; } double middle = (min + max) / 2; double polarity[magNumPoints]; //assigns polarity based on middle point of mesh for (int i = 0; i < magNumPoints; i++) { polarity[i] = magVerts[i].z > middle ? max / magVerts[i].z : -min / magVerts[i].z; } double* objdVerts = (double *)malloc(sizeof(double) * objNumPoints * 3); double* magdVerts = (double *)malloc(sizeof(double) * magNumPoints * 3); //creates handles to use attribute data MDataHandle vecX = data.inputValue(transX, &status); MDataHandle vecY = data.inputValue(transY, &status); MDataHandle vecZ = data.inputValue(transZ, &status); //gathers previously stored coordinates of the center of the object double moveX = vecX.asFloat(); double moveY = vecY.asFloat(); double moveZ = vecZ.asFloat(); //translates object based on the position stored in the attribute values for (int i=0; i<objNumPoints; i++) { objdVerts[i * 3] = tempverts[i].x + moveX; objdVerts[i * 3 + 1] = tempverts[i].y + moveY; objdVerts[i * 3 + 2] = tempverts[i].z + moveZ; } for (int i=0; i<magNumPoints; i++) { magdVerts[i * 3] = magVerts[i].x; magdVerts[i * 3 + 1] = magVerts[i].y; magdVerts[i * 3 + 2] = magVerts[i].z; } double teslaData = data.inputValue(tesla, &status).asDouble(); MDataHandle posiData = data.inputValue(positivelycharged, &status); double pivot[6] = {DBL_MAX, -DBL_MAX, DBL_MAX, -DBL_MAX, DBL_MAX, -DBL_MAX}; //finds the pivot point of the object in world space prior to being affected by the magnet for (int i = 0; i < tempverts.length(); i++) { pivot[0] = tempverts[i].x < pivot[0] ? tempverts[i].x : pivot[0]; pivot[1] = tempverts[i].x > pivot[1] ? tempverts[i].x : pivot[1]; pivot[2] = tempverts[i].y < pivot[2] ? tempverts[i].y : pivot[2]; pivot[3] = tempverts[i].y > pivot[3] ? tempverts[i].y : pivot[3]; pivot[4] = tempverts[i].z < pivot[4] ? tempverts[i].z : pivot[4]; pivot[5] = tempverts[i].z > pivot[5] ? tempverts[i].z : pivot[5]; } MTimer timer; timer.beginTimer(); //main function call magnetForce(magNumPoints, objNumPoints, teslaData, magdVerts, objdVerts, polarity, posiData.asBool(), offloadData.asBool()); timer.endTimer(); printf("Runtime for threaded loop %f\n", timer.elapsedTime()); for (int i=0; i<objNumPoints; i++) { objVerts[i].x = objdVerts[i * 3 + 0]; objVerts[i].y = objdVerts[i * 3 + 1]; objVerts[i].z = objdVerts[i * 3 + 2]; } //finds the pivot point of object in world space after being affected by the magnet double objCenter[6] = {DBL_MAX, -DBL_MAX, DBL_MAX, -DBL_MAX, DBL_MAX, -DBL_MAX}; for (int i = 0; i < tempverts.length(); i++) { objCenter[0] = objVerts[i].x < objCenter[0] ? objVerts[i].x : objCenter[0]; objCenter[1] = objVerts[i].x > objCenter[1] ? objVerts[i].x : objCenter[1]; objCenter[2] = objVerts[i].y < objCenter[2] ? objVerts[i].y : objCenter[2]; objCenter[3] = objVerts[i].y > objCenter[3] ? objVerts[i].y : objCenter[3]; objCenter[4] = objVerts[i].z < objCenter[4] ? objVerts[i].z : objCenter[4]; objCenter[5] = objVerts[i].z > objCenter[5] ? objVerts[i].z : objCenter[5]; } //creates vector based on the two calculated pivot points moveX = (objCenter[0] + objCenter[1]) / 2 - (pivot[0] + pivot[1]) / 2; moveY = (objCenter[2] + objCenter[3]) / 2 - (pivot[2] + pivot[3]) / 2; moveZ = (objCenter[4] + objCenter[5]) / 2 - (pivot[4] + pivot[5]) / 2; //stores pivot vector for next computation if (teslaData) { vecX.setFloat(moveX); vecY.setFloat(moveY); vecZ.setFloat(moveZ); } // write values back onto output using fast set method on iterator iter.setAllPositions(objVerts, MSpace::kWorld); free(objdVerts); free(magdVerts); return status; }