int StVKReducedInternalForces::LoadFromStream(FILE * fin, int rTarget, int bigEndianMachine) { if (verbose) printf("Loading polynomials assuming little endian machine: %s.", (!bigEndianMachine) ? "TRUE" : "FALSE"); int header[4]; if ((int)(fread(header, sizeof(int), 4, fin)) < 4) { printf("Error: couldn't read from input cubic polynomial file.\n"); throw 1; } r = header[0]; int buffer; if (bigEndianMachine) { little2big(&r, &buffer, sizeof(int)); r = buffer; } if (rTarget > r) { printf("Error: the input cubic polynomial file has r=%d, but you requested %d > %d.\n", r, rTarget, r); throw 2; } // first read in the coefficients as if all modes requested if (verbose) printf(" r=%d\n", r); r2 = r * r; linearSize = header[1]; if (bigEndianMachine) { little2big(&linearSize, &buffer, sizeof(int)); linearSize = buffer; } quadraticSize = header[2]; if (bigEndianMachine) { little2big(&quadraticSize, &buffer, sizeof(int)); quadraticSize = buffer; } cubicSize = header[3]; if (bigEndianMachine) { little2big(&cubicSize, &buffer, sizeof(int)); cubicSize = buffer; } linearCoef_ = (double*) malloc (sizeof(double) * r * linearSize); if ((int)(fread(linearCoef_,sizeof(double),r*linearSize,fin)) < r*linearSize) { printf("Error: couldn't read from input cubic polynomial file.\n"); throw 1; } double bufferd; if (bigEndianMachine) { for(int i=0; i<r*linearSize; i++) { little2big(&linearCoef_[i], &bufferd, sizeof(double)); linearCoef_[i] = bufferd; } } quadraticCoef_ = (double*) malloc (sizeof(double) * r * quadraticSize); if ((int)(fread(quadraticCoef_,sizeof(double),r*quadraticSize,fin)) < r*quadraticSize) { printf("Error: couldn't read from input cubic polynomial file.\n"); throw 1; } if (bigEndianMachine) { for(int i=0; i<r*quadraticSize; i++) { little2big(&quadraticCoef_[i], &bufferd, sizeof(double)); quadraticCoef_[i] = bufferd; } } cubicCoef_ = (double*) malloc (sizeof(double) * r * cubicSize); if ((int)(fread(cubicCoef_,sizeof(double),r*cubicSize,fin)) < r*cubicSize) { printf("Error: couldn't read from input cubic polynomial file.\n"); throw 1; } if (bigEndianMachine) { for(int i=0; i<r*cubicSize; i++) { little2big(&cubicCoef_[i], &bufferd, sizeof(double)); cubicCoef_[i] = bufferd; } } if (rTarget >= 0) { int linearSizeTarget, quadraticSizeTarget, cubicSizeTarget; GetSizes(rTarget, &linearSizeTarget, &quadraticSizeTarget, &cubicSizeTarget); double * linearCoefTemp_ = (double*) malloc (sizeof(double) * rTarget * linearSizeTarget); double * quadraticCoefTemp_ = (double*) malloc (sizeof(double) * rTarget * quadraticSizeTarget); double * cubicCoefTemp_ = (double*) malloc (sizeof(double) * rTarget * cubicSizeTarget); for(int output=0; output<rTarget; output++) for(int i=0; i<rTarget; i++) { SetSizes(rTarget); int positionTarget = linearCoefPos(output, i); SetSizes(r); int position = linearCoefPos(output, i); linearCoefTemp_[positionTarget] = linearCoef_[position]; } for(int output=0; output<rTarget; output++) for(int i=0; i<rTarget; i++) for(int j=i; j<rTarget; j++) { SetSizes(rTarget); int positionTarget = quadraticCoefPos(output, i, j); SetSizes(r); int position = quadraticCoefPos(output, i, j); quadraticCoefTemp_[positionTarget] = quadraticCoef_[position]; } for(int output=0; output<rTarget; output++) for(int i=0; i<rTarget; i++) for(int j=i; j<rTarget; j++) for(int k=j; k<rTarget; k++) { SetSizes(rTarget); int positionTarget = cubicCoefPos(output, i, j, k); SetSizes(r); int position = cubicCoefPos(output, i, j, k); cubicCoefTemp_[positionTarget] = cubicCoef_[position]; } r = rTarget; SetSizes(r); free(linearCoef_); free(quadraticCoef_); free(cubicCoef_); linearCoef_ = linearCoefTemp_; quadraticCoef_ = quadraticCoefTemp_; cubicCoef_ = cubicCoefTemp_; } volumetricMesh = NULL; U = NULL; reducedGravityForce = NULL; precomputedIntegrals = NULL; numElementVertices = 0; muLame = NULL; InitBuffers(); addGravity = false; useSingleThread = 0; shallowCopy = 0; g=9.81; return 0; }
StVKReducedStiffnessMatrix::StVKReducedStiffnessMatrix(StVKReducedInternalForces * stVKReducedInternalForces, int verbose) : shallowCopy(0) { r = stVKReducedInternalForces->Getr(); r2 = r*r; if (verbose) printf("Building the reduced stiffness matrix quadratic polynomials... r is %d\n",r); int i,j,k; int output; if (verbose) printf("Building free terms:"); // free terms // allocate room for coefficients, 1 coefficient per each of the r x r components freeCoef_ = (double*) malloc (sizeof(double) * r * (r+1) / 2); // obtain free terms by analytic derivation of the linear force terms for(output=0; output<r; output++) { if (verbose) printf(" %d",output); for(i=output; i<r; i++) { freeCoef_[freeCoefPos(output,i)] = stVKReducedInternalForces->linearCoef(output,i); } } if (verbose) printf("\nBuilding linear terms:"); // linear terms // allocate room for coefficients, r coefficients per each of the r x r components linearSize = StVKReducedInternalForces::GetLinearSize(r); linearCoef_ = (double*) malloc (sizeof(double) * r * (r+1) / 2 * linearSize); // obtain linear coefficients by analytic derivation of the quadratic force terms for(output=0; output<r; output++) { if (verbose) printf(" %d",output); for(i=output; i<r; i++) for(j=0; j<r; j++) { // (i1,j1) will be (i,j) sorted in ascending order int i1 = i; int j1 = j; if (j1 < i1) // swap them { j1 = i; i1 = j; } double value = stVKReducedInternalForces->quadraticCoef(output,i1,j1); if (i == j) value *= 2; //int pos = linearCoefPos(output,i,j); linearCoef_[linearCoefPos(output,i,j)] = value; } } if (verbose) printf("\nBuilding quadratic terms:"); // quadratic terms // allocate room for coefficients, r*(r+1)/2 coefficients per each of the r x r components quadraticSize = StVKReducedInternalForces::GetQuadraticSize(r); quadraticCoef_ = (double*) malloc (sizeof(double) * r * (r+1) / 2 * quadraticSize); // obtain quadratic coefficients by analytic derivation of the cubic force terms for(output=0; output<r; output++) { if (verbose) printf(" %d",output); for(i=output; i<r; i++) for(j=0; j<r; j++) for(k=j; k<r; k++) { // (i1,j1,k1) will be (i,j,k) sorted in ascending order int i1 = i; int j1 = j; int k1 = k; int buffer; #define SWAP(i,j)\ buffer = i;\ i = j;\ j = buffer; // bubble sort on 3 elements if (j1 < i1) { SWAP(i1,j1); } if (k1 < j1) { SWAP(j1,k1); } if (j1 < i1) { SWAP(i1,j1); } double value = stVKReducedInternalForces->cubicCoef(output,i1,j1,k1); if ((i == j) && (i == k)) // q_i^3 value *= 3; else if ((i == j) || (i == k)) // q_i^2 * q_j value *= 2; quadraticCoef_[quadraticCoefPos(output,i,j,k)] = value; } } if (verbose) printf("\n"); InitBuffers(); }
void StVKReducedInternalForces::ProcessElements(int startElement, int endElement, double ** target) { double * linearCoef_ = this->linearCoef_; double * quadraticCoef_ = this->quadraticCoef_; double * cubicCoef_ = this->cubicCoef_; if (target != NULL) { linearCoef_ = target[0]; quadraticCoef_ = target[1]; cubicCoef_ = target[2]; } if (verbose >= 1) printf("Generating element data: element %d to %d...\n", startElement, endElement-1); int numVertices_ = volumetricMesh->getNumVertices(); // make auxiliary vectors double * qiqjBuffer = (double*) calloc(r2,sizeof(double)); double * qkBuffer = (double*) calloc(r2,sizeof(double)); double * coefs = (double*) calloc(r*r*r*r,sizeof(double)); void * elIter; precomputedIntegrals->AllocateElementIterator(&elIter); // Linear terms //if (verbose >= 1) //printf("Building linear terms:"); for(int el=startElement; el < endElement; el++) { precomputedIntegrals->PrepareElement(el, elIter); if (verbose >= 1) { if (el % 100 == 1) printf("%d ",el); fflush(NULL); } double lambda = lambdaLame[el]; double mu = muLame[el]; for(int i=0; i<r; i++) { for (int c=0; c<numElementVertices; c++) { Vec3d force(0.0,0.0,0.0); int vc = volumetricMesh->getVertexIndex(el, c); for (int a=0; a<numElementVertices; a++) { int va = volumetricMesh->getVertexIndex(el, a); Vec3d ua(U[ELT(3*numVertices_,3*va+0,i)], U[ELT(3*numVertices_,3*va+1,i)], U[ELT(3*numVertices_,3*va+2,i)]); force += lambda * (precomputedIntegrals->A(elIter,c,a) * ua) + (mu * precomputedIntegrals->B(elIter,a,c)) * ua + mu * (precomputedIntegrals->A(elIter,a,c) * ua); } // multiply Uc^T * force for(int output=0; output<r; output++) { linearCoef_[linearCoefPos(output, i)] += U[ELT(3*numVertices_,3*vc+0,output)] * force[0] + U[ELT(3*numVertices_,3*vc+1,output)] * force[1] + U[ELT(3*numVertices_,3*vc+2,output)] * force[2]; } } } } // Quadratic terms //if (verbose >= 1) //printf("\nBuilding quadratic terms:"); double ** forceBuffer = (double**) malloc (sizeof(double*) * numElementVertices); for(int c=0; c<numElementVertices; c++) forceBuffer[c] = (double*) calloc (3*r2,sizeof(double)); memset(quadraticCoef_, 0, sizeof(double) * r * quadraticSize); int * vertices = (int*) malloc (sizeof(int) * numElementVertices); for(int el=startElement; el < endElement; el++) { precomputedIntegrals->PrepareElement(el, elIter); if (verbose >= 1) { if (el % 100 == 1) printf("%d ",el); fflush(NULL); } double lambda = lambdaLame[el]; double mu = muLame[el]; for(int ver=0; ver<numElementVertices ;ver++) vertices[ver] = volumetricMesh->getVertexIndex(el, ver); for(int c=0; c<numElementVertices; c++) memset(forceBuffer[c],0,sizeof(double)*3*r2); for(int a=0; a<numElementVertices; a++) { for(int b=0; b<numElementVertices; b++) { // compute ua*ub for all possible i,j cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, r, r, 3, 1.0, &U[ELT(3*numVertices_,3*vertices[a],0)], 3*numVertices_, &U[ELT(3*numVertices_,3*vertices[b],0)], 3*numVertices_, 0.0, qiqjBuffer, r); for(int c=0; c<numElementVertices; c++) { Vec3d vec1 = 0.5 * lambda * precomputedIntegrals->C(elIter,c,a,b) + mu * precomputedIntegrals->C(elIter,a,b,c); Vec3d C = lambda * precomputedIntegrals->C(elIter,a,b,c) + mu * (precomputedIntegrals->C(elIter,c,a,b) + precomputedIntegrals->C(elIter,b,a,c)); for(int i=0; i<r; i++) { double * posa = &(U[ELT(3*numVertices_,3*vertices[a]+0,i)]); double Cdotua = C[0] * posa[0] + C[1] * posa[1] + C[2] * posa[2]; for(int j=0; j<r; j++) { double buffer = qiqjBuffer[ELT(r,i,j)]; double * posb = &(U[ELT(3*numVertices_,3*vertices[b]+0,j)]); int index = ELT(3,0,ELT(r,i,j)); forceBuffer[c][index+0] += buffer * vec1[0] + Cdotua * posb[0]; forceBuffer[c][index+1] += buffer * vec1[1] + Cdotua * posb[1]; forceBuffer[c][index+2] += buffer * vec1[2] + Cdotua * posb[2]; } } } // end c } // end b } // end a // generate unpacked coefficients for this element memset(coefs,0,sizeof(double)*r*r*r); for(int c=0; c<numElementVertices; c++) { // multiply Uc^T * forcesBuffer[c] cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, r, r2, 3, 1.0, &U[ELT(3*numVertices_,3*vertices[c],0)], 3*numVertices_, forceBuffer[c], 3, 1.0, coefs, r); } // pack and add for(int output=0; output<r; output++) { for(int i=0; i<r; i++) for(int j=0; j<r; j++) { int i1 = i; int j1 = j; if (j < i) { i1 = j; j1 = i; } quadraticCoef_[quadraticCoefPos(output,i1, j1)] += coefs[ELT(r,output,ELT(r,i,j))]; } } } // end el free(vertices); for(int c=0; c<numElementVertices; c++) free(forceBuffer[c]); free(forceBuffer); // cubic terms //if (verbose >= 1) //printf("\nBuilding cubic terms:\n"); memset(coefs,0,sizeof(double)*r*r*r*r); for(int el=startElement; el < endElement; el++) { precomputedIntegrals->PrepareElement(el, elIter); if (verbose >= 1) { if ((el % 50 == 1) || ((r > 30) && (el % 25 == 1))) printf("%d ",el); fflush(NULL); } double lambda = lambdaLame[el]; double mu = muLame[el]; for (int a=0; a<numElementVertices; a++) { int va = volumetricMesh->getVertexIndex(el, a); for(int b=0; b<numElementVertices; b++) { int vb = volumetricMesh->getVertexIndex(el, b); // fill up the buffers cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, r, r, 3, 1.0, &U[ELT(3*numVertices_,3*va,0)], 3*numVertices_, &U[ELT(3*numVertices_,3*vb,0)], 3*numVertices_, 0.0, qiqjBuffer, r); for(int i=0; i<r2; i++) qkBuffer[i] = 0; for(int c=0; c<numElementVertices; c++) { int vc = volumetricMesh->getVertexIndex(el, c); for(int d=0; d<numElementVertices; d++) { int vd = volumetricMesh->getVertexIndex(el, d); double factor = 0.5 * lambda * precomputedIntegrals->D(elIter,a,b,c,d) + mu * precomputedIntegrals->D(elIter,a,c,b,d); cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, r, r, 3, factor, &U[ELT(3*numVertices_,3*vc,0)], 3*numVertices_, &U[ELT(3*numVertices_,3*vd,0)], 3*numVertices_, 1.0, qkBuffer, r); } } // multiply qiqjBuffer * qkBuffer^T (tensor product) // both vectors are r^2 vectors cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, r2, r2, 1, 1.0, qiqjBuffer, r2, qkBuffer, r2, 1.0, coefs, r2); } // over b } // over a } for(int i=0; i < r*cubicSize; i++) cubicCoef_[i] = 0.0; // unpack for(int i=0; i<r; i++) for(int j=0; j<r; j++) for(int k=0; k<r; k++) for(int l=0; l<r; l++) { // sort the indices int i1=i; int j1=j; int k1=k; tripleSort(i1,j1,k1); //int pos = cubicCoefPos(l, i1, j1, k1); //int pos1 = ELT(r*r,ELT(r,i,j),ELT(r,l,k)); cubicCoef_[cubicCoefPos(l, i1, j1, k1)] += coefs[ELT(r*r,ELT(r,i,j),ELT(r,l,k))]; } free(qiqjBuffer); free(qkBuffer); free(coefs); precomputedIntegrals->ReleaseElementIterator(elIter); }