void Net::Train(const mxArray *mx_data, const mxArray *mx_labels) { //mexPrintMsg("Start training..."); ReadData(mx_data); ReadLabels(mx_labels); InitNorm(); std::srand(params_.seed_); size_t train_num = labels_.size1(); size_t numbatches = (size_t) ceil((ftype) train_num/params_.batchsize_); trainerror_.resize(params_.numepochs_, numbatches); for (size_t epoch = 0; epoch < params_.numepochs_; ++epoch) { std::vector<size_t> randind(train_num); for (size_t i = 0; i < train_num; ++i) { randind[i] = i; } if (params_.shuffle_) { std::random_shuffle(randind.begin(), randind.end()); } std::vector<size_t>::const_iterator iter = randind.begin(); for (size_t batch = 0; batch < numbatches; ++batch) { size_t batchsize = std::min(params_.batchsize_, (size_t)(randind.end() - iter)); std::vector<size_t> batch_ind = std::vector<size_t>(iter, iter + batchsize); iter = iter + batchsize; Mat data_batch = SubMat(data_, batch_ind, 1); Mat labels_batch = SubMat(labels_, batch_ind, 1); UpdateWeights(epoch, false); InitActiv(data_batch); Mat pred_batch; Forward(pred_batch, 1); InitDeriv(labels_batch, trainerror_(epoch, batch)); Backward(); CalcWeights(); UpdateWeights(epoch, true); if (params_.verbose_ == 2) { std::string info = std::string("Epoch: ") + std::to_string(epoch+1) + std::string(", batch: ") + std::to_string(batch+1); mexPrintMsg(info); } } // batch if (params_.verbose_ == 1) { std::string info = std::string("Epoch: ") + std::to_string(epoch+1); mexPrintMsg(info); } } // epoch //mexPrintMsg("Training finished"); }
void RealignChain(MultipleAlignment *ma, int c, Matrix *matrices) { int i, j, k; WeightedResiduePositions tempResidues; CalcWeights(ma, c); tempResidues.weight = 1; tempResidues.res = ma->residues[c].res; for (i=0; i<ma->numResidues; i++) { tempResidues.res[i].coords = ma->chains[c]->res[tempResidues.res[i].index].coords; } for (j=0; j<ma->numBlocks; j++) { AlignAlignmentBlocks(ma->blocks[j].first, ma->blocks[j].first, ma->numChains, 1, ma->residues, &tempResidues, ma->blocks[j].last-ma->blocks[j].first+1, &matrices[j], -1); for (k=ma->blocks[j].first; k<=ma->blocks[j].last; k++) { transformVect(&tempResidues.res[k].coords, &matrices[j], &ma->chains[c]->res[tempResidues.res[k].index].coords); } } }
inline Pixel GetPixelSSE(const Image* img, float x, float y) { const int stride = img->width; const Pixel* p0 = img->data + (int)x + (int)y * stride; // pointer to first pixel // Load the data (2 pixels in one load) __m128i p12 = _mm_loadl_epi64((const __m128i*)&p0[0 * stride]); __m128i p34 = _mm_loadl_epi64((const __m128i*)&p0[1 * stride]); __m128 weight = CalcWeights(x, y); // extend to 16bit p12 = _mm_unpacklo_epi8(p12, _mm_setzero_si128()); p34 = _mm_unpacklo_epi8(p34, _mm_setzero_si128()); // convert floating point weights to 16bit integer weight = _mm_mul_ps(weight, CONST_256); __m128i weighti = _mm_cvtps_epi32(weight); // w4 w3 w2 w1 weighti = _mm_packs_epi32(weighti, _mm_setzero_si128()); // 32->16bit // prepare the weights __m128i w12 = _mm_shufflelo_epi16(weighti, _MM_SHUFFLE(1, 1, 0, 0)); __m128i w34 = _mm_shufflelo_epi16(weighti, _MM_SHUFFLE(3, 3, 2, 2)); w12 = _mm_unpacklo_epi16(w12, w12); // w2 w2 w2 w2 w1 w1 w1 w1 w34 = _mm_unpacklo_epi16(w34, w34); // w4 w4 w4 w4 w3 w3 w3 w3 // multiply each pixel with its weight (2 pixel per SSE mul) __m128i L12 = _mm_mullo_epi16(p12, w12); __m128i L34 = _mm_mullo_epi16(p34, w34); // sum the results __m128i L1234 = _mm_add_epi16(L12, L34); __m128i Lhi = _mm_shuffle_epi32(L1234, _MM_SHUFFLE(3, 2, 3, 2)); __m128i L = _mm_add_epi16(L1234, Lhi); // convert back to 8bit __m128i L8 = _mm_srli_epi16(L, 8); // divide by 256 L8 = _mm_packus_epi16(L8, _mm_setzero_si128()); // return return _mm_cvtsi128_si32(L8); }
inline Pixel GetPixelSSE3(const Image<Pixel>* img, float x, float y) { const int stride = img->width; const Pixel* p0 = img->data + (int)x + (int)y * stride; // pointer to first pixel // Load the data (2 pixels in one load) __m128i p12 = _mm_loadl_epi64((const __m128i*)&p0[0 * stride]); __m128i p34 = _mm_loadl_epi64((const __m128i*)&p0[1 * stride]); __m128 weight = CalcWeights(x, y); // convert RGBA RGBA RGBA RGAB to RRRR GGGG BBBB AAAA (AoS to SoA) __m128i p1234 = _mm_unpacklo_epi8(p12, p34); __m128i p34xx = _mm_unpackhi_epi64(p1234, _mm_setzero_si128()); __m128i p1234_8bit = _mm_unpacklo_epi8(p1234, p34xx); // extend to 16bit __m128i pRG = _mm_unpacklo_epi8(p1234_8bit, _mm_setzero_si128()); __m128i pBA = _mm_unpackhi_epi8(p1234_8bit, _mm_setzero_si128()); // convert weights to integer weight = _mm_mul_ps(weight, CONST_256); __m128i weighti = _mm_cvtps_epi32(weight); // w4 w3 w2 w1 weighti = _mm_packs_epi32(weighti, weighti); // 32->2x16bit //outRG = [w1*R1 + w2*R2 | w3*R3 + w4*R4 | w1*G1 + w2*G2 | w3*G3 + w4*G4] __m128i outRG = _mm_madd_epi16(pRG, weighti); //outBA = [w1*B1 + w2*B2 | w3*B3 + w4*B4 | w1*A1 + w2*A2 | w3*A3 + w4*A4] __m128i outBA = _mm_madd_epi16(pBA, weighti); // horizontal add that will produce the output values (in 32bit) __m128i out = _mm_hadd_epi32(outRG, outBA); out = _mm_srli_epi32(out, 8); // divide by 256 // convert 32bit->8bit out = _mm_packus_epi32(out, _mm_setzero_si128()); out = _mm_packus_epi16(out, _mm_setzero_si128()); // return return _mm_cvtsi128_si32(out); }
/***************************************************************************** InitInterpolationWeights() *****************************************************************************/ void InitInterpolationWeights(MAPSIZE *Map, OPTIONSTRUCT *Options, TOPOPIX **TopoMap, uchar ****MetWeights, METLOCATION *Stats, int NStats) { const char *Routine = "InitInterpolationWeights"; uchar **BasinMask; int x; /* counter */ int y; /* counter */ int i; if (Options->GRIDMET) for (i = 0; i < NStats; i++) Stats[i].Elev = TopoMap[Stats[i].Loc.N][Stats[i].Loc.E].Dem; if (Options->MM5 == TRUE && Options->QPF == FALSE) { if (!((*MetWeights) = (uchar ***)calloc(Map->NY, sizeof(uchar **)))) ReportError("CalcWeights()", 1); for (y = 0; y < Map->NY; y++) if (!((*MetWeights)[y] = (uchar **)calloc(Map->NX, sizeof(uchar *)))) ReportError("CalcWeights()", 1); for (y = 0; y < Map->NY; y++) for (x = 0; x < Map->NX; x++) (*MetWeights)[y][x] = NULL; } else { if (!(BasinMask = (uchar **)calloc(Map->NY, sizeof(uchar *)))) ReportError((char *)Routine, 1); for (y = 0; y < Map->NY; y++) { if (!(BasinMask[y] = (uchar *)calloc(Map->NX, sizeof(uchar)))) ReportError((char *)Routine, 1); } for (y = 0; y < Map->NY; y++) for (x = 0; x < Map->NX; x++) BasinMask[y][x] = TopoMap[y][x].Mask; CalcWeights(Stats, NStats, Map->NX, Map->NY, BasinMask, MetWeights, Options); printf("\nSummary info on met stations used for current model run \n"); printf(" Name\t\tY\tX\tIn Mask\tDefined Elev\tActual Elev\n"); for (i = 0; i < NStats; i++) { if ((Stats[i].Loc.N > Map->NY || Stats[i].Loc.N < 0 || Stats[i].Loc.E > Map->NX || Stats[i].Loc.E < 0)) printf("%20s\t%d\t%d\t%5s\t%5.1f\t\t%5s\n", Stats[i].Name, Stats[i].Loc.N, Stats[i].Loc.E, "NA", Stats[i].Elev, "NA"); else printf("%20s\t%d\t%d\t%d\t%5.1f\t\t%5.1f\n", Stats[i].Name, Stats[i].Loc.N, Stats[i].Loc.E, BasinMask[Stats[i].Loc.N][Stats[i].Loc.E], Stats[i].Elev, TopoMap[Stats[i].Loc.N][Stats[i].Loc.E].Dem); } printf("\n"); for (y = 0; y < Map->NY; y++) free(BasinMask[y]); free(BasinMask); } }