int main(int argc, char **argv) { double t1 = getsec(); long i; /* コマンドライン引数の処理 */ if (argc < 4) { fprintf(stderr, "Usage: ./sa_find2 [textfile] [sa_file] [keyword]\n"); exit(1); } const char * tfile = argv[1]; const char * sfile = argv[2]; char * keyword = argv[3]; /* ファイルの処理*/ FILE * fp = fopen(tfile, "r"); if (fp == NULL) {perror("open"); exit(1);} fseek(fp, 0, SEEK_END); long sz = ftell(fp); // sz はテキストファイルのサイズ == 文字数 rewind(fp); // ファイルの内容をsにコピー char * s = (char *)malloc((sz+1) * sizeof(char)); fread(s, sizeof(char), sz, fp); s[sz] = '\0'; fclose(fp); /* 接尾辞配列の読み込み*/ fp = fopen(sfile, "rb"); if (fp == NULL) {perror("open"); exit(1);} fseek(fp, 0, SEEK_END); long n = ftell(fp) / sizeof(long); // n は接尾辞配列の要素数 rewind(fp); // 接尾辞配列をコピー long * sa = malloc(n * sizeof(long)); fread(sa, sizeof(long), n, fp); fclose(fp); /* 検索 */ i = bsearch_ex(keyword, sa, s, n); while (i < n) { if (strlen(s + sa[i]) >= strlen(keyword) && strncmp(keyword, s + sa[i], strlen(keyword)) == 0) { // 一致するとき // 元の位置を表示 printf("%ld\n", sz - (long)strlen(s + sa[i])); i++; } else break; } /* 解放 */ free(s); free(sa); double t2 = getsec(); printf("%lfsec\n", t2-t1); return 0; }
bool Model::filter_AVX(const float *packed_input, float *packed_output, cv::Size size) { #ifdef COMPARE_RESULT float *packed_output_cv = (float*)malloc(sizeof(float) * size.width * size.height * nOutputPlanes); double t0 = getsec(); filter_CV(packed_input, packed_output_cv, size); double t1 = getsec(); /* 3x3 = 9 fma */ double ops = size.width * size.height * 9.0 * 2.0 * nOutputPlanes * nInputPlanes; std::vector<cv::Mat> output2; filter_AVX_impl(packed_input, packed_output, nInputPlanes, nOutputPlanes, biases, weights, size, nJob); double t2 = getsec(); printf("%d %d %f %f\n", nInputPlanes, nOutputPlanes, t1-t0, t2-t1); printf("ver2 : %f [Gflops]\n", (ops/(1000.0*1000.0*1000.0)) / (t2-t1)); printf("orig : %f [Gflops]\n", (ops/(1000.0*1000.0*1000.0)) / (t1-t0)); for (int i=0; i<size.width * size.height * nOutputPlanes; i++) { float v0 = packed_output_cv[i]; float v1 = packed_output[i]; float d = fabs(v0 - v1); float r0 = d/fabs(v0); float r1 = d/fabs(v1); float r = std::max(r0, r1); if (r > 0.1f && d > 0.0000001f) { printf("d=%.20f %.20f %.20f @ \n",r, v0, v1, i); exit(1); } } #else //double t1 = getsec(); filter_AVX_impl(packed_input, packed_output, nInputPlanes, nOutputPlanes, biases, weights, size, nJob); //double t2 = getsec(); //double ops = size.width * size.height * 9.0 * 2.0 * nOutputPlanes * nInputPlanes; //printf("ver2 : %f [Gflops], %f[msec]\n", (ops/(1000.0*1000.0*1000.0)) / (t2-t1), (t2-t1)*1000); #endif return true; }
void add_sd(imtype*img,imtype*fimg,double m,int*n,double*sd,double mn0,double sd0,double mn) { int dx1,dx2,dy1,dy2,dz1,dz2,x,y,z; double v; float FMIN; if (!isimage(img)) return; parsecards(fimg,NULL,NULL,NULL,&FMIN,NULL,NULL,NULL,NULL,0,1); if (fimg!=&(scal.img)) parsecards(fimg,NULL,NULL,NULL,&FMIN,NULL,NULL,NULL,NULL,0,1); getsec(img,datasec,&dx1,&dx2,&dy1,&dy2,&dz1,&dz2); for (z=dz1;z<dz2;z++) for (y=dy1;y<dy2;y++) for (x=dx1;x<dx2;x++) if (fimg->data[z][y][x]>FMIN && (fabs((v=img->data[z][y][x]-m*fimg->data[z][y][x])-mn0)<=sd0 || sd0<0)) (*sd)+=(v-mn)*(v-mn); }
int gen_autodoc(FILE *fp, int cols, int tabsize, int flags, char **body_macros) { int err= 0; char *fun, *sec, *text; for(fun= stepfun(0); fun && (err==0); fun= stepfun(1)) { if(flags & ADOC_FORM_FEEDS) fputc('\f',fp); fprintf(fp,"%s%*s\n\n",fun,(int)(cols-strlen(fun)),fun); for(sec= stepsec(0); sec && (err==0); sec= stepsec(1)) { /* indent the section heading with 3 spaces */ if(*sec) fprintf(fp," %s\n",sec); if( (text= getsec(sec)) ) { if(text && *text) { char *x= strexpand(text, body_macros); if(x) { if(tabsize > 0) fexpand(fp,tabsize,x); else fputs(x,fp); free(x); } else err= __LINE__; } fputc('\n',fp); } } } return err; }
int main(int argc, char** argv) { int ret = 1; // definition of command line arguments TCLAP::CmdLine cmd("waifu2x reimplementation using OpenCV", ' ', "1.0.0"); TCLAP::ValueArg<std::string> cmdInputFile("i", "input_file", "path to input image file (you should input full path)", true, "", "string", cmd); TCLAP::ValueArg<std::string> cmdOutputFile("o", "output_file", "path to output image file (you should input full path)", false, "(auto)", "string", cmd); std::vector<std::string> cmdModeConstraintV; cmdModeConstraintV.push_back("noise"); cmdModeConstraintV.push_back("scale"); cmdModeConstraintV.push_back("noise_scale"); TCLAP::ValuesConstraint<std::string> cmdModeConstraint(cmdModeConstraintV); TCLAP::ValueArg<std::string> cmdMode("m", "mode", "image processing mode", false, "noise_scale", &cmdModeConstraint, cmd); std::vector<int> cmdNRLConstraintV; cmdNRLConstraintV.push_back(1); cmdNRLConstraintV.push_back(2); TCLAP::ValuesConstraint<int> cmdNRLConstraint(cmdNRLConstraintV); TCLAP::ValueArg<int> cmdNRLevel("", "noise_level", "noise reduction level", false, 1, &cmdNRLConstraint, cmd); TCLAP::ValueArg<double> cmdScaleRatio("", "scale_ratio", "custom scale ratio", false, 2.0, "double", cmd); TCLAP::ValueArg<std::string> cmdModelPath("", "model_dir", "path to custom model directory (don't append last / )", false, "models_rgb", "string", cmd); TCLAP::ValueArg<int> cmdNumberOfJobs("j", "jobs", "number of threads launching at the same time", false, 0, "integer", cmd); TCLAP::SwitchArg cmdForceOpenCL("", "force-OpenCL", "force to use OpenCL on Intel Platform", cmd, false); TCLAP::SwitchArg cmdDisableGPU("", "disable-gpu", "disable GPU", cmd, false); TCLAP::ValueArg<int> cmdBlockSize("", "block_size", "block size", false, 0, "integer", cmd); // definition of command line argument : end // parse command line arguments try { cmd.parse(argc, argv); } catch (std::exception &e) { std::cerr << e.what() << std::endl; std::cerr << "Error : cmd.parse() threw exception" << std::endl; std::exit(-1); } std::string outputFileName = cmdOutputFile.getValue(); if (outputFileName == "(auto)") { outputFileName = cmdInputFile.getValue(); int tailDot = outputFileName.find_last_of('.'); outputFileName.erase(tailDot, outputFileName.length()); outputFileName = outputFileName + "(" + cmdMode.getValue() + ")"; std::string &mode = cmdMode.getValue(); if(mode.find("noise") != mode.npos){ outputFileName = outputFileName + "(Level" + std::to_string(cmdNRLevel.getValue()) + ")"; } if(mode.find("scale") != mode.npos){ outputFileName = outputFileName + "(x" + std::to_string(cmdScaleRatio.getValue()) + ")"; } outputFileName += ".png"; } enum W2XConvGPUMode gpu = W2XCONV_GPU_AUTO; if (cmdDisableGPU.getValue()) { gpu = W2XCONV_GPU_DISABLE; } else if (cmdForceOpenCL.getValue()) { gpu = W2XCONV_GPU_FORCE_OPENCL; } W2XConv *converter = w2xconv_init(gpu, cmdNumberOfJobs.getValue(), 1); double time_start = getsec(); switch (converter->target_processor.type) { case W2XCONV_PROC_HOST: printf("CPU: %s\n", converter->target_processor.dev_name); break; case W2XCONV_PROC_CUDA: printf("CUDA: %s\n", converter->target_processor.dev_name); break; case W2XCONV_PROC_OPENCL: printf("OpenCL: %s\n", converter->target_processor.dev_name); break; } int bs = cmdBlockSize.getValue(); int r = w2xconv_load_models(converter, cmdModelPath.getValue().c_str()); if (r < 0) { goto error; } { int nrLevel = 0; if (cmdMode.getValue() == "noise" || cmdMode.getValue() == "noise_scale") { nrLevel = cmdNRLevel.getValue(); } double scaleRatio = 1; if (cmdMode.getValue() == "scale" || cmdMode.getValue() == "noise_scale") { scaleRatio = cmdScaleRatio.getValue(); } r = w2xconv_convert_file(converter, outputFileName.c_str(), cmdInputFile.getValue().c_str(), nrLevel, scaleRatio, bs); } if (r < 0) { goto error; } { double time_end = getsec(); double gflops_proc = (converter->flops.flop/(1000.0*1000.0*1000.0)) / converter->flops.filter_sec; double gflops_all = (converter->flops.flop/(1000.0*1000.0*1000.0)) / (time_end-time_start); std::cout << "process successfully done! (all:" << (time_end - time_start) << "[sec], " << gflops_all << "[GFLOPS], filter:" << converter->flops.filter_sec << "[sec], " << gflops_proc << "[GFLOPS])" << std::endl; } ret = 0; error: if (ret != 0) { char *err = w2xconv_strerror(&converter->last_error); puts(err); w2xconv_free(err); } w2xconv_fini(converter); return ret; }
int w2xconv_convert(struct W2XConv *conv, const cv::Mat& src, cv::Mat& image_dst, int denoise_level, double scale, int blockSize) { double time_start = getsec(); bool is_rgb = (conv->impl->scale2_models[0]->getNInputPlanes() == 3); bool png_rgb = true; float bkgd_r = 1.0f; float bkgd_g = 1.0f; float bkgd_b = 1.0f; cv::Mat image_src = src.clone(); enum w2xc::image_format fmt; int src_depth = CV_MAT_DEPTH(image_src.type()); int src_cn = CV_MAT_CN(image_src.type()); cv::Mat image = cv::Mat(image_src.size(), CV_32FC3); cv::Mat alpha; if (is_rgb) { if (png_rgb) { if (src_cn == 4) { // save alpha alpha = cv::Mat(image_src.size(), CV_32FC1); if (src_depth == CV_16U) { preproc_rgba2rgb<unsigned short, 65535, 2, 0>(&image, &alpha, &image_src, bkgd_r, bkgd_g, bkgd_b); } else { preproc_rgba2rgb<unsigned char, 255, 2, 0>(&image, &alpha, &image_src, bkgd_r, bkgd_g, bkgd_b); } } else { preproc_rgb2rgb<unsigned short, 65535, 2, 0>(&image, &image_src); } } else { preproc_rgb2rgb<unsigned char, 255, 2, 0>(&image, &image_src); } fmt = w2xc::IMAGE_RGB_F32; } else { //if (png_rgb) { // if (src_cn == 4) { // // save alpha // alpha = cv::Mat(image_src.size(), CV_32FC1); // if (src_depth == CV_16U) { // preproc_rgba2yuv<unsigned short, 65535, 2, 0>(&image, &alpha, &image_src, // bkgd_r, bkgd_g, bkgd_b); // } else { // preproc_rgba2yuv<unsigned char, 255, 2, 0>(&image, &alpha, &image_src, // bkgd_r, bkgd_g, bkgd_b); // } // } else { // preproc_rgb2yuv<unsigned short, 65535, 2, 0>(&image, &image_src); // } //} else { // preproc_rgb2yuv<unsigned char, 255, 2, 0>(&image, &image_src); //} if (image_src.channels() > 1) cv::cvtColor(image_src, image, CV_RGB2YUV); else image = image_src.clone(); fmt = w2xc::IMAGE_Y; } if (denoise_level != 0) { apply_denoise(conv, image, denoise_level, blockSize, fmt); } if (scale != 1.0) { // calculate iteration times of 2x scaling and shrink ratio which will use at last int iterTimesTwiceScaling = static_cast<int>(std::ceil(std::log2(scale))); double shrinkRatio = 0.0; if (static_cast<int>(scale) != std::pow(2, iterTimesTwiceScaling)) { shrinkRatio = scale / std::pow(2.0, static_cast<double>(iterTimesTwiceScaling)); } apply_scale(conv, image, iterTimesTwiceScaling, blockSize, fmt); if (shrinkRatio != 0.0) { cv::Size lastImageSize = image.size(); lastImageSize.width = static_cast<int>(static_cast<double>(lastImageSize.width * shrinkRatio)); lastImageSize.height = static_cast<int>(static_cast<double>(lastImageSize.height * shrinkRatio)); cv::resize(image, image, lastImageSize, 0, 0, cv::INTER_LINEAR); } } if (alpha.empty()) { //image_dst = cv::Mat(image.size(), CV_MAKETYPE(src_depth,3)); //if (is_rgb) { // if (src_depth == CV_16U) { // postproc_rgb2rgb<unsigned short, 65535, 2, 0>(&image_dst, &image); // } else { // postproc_rgb2rgb<unsigned char, 255, 2, 0>(&image_dst, &image); // } //} else { // if (src_depth == CV_16U) { // postproc_yuv2rgb<unsigned short, 65535, 0, 2>(&image_dst, &image); // } else { // postproc_yuv2rgb<unsigned char, 255, 0, 2>(&image_dst, &image); // } //} image_dst = image.clone(); } else { image_dst = cv::Mat(image.size(), CV_MAKETYPE(src_depth,4)); if (image.size() != alpha.size()) { cv::resize(alpha, alpha, image.size(), 0, 0, cv::INTER_LINEAR); } image_dst = image.clone(); //cv::cvtColor(image, image_dst, ) //if (is_rgb) { // if (src_depth == CV_16U) { // postproc_rgb2rgba<unsigned short, 65535, 2, 0>(&image_dst, &image, &alpha, bkgd_r, bkgd_g, bkgd_b); // } else { // postproc_rgb2rgba<unsigned char, 255, 2, 0>(&image_dst, &image, &alpha, bkgd_r, bkgd_g, bkgd_b); // } //} else { // if (src_depth == CV_16U) { // postproc_yuv2rgba<unsigned short, 65535, 0, 2>(&image_dst, &image, &alpha, bkgd_r, bkgd_g, bkgd_b); // } else { // postproc_yuv2rgba<unsigned char, 255, 0, 2>(&image_dst, &image, &alpha, bkgd_r, bkgd_g, bkgd_b); // } //} } double time_end = getsec(); conv->flops.process_sec += time_end - time_start; //printf("== %f == \n", conv->impl->env.transfer_wait); return 0; }
int main(int argc, char *argv[]) { int i,j/*,k,test*/; int ndomain,total,add; int gottrans; int T_FLAG; /* char c; */ char *env; char *deffile,*keyword,*value; FILE *PARMS,*TRANS,*PDB; struct parameters *parms; struct domain_loc *domain; if(argc<2) exit_error(); /* get environment variable */ if((env=getenv("STAMPDIR"))==NULL) { fprintf(stderr,"error: environment variable STAMPDIR must be set\n"); exit(-1); } parms=(struct parameters*)malloc(sizeof(struct parameters)); strcpy(parms[0].stampdir,env); /* read in default parameters from $STAMPDIR/stamp.defaults */ deffile=(char*)malloc(1000*sizeof(char)); #if defined(_MSC_VER) sprintf(deffile,"%s\\stamp.defaults",env); #else sprintf(deffile,"%s/stamp.defaults",env); #endif if((PARMS=fopen(deffile,"r"))==NULL) { fprintf(stderr,"error: default parameter file %s does not exist\n",deffile); exit(-1); } if(getpars(PARMS,parms)==-1) exit(-1); fclose(PARMS); /* define DSSP directory file name */ sprintf(&parms[0].dsspfile[0],"%s/dssp.directories",env); /* now search the command line for commands */ keyword=(char*)malloc(1000*sizeof(char)); value=(char*)malloc(1000*sizeof(char)); for(i=1; i<argc; ++i) { if(argv[i][0]!='-') exit_error(); strcpy(keyword,&argv[i][1]); if(i+1<argc) strcpy(value,argv[i+1]); else strcpy(value,"none"); for(j=0; j<strlen(keyword); ++j) keyword[j]=ltou(keyword[j]); /* change to upper case */ T_FLAG=(value[0]=='Y' || value[0]=='y' || value[0]=='1' || value[0]=='T' || value[0]=='t' || value[0]=='o' || value[0]=='O'); /* enables one to write '1', 'YES', 'Yes', 'yes', 'T_FLAG', 'True' or 'true' to * set any boolean parmsiable to one */ if((strcmp(&argv[i][1],"l")==0) || (strcmp(&argv[i][1],"f")==0) || (strcmp(&argv[i][1],"p")==0)) { if(i+1>=argc) exit_error(); /* listfile name */ strcpy(parms[0].listfile,argv[i+1]); i++; } else if(strcmp(&argv[i][1],"P")==0) { /* want to read in parameter file */ if(i+1>=argc) exit_error(); if((PARMS=fopen(argv[i+1],"r"))==NULL) { fprintf(stderr,"error opening file %s\n",argv[i+1]); exit(-1); } if(getpars(PARMS,parms)==-1) exit(-1); fclose(PARMS); i++; } else if(strcmp(&argv[i][1],"o")==0) { /* output file */ if(i+1>=argc) exit_error(); strcpy(parms[0].logfile,argv[i+1]); i++; } else if(strcmp(&argv[i][1],"help")==0) { help_exit_error(); } else if((strcmp(&argv[i][1],"V")==0) || (strcmp(&argv[i][1],"v")==0)) { parms[0].verbose=1; strcpy(parms[0].logfile,"stdout"); } else if(strcmp(&argv[i][1],"s")==0) { parms[0].SCAN=1; parms[0].TREEWISE=parms[0].PAIRWISE=0; } else if(strcmp(&argv[i][1],"n")==0) { if(i+1>=argc) exit_error(); sscanf(argv[i+1],"%d",&parms[0].NPASS); i++; if(parms[0].NPASS!=1 && parms[0].NPASS!=2) exit_error(); } else if(strcmp(keyword,"PAIRPEN") == 0 || strcmp(keyword,"PEN")==0 || strcmp(keyword,"SECOND_PAIRPEN")==0) { sscanf(value,"%f",&parms[0].second_PAIRPEN); i++; } else if(strcmp(keyword,"FIRST_PAIRPEN")==0) { sscanf(value,"%f",&parms[0].first_PAIRPEN); i++; } else if(strcmp(keyword,"MAXPITER") == 0 || strcmp(keyword,"MAXSITER") == 0) { sscanf(value,"%d",&parms[0].MAXPITER); i++; } else if(strcmp(keyword,"MAXTITER") == 0) { sscanf(value,"%d",&parms[0].MAXTITER); i++; } else if(strcmp(keyword,"TREEPEN") == 0 || strcmp(keyword,"SECOND_TREEPEN")==0) { sscanf(value,"%f",&parms[0].second_TREEPEN); i++; } else if(strcmp(keyword,"FIRST_TREEPEN")==0) { sscanf(value,"%f",&parms[0].first_TREEPEN); i++; } else if(strcmp(keyword,"SCORETOL") == 0) { sscanf(value,"%f",&parms[0].SCORETOL); i++; } else if(strcmp(keyword,"CLUSTMETHOD") == 0) { sscanf(value,"%d",&parms[0].CLUSTMETHOD); i++; } else if(strcmp(keyword,"E1") == 0 || strcmp(keyword,"SECOND_E1")==0) { sscanf(value,"%f",&parms[0].second_E1); i++; } else if(strcmp(keyword,"E2") == 0 || strcmp(keyword,"SECOND_E2")==0) { sscanf(value,"%f",&parms[0].second_E2); i++; } else if(strcmp(keyword,"FIRST_E1")==0) { sscanf(value,"%f",&parms[0].first_E1); i++; } else if(strcmp(keyword,"FIRST_E2")==0) { sscanf(value,"%f",&parms[0].first_E2); i++; } else if(strcmp(keyword,"NPASS")==0) { sscanf(value,"%d",&parms[0].NPASS); i++; if(parms[0].NPASS!=1 && parms[0].NPASS!=2) { fprintf(stderr,"error: NPASS must be either 1 or 2\n"); return -1; } } else if(strcmp(keyword,"CUTOFF") == 0 || strcmp(keyword,"SECOND_CUTOFF")==0) { sscanf(value,"%f",&parms[0].second_CUTOFF); i++; } else if(strcmp(keyword,"FIRST_CUTOFF")==0) { sscanf(value,"%f",&parms[0].first_CUTOFF); i++; } else if(strcmp(keyword,"TREEPLOT") == 0) { parms[0].TREEPLOT=T_FLAG; i++; } else if(strcmp(keyword,"PAIRPLOT") == 0) { parms[0].PAIRPLOT=T_FLAG; i++; } else if(strcmp(keyword,"NALIGN") == 0) { sscanf(value,"%d",&parms[0].NALIGN); i++; } else if(strcmp(keyword,"DISPALL") == 0) { parms[0].DISPALL=T_FLAG; i++; } else if(strcmp(keyword,"HORIZ") ==0) { parms[0].HORIZ=T_FLAG; i++; } else if(strcmp(keyword,"ADD") ==0) { sscanf(value,"%f",&parms[0].ADD); i++; } else if(strcmp(keyword,"NMEAN") ==0) { sscanf(value,"%f",&parms[0].NMEAN); i++; } else if(strcmp(keyword,"NSD") ==0) { sscanf(value,"%f",&parms[0].NSD); i++; } else if(strcmp(keyword,"STATS") ==0) { parms[0].STATS=T_FLAG; i++; } else if(strcmp(keyword,"NA") == 0) { sscanf(value,"%f",&parms[0].NA); i++; } else if(strcmp(keyword,"NB") == 0) { sscanf(value,"%f",&parms[0].NB); i++; } else if(strcmp(keyword,"NASD") == 0) { sscanf(value,"%f",&parms[0].NASD); i++; } else if(strcmp(keyword,"NBSD") == 0) { sscanf(value,"%f",&parms[0].NBSD); i++; } else if(strcmp(keyword,"PAIRWISE") == 0) { parms[0].PAIRWISE=T_FLAG; i++; } else if(strcmp(keyword,"TREEWISE") == 0) { parms[0].TREEWISE=T_FLAG; i++; } else if(strcmp(keyword,"ORDFILE") == 0) { strcpy(parms[0].ordfile,value); i++; } else if(strcmp(keyword,"TREEFILE") == 0) { strcpy(parms[0].treefile,value); i++; } else if(strcmp(keyword,"PLOTFILE") == 0) { strcpy(parms[0].plotfile,value); i++; } else if(strcmp(keyword,"PREFIX") == 0 || strcmp(keyword,"TRANSPREFIX")==0 || strcmp(keyword,"STAMPPREFIX")==0) { strcpy(parms[0].transprefix,value); i++; } else if(strcmp(keyword,"MATFILE") == 0) { strcpy(parms[0].matfile,value); i++; } else if(strcmp(keyword,"THRESH") ==0) { sscanf(value,"%f",&parms[0].THRESH); i++; } else if(strcmp(keyword,"TREEALIGN")==0) { parms[0].TREEALIGN=T_FLAG; i++; } else if(strcmp(keyword,"TREEALLALIGN")==0) { parms[0].TREEALLALIGN=T_FLAG; i++; } else if(strcmp(keyword,"PAIRALIGN")==0 || strcmp(keyword,"SCANALIGN")==0) { parms[0].PAIRALIGN=T_FLAG; i++; } else if(strcmp(keyword,"PAIRALLALIGN")==0 || strcmp(keyword,"SCANALLALIGN")==0) { parms[0].PAIRALLALIGN=T_FLAG; i++; } else if(strcmp(keyword,"PRECISION")==0) { sscanf(value,"%d",&parms[0].PRECISION); i++; } else if(strcmp(keyword,"MAX_SEQ_LEN")==0) { sscanf(value,"%d",&parms[0].MAX_SEQ_LEN); i++; } else if(strcmp(keyword,"ROUGHFIT")==0) { parms[0].ROUGHFIT=T_FLAG; i++; } else if(strcmp(keyword,"ROUGH")==0) { parms[0].ROUGHFIT=1; } else if(strcmp(keyword,"ROUGHOUT")==0) { parms[0].roughout=1; } else if(strcmp(keyword,"ROUGHOUTFILE")==0) { if(i+1>=argc) exit_error(); strcpy(&parms[0].roughoutfile[0],argv[i+1]); i++; parms[0].roughout=1; } else if(strcmp(keyword,"BOOLCUT")==0 || strcmp(keyword,"SECOND_BOOLCUT")==0) { sscanf(value,"%f",&parms[0].second_BOOLCUT); i++; } else if(strcmp(keyword,"FIRST_BOOLCUT")==0) { sscanf(value,"%f",&parms[0].first_BOOLCUT); i++; } else if(strcmp(keyword,"SCANSLIDE")==0) { sscanf(value,"%d",&parms[0].SCANSLIDE); i++; } else if(strcmp(keyword,"SCAN")==0) { parms[0].SCAN=T_FLAG; i++; if(T_FLAG) parms[0].PAIRWISE=parms[0].TREEWISE=0; } else if(strcmp(keyword,"SCANMODE")==0) { sscanf(value,"%d",&parms[0].SCANMODE); i++; if(parms[0].SCANMODE==1) parms[0].PAIRALIGN=1; } else if(strcmp(keyword,"SCANCUT")==0) { sscanf(value,"%f",&parms[0].SCANCUT); i++; } else if(strcmp(keyword,"SECSCREEN")==0) { parms[0].SECSCREEN=T_FLAG; i++; } else if(strcmp(keyword,"SECSCREENMAX")==0) { sscanf(value,"%f",&parms[0].SECSCREENMAX); i++; } else if(strcmp(keyword,"SCANTRUNC")==0) { parms[0].SCANTRUNC=T_FLAG; i++; } else if(strcmp(keyword,"SCANTRUNCFACTOR")==0) { sscanf(value,"%f",&parms[0].SCANTRUNCFACTOR); i++; } else if(strcmp(keyword,"DATABASE")==0) { strcpy(&parms[0].database[0],value); i++; } else if(strcmp(keyword,"SCANFILE")==0) { strcpy(&parms[0].scanfile[0],value); i++; } else if(strcmp(keyword,"LOGFILE")==0) { strcpy(&parms[0].logfile[0],value); i++; } else if(strcmp(keyword,"SECTYPE")==0) { sscanf(value,"%d",&parms[0].SECTYPE); i++; } else if(strcmp(keyword,"SCANSEC")==0) { sscanf(value,"%d",&parms[0].SCANSEC); i++; } else if(strcmp(keyword,"SECFILE")==0) { strcpy(&parms[0].secfile[0],value); i++; parms[0].SECTYPE=2; } else if(strcmp(keyword,"BOOLEAN")==0) { parms[0].BOOLEAN=T_FLAG; i++; } else if(strcmp(keyword,"BOOLMETHOD")==0) { sscanf(value,"%d",&parms[0].BOOLMETHOD); i++; } else if(strcmp(keyword,"LISTFILE")==0) { strcpy(&parms[0].listfile[0],value); i++; } else if(strcmp(keyword,"STAMPDIR")==0) { strcpy(&parms[0].stampdir[0],value); i++; } else if(strcmp(keyword,"CLUST")==0) { parms[0].CLUST=T_FLAG; i++; } else if(strcmp(keyword,"COLUMNS")==0) { sscanf(value,"%d",&parms[0].COLUMNS); i++; } else if(strcmp(keyword,"SW")==0) { sscanf(value,"%d",&parms[0].SW); i++; } else if(strcmp(keyword,"CCFACTOR")==0) { sscanf(value,"%f",&parms[0].CCFACTOR); i++; } else if(strcmp(keyword,"CCADD")==0) { parms[0].CCADD=T_FLAG; i++; } else if(strcmp(keyword,"MINFIT")==0) { sscanf(value,"%d",&parms[0].MINFIT); i++; } else if(strcmp(keyword,"ROUGHALIGN")==0) { strcpy(parms[0].roughalign,value); i++; } else if(strcmp(keyword,"FIRST_THRESH")==0) { sscanf(value,"%f",&parms[0].first_THRESH); i++; } else if(strcmp(keyword,"MIN_FRAC")==0) { sscanf(value,"%f",&parms[0].MIN_FRAC); i++; } else if(strcmp(keyword,"SCORERISE")==0) { parms[0].SCORERISE=T_FLAG; i++; } else if(strcmp(keyword,"SKIPAHEAD")==0) { parms[0].SKIPAHEAD=T_FLAG; i++; } else if(strcmp(keyword,"SCANSCORE")==0) { sscanf(value,"%d",&parms[0].SCANSCORE); i++; } else if(strcmp(keyword,"PAIROUTPUT")==0) { parms[0].PAIROUTPUT=T_FLAG; i++; } else if(strcmp(keyword,"ALLPAIRS")==0) { parms[0].ALLPAIRS=T_FLAG; i++; } else if (strcmp(keyword,"ATOMTYPE")==0) { parms[0].ATOMTYPE=T_FLAG; i++; } else if(strcmp(keyword,"DSSP")==0) { parms[0].DSSP=T_FLAG; i++; } else if(strcmp(keyword,"SLOWSCAN")==0) { parms[0].SLOWSCAN=T_FLAG; i++; } else if(strcmp(keyword,"SLOW")==0) { parms[0].SLOWSCAN=1; } else if(strcmp(keyword,"CUT")==0) { parms[0].CO=1; } else if(strcmp(&argv[i][1],"slide")==0) { if(i+1>=argc) exit_error(); sscanf(argv[i+1],"%d",&parms[0].SCANSLIDE); i++; } else if(strcmp(&argv[i][1],"d")==0) { /* database file */ if(i+1>=argc) exit_error(); strcpy(&parms[0].database[0],argv[i+1]); i++; } else if(strcmp(&argv[i][1],"pen1")==0) { if(i+1>=argc) exit_error(); sscanf(argv[i+1],"%f",&parms[0].first_PAIRPEN); i++; } else if(strcmp(&argv[i][1],"pen2")==0) { if(i+1>=argc) exit_error(); sscanf(argv[i+1],"%f",&parms[0].second_PAIRPEN); i++; } else if(strcmp(&argv[i][1],"prefix")==0) { if(i+1>=argc) exit_error(); strcpy(&parms[0].transprefix[0],argv[i+1]); i++; } else if(strcmp(&argv[i][1],"scancut")==0) { if(i+1>=argc) exit_error(); sscanf(argv[i+1],"%f",&parms[0].SCANCUT); i++; } else if(strcmp(&argv[i][1],"opd")==0) { parms[0].opd=1; } else { exit_error(); } } free(keyword); free(value); /* make the names of all the output files using the prefix */ sprintf(&parms[0].ordfile[0],"%s.ord",parms[0].transprefix); sprintf(&parms[0].treefile[0],"%s.tree",parms[0].transprefix); sprintf(&parms[0].plotfile[0],"%s.plot",parms[0].transprefix); sprintf(&parms[0].matfile[0],"%s.mat",parms[0].transprefix); sprintf(&parms[0].roughalign[0],"%s_align.rough",parms[0].transprefix); sprintf(&parms[0].scanfile[0],"%s.scan",parms[0].transprefix); if(strcmp(parms[0].logfile,"stdout")==0 || strcmp(parms[0].logfile,"STDOUT")==0) { parms[0].LOG=stdout; } else if(strcmp(parms[0].logfile,"silent")==0 || strcmp(parms[0].logfile,"SILENT")==0) { #if defined(_MSC_VER) parms[0].LOG=stdout; #else parms[0].LOG=fopen("/dev/null","w"); #endif } else { if((parms[0].LOG=fopen(parms[0].logfile,"w"))==NULL) { fprintf(stderr,"error opening file %s\n",parms[0].logfile); exit(-1); } } if(strcmp(parms[0].logfile,"silent")==0) { printf("\nSTAMP Structural Alignment of Multiple Proteins\n"); printf(" by Robert B. Russell & Geoffrey J. Barton \n"); printf(" Please cite PROTEINS, v14, 309-323, 1992\n\n"); } fprintf(parms[0].LOG,"-------------------------------------------------------------------------------\n"); fprintf(parms[0].LOG," S t A M P\n"); fprintf(parms[0].LOG," Structural Alignment of\n"); fprintf(parms[0].LOG," Multiple Proteins\n"); fprintf(parms[0].LOG," By Robert B. Russell & Geoffrey J. Barton \n"); fprintf(parms[0].LOG," Last Modified: %s\n",lastmod); fprintf(parms[0].LOG," Please cite Ref: Russell and GJ Barton, PROTEINS, v14, 309-323, 1992\n"); fprintf(parms[0].LOG,"-------------------------------------------------------------------------------\n\n"); fprintf(parms[0].LOG,"STAMPDIR has been set to %s\n\n\n",parms[0].stampdir); /* read in coordinate locations and initial transformations */ if((TRANS = fopen(parms[0].listfile,"r")) == NULL) { fprintf(stderr,"error: file %s does not exist\n",parms[0].listfile); exit(-1); } /* determine the number of domains specified */ ndomain=count_domain(TRANS); domain=(struct domain_loc*)malloc(ndomain*sizeof(struct domain_loc)); rewind(TRANS); if(getdomain(TRANS,domain,&ndomain,ndomain,&gottrans,parms[0].stampdir,parms[0].DSSP,parms[0].LOG)==-1) exit(-1); fclose(TRANS); fprintf(parms[0].LOG,"Details of this run:\n"); if(parms[0].PAIRWISE) fprintf(parms[0].LOG,"PAIRWISE mode specified\n"); if(parms[0].TREEWISE) fprintf(parms[0].LOG,"TREEWISE mode specified\n"); if(parms[0].SCAN) fprintf(parms[0].LOG,"SCAN mode specified\n"); if(!parms[0].SCAN) { /* if no MINFIT has been given, then take the smallest length and divide it by two */ if(parms[0].MINFIT==-1) { parms[0].MINFIT=parms[0].MAXLEN; for(i=0; i<ndomain; ++i) if(domain[i].ncoords<parms[0].MINFIT) parms[0].MINFIT=domain[i].ncoords; parms[0].MINFIT/=2; } fprintf(parms[0].LOG," pairwise score file: %s\n",parms[0].matfile); if(parms[0].TREEWISE) { fprintf(parms[0].LOG," tree order file: %s\n",parms[0].ordfile); fprintf(parms[0].LOG," tree file: %s\n",parms[0].treefile); fprintf(parms[0].LOG," tree plot file: %s\n",parms[0].plotfile); } } else { fprintf(parms[0].LOG," SCANMODE set to %d\n",parms[0].SCANMODE); fprintf(parms[0].LOG," SCANSCORE set to %d\n",parms[0].SCANSCORE); fprintf(parms[0].LOG," (see documentation for an explanation)\n"); if(parms[0].opd==1) fprintf(parms[0].LOG," Domains will be skipped after the first match is found\n"); if(parms[0].SCANMODE==1) { fprintf(parms[0].LOG," Transformations for Sc values greater than %f are to be output\n",parms[0].SCANCUT); fprintf(parms[0].LOG," to the file %s\n",parms[0].transprefix); } else { fprintf(parms[0].LOG," Only the scores are to be output to the file %s\n",parms[0].scanfile); } fprintf(parms[0].LOG," secondary structures are "); switch(parms[0].SCANSEC) { case 0: fprintf(parms[0].LOG," not to be considered\n"); break; case 1: fprintf(parms[0].LOG," to be from DSSP\n"); break; case 2: fprintf(parms[0].LOG," to be read in from %s\n",parms[0].secfile); break; default: fprintf(parms[0].LOG," not to be considered\n"); } if(parms[0].SECSCREEN) { fprintf(parms[0].LOG," An initial screen on secondary structure content is to performed when possible\n"); fprintf(parms[0].LOG," Secondary structure summaries farther than %6.2f %% apart result in\n",parms[0].SECSCREENMAX); fprintf(parms[0].LOG," a comparison being ignored\n"); } fprintf(parms[0].LOG," Initial fits are to be performed by aligning the N-terminus of the query\n with every %d residue of the database sequence\n",parms[0].SCANSLIDE); fprintf(parms[0].LOG," of the query along the database structure.\n"); if(parms[0].SCANTRUNC) { fprintf(parms[0].LOG," If sequences in the database are > %5.3f x the query sequence length\n",parms[0].SCANTRUNCFACTOR); fprintf(parms[0].LOG," then a fraction of the the database structure, corresponding to this\n"); fprintf(parms[0].LOG," of length %5.3f x the query, will be considered\n",parms[0].SCANTRUNCFACTOR); fprintf(parms[0].LOG," comparisons are to be ignored if the database structure is less than\n %6.4f x the length of the query structure\n",parms[0].MIN_FRAC); } fprintf(parms[0].LOG," Domain database file to be scanned %s\n",parms[0].database); } if(parms[0].TREEWISE) fprintf(parms[0].LOG," output files prefix: %s\n",parms[0].transprefix); fprintf(parms[0].LOG,"\n\nParameters:\n"); fprintf(parms[0].LOG,"Rossmann and Argos parameters:\n"); if(parms[0].NPASS==2) { fprintf(parms[0].LOG," Two fits are to be performed, the first fit with:\n"); fprintf(parms[0].LOG," E1=%7.3f,",parms[0].first_E1); fprintf(parms[0].LOG," E2=%7.3f,",parms[0].first_E2); fprintf(parms[0].LOG," CUT=%7.3f,",parms[0].first_CUTOFF); fprintf(parms[0].LOG," PAIRPEN=%7.3f,",parms[0].first_PAIRPEN); fprintf(parms[0].LOG," TREEPEN=%7.3f\n",parms[0].first_TREEPEN); /* fprintf(parms[0].LOG," E1=%7.3f, E2=%7.3f, CUT=%7.3f, PAIRPEN=%7.3f, TREEPEN=%7.3f\n", parms[0].first_E1,parms[0].first_E2,parms[0].first_CUTOFF,parms[0].first_PAIRPEN,parms[0].first_TREEPEN); */ fprintf(parms[0].LOG," The second fit with:\n"); } else fprintf(parms[0].LOG," One fit is to performed with:\n"); fprintf(parms[0].LOG," E1=%7.3f, E2=%7.3f, CUT=%7.3f, PAIRPEN=%7.3f, TREEPEN=%7.3f\n", parms[0].second_E1,parms[0].second_E2,parms[0].second_CUTOFF,parms[0].second_PAIRPEN,parms[0].second_TREEPEN); if(parms[0].BOOLEAN) { fprintf(parms[0].LOG," BOOLEAN mode specified\n"); fprintf(parms[0].LOG," A boolean matrix will be calculated corresponding to whether\n"); fprintf(parms[0].LOG," positions have Pij values greater than:\n"); if(parms[0].NPASS==2) fprintf(parms[0].LOG," %7.3f, for the first fit and\n",parms[0].first_BOOLCUT); fprintf(parms[0].LOG," %7.3f",parms[0].second_BOOLCUT); if(parms[0].NPASS==2) fprintf(parms[0].LOG," for the second fit.\n"); else fprintf(parms[0].LOG,".\n"); fprintf(parms[0].LOG," In the multiple case, this criteria must be satisfied for *all*\n"); fprintf(parms[0].LOG," possible pairwise comparisons\n"); } if(parms[0].SW==1) { fprintf(parms[0].LOG," Corner cutting is to be performed\n"); fprintf(parms[0].LOG," Corner cutting length: %6.2f\n",parms[0].CCFACTOR); if(parms[0].CCADD) fprintf(parms[0].LOG," The length difference is to be added to this value\n"); } else { fprintf(parms[0].LOG," The entire SW matrix is to be calculated and used\n"); } fprintf(parms[0].LOG," The minimum length of alignment to be evaluated further is %3d residues\n",parms[0].MINFIT); fprintf(parms[0].LOG,"\n"); fprintf(parms[0].LOG," Convergence tolerance SCORETOL= %f %%\n", parms[0].SCORETOL); fprintf(parms[0].LOG," Other parameters:\n"); fprintf(parms[0].LOG," MAX_SEQ_LEN=%d, MAXPITER=%d, MAXTITER=%d\n", parms[0].MAX_SEQ_LEN,parms[0].MAXPITER,parms[0].MAXTITER); fprintf(parms[0].LOG," PAIRPLOT (SCANPLOT) = %d, TREEPLOT = %d, PAIRALIGN (SCANALIGN) = %d, TREEALIGN = %d\n", parms[0].PAIRPLOT,parms[0].TREEPLOT,parms[0].PAIRALIGN,parms[0].TREEALIGN); fprintf(parms[0].LOG," PAIRALLALIGN (SCANALLALIGN) = %d, TREEALLALIGN = %d\n",parms[0].PAIRALLALIGN,parms[0].TREEALLALIGN); if(!parms[0].BOOLEAN) { fprintf(parms[0].LOG,"\n\nDetails of Confidence value calculations:\n"); if(parms[0].STATS) fprintf(parms[0].LOG," actual mean and standard deviations are to be\n used for determination of Pij' values.\n"); else { fprintf(parms[0].LOG," pre-set mean and standard deviations are to be used\n and multiple comparisons are to be corrected.\n"); fprintf(parms[0].LOG," mean Xt = %f, standard deviation SDt = %f\n", parms[0].NMEAN,parms[0].NSD); fprintf(parms[0].LOG," for the multiple case:\n"); fprintf(parms[0].LOG," pairwise means are to be calculated from:\n Xp = exp(%6.4f * log(length) + %6.4f)\n",parms[0].NA,parms[0].NB); fprintf(parms[0].LOG," and pairwise standard deviations from:\n SDp = exp(%6.4f * log(length) + %6.4f)\n",parms[0].NASD,parms[0].NBSD); fprintf(parms[0].LOG," the mean to be used is calculated from: \n Xc = (Xm/Xp) * Xt).\n"); fprintf(parms[0].LOG," and the standard deviation from: \n SDc = (SDm/SDp)*SDt).\n"); } /* End of if(parms[0].STATS) */ } else { fprintf(parms[0].LOG," Positional values will consist of one's or zeros depending on whether\n"); fprintf(parms[0].LOG," a position satisfies the BOOLEAN criterion above\n"); fprintf(parms[0].LOG," The score (Sp) for each alignment will be a sum of these positions.\n"); } /* end of if(parms[0].BOOLEAN */ if(!parms[0].SCAN && parms[0].TREEWISE) { fprintf(parms[0].LOG,"\n\nTree is to be generated by "); if(parms[0].CLUSTMETHOD==0) fprintf(parms[0].LOG,"1/rms values.\n"); if(parms[0].CLUSTMETHOD==1) { fprintf(parms[0].LOG,"scores from path tracings modified as follows:\n"); fprintf(parms[0].LOG," Sc = (Sp/Lp) * ((Lp-ia)/La) * ((Lp-ib)/Lb),\n"); fprintf(parms[0].LOG," where Sp is the actual score, Lp is the path length.\n"); fprintf(parms[0].LOG," and La & Lb are the lengths of the structures considered.\n"); } /* End of if(parms[0].METHOD==2) */ } fprintf(parms[0].LOG,"\n\n"); fprintf(parms[0].LOG,"Reading coordinates...\n"); for(i=0; i<ndomain; ++i) { fprintf(parms[0].LOG,"Domain %3d %s %s\n ",i+1,domain[i].filename,domain[i].id); if((PDB=openfile(domain[i].filename,"r"))==NULL) { fprintf(stderr,"error opening file %s\n",domain[i].filename); exit(-1); } domain[i].ncoords=0; domain[i].coords=(int**)malloc(parms[0].MAX_SEQ_LEN*sizeof(int*)); domain[i].aa=(char*)malloc((parms[0].MAX_SEQ_LEN+1)*sizeof(char)); domain[i].numb=(struct brookn*)malloc((parms[0].MAX_SEQ_LEN)*sizeof(struct brookn)); total=0; fprintf(parms[0].LOG," "); for(j=0; j<domain[i].nobj; ++j) { if(!parms[0].DSSP) { if(igetca(PDB,&domain[i].coords[total],&domain[i].aa[total],&domain[i].numb[total], &add,domain[i].start[j],domain[i].end[j],domain[i].type[j],(parms[0].MAX_SEQ_LEN-total), domain[i].reverse[j],parms[0].PRECISION,parms[0].ATOMTYPE,parms[0].LOG)==-1) { fprintf(stderr,"Error in domain %s object %d \n",domain[i].id,j+1); exit(-1); } } else { if(igetcadssp(PDB,&domain[i].coords[total],&domain[i].aa[total],&domain[i].numb[total], &add,domain[i].start[j],domain[i].end[j],domain[i].type[j],(parms[0].MAX_SEQ_LEN-total), domain[i].reverse[j],parms[0].PRECISION,parms[0].LOG)==-1) exit(-1); } switch(domain[i].type[j]) { case 1: fprintf(parms[0].LOG," all residues"); break; case 2: fprintf(parms[0].LOG," chain %c",domain[i].start[j].cid); break; case 3: fprintf(parms[0].LOG," from %c %4d %c to %c %4d %c", domain[i].start[j].cid,domain[i].start[j].n,domain[i].start[j].in, domain[i].end[j].cid,domain[i].end[j].n,domain[i].end[j].in); break; } fprintf(parms[0].LOG,"%4d CAs ",add); total+=add; closefile(PDB,domain[i].filename); PDB=openfile(domain[i].filename,"r"); } domain[i].ncoords=total; fprintf(parms[0].LOG,"=> %4d CAs in total\n",domain[i].ncoords); fprintf(parms[0].LOG,"Applying the transformation... \n"); printmat(domain[i].R,domain[i].V,3,parms[0].LOG); fprintf(parms[0].LOG," ...to these coordinates.\n"); matmult(domain[i].R,domain[i].V,domain[i].coords,domain[i].ncoords,parms[0].PRECISION); closefile(PDB,domain[i].filename); } fprintf(parms[0].LOG,"\n\n"); fprintf(parms[0].LOG,"Secondary structure...\n"); for(i=0; i<ndomain; ++i) domain[i].sec=(char*)malloc(parms[0].MAX_SEQ_LEN*sizeof(char)); switch(parms[0].SECTYPE) { case 0: { fprintf(parms[0].LOG,"No secondary structure assignment will be considered\n"); for(i=0; i<ndomain; ++i) { for(j=0; j<domain[i].ncoords; ++j) domain[i].sec[j]='?'; domain[i].sec[j]='\0'; } parms[0].SECSCREEN=0; } break; case 1: { fprintf(parms[0].LOG,"Will try to find Kabsch and Sander DSSP assignments\n"); if(getks(domain,ndomain,parms)!=0) parms[0].SECSCREEN=0; } break; case 2: { fprintf(parms[0].LOG,"Reading in secondary structure assignments from file: %s\n",parms[0].secfile); if(getsec(domain,ndomain,parms)!=0) parms[0].SECSCREEN=0; } break; default: { fprintf(stderr,"error: unrecognised secondary structure assignment option\n"); exit(-1); } } fprintf(parms[0].LOG,"\n\n"); if(parms[0].SCAN) { i=0; fprintf(parms[0].LOG,"Scanning with domain %s\n",&(domain[i].id[0])); if(strcmp(parms[0].logfile,"silent")==0) { printf("Results of scan will be written to file %s\n",parms[0].scanfile); printf("Fits = no. of fits performed, Sc = STAMP score, RMS = RMS deviation\n"); printf("Align = alignment length, Nfit = residues fitted, Eq. = equivalent residues\n"); printf("Secs = no. equiv. secondary structures, %%I = seq. identity, %%S = sec. str. identity\n"); printf("P(m) = P value (p=1/10) calculated after Murzin (1993), JMB, 230, 689-694\n"); printf("\n"); printf(" Domain1 Domain2 Fits Sc RMS Len1 Len2 Align Fit Eq. Secs %%I %%S P(m)\n"); } if(parms[0].SLOWSCAN==1) { if(slow_scan(domain[i],parms)==-1) exit(-1); } else { if(scan(domain[i],parms)==-1) exit(-1); } if(strcmp(parms[0].logfile,"silent")==0) printf("See the file %s.scan\n",parms[0].transprefix); fprintf(parms[0].LOG,"\n"); } else { if(parms[0].ROUGHFIT) if(roughfit(domain,ndomain,parms)==-1) exit(-1); if(parms[0].PAIRWISE) if(pairwise(domain,ndomain,parms)==-1) exit(-1); if(parms[0].TREEWISE) if(treewise(domain,ndomain,parms)==-1) exit(-1); } /* end of if(parms[0].SCAN... */ /* freeing memory to keep purify happy */ /* for(i=0; i<ndomain; ++i) { free(domain[i].aa); free(domain[i].sec); free(domain[i].v); free(domain[i].V); for(j=0; j<3; ++j) { free(domain[i].R[j]); free(domain[i].r[j]); } free(domain[i].R); free(domain[i].r); for(j=0; j<domain[i].ncoords; ++j) free(domain[i].coords[j]); free(domain[i].coords); free(domain[i].type); free(domain[i].start); free(domain[i].end); free(domain[i].reverse); free(domain[i].numb); } */ free(domain); exit(0); }
bool Model::filter_AVX_OpenCL(ComputeEnv *env, Buffer *packed_input_buf, Buffer *packed_output_buf, cv::Size size, enum runtype rt) { int vec_width; int weight_step; unsigned int eax=0, ebx=0, ecx=0, edx=0; bool have_fma = false, have_avx = false; int nJob = modelUtility::getInstance().getNumberOfJobs(); #ifdef __GNUC__ __get_cpuid(1, &eax, &ebx, &ecx, &edx); #else int cpuInfo[4]; __cpuid(cpuInfo, 1); eax = cpuInfo[0]; ebx = cpuInfo[1]; ecx = cpuInfo[2]; edx = cpuInfo[3]; #endif if ((ecx & 0x18000000) == 0x18000000) { have_avx = true; } if (ecx & (1<<12)) { have_fma = true; } bool gpu = (rt == RUN_OPENCL) || (rt == RUN_CUDA); if (gpu) { weight_step = GPU_VEC_WIDTH; vec_width = GPU_VEC_WIDTH; } else { weight_step = nOutputPlanes; vec_width = VEC_WIDTH; } float *weight_flat = (float*)_mm_malloc(sizeof(float)*nInputPlanes*weight_step*3*3, 64); float *fbiases_flat = (float*)_mm_malloc(sizeof(float) * biases.size(), 64); for (int i=0; i<(int)biases.size(); i++) { fbiases_flat[i] = biases[i]; } if (nOutputPlanes == 1) { if (gpu) { for (int ii=0; ii<nInputPlanes; ii++) { cv::Mat &wm = weights[ii]; const float *src0 = (float*)wm.ptr(0); const float *src1 = (float*)wm.ptr(1); const float *src2 = (float*)wm.ptr(2); float *dst = weight_flat + ii * 9; dst[0] = src0[0]; dst[1] = src0[1]; dst[2] = src0[2]; dst[3] = src1[0]; dst[4] = src1[1]; dst[5] = src1[2]; dst[6] = src2[0]; dst[7] = src2[1]; dst[8] = src2[2]; } } else { for (int ii=0; ii<nInputPlanes; ii++) { cv::Mat &wm = weights[ii]; const float *src0 = (float*)wm.ptr(0); const float *src1 = (float*)wm.ptr(1); const float *src2 = (float*)wm.ptr(2); int ii_0 = ii % vec_width; int ii_1 = (ii / vec_width) * vec_width; float *dst = weight_flat + ii_1 * 9 + ii_0; dst[0 * vec_width] = src0[0]; dst[1 * vec_width] = src0[1]; dst[2 * vec_width] = src0[2]; dst[3 * vec_width] = src1[0]; dst[4 * vec_width] = src1[1]; dst[5 * vec_width] = src1[2]; dst[6 * vec_width] = src2[0]; dst[7 * vec_width] = src2[1]; dst[8 * vec_width] = src2[2]; } } } else if (gpu && nInputPlanes == 1) { for (int oi=0; oi<nOutputPlanes; oi++) { cv::Mat &wm = weights[oi]; const float *src0 = (float*)wm.ptr(0); const float *src1 = (float*)wm.ptr(1); const float *src2 = (float*)wm.ptr(2); float *dst = weight_flat + oi * 9; dst[0] = src0[0]; dst[1] = src0[1]; dst[2] = src0[2]; dst[3] = src1[0]; dst[4] = src1[1]; dst[5] = src1[2]; dst[6] = src2[0]; dst[7] = src2[1]; dst[8] = src2[2]; } } else if (nOutputPlanes == 3) { /* | o0 | o1 | o2 ... | * |i0 i1 i2 ... i127|i0 i1 i2 ... i127| ... |*/ for (int oi=0; oi<nOutputPlanes; oi++) { for (int ii=0; ii<nInputPlanes; ii++) { int mi = oi*nInputPlanes+ii; cv::Mat &wm = weights[mi]; const float *src0 = (float*)wm.ptr(0); const float *src1 = (float*)wm.ptr(1); const float *src2 = (float*)wm.ptr(2); float *dst = weight_flat + (oi * nInputPlanes * 9) + ii; dst[0*nInputPlanes] = src0[0]; dst[1*nInputPlanes] = src0[1]; dst[2*nInputPlanes] = src0[2]; dst[3*nInputPlanes] = src1[0]; dst[4*nInputPlanes] = src1[1]; dst[5*nInputPlanes] = src1[2]; dst[6*nInputPlanes] = src2[0]; dst[7*nInputPlanes] = src2[1]; dst[8*nInputPlanes] = src2[2]; } } } else if (gpu && (nInputPlanes == 3) && (nOutputPlanes == 32)) { /* | i0 | i1 | i2 .. iN-1| * |o0 o1 o2 o3..o31|o0 .... o32| .... | * |<- ->| * | 32 | * | x 9 | */ for (int oi=0; oi<nOutputPlanes; oi++) { for (int ii=0; ii<nInputPlanes; ii++) { int mi = oi*nInputPlanes+ii; cv::Mat &wm = weights[mi]; const float *src0 = (float*)wm.ptr(0); const float *src1 = (float*)wm.ptr(1); const float *src2 = (float*)wm.ptr(2); float *dst = weight_flat + (ii * nOutputPlanes * 9) + oi; dst[0*nOutputPlanes] = src0[0]; dst[1*nOutputPlanes] = src0[1]; dst[2*nOutputPlanes] = src0[2]; dst[3*nOutputPlanes] = src1[0]; dst[4*nOutputPlanes] = src1[1]; dst[5*nOutputPlanes] = src1[2]; dst[6*nOutputPlanes] = src2[0]; dst[7*nOutputPlanes] = src2[1]; dst[8*nOutputPlanes] = src2[2]; } } } else { /* | i0 | i1 | i2 .. iN-1| i0 | i1 | .. * |o0 o1 o2 o3|o0 o1 o2 o3| .... |o4 o5 o6 o7|o4 o5 o6 o7| .. * |<- ->| * | VEC_WIDTH | * | x 9 | */ for (int oi=0; oi<nOutputPlanes; oi++) { for (int ii=0; ii<nInputPlanes; ii++) { int mi = oi*nInputPlanes+ii; cv::Mat &wm = weights[mi]; const float *src0 = (float*)wm.ptr(0); const float *src1 = (float*)wm.ptr(1); const float *src2 = (float*)wm.ptr(2); int oi_0 = oi % vec_width; int oi_1 = (oi / vec_width) * vec_width; float *dst = weight_flat + ((ii*weight_step + oi_1) * 9) + oi_0; dst[0*vec_width] = src0[0]; dst[1*vec_width] = src0[1]; dst[2*vec_width] = src0[2]; dst[3*vec_width] = src1[0]; dst[4*vec_width] = src1[1]; dst[5*vec_width] = src1[2]; dst[6*vec_width] = src2[0]; dst[7*vec_width] = src2[1]; dst[8*vec_width] = src2[2]; } } } bool compare_result = false; #ifdef COMPARE_RESULT if (nOutputPlanes == 3) { compare_result = true; } #endif size_t in_size = size.width * size.height * sizeof(float) * nInputPlanes; size_t out_size = size.width * size.height * sizeof(float) * nOutputPlanes; if (compare_result) { Buffer *packed_output_cv_buf = new Buffer(env, sizeof(float) * size.width * size.height * nOutputPlanes); double t0 = getsec(); filter_CV(env, packed_input_buf, packed_output_cv_buf, size); //filter_FMA_impl(packed_input, packed_output_cv, // nInputPlanes, nOutputPlanes, fbiases_flat, weight_flat, size, nJob); double t1 = getsec(); /* 3x3 = 9 fma */ double ops = size.width * size.height * 9.0 * 2.0 * nOutputPlanes * nInputPlanes; std::vector<cv::Mat> output2; if (rt == RUN_OPENCL) { filter_OpenCL_impl(env, packed_input_buf, packed_output_buf, nInputPlanes, nOutputPlanes, fbiases_flat, weight_flat, size.width, size.height, nJob); } else if (rt == RUN_CUDA) { filter_CUDA_impl(env, packed_input_buf, packed_output_buf, nInputPlanes, nOutputPlanes, fbiases_flat, weight_flat, size.width, size.height, nJob); } else { const float *packed_input = (float*)packed_input_buf->get_read_ptr_host(env, in_size); float *packed_output = (float*)packed_output_buf->get_write_ptr_host(env); if (have_fma) { filter_FMA_impl(env, packed_input, packed_output, nInputPlanes, nOutputPlanes, fbiases_flat, weight_flat, size.width, size.height, nJob); } else { filter_AVX_impl(env, packed_input, packed_output, nInputPlanes, nOutputPlanes, fbiases_flat, weight_flat, size.width, size.height, nJob); } } double t2 = getsec(); printf("(w=%d,h=%d) (ip=%d,op=%d) %f %f %f[gflops]\n", size.width, size.height, nInputPlanes, nOutputPlanes, t1-t0, t2-t1, ops/(1000*1000*1000)); printf("ver2 : %f [Gflops]\n", (ops/(1000.0*1000.0*1000.0)) / (t2-t1)); printf("orig : %f [Gflops]\n", (ops/(1000.0*1000.0*1000.0)) / (t1-t0)); int error_count = 0; float *packed_output_cv = (float*)packed_output_cv_buf->get_read_ptr_host(env, out_size); float *packed_output = (float*)packed_output_buf->get_read_ptr_host(env, out_size); for (int i=0; i<size.width * size.height * nOutputPlanes; i++) { float v0 = packed_output_cv[i]; float v1 = packed_output[i]; float d = fabs(v0 - v1); float r0 = d/fabs(v0); float r1 = d/fabs(v1); float r = (std::max)(r0, r1); if (r > 0.1f && d > 0.000001f) { int plane = i % nOutputPlanes; int pixpos = i / nOutputPlanes; int xpos = pixpos % size.width; int ypos = pixpos / size.width; printf("d=%.20f %.20f %.20f @ (%d,%d,%d,%d) \n",r, v0, v1, xpos, ypos, plane, i); error_count++; if (error_count >= 256) { exit(1); } } } if (error_count != 0) { exit(1); } delete packed_output_cv_buf; } else { if (rt == RUN_OPENCL) { filter_OpenCL_impl(env, packed_input_buf, packed_output_buf, nInputPlanes, nOutputPlanes, fbiases_flat, weight_flat, size.width, size.height, nJob); } else if (rt == RUN_CUDA) { filter_CUDA_impl(env, packed_input_buf, packed_output_buf, nInputPlanes, nOutputPlanes, fbiases_flat, weight_flat, size.width, size.height, nJob); } else { if (!have_avx) { filter_CV(env, packed_input_buf, packed_output_buf, size); } else { const float *packed_input = (float*)packed_input_buf->get_read_ptr_host(env, in_size); float *packed_output = (float*)packed_output_buf->get_write_ptr_host(env); if (have_fma) { filter_FMA_impl(env, packed_input, packed_output, nInputPlanes, nOutputPlanes, fbiases_flat, weight_flat, size.width, size.height, nJob); } else if (have_avx) { filter_AVX_impl(env, packed_input, packed_output, nInputPlanes, nOutputPlanes, fbiases_flat, weight_flat, size.width, size.height, nJob); } } } } _mm_free(fbiases_flat); _mm_free(weight_flat); return true; }