THNETWORK *THLoadNetwork(const char *path) { char tmppath[255]; int i, longsize = 8; THNETWORK *net = calloc(1, sizeof(*net)); sprintf(tmppath, "%s/model.net", path); net->netobj = malloc(sizeof(*net->netobj)); lasterror = loadtorch(tmppath, net->netobj, longsize); if(lasterror == ERR_CORRUPTED) lasterror = loadtorch(tmppath, net->netobj, longsize = 4); if(lasterror) { free(net->netobj); free(net); return 0; } if(th_debug) printobject(net->netobj, 0); net->net = Object2Network(net->netobj); if(!net->net) { lasterror = ERR_WRONGOBJECT; freeobject(net->netobj); free(net->netobj); free(net); return 0; } net->std[0] = net->std[1] = net->std[2] = 1; net->mean[0] = net->mean[1] = net->mean[2] = 0; sprintf(tmppath, "%s/stat.t7", path); net->statobj = malloc(sizeof(*net->statobj)); lasterror = loadtorch(tmppath, net->statobj, longsize); if(!lasterror) { if(net->statobj->type != TYPE_TABLE || net->statobj->table->nelem != 2) { lasterror = ERR_WRONGOBJECT; freenetwork(net->net); freeobject(net->netobj); free(net->netobj); freeobject(net->statobj); free(net->statobj); free(net); return 0; } for(i = 0; i < net->statobj->table->nelem; i++) if(net->statobj->table->records[i].name.type == TYPE_STRING) { if(!strcmp(net->statobj->table->records[i].name.string.data, "mean")) memcpy(net->mean, net->statobj->table->records[i].value.tensor->storage->data, sizeof(net->mean)); else if(!strcmp(net->statobj->table->records[i].name.string.data, "std")) memcpy(net->std, net->statobj->table->records[i].value.tensor->storage->data, sizeof(net->std)); } } else { free(net->statobj); net->statobj = 0; } THUseSpatialConvolutionMM(net, 2); return net; }
int main(int argc, char **argv) { THNETWORK *net; float *result; int i, n = 0, rc, outwidth, outheight, runs = 1, print = 0, alg = 1, nbatch = 1; const char *modelsdir = 0, *inputfile = 0; for(i = 1; i < argc; i++) { if(argv[i][0] != '-') continue; switch(argv[i][1]) { case 'm': if(i+1 < argc) modelsdir = argv[++i]; break; case 'i': if(i+1 < argc) inputfile = argv[++i]; break; case 'a': if(i+1 < argc) alg = atoi(argv[++i]); break; case 'p': print = 1; break; case 'r': if(i+1 < argc) runs = atoi(argv[++i]); break; case 'b': if(i+1 < argc) { nbatch = atoi(argv[++i]); if(nbatch > 256 || nbatch < 1) nbatch = 256; } break; } } if(!modelsdir || !inputfile) { fprintf(stderr, "Syntax: test -m <models directory> -i <input file>\n"); fprintf(stderr, " [-r <number of runs] [-p(rint results)]\n"); fprintf(stderr, " [-a <alg=0:norm,1:MM (default),2:virtMM,3:cuDNN,4:cudNNhalf>]\n"); fprintf(stderr, " [-b <nbatch>]\n"); return -1; } if(alg == 4) { alg = 3; THCudaHalfFloat(1); } THInit(); net = THLoadNetwork(modelsdir); if(net) { THMakeSpatial(net); if(alg == 0) THUseSpatialConvolutionMM(net, 0); else if(alg == 1 || alg == 2) THUseSpatialConvolutionMM(net, alg); else if(alg == 3) { THNETWORK *net2 = THCreateCudaNetwork(net); if(!net2) THError("CUDA not compiled in"); THFreeNetwork(net); net = net2; } if(strstr(inputfile, ".t7")) { struct thobject input_o; rc = loadtorch(inputfile, &input_o, 8); if(!rc) { THFloatTensor *in = THFloatTensor_newFromObject(&input_o); // In CuDNN the first one has to do some initializations, so don't count it for timing if(alg == 3) THProcessFloat(net, in->storage->data, 1, in->size[2], in->size[1], &result, &outwidth, &outheight); t = seconds(); for(i = 0; i < runs; i++) n = THProcessFloat(net, in->storage->data, 1, in->size[2], in->size[1], &result, &outwidth, &outheight); t = (seconds() - t) / runs; THFloatTensor_free(in); freeobject(&input_o); } else printf("Error loading %s\n", inputfile); } else { img_t image; rc = loadimage(inputfile, &image); if(!rc) { unsigned char *bitmaps[256]; for(i = 0; i < nbatch; i++) bitmaps[i] = image.bitmap; // In CuDNN the first one has to do some initializations, so don't count it for timing if(alg == 3) THProcessImages(net, &image.bitmap, 1, image.width, image.height, 3*image.width, &result, &outwidth, &outheight, 0); t = seconds(); for(i = 0; i < runs; i++) n = THProcessImages(net, bitmaps, nbatch, image.width, image.height, 3*image.width, &result, &outwidth, &outheight, 0); t = (seconds() - t) / runs; #ifdef USECUDAHOSTALLOC cudaFreeHost(image.bitmap); #else free(image.bitmap); #endif } else printf("Error loading image %s\n", inputfile); } if(print) for(i = 0; i < n; i++) printf("(%d,%d,%d): %f\n", i/(outwidth*outheight), i % (outwidth*outheight) / outwidth, i % outwidth, result[i]); printf("1 run processing time: %lf\n", t); THFreeNetwork(net); } else printf("The network could not be loaded: %d\n", THLastError()); #ifdef MEMORYDEBUG debug_memorydump(stderr); #endif return 0; }