static PyObject *_Assign(PyObject *self, PyObject *args) { // Given a cost matrix as input, // return the column numbers that are paired with // row numbers (1....N) that give the minimum cost assignment. /**********************************/ /* Initialize input variables */ /**********************************/ PyArrayObject *Mat_; if (!PyArg_ParseTuple(args, "O", &Mat_)) { printf("Mao says: Inputs / outputs not correctly specified!\n"); return NULL; } /**********************************/ /* Initialize local variables */ /**********************************/ int inf=1e9; // Infinity parameter (needs to be bigger than costs) int ans=0; // The minimized cost int i; // Row and column indices long unsigned int *Mat = (long unsigned int*) Mat_->data; // Dimensions of the matrix. int DIM = (int) Mat_->dimensions[0]; // Allocate the assignment array. I'm not too familiar with the Python-C interface // so it feels pretty clumsy. npy_intp dim1[1]; dim1[0] = DIM; PyArrayObject *idx_; idx_ = (PyArrayObject*) PyArray_SimpleNew(1,dim1,NPY_INT); int *idx = (int*) PyArray_DATA(idx_); // The matrix passed from Python is "long unsigned int", which causes problems for apc. // We're going to create a new matrix but with "int" instead. int *Mat_Int = calloc(DIM*DIM, sizeof(int)); for (i=0; i<DIM*DIM; i++) { Mat_Int[i] = (int) Mat[i]; } /* int j; printf("Solving assignment problem for the following matrix:\n"); for (i=0; i<DIM; i++) { for (j=0; j<DIM; j++) { printf("%8i ",(int) Mat[i*DIM+j]); } printf("\n"); } */ // Solve the assignment problem. apc(DIM,Mat_Int,inf,&ans,idx); //printf("The optimal assignment has cost %i\n",ans); free(Mat_Int); return PyArray_Return(idx_); }
intermodule_singleton_instantiator() : ppref(0), pc(0) { bool done = false; try{ managed_shared_memory seg(interprocess::create_only, get_singleton_unique_name(), 16384); //Register cleanup??? We can't because the address of a local function might be the //address of a dll, and that dll can be UNLOADED!!! } catch(interprocess_exception &ex){ if(ex.get_error_code() != already_exists_error){ managed_shared_memory seg(open_only, "unique_name", 16384); seg.find_or_construct<referenced_instance*>(unique_instance)(); } else{ throw; } } { ppref=seg.find_or_construct<referenced_instance*>(unique_instance)(); if(*ppref){ /* As in some OSes Boost.Interprocess memory segments can outlive * their associated processes, there is a possibility that we * retrieve a dangling pointer (coming from a previous aborted run, * for instance). Try to protect against this by checking that * the contents of the pointed object are consistent. */ if(std::strcmp(segment_name,(*ppref)->segment_name)!=0){ *ppref=0; /* dangling pointer! */ } else ++((*ppref)->ref); } } if(!*ppref){ std::auto_ptr<referenced_instance> apc( new referenced_instance(segment_name)); interprocess::scoped_lock<interprocess::named_mutex> lock(mutex); ppref=seg.find_or_construct<referenced_instance*>( typeid(C).name())((referenced_instance*)0); if(!*ppref)*ppref=apc.release(); ++((*ppref)->ref); } pc=&(*ppref)->c; }
instantiator(): mutex(interprocess::open_or_create,compute_mutex_name()), seg(interprocess::open_or_create,compute_segment_name(),16384), ppref(0), pc(0) { /* Instance creation is done according to a two-phase protocol so * that we call "new" in an unlocked situation, thus minimizing the * chance of leaving dangling locks due to catastrophic failure. */ { interprocess::scoped_lock<interprocess::named_mutex> lock(mutex); ppref=seg.find_or_construct<referenced_instance*>( typeid(C).name())((referenced_instance*)0); if(*ppref){ /* As in some OSes Boost.Interprocess memory segments can outlive * their associated processes, there is a possibility that we * retrieve a dangling pointer (coming from a previous aborted run, * for instance). Try to protect against this by checking that * the contents of the pointed object are consistent. */ if(std::strcmp(segment_name,(*ppref)->segment_name)!=0){ *ppref=0; /* dangling pointer! */ } else ++((*ppref)->ref); } } if(!*ppref){ std::auto_ptr<referenced_instance> apc( new referenced_instance(segment_name)); interprocess::scoped_lock<interprocess::named_mutex> lock(mutex); ppref=seg.find_or_construct<referenced_instance*>( typeid(C).name())((referenced_instance*)0); if(!*ppref)*ppref=apc.release(); ++((*ppref)->ref); } pc=&(*ppref)->c; }
int main(int argc, char **argv) { char *rawfilename = NULL; int numiter = 250; int use_apc = 1; int use_normalization = 0; conjugrad_float_t lambda_single = F001; // 0.01 conjugrad_float_t lambda_pair = FInf; conjugrad_float_t lambda_pair_factor = F02; // 0.2 int conjugrad_k = 5; conjugrad_float_t conjugrad_eps = 0.01; parse_option *optList, *thisOpt; char *optstr; char *old_optstr = malloc(1); old_optstr[0] = 0; optstr = concat("r:i:n:w:k:e:l:ARh?", old_optstr); free(old_optstr); #ifdef OPENMP int numthreads = 1; old_optstr = optstr; optstr = concat("t:", optstr); free(old_optstr); #endif #ifdef CUDA int use_def_gpu = 0; old_optstr = optstr; optstr = concat("d:", optstr); free(old_optstr); #endif #ifdef MSGPACK char* msgpackfilename = NULL; old_optstr = optstr; optstr = concat("b:", optstr); free(old_optstr); #endif optList = parseopt(argc, argv, optstr); free(optstr); char* msafilename = NULL; char* matfilename = NULL; char* initfilename = NULL; conjugrad_float_t reweighting_threshold = F08; // 0.8 while(optList != NULL) { thisOpt = optList; optList = optList->next; switch(thisOpt->option) { #ifdef OPENMP case 't': numthreads = atoi(thisOpt->argument); #ifdef CUDA use_def_gpu = -1; // automatically disable GPU if number of threads specified #endif break; #endif #ifdef CUDA case 'd': use_def_gpu = atoi(thisOpt->argument); break; #endif #ifdef MSGPACK case 'b': msgpackfilename = thisOpt->argument; break; #endif case 'r': rawfilename = thisOpt->argument; break; case 'i': initfilename = thisOpt->argument; break; case 'n': numiter = atoi(thisOpt->argument); break; case 'w': reweighting_threshold = (conjugrad_float_t)atof(thisOpt->argument); break; case 'l': lambda_pair_factor = (conjugrad_float_t)atof(thisOpt->argument); break; case 'k': conjugrad_k = (int)atoi(thisOpt->argument); break; case 'e': conjugrad_eps = (conjugrad_float_t)atof(thisOpt->argument); break; case 'A': use_apc = 0; break; case 'R': use_normalization = 1; break; case 'h': case '?': usage(argv[0], 1); break; case 0: if(msafilename == NULL) { msafilename = thisOpt->argument; } else if(matfilename == NULL) { matfilename = thisOpt->argument; } else { usage(argv[0], 0); } break; default: die("Unknown argument"); } free(thisOpt); } if(msafilename == NULL || matfilename == NULL) { usage(argv[0], 0); } FILE *msafile = fopen(msafilename, "r"); if( msafile == NULL) { printf("Cannot open %s!\n\n", msafilename); return 2; } #ifdef JANSSON char* metafilename = malloc(2048); snprintf(metafilename, 2048, "%s.meta.json", msafilename); FILE *metafile = fopen(metafilename, "r"); json_t *meta; if(metafile == NULL) { // Cannot find .meta.json file - create new empty metadata meta = meta_create(); } else { // Load metadata from matfile.meta.json meta = meta_read_json(metafile); fclose(metafile); } json_object_set(meta, "method", json_string("ccmpred")); json_t *meta_step = meta_add_step(meta, "ccmpred"); json_object_set(meta_step, "version", json_string(__VERSION)); json_t *meta_parameters = json_object(); json_object_set(meta_step, "parameters", meta_parameters); json_t *meta_steps = json_array(); json_object_set(meta_step, "iterations", meta_steps); json_t *meta_results = json_object(); json_object_set(meta_step, "results", meta_results); #endif int ncol, nrow; unsigned char* msa = read_msa(msafile, &ncol, &nrow); fclose(msafile); int nsingle = ncol * (N_ALPHA - 1); int nvar = nsingle + ncol * ncol * N_ALPHA * N_ALPHA; int nsingle_padded = nsingle + N_ALPHA_PAD - (nsingle % N_ALPHA_PAD); int nvar_padded = nsingle_padded + ncol * ncol * N_ALPHA * N_ALPHA_PAD; #ifdef CURSES bool color = detect_colors(); #else bool color = false; #endif logo(color); #ifdef CUDA int num_devices, dev_ret; struct cudaDeviceProp prop; dev_ret = cudaGetDeviceCount(&num_devices); if(dev_ret != CUDA_SUCCESS) { num_devices = 0; } if(num_devices == 0) { printf("No CUDA devices available, "); use_def_gpu = -1; } else if (use_def_gpu < -1 || use_def_gpu >= num_devices) { printf("Error: %d is not a valid device number. Please choose a number between 0 and %d\n", use_def_gpu, num_devices - 1); exit(1); } else { printf("Found %d CUDA devices, ", num_devices); } if (use_def_gpu != -1) { cudaError_t err = cudaSetDevice(use_def_gpu); if(cudaSuccess != err) { printf("Error setting device: %d\n", err); exit(1); } cudaGetDeviceProperties(&prop, use_def_gpu); printf("using device #%d: %s\n", use_def_gpu, prop.name); size_t mem_free, mem_total; err = cudaMemGetInfo(&mem_free, &mem_total); if(cudaSuccess != err) { printf("Error getting memory info: %d\n", err); exit(1); } size_t mem_needed = nrow * ncol * 2 + // MSAs sizeof(conjugrad_float_t) * nrow * ncol * 2 + // PC, PCS sizeof(conjugrad_float_t) * nrow * ncol * N_ALPHA_PAD + // PCN sizeof(conjugrad_float_t) * nrow + // Weights (sizeof(conjugrad_float_t) * ((N_ALPHA - 1) * ncol + ncol * ncol * N_ALPHA * N_ALPHA_PAD)) * 4; setlocale(LC_NUMERIC, ""); printf("Total GPU RAM: %'17lu\n", mem_total); printf("Free GPU RAM: %'17lu\n", mem_free); printf("Needed GPU RAM: %'17lu ", mem_needed); if(mem_needed <= mem_free) { printf("✓\n"); } else { printf("⚠\n"); } #ifdef JANSSON json_object_set(meta_parameters, "device", json_string("gpu")); json_t* meta_gpu = json_object(); json_object_set(meta_parameters, "gpu_info", meta_gpu); json_object_set(meta_gpu, "name", json_string(prop.name)); json_object_set(meta_gpu, "mem_total", json_integer(mem_total)); json_object_set(meta_gpu, "mem_free", json_integer(mem_free)); json_object_set(meta_gpu, "mem_needed", json_integer(mem_needed)); #endif } else { printf("using CPU"); #ifdef JANSSON json_object_set(meta_parameters, "device", json_string("cpu")); #endif #ifdef OPENMP printf(" (%d thread(s))", numthreads); #ifdef JANSSON json_object_set(meta_parameters, "cpu_threads", json_integer(numthreads)); #endif #endif printf("\n"); } #else // CUDA printf("using CPU"); #ifdef JANSSON json_object_set(meta_parameters, "device", json_string("cpu")); #endif #ifdef OPENMP printf(" (%d thread(s))\n", numthreads); #ifdef JANSSON json_object_set(meta_parameters, "cpu_threads", json_integer(numthreads)); #endif #endif // OPENMP printf("\n"); #endif // CUDA conjugrad_float_t *x = conjugrad_malloc(nvar_padded); if( x == NULL) { die("ERROR: Not enough memory to allocate variables!"); } memset(x, 0, sizeof(conjugrad_float_t) * nvar_padded); // Auto-set lambda_pair if(isnan(lambda_pair)) { lambda_pair = lambda_pair_factor * (ncol - 1); } // fill up user data struct for passing to evaluate userdata *ud = (userdata *)malloc( sizeof(userdata) ); if(ud == 0) { die("Cannot allocate memory for user data!"); } ud->msa = msa; ud->ncol = ncol; ud->nrow = nrow; ud->nsingle = nsingle; ud->nvar = nvar; ud->lambda_single = lambda_single; ud->lambda_pair = lambda_pair; ud->weights = conjugrad_malloc(nrow); ud->reweighting_threshold = reweighting_threshold; if(initfilename == NULL) { // Initialize emissions to pwm init_bias(x, ud); } else { // Load potentials from file read_raw(initfilename, ud, x); } // optimize with default parameters conjugrad_parameter_t *param = conjugrad_init(); param->max_iterations = numiter; param->epsilon = conjugrad_eps; param->k = conjugrad_k; param->max_linesearch = 5; param->alpha_mul = F05; param->ftol = 1e-4; param->wolfe = F02; int (*init)(void *) = init_cpu; int (*destroy)(void *) = destroy_cpu; conjugrad_evaluate_t evaluate = evaluate_cpu; #ifdef OPENMP omp_set_num_threads(numthreads); if(numthreads > 1) { init = init_cpu_omp; destroy = destroy_cpu_omp; evaluate = evaluate_cpu_omp; } #endif #ifdef CUDA if(use_def_gpu != -1) { init = init_cuda; destroy = destroy_cuda; evaluate = evaluate_cuda; } #endif init(ud); #ifdef JANSSON json_object_set(meta_parameters, "reweighting_threshold", json_real(ud->reweighting_threshold)); json_object_set(meta_parameters, "apc", json_boolean(use_apc)); json_object_set(meta_parameters, "normalization", json_boolean(use_normalization)); json_t *meta_regularization = json_object(); json_object_set(meta_parameters, "regularization", meta_regularization); json_object_set(meta_regularization, "type", json_string("l2")); json_object_set(meta_regularization, "lambda_single", json_real(lambda_single)); json_object_set(meta_regularization, "lambda_pair", json_real(lambda_pair)); json_object_set(meta_regularization, "lambda_pair_factor", json_real(lambda_pair_factor)); json_t *meta_opt = json_object(); json_object_set(meta_parameters, "optimization", meta_opt); json_object_set(meta_opt, "method", json_string("libconjugrad")); json_object_set(meta_opt, "float_bits", json_integer((int)sizeof(conjugrad_float_t) * 8)); json_object_set(meta_opt, "max_iterations", json_integer(param->max_iterations)); json_object_set(meta_opt, "max_linesearch", json_integer(param->max_linesearch)); json_object_set(meta_opt, "alpha_mul", json_real(param->alpha_mul)); json_object_set(meta_opt, "ftol", json_real(param->ftol)); json_object_set(meta_opt, "wolfe", json_real(param->wolfe)); json_t *meta_msafile = meta_file_from_path(msafilename); json_object_set(meta_parameters, "msafile", meta_msafile); json_object_set(meta_msafile, "ncol", json_integer(ncol)); json_object_set(meta_msafile, "nrow", json_integer(nrow)); if(initfilename != NULL) { json_t *meta_initfile = meta_file_from_path(initfilename); json_object_set(meta_parameters, "initfile", meta_initfile); json_object_set(meta_initfile, "ncol", json_integer(ncol)); json_object_set(meta_initfile, "nrow", json_integer(nrow)); } double neff = 0; for(int i = 0; i < nrow; i++) { neff += ud->weights[i]; } json_object_set(meta_msafile, "neff", json_real(neff)); ud->meta_steps = meta_steps; #endif printf("\nWill optimize %d %ld-bit variables\n\n", nvar, sizeof(conjugrad_float_t) * 8); if(color) { printf("\x1b[1m"); } printf("iter\teval\tf(x) \t║x║ \t║g║ \tstep\n"); if(color) { printf("\x1b[0m"); } conjugrad_float_t fx; int ret; #ifdef CUDA if(use_def_gpu != -1) { conjugrad_float_t *d_x; cudaError_t err = cudaMalloc((void **) &d_x, sizeof(conjugrad_float_t) * nvar_padded); if (cudaSuccess != err) { printf("CUDA error No. %d while allocation memory for d_x\n", err); exit(1); } err = cudaMemcpy(d_x, x, sizeof(conjugrad_float_t) * nvar_padded, cudaMemcpyHostToDevice); if (cudaSuccess != err) { printf("CUDA error No. %d while copying parameters to GPU\n", err); exit(1); } ret = conjugrad_gpu(nvar_padded, d_x, &fx, evaluate, progress, ud, param); err = cudaMemcpy(x, d_x, sizeof(conjugrad_float_t) * nvar_padded, cudaMemcpyDeviceToHost); if (cudaSuccess != err) { printf("CUDA error No. %d while copying parameters back to CPU\n", err); exit(1); } err = cudaFree(d_x); if (cudaSuccess != err) { printf("CUDA error No. %d while freeing memory for d_x\n", err); exit(1); } } else { ret = conjugrad(nvar_padded, x, &fx, evaluate, progress, ud, param); } #else ret = conjugrad(nvar_padded, x, &fx, evaluate, progress, ud, param); #endif printf("\n"); printf("%s with status code %d - ", (ret < 0 ? "Exit" : "Done"), ret); if(ret == CONJUGRAD_SUCCESS) { printf("Success!\n"); } else if(ret == CONJUGRAD_ALREADY_MINIMIZED) { printf("Already minimized!\n"); } else if(ret == CONJUGRADERR_MAXIMUMITERATION) { printf("Maximum number of iterations reached.\n"); } else { printf("Unknown status code!\n"); } printf("\nFinal fx = %f\n\n", fx); FILE* out = fopen(matfilename, "w"); if(out == NULL) { printf("Cannot open %s for writing!\n\n", matfilename); return 3; } conjugrad_float_t *outmat = conjugrad_malloc(ncol * ncol); FILE *rawfile = NULL; if(rawfilename != NULL) { printf("Writing raw output to %s\n", rawfilename); rawfile = fopen(rawfilename, "w"); if(rawfile == NULL) { printf("Cannot open %s for writing!\n\n", rawfilename); return 4; } write_raw(rawfile, x, ncol); } #ifdef MSGPACK FILE *msgpackfile = NULL; if(msgpackfilename != NULL) { printf("Writing msgpack raw output to %s\n", msgpackfilename); msgpackfile = fopen(msgpackfilename, "w"); if(msgpackfile == NULL) { printf("Cannot open %s for writing!\n\n", msgpackfilename); return 4; } #ifndef JANSSON void *meta = NULL; #endif } #endif sum_submatrices(x, outmat, ncol); if(use_apc) { apc(outmat, ncol); } if(use_normalization) { normalize(outmat, ncol); } write_matrix(out, outmat, ncol, ncol); #ifdef JANSSON json_object_set(meta_results, "fx_final", json_real(fx)); json_object_set(meta_results, "num_iterations", json_integer(json_array_size(meta_steps))); json_object_set(meta_results, "opt_code", json_integer(ret)); json_t *meta_matfile = meta_file_from_path(matfilename); json_object_set(meta_results, "matfile", meta_matfile); if(rawfilename != NULL) { json_object_set(meta_results, "rawfile", meta_file_from_path(rawfilename)); } if(msgpackfilename != NULL) { json_object_set(meta_results, "msgpackfile", meta_file_from_path(msgpackfilename)); } fprintf(out, "#>META> %s", json_dumps(meta, JSON_COMPACT)); if(rawfile != NULL) { fprintf(rawfile, "#>META> %s", json_dumps(meta, JSON_COMPACT)); } #endif if(rawfile != NULL) { fclose(rawfile); } #ifdef MSGPACK if(msgpackfile != NULL) { write_raw_msgpack(msgpackfile, x, ncol, meta); fclose(msgpackfile); } #endif fflush(out); fclose(out); destroy(ud); conjugrad_free(outmat); conjugrad_free(x); conjugrad_free(ud->weights); free(ud); free(msa); free(param); printf("Output can be found in %s\n", matfilename); return 0; }