static Val_Any spAsVal(const ZRectPOD& iRect) { Seq_Any theSeq; theSeq.Append(spAsVal(LT(iRect))); theSeq.Append(spAsVal(WH(iRect))); return theSeq; }
iePImage ieImage::CreateCopy(bool bCopyAllFrames, bool bDraw, bool bText, iePixelFormat eType) const { if (eType == iePixelFormat::Count) eType = PixelFormat(); iePImage pim = Create(eType, WH(), bDraw, bText); if (!pim) return nullptr; if (ieFailed(pim->CopyFrom(this, bCopyAllFrames))) { pim->Release(); return nullptr; } return pim; }
void build_elem_elem(Exo_DB *exo) { int ce; int count; int e; int ebi; int elem; int ename; int face; //int his_dim, her_dim; int i, j; int index; int len_curr; int len_prev; int len_intr; int length, length_new, num_faces; int iel; int ioffset; int n; int neighbor_name = -1; int node; int num_elem_sides; int num_nodes; int snl[MAX_NODES_PER_SIDE]; /* Side Node List - NOT Saturday Night Live! */ char err_msg[MAX_CHAR_ERR_MSG]; /* * Integer arrays used to find intersection sets of node->element lists. */ int prev_set[MAX_EPN]; /* list of elements attached to previous node*/ int curr_set[MAX_EPN]; /* list of elements attached to "this" node */ int interset[MAX_EPN]; /* values of hits between */ int ip[MAX_EPN]; /* indeces of hits for prev_set[] */ int ic[MAX_EPN]; /* indeces of hits for curr_set[] */ /* * If the element->node and node->element connectivities have not been * built, then we won't be able to do this task. */ if ( ! exo->elem_node_conn_exists || ! exo->node_elem_conn_exists ) { EH(-1, "Build elem->node before node->elem."); return; } /* * The number of elements connected via conventional faces may be deduced * from the number of elements and their type. */ exo->elem_elem_pntr = (int *) smalloc((exo->num_elems+1)*sizeof(int)); length = 0; for ( i=0; i<exo->num_elem_blocks; i++) { length += exo->eb_num_elems[i] * get_num_faces(exo->eb_elem_type[i]); } exo->elem_elem_list = (int *) smalloc(length*sizeof(int)); /* * Initialize... */ for ( i=0; i<length; i++) { exo->elem_elem_list[i] = UNASSIGNED_YET; } /* elem = 0; for ( ebi=0; ebi<exo->num_elem_blocks; ebi++) { num_elem_sides = get_num_faces(exo->eb_elem_type[ebi]); for ( e=0; e<exo->eb_num_elems[ebi]; e++) { exo->elem_elem_pntr[elem] = count; elem++; count += num_elem_sides; } } */ /* * Walk through the elements, block by block. */ count = 0; elem = 0; for ( ebi=0; ebi<exo->num_elem_blocks; ebi++) { num_elem_sides = get_num_faces(exo->eb_elem_type[ebi]); for ( e=0; e<exo->eb_num_elems[ebi]; e++,elem++) { exo->elem_elem_pntr[elem] = count; count += num_elem_sides; /* * Look at each side of the element, collecting a unique * list of integers corresponding to the minimum number of nodes * needed to identify an entire side. * * Typically, the same number of nodes as space dimensions are * needed, with exceptions being the various "sides" of shells, * beams and trusses... */ for ( face=0; face<num_elem_sides; face++) { /* * Given the element and the face construct the * list of node numbers that determine that side. */ /* * Later, we might not need *all* the nodes on a side, * particularly for high order elements. It may suffice * to check only as many nodes as space dimensions that * the element lives in... */ num_nodes = build_side_node_list(elem, face, ebi, exo, snl); #ifdef DEBUG fprintf(stderr, "Elem %d, face %d has %d nodes: ", elem, face, num_nodes); for ( i=0; i<num_nodes; i++) { fprintf(stderr, " %d", snl[i]); } fprintf(stderr, "\n"); #endif /* DEBUG */ /* * Cross check: for each node in the side there is a list * of elements connected to it. Beginning with all the * elements connected to the first node (except for this given * element), cross check with all the elements connected with * the 2nd node to build an intersection set of one element. */ for ( i=0; i<MAX_EPN; i++) { prev_set[i] = -1; curr_set[i] = -1; interset[i] = -1; } len_prev = 0; len_curr = 0; len_intr = 0; for ( n=0; n<num_nodes; n++) { /* * Copy this node's element list into a clean "curr_set" array * that will be intersected with any previously gathered * lists of elements that qualify as promiscuously in * contact with nodes... */ node = snl[n]; for ( i=0; i<MAX_EPN; i++) { curr_set[i] = -1; } len_curr = 0; #ifdef DEBUG fprintf(stderr, "Traversing n->e connectivity of node %d\n", node); #endif /* DEBUG */ for ( ce=exo->node_elem_pntr[node]; ce<exo->node_elem_pntr[node+1]; ce++) { ename = exo->node_elem_list[ce]; #ifdef DEBUG fprintf(stderr, "\telem %d\n", ename); #endif /* DEBUG */ /* * Go ahead and accumulate the self element name * just as a consistency check.... */ /* if ( ename != e ) { } */ /* * PKN: The current Goma use of ->elem_elem... * is such that this connectivity should list * connections like QUAD-BAR or HEX-SHELL. * So, I'll add this dimension matching conditional */ /* PRS (Summer 2012): Need to change this for shell stacks which have the same dim*/ /* We need however to consider a special case (as of 8/30/2012 * this is a SHELL-on-SHELL stack. Viz. two materials, each a shell material * which share not a side but a face. Since faces of shells are sides * in patran speak, we need some special logic. We need to avoid adding * the friend shell element (neighboring material) to the current shell element * even though each material has the same number of sides. * Here goes (BTW, I cannot find max-nodes-per-element anywhere!!!!) */ int shell_on_shell = 0; int flippy_flop = 0; int nbr_ebid; int nbr_num_elem_sides; nbr_ebid = fence_post(ename, exo->eb_ptr, exo->num_elem_blocks+1); EH(nbr_ebid, "Bad element block ID!"); nbr_num_elem_sides = get_num_faces(exo->eb_elem_type[nbr_ebid]); shell_on_shell = 0; flippy_flop = 0; if (exo->eb_id[ebi] < 100 && exo->eb_id[nbr_ebid] >= 100) flippy_flop=1; if (exo->eb_id[ebi] >= 100 && exo->eb_id[nbr_ebid] < 100) flippy_flop=1; if ((nbr_ebid != ebi) && (strstr(exo->eb_elem_type[nbr_ebid], "SHELL")) && (strstr(exo->eb_elem_type[ebi], "SHELL")) && flippy_flop) shell_on_shell = 1; // his_dim = elem_info(NDIM, exo->eb_elem_itype[ebi]); // her_dim = elem_info(NDIM, exo->eb_elem_itype[exo->elem_eb[ename]]); // if( his_dim == her_dim ) if (nbr_num_elem_sides == num_elem_sides && !shell_on_shell) { curr_set[len_curr] = ename; len_curr++; } } /* * The first node is special - we'll just compare * it with itself by making the "previous set" just the * same as the current set... */ if ( n == 0 ) { for ( i=0; i<MAX_EPN; i++) { prev_set[i] = curr_set[i]; } len_prev = len_curr; } #ifdef DEBUG fprintf(stderr, "\ncurr_set: "); for ( i=0; i<len_curr; i++) { fprintf(stderr, "%d ", curr_set[i]); } fprintf(stderr, "\nprev_set: "); for ( i=0; i<len_prev; i++) { fprintf(stderr, "%d ", prev_set[i]); } #endif /* DEBUG */ /* * First, clean the intersection list and the list of * hit indeces in the previous and current lists. * * Then find the intersection of the previous and current * sets of elements attached to the previous and current * nodes... */ for ( i=0; i<MAX_EPN; i++) { interset[i] = -1; ip[i] = -1; ic[i] = -1; } len_intr = 0; len_intr = int_intersect(prev_set, curr_set, len_prev, len_curr, ip, ic); #ifdef DEBUG fprintf(stderr, "num_hits = %d\n", len_intr); #endif /* DEBUG */ /* * Now, let's make the intersection set the next previous * set of elements, a standard for comparison. We should * eventually boil down to either one or zero elements * that qualify... */ for ( i=0; i<MAX_EPN; i++) { prev_set[i] = -1; } for ( i=0; i<len_intr; i++) { prev_set[i] = curr_set[ic[i]]; } len_prev = len_intr; } #ifdef DEBUG fprintf(stderr, "Element [%d], face [%d], local_node [%d]\n", elem, face, n); fprintf(stderr, "Intersection set length = %d\n", len_intr); #endif /* DEBUG */ /* * Now consider the different cases. */ if ( len_intr == 2 ) { /* * The boiled list contains self and one other element. */ if ( prev_set[0] == elem ) { neighbor_name = prev_set[1]; } else { neighbor_name = prev_set[0]; if ( prev_set[1] != elem ) { sr = sprintf(err_msg, "2 elems ( %d %d ) 1 should be %d!", prev_set[0], prev_set[1], elem); EH(-1, err_msg); } } } else if ( len_intr == 1 && prev_set[0] == elem ) { /* * The boiled list has one member, this element. * * The face must connect either to outer space or to * another processor. */ if ( Num_Proc == 1 ) { neighbor_name = -1; } else { neighbor_name = -1; /* * I am going to punt for now. Later, revisit this * condition and insert code to check for neighbor * processors containing all the same face nodes. * * EH(-1, "Not done yet..."); * */ /* * Check if ALL the nodes on this face belong * to another processors list of nodes. I.e., the * node must all be in the external node list of * and belong to the same external processor. */ } } /* * Pathological cases that normally should not occur.... */ else if ( len_intr == 0 ) { sr = sprintf(err_msg, "Elem %d, face %d should self contain!", elem, face); EH(-1, err_msg); } else if ( len_intr == 1 && prev_set[0] != elem ) { sr = sprintf(err_msg, "Elem %d, face %d only connects with elem %d ?", elem, face, prev_set[0]); EH(-1, err_msg); } else { sr = sprintf(err_msg, "Unknown elem-elem connection elem %d, face %d, len_intr=%d", elem, face, len_intr); WH(-1, err_msg); } /* * Now we know how to assign the neighbor name for this face * of the element. */ index = exo->elem_elem_pntr[elem] + face; exo->elem_elem_list[index] = neighbor_name; } /* end face loop this elem */ } /* end elem loop this elemblock */ } /* end elem block loop */ exo->elem_elem_pntr[exo->num_elems] = count; /* last fencepost */ exo->elem_elem_conn_exists = TRUE; if (Linear_Solver == FRONT) { /* * Now that we have elem_elem_pntr and elem_elem_list for our parallel * world, we are going to use them also for optimal element bandwidth * reduction ordering. We will use METIS, but METIS requires the CSR * format, which is compressed, viz. we need to remove the -1s. Here * we go */ /* First check for the assumption that all blocks have same number of element faces. Stop if they don't and issue an error to the next aspiring developer */ for ( i=0; i<exo->num_elem_blocks; i++) { if(get_num_faces(exo->eb_elem_type[0]) != get_num_faces(exo->eb_elem_type[i]) ) { EH(-1,"Stop! We cannot reorder these elements with METIS with elemement type changes"); } } /* Now begin */ exo->elem_elem_xadj = (int *) smalloc((exo->num_elems+1)*sizeof(int)); /*initialize */ for(e=0; e<exo->num_elems+1 ; e++) { exo->elem_elem_xadj[e] = exo->elem_elem_pntr[e]; } /* Recompute length of adjacency list by removing external edges */ length_new = 0; for (i = 0; i < length; i++) { if(exo->elem_elem_list[i] != -1) length_new++; } exo->elem_elem_adjncy = alloc_int_1(length_new, -1); /* Now convert */ ioffset=0; for(iel = 0; iel < exo->num_elems; iel++) { /* Big assumption here that all blocks have the same */ /* element type. Can be furbished later since this is */ /* just for the frontal solver */ num_faces = get_num_faces(exo->eb_elem_type[0]); for (i= iel*num_faces; i < (iel+1)*num_faces; i++) { j = i - ioffset; if(exo->elem_elem_list[i] == -1) { ioffset++; for(e=iel +1; e <exo->num_elems+1; e++)exo->elem_elem_xadj[e]--; } else { exo->elem_elem_adjncy[j] = exo->elem_elem_list[i]; } } } /* convert to Fortran style */ for(e=0; e<exo->num_elems+1 ; e++) exo->elem_elem_xadj[e]++; for ( i=0; i<length_new; i++) exo->elem_elem_adjncy[i]++; } /* End FRONTAL_SOLVER if */ /* * Verification that every element/face has assigned something besides * the initial default value of "unassigned". * * For your convenience - FORTRAN 1-based numbering. */ #ifdef DEBUG for ( e=0; e<exo->num_elems; e++) { fprintf(stdout, "Elem %3d:", e+1); for ( ce=exo->elem_elem_pntr[e]; ce<exo->elem_elem_pntr[e+1]; ce++) { if ( exo->elem_elem_list[ce] == -1 ) { fprintf(stdout, " spc"); } else if ( exo->elem_elem_list[ce] < -1 ) { fprintf(stdout, " prc"); } else { fprintf(stdout, " %3d", exo->elem_elem_list[ce] + 1); } if ( exo->elem_elem_list[ce] == UNASSIGNED_YET ) { sr = sprintf(err_msg, "You need to plug a leak at elem (%d) face (%d)", exo->elem_elem_list[ce] + 1, ce - exo->elem_elem_pntr[e] + 1); EH(-1, err_msg); } } fprintf(stdout, "\n"); } #endif /* DEBUG */ #if FALSE demo_elem_elem_conn(exo); #endif return; }
void set_init_Element_Storage(ELEM_BLK_STRUCT *eb_ptr, int mn) /***************************************************************** * * set_init_Element_Storage() * * * like its predecessor init_element_storage, this function actually * places initial values for the draining and wetting curves for the * TANH_HYST function, according to the request in the material property * database cards for the current material * *****************************************************************/ { int ielem_type, ip_total, i, j, ifound, ip; double sat_switch = 0.0, pc_switch = 0.0, Draining_curve, *ev_tmp; int error, num_dim, num_nodes; int num_elem, num_elem_blk, num_node_sets, num_side_sets, time_step; float version; /* version number of EXODUS II */ int exoid; /* ID of the open EXODUS II file */ char title[MAX_LINE_LENGTH]; /* title of the EXODUS II database */ float ret_float; /* any returned float */ char ret_char[3]; /* any returned character */ int num_vars; /* number of var_type variables */ char **var_names = NULL; /* array containing num_vars variable names */ char appended_name[MAX_VAR_NAME_LNGTH]; /*Quick return if model is not hysteretic in nature */ if(mp_glob[mn]->SaturationModel == TANH_HYST) { ielem_type = eb_ptr->Elem_Type; ip_total = eb_ptr->IP_total; Draining_curve = mp_glob[mn]->u_saturation[8]; if (Guess_Flag ==4 || Guess_Flag == 5) { EH(-1,"Not a smooth restart for hysteretic saturation function. If you really want to do this use read_exoII_file or call us"); } if(Guess_Flag == 5 || Guess_Flag == 6) { WH(-1,"Initializing Hysteretic Curve values at all Gauss points with read_exoII_file"); CPU_word_size = sizeof(double); IO_word_size = 0; exoid = ex_open(ExoAuxFile, EX_READ, &CPU_word_size, &IO_word_size , &version); EH(exoid, "ex_open"); error = ex_get_init(exoid, title, &num_dim, &num_nodes, &num_elem, &num_elem_blk, &num_node_sets, &num_side_sets); EH(error, "ex_get_init for efv or init guess"); /* * Obtain the number of time steps in the exodus file, time_step, * We will read only from the last time step */ error = ex_inquire(exoid, EX_INQ_TIME, &time_step, &ret_float, ret_char); EH(error, "ex_inquire"); /* Based on problem type and available info in database, extract * appropriate fields */ /* * Get the number of nodal variables in the file, and allocate * space for storage of their names. */ error = ex_get_var_param(exoid, "e", &num_vars); EH(error, "ex_get_var_param"); /* First extract all nodal variable names in exoII database */ if (num_vars > 0) { var_names = alloc_VecFixedStrings(num_vars, (MAX_STR_LENGTH+1)); error = ex_get_var_names(exoid, "e", num_vars, var_names); EH(error, "ex_get_var_names"); for (i = 0; i < num_vars; i++) strip(var_names[i]); } else { fprintf(stderr, "Warning: no element variables for saturation stored in exoII input file.\n"); } /*****THIS IS WHERE YOU LOAD THEM UP ******/ ev_tmp = (double *) smalloc(eb_ptr->Num_Elems_In_Block* sizeof(double)); ifound = 0; for(ip = 0; ip < ip_total; ip++) { sprintf(appended_name, "sat_curve_type%d", ip ); for(j=0; j < num_vars; j++) { if(!strcasecmp(appended_name,var_names[j])) { /*Found variable so load it into element storage */ error = ex_get_elem_var(exoid, time_step, j+1, eb_ptr->Elem_Blk_Id, eb_ptr->Num_Elems_In_Block, ev_tmp); ifound = 1; } } if(ifound) { for (i = 0; i < eb_ptr->Num_Elems_In_Block; i++) { eb_ptr->ElemStorage[i].sat_curve_type[ip] = ev_tmp[i]; } } else { EH(-1,"Cannot find an element variable for sat. hysteresis"); } ifound = 0; sprintf(appended_name, "sat_switch%d", ip ); for(j=0; j < num_vars; j++) { if(!strcasecmp(appended_name,var_names[j])) { /*Found variable so load it into element storage */ error = ex_get_elem_var(exoid, time_step, j+1, eb_ptr->Elem_Blk_Id, eb_ptr->Num_Elems_In_Block, ev_tmp); ifound = 1; } } if(ifound) { for (i = 0; i < eb_ptr->Num_Elems_In_Block; i++) { eb_ptr->ElemStorage[i].Sat_QP_tn[ip] = ev_tmp[i]; } } else { EH(-1,"Cannot find an element variable for sat. hysteresis"); } ifound = 0; sprintf(appended_name, "pc_switch%d", ip ); for(j=0; j < num_vars; j++) { if(!strcasecmp(appended_name,var_names[j])) { /*Found variable so load it into element storage */ error = ex_get_elem_var(exoid, time_step, j+1, eb_ptr->Elem_Blk_Id, eb_ptr->Num_Elems_In_Block, ev_tmp); ifound = 1; } } if(ifound) { for (i = 0; i < eb_ptr->Num_Elems_In_Block; i++) { eb_ptr->ElemStorage[i].p_cap_QP[ip] = ev_tmp[i]; } } else { EH(-1,"Cannot find an element variable for sat. hysteresis"); } } error = ex_close(exoid); safer_free((void **) &var_names); free(ev_tmp); } else /*Initialize as dictated by input cards */ { if(Draining_curve == 1.0) { sat_switch = mp->u_saturation[0]; pc_switch = 1.e-12; } else if (Draining_curve == 0.0) { double sat_max = mp->u_saturation[0]; double sat_min = mp->u_saturation[4]; double alpha_w = mp->u_saturation[3]; double beta_w = mp->u_saturation[2]; pc_switch = 1.e12*alpha_w; sat_switch = sat_max - ( sat_max - sat_min)*0.5*(1.0+tanh( beta_w - alpha_w/pc_switch ) ) ; } else { EH(-1,"TANH_HYST must have 1.0 or 0.0 in 9th spot"); } for (i = 0; i < eb_ptr->Num_Elems_In_Block; i++) { for(ip = 0; ip < ip_total; ip++) { eb_ptr->ElemStorage[i].p_cap_QP[ip] = pc_switch; eb_ptr->ElemStorage[i].Sat_QP_tn[ip] = sat_switch; eb_ptr->ElemStorage[i].sat_curve_type[ip] = Draining_curve; } } } } if(elc_glob[mn]->thermal_expansion_model == SHRINKAGE) { ip_total = eb_ptr->IP_total; if (Guess_Flag ==4 || Guess_Flag == 5) { EH(-1,"Not a smooth restart for solidification shrinkage model.Use read_exoII_file or call us"); } if(Guess_Flag == 5 || Guess_Flag == 6) { EH(-1,"Initializing solidified shrinkage model from exoII file not available yet. Use zero"); } // Load em up as all unsolidified for(ip = 0; ip < ip_total; ip++) { for (i = 0; i < eb_ptr->Num_Elems_In_Block; i++) { eb_ptr->ElemStorage[i].solidified[ip] = 0.0; } } } }
int main(int argc, char **argv) /* * Initial main driver for GOMA. Derived from a (1/93) release of * the rf_salsa program by * * Original Authors: John Shadid (1421) * Scott Hutchinson (1421) * Harry Moffat (1421) * * Date: 12/3/92 * * * Updates and Changes by: * Randy Schunk (9111) * P. A. Sackinger (9111) * R. R. Rao (9111) * R. A. Cairncross (Univ. of Delaware) * Dates: 2/93 - 6/96 * * Modified for continuation * Ian Gates * Dates: 2/98 - 10/98 * Dates: 7/99 - 8/99 * * Last modified: Wed June 26 14:21:35 MST 1994 [email protected] * Hello. * * Note: Many modifications from an early 2/93 pre-release * version of rf_salsa were made by various persons * in order to test ideas about moving/deforming meshes... */ { /* Local Declarations */ double time_start, total_time; /* timing variables */ #ifndef PARALLEL /* struct tm *tm_ptr; additional serial timing variables */ time_t now; #endif int error; int i; int j; char **ptmp; char *yo; struct Command_line_command **clc=NULL; /* point to command line structure */ int nclc = 0; /* number of command line commands */ /********************** BEGIN EXECUTION ***************************************/ #ifdef FP_EXCEPT feenableexcept ((FE_OVERFLOW | FE_DIVBYZERO | FE_INVALID)); #endif /* assume number of commands is less than or equal to the number of * arguments in the command line minus 1 (1st is program name) */ /* * Get the name of the executable, yo */ yo = argv[0]; #ifdef PARALLEL MPI_Init(&argc, &argv); time_start = MPI_Wtime(); #endif /* PARALLEL */ #ifndef PARALLEL (void)time(&now); time_start = (double)now; #endif /* PARALLEL */ time_goma_started = time_start; Argv = argv; Argc = argc; #ifdef PARALLEL /* * Determine the parallel processing status, if any. We need to know * pretty early if we're "one of many" or the only process. */ error = MPI_Comm_size(MPI_COMM_WORLD, &Num_Proc); error = MPI_Comm_rank(MPI_COMM_WORLD, &ProcID); /* * Setup a default Proc_config so we can use utility routines * from Aztec */ AZ_set_proc_config(Proc_Config, MPI_COMM_WORLD); /* set the output limit flag if need be */ if( Num_Proc > DP_PROC_PRINT_LIMIT ) Unlimited_Output = FALSE; #ifdef HAVE_MPE_H error = MPE_Init_log(); #endif /* HAVE_MPE_H */ Dim = 0; /* for any hypercube legacy code... */ #endif /* PARALLEL */ #ifndef PARALLEL Dim = 0; ProcID = 0; Num_Proc = 1; #endif /* PARALLEL */ /* * HKM - Change the ieee exception handling based on the machine and * the level of debugging/speed desired. This call currently causes * core dumps for floating point exceptions. */ handle_ieee(); log_msg("--------------"); log_msg("GOMA begins..."); /* * Some initial stuff that only the master process does. */ if ( ProcID == 0 ) { if (argc > 1) { log_msg("Preprocessing command line options."); clc = (struct Command_line_command **) smalloc( argc * sizeof(struct Command_line_command *)); for (i=0; i<argc; i++) { clc[i] = (struct Command_line_command *) smalloc(sizeof(struct Command_line_command)); clc[i]->type = 0; /* initialize command line structure */ clc[i]->i_val = 0; clc[i]->r_val = 0.; clc[i]->string = (char *) smalloc(MAX_COMMAND_LINE_LENGTH*sizeof(char)); for ( j=0; j<MAX_COMMAND_LINE_LENGTH; j++) { clc[i]->string[j] = '\0'; } #ifdef DEBUG fprintf(stderr, "clc[%d]->string is at 0x%x\n", i, clc[i]->string); fprintf(stderr, "clc[%d] is at 0x%x\n", i, clc[i]); #endif } } strcpy(Input_File, "input"); strcpy(Echo_Input_File , "echo_input"); if (argc > 1) translate_command_line(argc, argv, clc, &nclc); ECHO("OPEN", Echo_Input_File); echo_command_line( argc, argv, Echo_Input_File ); print_code_version(); ptmp = legal_notice; while ( strcmp(*ptmp, LAST_LEGAL_STRING) != 0 ) { fprintf(stderr, "%s", *ptmp++); } } /* * Allocate the uniform problem description structure and * the problem description structures on all processors */ error = pd_alloc(); EH(error, "pd_alloc problem"); #ifdef DEBUG fprintf(stderr, "P_%d at barrier after pd_alloc\n", ProcID); #ifdef PARALLEL error = MPI_Barrier(MPI_COMM_WORLD); #endif #endif log_msg("Allocating mp, gn, ..."); error = mp_alloc(); EH(error, "mp_alloc problem"); error = gn_alloc(); EH(error, "gn_alloc problem"); error = ve_alloc(); EH(error, "ve_alloc problem"); error = elc_alloc(); EH(error, "elc_alloc problem"); error = elc_rs_alloc(); EH(error, "elc_alloc problem"); error = cr_alloc(); EH(error, "cr_alloc problem"); error = evp_alloc(); EH(error, "evp_alloc problem"); error = tran_alloc(); EH(error, "tran_alloc problem"); error = eigen_alloc(); EH(error, "eigen_alloc problem"); error = cont_alloc(); EH(error, "cont_alloc problem"); error = loca_alloc(); EH(error, "loca_alloc problem"); error = efv_alloc(); EH(error, "efv_alloc problem"); #ifdef DEBUG fprintf(stderr, "P_%d at barrier before read_input_file()\n", ProcID); #ifdef PARALLEL error = MPI_Barrier(MPI_COMM_WORLD); #endif #endif /* * Read ASCII input file, data files, related exodusII FEM databases. */ if ( ProcID == 0 ) { log_msg("Reading input file ..."); read_input_file(clc, nclc); /* Read ascii input file get file names */ /* update inputed data to account for command line arguments that * might override the input deck... */ log_msg("Overriding any input file specs w/ any command line specs..."); if (argc > 1) apply_command_line(clc, nclc); #ifdef DEBUG DPRINTF(stderr, "apply_command_line() is done.\n"); #endif } /* * The user-defined material properties, etc. available to goma users * mean that some dynamically allocated data needs to be communicated. * * To handle this, sizing information from the input file scan is * broadcast in stages so that the other processors can allocate space * accordingly to hold the data. * * Note: instead of handpacking a data structure, use MPI derived datatypes * to gather and scatter. Pray this is done efficiently. Certainly it costs * less from a memory standpoint. */ #ifdef PARALLEL /* * Make sure the input file was successully processed before moving on */ check_parallel_error("Input file error"); /* * This is some sizing information that helps fit a little bit more * onto the ark later on. */ #ifdef DEBUG fprintf(stderr, "P_%d at barrier before noahs_raven()\n", ProcID); error = MPI_Barrier(MPI_COMM_WORLD); #endif noahs_raven(); #ifdef DEBUG fprintf(stderr, "P_%d at barrier before MPI_Bcast of Noahs_Raven\n", ProcID); error = MPI_Barrier(MPI_COMM_WORLD); #endif MPI_Bcast(MPI_BOTTOM, 1, Noahs_Raven->new_type, 0, MPI_COMM_WORLD); #ifdef DEBUG fprintf(stderr, "P_%d at barrier after Bcast/before raven_landing()\n", ProcID); error = MPI_Barrier(MPI_COMM_WORLD); #endif /* * Get the other processors ready to handle ark data. */ raven_landing(); #ifdef DEBUG fprintf(stderr, "P_%d at barrier before noahs_ark()\n", ProcID); error = MPI_Barrier(MPI_COMM_WORLD); #endif /* * This is the main body of communicated information, including some * whose sizes were determined because of advanced legwork by the raven. */ noahs_ark(); MPI_Bcast(MPI_BOTTOM, 1, Noahs_Ark->new_type, 0, MPI_COMM_WORLD); /* * Chemkin was initialized on processor zero during the input file * process. Now, distribute it to all processors */ #ifdef USE_CHEMKIN if (Chemkin_Needed) { chemkin_initialize_mp(); } #endif /* * Once the ark has landed, there are additional things that will need to * be sent by dove. Example: BC_Types[]->u-BC arrays. * */ ark_landing(); noahs_dove(); MPI_Bcast(MPI_BOTTOM, 1, Noahs_Dove->new_type, 0, MPI_COMM_WORLD); #endif /* End of ifdef PARALLEL */ /* * We sent the packed line to all processors that contained geometry * creation commands. Now we need to step through it and create * geometry as we go (including possibly reading an ACIS .sat file). * */ /* Check to see if BRK File option exists and if so check if file exits */ if (Brk_Flag == 1) { check_for_brkfile(Brk_File); } check_parallel_error("Error encountered in check for brkfile"); /* Now break the exodus files */ if (Num_Proc > 1 && ProcID == 0 && Brk_Flag == 1) { call_brk(); } check_parallel_error("Error in brking exodus files"); MPI_Barrier(MPI_COMM_WORLD); /* * For parallel execution, assume the following variables will be changed * to reflect the multiple file aspect of the problem. * * FEM file = file.exoII --> file_3of15.exoII * * Output EXODUS II file = out.exoII --> out_3of15.exoII * */ /* * Allocate space for structures holding the EXODUS II finite element * database information and for the Distributed Processing information. * * These are mostly skeletons with pointers that get allocated in the * rd_exoII and rd_dpi routines. Remember to free up those arrays first * before freeing the major pointers. */ EXO_ptr = alloc_struct_1(Exo_DB, 1); init_exo_struct(EXO_ptr); DPI_ptr = alloc_struct_1(Dpi, 1); init_dpi_struct(DPI_ptr); log_msg("Reading mesh from EXODUS II file..."); error = read_mesh_exoII(EXO_ptr, DPI_ptr); /* * Missing files on any processor are detected at a lower level * forcing a return to the higher level * rd_exo --> rd_mesh --> main * Shutdown now, if any of the exodus files weren't found */ if (error < 0) { #ifdef PARALLEL MPI_Finalize(); #endif return(-1); } /* * All of the MPI_Type_commit() calls called behind the scenes that build * the dove, ark and raven really allocated memory. Let's free it up now that * the initial information has been communicated. */ #ifdef PARALLEL MPI_Type_free(&(Noahs_Raven->new_type)); MPI_Type_free(&(Noahs_Ark->new_type)); MPI_Type_free(&(Noahs_Dove->new_type)); #endif /* * Setup the rest of the Problem Description structure that depends on * the mesh that was read in from the EXODUS II file... * * Note that memory allocation and some setup has already been performed * in mm_input()... */ error = setup_pd(); EH( error, "Problem setting up Problem_Description."); /* * Let's check to see if we need the large elasto-plastic global tensors * and allocate them if so */ error = evp_tensor_alloc(EXO_ptr); EH( error, "Problems setting up evp tensors"); /* * Now that we know about what kind of problem we're solving and the * mesh information, let's allocate space for elemental assembly structures * */ #ifdef DEBUG DPRINTF(stderr, "About to assembly_alloc()...\n"); #endif log_msg("Assembly allocation..."); error = assembly_alloc(EXO_ptr); EH( error, "Problem from assembly_alloc"); if (Debug_Flag) { DPRINTF(stderr, "%s: setting up EXODUS II output files...\n", yo); } /* * These are not critical - just niceties. Also, they should not overburden * your db with too much of this - they're capped verbiage compliant routines. */ add_qa_stamp(EXO_ptr); add_info_stamp(EXO_ptr); #ifdef DEBUG fprintf(stderr, "added qa and info stamps\n"); #endif /* * If the output EXODUS II database file is different from the input * file, then we'll need to replicate all the basic mesh information. * But, remember that if we're parallel, that the output file names must * be multiplexed first... */ if ( Num_Proc > 1 ) { multiname(ExoFileOut, ProcID, Num_Proc); multiname(Init_GuessFile, ProcID, Num_Proc); if ( strcmp( Soln_OutFile, "" ) != 0 ) { multiname(Soln_OutFile, ProcID, Num_Proc); } if( strcmp( ExoAuxFile, "" ) != 0 ) { multiname(ExoAuxFile, ProcID, Num_Proc); } if( efv->Num_external_field != 0 ) { for( i=0; i<efv->Num_external_field; i++ ) { multiname(efv->file_nm[i], ProcID, Num_Proc); } } } /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * Preprocess the exodus mesh * -> Allocate pointers to structures containing element * side bc info, First_Elem_Side_BC_Array, and * element edge info, First_Elem_Edge_BC_Array. * -> Determine Unique_Element_Types[] array */ #ifdef DEBUG fprintf(stderr, "pre_process()...\n"); #endif log_msg("Pre processing of mesh..."); #ifdef PARALLEL error = MPI_Barrier(MPI_COMM_WORLD); #endif pre_process(EXO_ptr); /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * Load up a few key indeces in the bfd prototype basis function structures * and make sure that each active eqn/vbl has a bf[v] that points to the * right bfd[]...needs pre_process to find out the number of unique * element types in the problem. */ #ifdef DEBUG fprintf(stderr, "bf_init()...\n"); #endif log_msg("Basis function initialization..."); error = bf_init(EXO_ptr); EH( error, "Problem from bf_init"); /* * check for parallel errors before continuing */ check_parallel_error("Error encountered in problem setup"); /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * Allocate space for each communication exchange description. */ #ifdef PARALLEL #ifdef DEBUG fprintf(stderr, "P_%d: Parallel cx allocation\n", ProcID); #endif if (DPI_ptr->num_neighbors > 0) { cx = alloc_struct_1(Comm_Ex, DPI_ptr->num_neighbors); Request = alloc_struct_1(MPI_Request, Num_Requests * DPI_ptr->num_neighbors); Status = alloc_struct_1(MPI_Status, Num_Requests * DPI_ptr->num_neighbors); } #endif /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * SET UP THE PROBLEM * * Setup node-based structures * Finalise how boundary conditions are to be handled * Determine what unknowns are at each owned node and then tell * neighboring processors about your nodes * Set up communications pattern for fast unknown updates between * processors. */ (void) setup_problem(EXO_ptr, DPI_ptr); /* * check for parallel errors before continuing */ check_parallel_error("Error encountered in problem setup"); /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * CREATE BRK_FILE IF ONE DOES NOT EXIST * * If no Brk_File exists but the option was configured in the input or * optional command we create one now and exit from goma. */ if ( Brk_Flag == 2 ) { write_brk_file(Brk_File, EXO_ptr); exit(0); } /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * WRITE OUT INITIAL INFO TO EXODUS FILE */ /* * Only have to initialize the exodus file if we are using different * files for the output versus the input mesh */ if (strcmp(ExoFile, ExoFileOut)) { /* * Temporarily we'll need to renumber the nodes and elements in the * mesh to be 1-based. After writing, return to the 0 based indexing * that is more convenient in C. */ #ifdef DEBUG fprintf(stderr, "1-base; wr_mesh; 0-base\n"); #endif one_base(EXO_ptr); wr_mesh_exo(EXO_ptr, ExoFileOut, 0); zero_base(EXO_ptr); /* * If running on a distributed computer, augment the plain finite * element information of EXODUS with the description of how this * piece fits into the global problem. */ if (Num_Proc > 1) { #ifdef PARALLEL #ifdef DEBUG fprintf(stderr, "P_%d at barrier before wr_dpi()\n", ProcID); fprintf(stderr, "P_%d ExoFileOut = \"%s\"\n", ProcID, ExoFileOut); error = MPI_Barrier(MPI_COMM_WORLD); #endif #endif wr_dpi(DPI_ptr, ExoFileOut, 0); } } /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * SOLVE THE PROBLEM */ if (Debug_Flag) { switch (Continuation) { case ALC_ZEROTH: P0PRINTF("%s: continue_problem (zeroth order) ...\n", yo); break; case ALC_FIRST: P0PRINTF("%s: continue_problem (first order) ...\n", yo); break; case HUN_ZEROTH: P0PRINTF("%s: hunt_problem (zeroth order) ...\n", yo); break; case HUN_FIRST: P0PRINTF("%s: hunt_problem (first order) ...\n", yo); break; case LOCA: P0PRINTF("%s: do_loca ...\n", yo); break; default: P0PRINTF("%s: solve_problem...\n", yo); break; } } #ifdef DEBUG switch (Continuation) { case ALC_ZEROTH: DPRINTF(stderr, "%s: continue_problem (zeroth order) ...\n", yo); break; case ALC_FIRST: DPRINTF(stderr, "%s: continue_problem (first order) ...\n", yo); break; case HUN_ZEROTH: DPRINTF(stderr, "%s: hunt_problem (zeroth order) ...\n", yo); break; case HUN_FIRST: DPRINTF(stderr, "%s: hunt_problem (first order) ...\n", yo); break; case LOCA: DPRINTF(stderr, "%s: do_loca ...\n", yo); break; default: DPRINTF(stderr, "%s: solve_problem...\n", yo); break; } #endif if( TimeIntegration == TRANSIENT) { Continuation = ALC_NONE; if (Debug_Flag) { P0PRINTF("%s: solve_problem...TRANSIENT superceded Continuation...\n", yo); } #ifdef DEBUG DPRINTF(stderr, "%s: solve_problem...TRANSIENT superceded Continuation...\n", yo); #endif solve_problem(EXO_ptr, DPI_ptr, NULL); } switch (Continuation) { case ALC_ZEROTH: case ALC_FIRST: log_msg("Solving continuation problem"); continue_problem(cx, EXO_ptr, DPI_ptr); break; case HUN_ZEROTH: case HUN_FIRST: log_msg("Solving hunt problem"); hunt_problem(cx, EXO_ptr, DPI_ptr); break; case LOCA: log_msg("Solving continuation problem with LOCA"); error = do_loca(cx, EXO_ptr, DPI_ptr); break; default: log_msg("Solving problem"); if (loca_in->Cont_Alg == LOCA_LSA_ONLY) { error = do_loca(cx, EXO_ptr, DPI_ptr); } else if(TimeIntegration != TRANSIENT) { solve_problem(EXO_ptr, DPI_ptr, NULL); } break; } #ifdef PARALLEL MPI_Barrier(MPI_COMM_WORLD); #endif if (ProcID == 0 && Brk_Flag == 1 && Num_Proc > 1) { fix_output(); } /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * PRINT A MESSAGE TO STDOUT SAYING WE ARE DONE */ P0PRINTF("\n-done\n\n"); /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * FREE MEMORY ALLOCATED BY THE PROGRAM */ /* * free the element block / element based structures */ free_element_blocks(EXO_ptr); /* * free nodal based structures */ free_nodes(); #ifdef FREE_PROBLEM free_problem ( EXO_ptr, DPI_ptr ); #endif /* * Free command line stuff */ if ( ProcID == 0 ) { if ( argc > 1 ) { for (i=0; i<argc; i++) { #ifdef DEBUG fprintf(stderr, "clc[%d]->string &= 0x%x\n", i, clc[i]->string); fprintf(stderr, "clc[%d] &= 0x%x\n", i, clc[i]); #endif safer_free((void **) &(clc[i]->string)); safer_free((void **) (clc + i)); } safer_free((void **) &clc); } } /* * Free exodus database structures */ free_exo(EXO_ptr); safer_free((void **) &EXO_ptr); if ( Num_Proc > 1 ) { free_dpi(DPI_ptr); } else { free_dpi_uni(DPI_ptr); } safer_free((void **) &DPI_ptr); /* * Remove front scratch file [/tmp/lu.'pid'.0] */ if (Linear_Solver == FRONT) { unlerr = unlink(front_scratch_directory); WH(unlerr, "Unlink problem with front scratch file"); } #ifdef PARALLEL total_time = ( MPI_Wtime() - time_start )/ 60. ; DPRINTF(stderr, "\nProc 0 runtime: %10.2f Minutes.\n\n",total_time); MPI_Finalize(); #endif #ifndef PARALLEL (void)time(&now); total_time = (double)(now) - time_start; fprintf(stderr, "\nProc 0 runtime: %10.2f Minutes.\n\n",total_time/60); #endif fflush(stdout); fflush(stderr); log_msg("GOMA ends normally."); return (0); }
void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) { GSDrawingEnvironment& env = m_env; GSDrawingContext* context = m_context; const GSVector2i& rtsize = rt->GetSize(); const GSVector2& rtscale = rt->GetScale(); bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; GSTexture* rtcopy = NULL; ASSERT(m_dev != NULL); GSDeviceDX* dev = (GSDeviceDX*)m_dev; if(DATE) { if(dev->HasStencil()) { GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y); GSVector4 o = GSVector4(-1.0f, 1.0f); GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); GSVector4 dst = src * 2.0f + o.xxxx(); GSVertexPT1 vertices[] = { {GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)}, {GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)}, {GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)}, {GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)}, }; dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM); } else { rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat()); // I'll use VertexTrace when I consider it more trustworthy dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy()); } } // dev->BeginScene(); // om GSDeviceDX::OMDepthStencilSelector om_dssel; if(context->TEST.ZTE) { om_dssel.ztst = context->TEST.ZTST; om_dssel.zwe = !context->ZBUF.ZMSK; } else { om_dssel.ztst = ZTST_ALWAYS; } if(m_fba) { om_dssel.fba = context->FBA.FBA; } GSDeviceDX::OMBlendSelector om_bsel; if(!IsOpaque()) { om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS; om_bsel.a = context->ALPHA.A; om_bsel.b = context->ALPHA.B; om_bsel.c = context->ALPHA.C; om_bsel.d = context->ALPHA.D; if(env.PABE.PABE) { if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1) { // this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader // cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result om_bsel.abe = 0; } else { //Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though. //ASSERT(0); } } } om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask(); // vs GSDeviceDX::VSSelector vs_sel; vs_sel.tme = PRIM->TME; vs_sel.fst = PRIM->FST; vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0; vs_sel.rtcopy = !!rtcopy; // The real GS appears to do no masking based on the Z buffer format and writing larger Z values // than the buffer supports seems to be an error condition on the real GS, causing it to crash. // We are probably receiving bad coordinates from VU1 in these cases. if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe) { if(context->ZBUF.PSM == PSM_PSMZ24) { if(m_vt.m_max.p.z > 0xffffff) { ASSERT(m_vt.m_min.p.z > 0xffffff); // Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended. if (m_vt.m_min.p.z > 0xffffff) { vs_sel.bppz = 1; om_dssel.ztst = ZTST_ALWAYS; } } } else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S) { if(m_vt.m_max.p.z > 0xffff) { ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo // Fixme : Same as above, I guess. if (m_vt.m_min.p.z > 0xffff) { vs_sel.bppz = 2; om_dssel.ztst = ZTST_ALWAYS; } } } } GSDeviceDX::VSConstantBuffer vs_cb; float sx = 2.0f * rtscale.x / (rtsize.x << 4); float sy = 2.0f * rtscale.y / (rtsize.y << 4); float ox = (float)(int)context->XYOFFSET.OFX; float oy = (float)(int)context->XYOFFSET.OFY; float ox2 = 2.0f * m_pixelcenter.x / rtsize.x; float oy2 = 2.0f * m_pixelcenter.y / rtsize.y; //This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly, //because DX10 and DX9 have a different pixel center.) // //The resulting shifted output aligns better with common blending / corona / blurring effects, //but introduces a few bad pixels on the edges. if(rt->LikelyOffset) { // DX9 has pixelcenter set to 0.0, so give it some value here if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; } ox2 *= rt->OffsetHack_modx; oy2 *= rt->OffsetHack_mody; } vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f); vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f); // gs GSDeviceDX::GSSelector gs_sel; gs_sel.iip = PRIM->IIP; gs_sel.prim = m_vt.m_primclass; // ps GSDeviceDX::PSSelector ps_sel; GSDeviceDX::PSSamplerSelector ps_ssel; GSDeviceDX::PSConstantBuffer ps_cb; if(DATE) { if(dev->HasStencil()) { om_dssel.date = 1; } else { ps_sel.date = 1 + context->TEST.DATM; } } if(env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) { ps_sel.colclip = 1; } ps_sel.clr1 = om_bsel.IsCLR1(); ps_sel.fba = context->FBA.FBA; ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0; if(UserHacks_AlphaHack) ps_sel.aout = 1; if(PRIM->FGE) { ps_sel.fog = 1; ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255; } if(context->TEST.ATE) { ps_sel.atst = context->TEST.ATST; switch(ps_sel.atst) { case ATST_LESS: ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1); break; case ATST_GREATER: ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1); break; default: ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF; break; } } else { ps_sel.atst = ATST_ALWAYS; } if(tex) { ps_sel.wms = context->CLAMP.WMS; ps_sel.wmt = context->CLAMP.WMT; ps_sel.fmt = tex->m_fmt; ps_sel.aem = env.TEXA.AEM; ps_sel.tfx = context->TEX0.TFX; ps_sel.tcc = context->TEX0.TCC; ps_sel.ltf = m_filter == 2 ? m_vt.IsLinear() : m_filter; ps_sel.rt = tex->m_target; int w = tex->m_texture->GetWidth(); int h = tex->m_texture->GetHeight(); int tw = (int)(1 << context->TEX0.TW); int th = (int)(1 << context->TEX0.TH); GSVector4 WH(tw, th, w, h); if(PRIM->FST) { vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy(); //Maybe better? //vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw(); ps_sel.fst = 1; } ps_cb.WH = WH; ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw(); ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV); GSVector4 clamp(ps_cb.MskFix); GSVector4 ta(env.TEXA & GSVector4i::x000000ff()); ps_cb.MinMax = clamp / WH.xyxy(); ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255)); ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1; ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1; ps_ssel.ltf = ps_sel.ltf; } else {
/** * Check if termination criterio is met. * * @param W Basis matrix of output. * @param H Encoding matrix of output. */ bool IsConverged(arma::mat& W, arma::mat& H) { arma::mat WH; WH = W * H; // compute residue residueOld = residue; size_t n = V->n_rows; size_t m = V->n_cols; double sum = 0; size_t count = 0; for(size_t i = 0;i < n;i++) { for(size_t j = 0;j < m;j++) { double temp = 0; if((temp = (*V)(i,j)) != 0) { temp = (temp - WH(i, j)); temp = temp * temp; sum += temp; count++; } } } residue = sum / count; residue = sqrt(residue); // increment iteration count iteration++; // if residue tolerance is not satisfied if ((residueOld - residue) / residueOld < tolerance && iteration > 4) { // check if this is a first of successive drops if (reverseStepCount == 0 && isCopy == false) { // store a copy of W and H matrix isCopy = true; this->W = W; this->H = H; // store residue values c_index = residue; c_indexOld = residueOld; } // increase successive drop count reverseStepCount++; } // if tolerance is satisfied else { // initialize successive drop count reverseStepCount = 0; // if residue is droped below minimum scrap stored values if(residue <= c_indexOld && isCopy == true) { isCopy = false; } } // check if termination criterion is met if(reverseStepCount == reverseStepTolerance || iteration > maxIterations) { // if stored values are present replace them with current value as they // represent the minimum residue point if(isCopy) { W = this->W; H = this->H; residue = c_index; } return true; } else return false; }
int ns_data_print(pp_Data * p, double x[], const Exo_DB * exo, const double time_value, const double time_step_size) { const int quantity = p->data_type; int mat_num = p->mat_num; const int elemBlock_id = p->elem_blk_id; const int node_set_id = p->ns_id; const int species_id = p->species_number; const char * filenm = p->data_filenm; const char * qtity_str = p->data_type_name; const char * format_flag = p->format_flag; int * first_time = &(p->first_time); static int err=0; int num_nodes_on_side; int ebIndex_first = -1; int local_side[2]; int side_nodes[3]; /* Assume quad has no more than 3 per side. */ int elem_list[4], elem_ct=0, face, ielem, node2; int local_node[4]; int node = -1; int idx, idy, idz, id_var; int iprint; int nsp; /* node set pointer for this node set */ dbl x_pos, y_pos, z_pos; int j, wspec; int doPressure = 0; #ifdef PARALLEL double some_time=0.0; #endif double abscissa=0; double ordinate=0; double n1[3], n2[3]; double xi[3]; /* * Find an element block that has the desired material id. */ if (elemBlock_id != -1) { for (j = 0; j < exo->num_elem_blocks; j++) { if (elemBlock_id == exo->eb_id[j]) { ebIndex_first = j; break; } } if (ebIndex_first == -1) { sprintf(err_msg, "Can't find an element block with the elem Block id %d\n", elemBlock_id); if (Num_Proc == 1) { EH(-1, err_msg); } } mat_num = Matilda[ebIndex_first]; p->mat_num = mat_num; pd = pd_glob[mat_num]; } else { mat_num = -1; p->mat_num = -1; pd = pd_glob[0]; } nsp = match_nsid(node_set_id); if( nsp != -1 ) { node = Proc_NS_List[Proc_NS_Pointers[nsp]]; } else { sprintf(err_msg, "Node set ID %d not found.", node_set_id); if( Num_Proc == 1 ) EH(-1,err_msg); } /* first right time stamp or run stamp to separate the sets */ print_sync_start(FALSE); if (*first_time) { if ( format_flag[0] != '\0' ) { if (ProcID == 0) { uf = fopen(filenm,"a"); if (uf != NULL) { fprintf(uf,"# %s %s @ nsid %d node (%d) \n", format_flag, qtity_str, node_set_id, node ); *first_time = FALSE; fclose(uf); } } } } if (format_flag[0] == '\0') { if (ProcID == 0) { if ((uf = fopen(filenm,"a")) != NULL) { fprintf(uf,"Time/iteration = %e \n", time_value); fprintf(uf," %s Node_Set %d Species %d\n", qtity_str,node_set_id,species_id); fflush(uf); fclose(uf); } } } if (nsp != -1 ) { for (j = 0; j < Proc_NS_Count[nsp]; j++) { node = Proc_NS_List[Proc_NS_Pointers[nsp]+j]; if (node < num_internal_dofs + num_boundary_dofs ) { idx = Index_Solution(node, MESH_DISPLACEMENT1, 0, 0, -1); if (idx == -1) { x_pos = Coor[0][node]; WH(idx, "Mesh variable not found. May get undeformed coords."); } else { x_pos = Coor[0][node] + x[idx]; } idy = Index_Solution(node, MESH_DISPLACEMENT2, 0, 0, -1); if (idy == -1) { y_pos = Coor[1][node]; } else { y_pos = Coor[1][node] + x[idy]; } z_pos = 0.; if(pd->Num_Dim == 3) { idz = Index_Solution(node, MESH_DISPLACEMENT3, 0, 0, -1); if (idz == -1) { z_pos = Coor[2][node]; } else{ z_pos = Coor[2][node] + x[idz]; } } if (quantity == MASS_FRACTION) { id_var = Index_Solution(node, quantity, species_id, 0, mat_num); } else if (quantity < 0) { id_var = -1; } else { id_var = Index_Solution(node, quantity, 0, 0, mat_num); } /* * In the easy case, the variable can be found somewhere in the * big vector of unknowns. But sometimes we want a derived quantity * that requires a little more work than an array reference. * * For now, save the good result if we have it. */ if ( id_var != -1 ) { ordinate = x[id_var]; iprint = 1; } else { /* * If we have an element based interpolation, let's calculate the interpolated value */ if (quantity == PRESSURE) { if ((pd->i[PRESSURE] == I_P1) || ( (pd->i[PRESSURE] > I_PQ1) && (pd->i[PRESSURE] < I_Q2_HVG) )) { doPressure = 1; } } iprint = 0; } /* * If the quantity is "theta", an interior angle that only * makes sense at a point, in 2D, we'll need to compute it. */ if ( strncasecmp(qtity_str, "theta", 5 ) == 0 || doPressure) { /* * Look for the two sides connected to this node...? * * Premise: * 1. The node appears in only one element(removed-RBS,6/14/06) * 2. Exactly two sides emanate from the node. * 3. Quadrilateral. * * Apologies to people who wish to relax premise 1. I know * there are some obtuse angles out there that benefit from * having more than one element at a vertex. With care, this * procedure could be extended to cover that case as well. */ if ( ! exo->node_elem_conn_exists ) { EH(-1, "Cannot compute angle without node_elem_conn."); } elem_list[0] = exo->node_elem_list[exo->node_elem_pntr[node]]; /* * Find out where this node appears in the elements local * node ordering scheme... */ local_node[0] = in_list(node, exo->elem_node_pntr[elem_list[0]], exo->elem_node_pntr[elem_list[0]+1], exo->elem_node_list); EH(local_node[0], "Can not find node in elem node connectivity!?! "); local_node[0] -= exo->elem_node_pntr[elem_list[0]]; /* check for neighbors*/ if( mat_num == find_mat_number(elem_list[0], exo)) {elem_ct = 1;} else {WH(-1,"block id doesn't match first element");} for (face=0 ; face<ei->num_sides ; face++) { ielem = exo->elem_elem_list[exo->elem_elem_pntr[elem_list[0]]+face]; if (ielem != -1) { node2 = in_list(node, exo->elem_node_pntr[ielem], exo->elem_node_pntr[ielem+1], exo->elem_node_list); if (node2 != -1 && (mat_num == find_mat_number(ielem, exo))) { elem_list[elem_ct] = ielem; local_node[elem_ct] = node2; local_node[elem_ct] -= exo->elem_node_pntr[ielem]; elem_ct++; } } } /* * Note that indeces are zero based! */ ordinate = 0.0; for (ielem = 0 ; ielem < elem_ct ; ielem++) { if ( local_node[ielem] < 0 || local_node[ielem] > 3 ) { if (strncasecmp(qtity_str, "theta", 5 ) == 0) { EH(-1, "Node out of bounds."); } } /* * Now, determine the local name of the sides adjacent to this * node...this works for the exo patran convention for quads... * * Again, local_node and local_side are zero based... */ local_side[0] = (local_node[ielem]+3)%4; local_side[1] = local_node[ielem]; /* * With the side names, we can find the normal vector. * Again, assume the sides live on the same element. */ load_ei(elem_list[ielem], exo, 0); /* * We abuse the argument list under the conditions that * we're going to do read-only operations and that * we're not interested in old time steps, time derivatives * etc. */ if (x == x_static) /* be the least disruptive possible */ { err = load_elem_dofptr(elem_list[ielem], exo, x_static, x_old_static, xdot_static, xdot_old_static, x_static, 1); } else { err = load_elem_dofptr(elem_list[ielem], exo, x, x, x, x, x, 1); } /* * What are the local coordinates of the nodes in a quadrilateral? */ find_nodal_stu(local_node[ielem], ei->ielem_type, xi, xi+1, xi+2); err = load_basis_functions(xi, bfd); EH( err, "problem from load_basis_functions"); err = beer_belly(); EH( err, "beer_belly"); err = load_fv(); EH( err, "load_fv"); err = load_bf_grad(); EH( err, "load_bf_grad"); err = load_bf_mesh_derivs(); EH(err, "load_bf_mesh_derivs"); if (doPressure) { ordinate = fv->P; iprint = 1; } else { /* First, one side... */ get_side_info(ei->ielem_type, local_side[0]+1, &num_nodes_on_side, side_nodes); surface_determinant_and_normal(elem_list[ielem], exo->elem_node_pntr[elem_list[ielem]], ei->num_local_nodes, ei->ielem_dim-1, local_side[0]+1, num_nodes_on_side, side_nodes); n1[0] = fv->snormal[0]; n1[1] = fv->snormal[1]; /* Second, the adjacent side of the quad... */ get_side_info(ei->ielem_type, local_side[1]+1, &num_nodes_on_side, side_nodes); surface_determinant_and_normal(elem_list[ielem], exo->elem_node_pntr[elem_list[ielem]], ei->num_local_nodes, ei->ielem_dim-1, local_side[1]+1, num_nodes_on_side, side_nodes); n2[0] = fv->snormal[0]; n2[1] = fv->snormal[1]; /* cos (theta) = n1.n2 / ||n1|| ||n2|| */ ordinate += 180. - (180./M_PI)*acos((n1[0]*n2[0] + n1[1]*n2[1])/ (sqrt(n1[0]*n1[0]+n1[1]*n1[1])* sqrt(n2[0]*n2[0]+n2[1]*n2[1]))); } iprint = 1; } /*ielem loop */ } else if ( strncasecmp(qtity_str, "timestepsize", 12 ) == 0 ) { ordinate = time_step_size; iprint = 1; } else if ( strncasecmp(qtity_str, "cputime", 7 ) == 0 ) { ordinate = ut(); iprint = 1; } else if ( strncasecmp(qtity_str, "wallclocktime", 13 ) == 0 ) { /* Get these from extern via main...*/ #ifdef PARALLEL some_time = MPI_Wtime(); ordinate = some_time - time_goma_started; #endif #ifndef PARALLEL time_t now=0; (void)time(&now); ordinate = (double)(now) - time_goma_started; #endif iprint = 1; } else if ( strncasecmp(qtity_str, "speed", 5 ) == 0 ) { id_var = Index_Solution(node, VELOCITY1, 0, 0, mat_num); ordinate = SQUARE(x[id_var]); id_var = Index_Solution(node, VELOCITY2, 0, 0, mat_num); ordinate += SQUARE(x[id_var]); id_var = Index_Solution(node, VELOCITY3, 0, 0, mat_num); ordinate += SQUARE(x[id_var]); ordinate = sqrt(ordinate); iprint = 1; } else if ( strncasecmp(qtity_str, "ac_pres", 7 ) == 0 ) { id_var = Index_Solution(node, ACOUS_PREAL, 0, 0, mat_num); ordinate = SQUARE(x[id_var]); id_var = Index_Solution(node, ACOUS_PIMAG, 0, 0, mat_num); ordinate += SQUARE(x[id_var]); ordinate = sqrt(ordinate); iprint = 1; } else if ( strncasecmp(qtity_str, "light_comp", 10 ) == 0 ) { id_var = Index_Solution(node, LIGHT_INTP, 0, 0, mat_num); ordinate = x[id_var]; id_var = Index_Solution(node, LIGHT_INTM, 0, 0, mat_num); ordinate += x[id_var]; iprint = 1; } else if ( strncasecmp(qtity_str, "nonvolatile", 11 ) == 0 ) { ordinate = 1.0; for(wspec = 0 ; wspec < pd->Num_Species_Eqn ; wspec++) { id_var = Index_Solution(node, MASS_FRACTION, wspec, 0, mat_num); ordinate -= x[id_var]*mp_glob[mat_num]->molar_volume[wspec]; } iprint = 1; } else { WH(id_var, "Requested print variable is not defined at all nodes. May get 0."); if(id_var == -1) iprint = 0; } if ((uf=fopen(filenm,"a")) != NULL) { if ( format_flag[0] == '\0' ) { if (iprint) { fprintf(uf," %e %e %e %e \n", x_pos, y_pos, z_pos, ordinate); } } else { if ( strncasecmp(format_flag, "t", 1) == 0 ) { abscissa = time_value; } else if ( strncasecmp(format_flag, "x", 1) == 0 ) { abscissa = x_pos; } else if ( strncasecmp(format_flag, "y", 1) == 0 ) { abscissa = y_pos; } else if ( strncasecmp(format_flag, "z", 1) == 0 ) { abscissa = z_pos; } else { abscissa = 0; } if (iprint) { fprintf(uf, "%.16g\t%.16g\n", abscissa, ordinate); } } fclose(uf); } } } } print_sync_end(FALSE); return(1); } /* END of routine ns_data_print */
int ns_data_sens_print(const struct Post_Processing_Data_Sens *p, const double x[], /* solution vector */ double **x_sens, /* solution sensitivity vector */ const double time_value) /* current time */ { const int node_set_id = p->ns_id; const int quantity = p->data_type; const int mat_id = p->mat_id; const int species_id = p->species_number; const int sens_type = p->sens_type; const int sens_id = p->sens_id; const int sens_flt = p->sens_flt; const int sens_flt2 = p->sens_flt2; const char *filenm = p->data_filenm; const char *qtity_str = p->data_type_name; const int sens_ct = p->vector_id; int node; int idx, idy, idz, id_var; int nsp; /* node set pointer for this node set */ dbl x_pos, y_pos, z_pos; int j; nsp = match_nsid(node_set_id); if( nsp != -1 ) { node = Proc_NS_List[Proc_NS_Pointers[nsp]]; } else { sprintf(err_msg, "Node set ID %d not found.", node_set_id); if( Num_Proc == 1 ) EH(-1,err_msg); } /* first right time stamp or run stamp to separate the sets */ print_sync_start(TRUE); if (ProcID == 0 && (uf=fopen(filenm,"a")) != NULL) { fprintf(uf,"Time/iteration = %e \n", time_value); fprintf(uf," %s Node_Set %d \n", qtity_str,node_set_id); if(sens_type == 1) { fprintf(uf,"Sensitivity_type BC ID %d Float %d\n",sens_id,sens_flt); } else if(sens_type == 2) { fprintf(uf,"Sensitivity_type MT NO %d Prop. %d\n",sens_id,sens_flt); } else if(sens_type == 3) { fprintf(uf,"Sensitivity_type AC ID %d Float %d\n",sens_id,sens_flt); } else if(sens_type == 4) { fprintf(uf,"Sensitivity_type UM NO %d Prop. %d %d\n",sens_id,sens_flt,sens_flt2); } else if(sens_type == 5) { fprintf(uf,"Sensitivity_type UF ID %d Float %d\n",sens_id,sens_flt); } else if(sens_type == 6) { fprintf(uf,"Sensitivity_type AN ID %d Float %d\n",sens_id,sens_flt); } fflush(uf); fclose(uf); } if( nsp != -1 ) { for (j = 0; j < Proc_NS_Count[nsp]; j++) { node = Proc_NS_List[Proc_NS_Pointers[nsp]+j]; if( node < num_internal_dofs + num_boundary_dofs ) { idx = Index_Solution (node, MESH_DISPLACEMENT1, 0, 0, -1); if (idx == -1) { x_pos = Coor[0][node]; WH(idx, "Mesh variable not found. May get undeformed coords."); } else { x_pos = Coor[0][node] + x[idx]; } idy = Index_Solution (node, MESH_DISPLACEMENT2, 0, 0, -1); if (idy == -1) { y_pos = Coor[1][node]; } else { y_pos = Coor[1][node] + x[idy]; } z_pos = 0.; if (pd->Num_Dim == 3) { idz = Index_Solution(node, MESH_DISPLACEMENT3, 0, 0, -1); if (idz == -1) { z_pos = Coor[2][node]; } else { z_pos = Coor[2][node] + x[idz]; } } if(quantity == MASS_FRACTION) { id_var = Index_Solution(node, quantity, species_id, 0, mat_id); } else { id_var = Index_Solution(node, quantity, 0, 0, mat_id); } WH(id_var, "Requested print variable is not defined at all nodes. May get 0."); if ((uf=fopen(filenm,"a")) != NULL) { if (id_var != -1) { fprintf(uf, " %e %e %e %e \n", x_pos, y_pos, z_pos, x_sens[sens_ct][id_var]); } fclose(uf); } } } } print_sync_end(TRUE); return(1); } /* END of routine ns_data_sens_print */