/* ** Name: CS_cp_mbx_create - mailbox creation and initialization ** ** Description: ** This subroutine is called from CSinitiate(). ** ** It does the following: ** a) establishes a mailbox, with the name II_CPRES_xx_pid, where ** xx is the (optional) installation code, and pid is the ** process ID in hex. ** b) queues a read on the mailbox, with completion routine set to ** CS_cp_mbx_complete ** c) deletes the mailbox, so it'll go away when the process dies. ** ** Inputs: ** num_sessions - Number of sessions for the process. ** ** Outputs: ** sys_err - reason for error ** ** Returns: ** OK, !OK ** ** Side Effects: ** Sets cpres_mbx_chan to the mailbox's channel ** Defines the system-wide logical name II_CPRES_xx_pid ** ** History: ** Summer, 1992 (bryanp) ** Working on the new portable logging and locking system. ** 08-Nov-2007 (jonj) ** Use of "num_sessions" is totally bogus. CS_cp_mbx_create() is called ** before the startup parms are determined from config.dat (where we'd ** find "connect_limit"), so SCD hard-codes num_sessions = 32, resulting ** in CS_CP_MIN_MSGS == 5 always being used, which is way too small. ** Instead, default to the (configurable) VMS sysgen parameter ** DEFMBXBUFQUO. ** Also, create mailbox as read-only. Writers will assign write-only ** channels. */ STATUS CS_cp_mbx_create(i4 num_sessions, CL_ERR_DESC *sys_err) { struct dsc$descriptor_s name_desc; i4 vms_status; char mbx_name[100]; char *inst_id; PID pid; CL_CLEAR_ERR(sys_err); /* ** Build the mailbox logical name: */ PCpid(&pid); NMgtAt("II_INSTALLATION", &inst_id); if (inst_id && *inst_id) STprintf(mbx_name, "II_CPRES_%s_%x", inst_id, (i4)pid); else STprintf(mbx_name, "II_CPRES_%x", (i4)pid); name_desc.dsc$a_pointer = mbx_name; name_desc.dsc$w_length = STlength(mbx_name); name_desc.dsc$b_dtype = DSC$K_DTYPE_T; name_desc.dsc$b_class = DSC$K_CLASS_S; vms_status = sys$crembx( 1, /* Mailbox is "permanent" */ &cpres_mbx_chan, /* where to put channel */ (i4)sizeof(CS_CP_WAKEUP_MSG), /* maximum message size */ 0, /* buffer quota (DEFMBXBUFQUO) */ 0, /* prot mask = all priv */ PSL$C_USER, /* acmode */ &name_desc, /* logical name descriptor */ CMB$M_READONLY, /* flags */ 0); /* nullarg */ if ( vms_status != SS$_NORMAL ) { sys_err->error = vms_status; if (vms_status == SS$_NOPRIV) return (E_CS00F8_CSMBXCRE_NOPRIV); else return (E_CS00F7_CSMBXCRE_ERROR); } /* Hang a read */ cpres_q_read_io(); /* Mark for deletion, so it disappears when we exit. */ sys$delmbx(cpres_mbx_chan); cpres_channels_sem = 0; cpres_num_channels_assigned = 0; return (OK); }
/*{ ** Name: II_GetIngresErrorMessage - Lookup error message from message file ** ** Description: ** Return the text associated with the Ingres status. ** The function returns the text associated with the Ingres status including ** the decoded facility representation of the status in the text. ** ** Inputs: ** ingres_status Retrieve the text message associated with this ** value. ** message_length Length of the buffer area to write the message ** including additional space for the end of string ** marker. ** message Pointer to the memory area where the retrieved ** message is to be written. ** ** Outputs: ** message_length If the function returns II_SUCCESSFUL the ** message_length field contains the number of ** storage octets in the message area that were used. ** message The message text associated with the Ingres ** status. ** ** Returns: ** II_SUCCESSFUL The message was retrieved successfully ** II_NULL_PARAM The parameters passed are invalid; no output ** fields are updated. ** II_INSUFFICIENT_BUFFER A larger message area is required. ** II_GET_MESSAGE_ERROR An error orccured during the execution of this ** function. ** ** Example: ** # include "tngapi.h" ** ** II_INT4 status; ** II_INT4 ingres_status = 0xC0132; ** II_INT4 message_length = 20; ** II_CHAR message[MAX_IIERR_LEN + 1; ** ** status = II_GetIngresMessage( ingres_status, &message_length, message ); ** if (status != II_SUCCESSFUL) ** { ** if (status == II_INSUFFICIENT_BUFFER) ** { ** printf( "A larger buffer is required\n" ); ** } ** } ** ** message_length = MAX_IIERR_LEN + 1; ** status = II_GetIngresMessage( ingres_status, &message_length, message ); ** if (status != II_SUCCESSFUL) ** { ** printf( "II_GetIngresMessage failed\n" ); ** } ** ** History: ** 20-Jan-2005 (fanra01) ** Created. }*/ int II_GetIngresMessage( unsigned int ingres_status, int* message_length, char* message ) { i4 status; CL_ERR_DESC clerror; i4 flags; i4 msglen = 0; CL_CLEAR_ERR( &clerror ); while(TRUE) { /* ** If any of the parameters are not valid, stop. */ if ((message == NULL) || (message_length == NULL) || (*message_length == 0)) { status = II_NULL_PARAM; break; } /* ** Call ERslookup to get the message text from the message file. ** Let the value of flags be: ** ~ER_TEXTONLY is not set, the error is decoded to include ** the error identifier. ** ER_NOPARAM there are no parameter values provided to be ** entered into the message string. ** Let the value of language be: ** -1 use the default language. */ flags = ER_NOPARAM; status = ERslookup( ingres_status, NULL, flags, NULL, message, *message_length, -1, &msglen, &clerror, 0, NULL ); switch(status) { case OK: *message_length = STlength( message ); status = II_SUCCESSFUL; break; case ER_TOOSMALL: status = II_INSUFFICIENT_BUFFER; break; default: status = II_GET_MESSAGE_ERROR; break; } break; } return(status); }
STATUS gcn_request( char *gcn_host, i4 *assoc_no, i4 *protocol ) { GCA_RQ_PARMS request; STATUS status = OK; char tbuff[ GC_HOSTNAME_MAX + 12 ]; char *target; i4 len; /* ** Prepare for GCA_REQUEST service call */ len = (gcn_host ? STlength( gcn_host ) + 2 : 0) + 10; target = (len <= sizeof( tbuff )) ? tbuff : (char *)MEreqmem( 0, len, FALSE, NULL ); if ( ! target ) return( E_GC0013_ASSFL_MEM ); STprintf( target, "%s%s/IINMSVR", gcn_host ? gcn_host : "", gcn_host ? "::" : "" ); MEfill( sizeof( request ), '\0', (PTR)&request ); request.gca_peer_protocol = GCA_PROTOCOL_LEVEL_63; request.gca_partner_name = target; /* ** Make GCA_REQUEST service call */ gca_call( &gcn_gca_cb, GCA_REQUEST, (GCA_PARMLIST *)&request, GCA_SYNC_FLAG, NULL, GCN_RCV_TIMEOUT, &status ); gcn_checkerr( "GCA_REQUEST", &status, request.gca_status, &request.gca_os_status ); if ( status != OK ) goto done; if ( gcn_host && request.gca_peer_protocol < GCA_PROTOCOL_LEVEL_50 ) { gcn_release( request.gca_assoc_id ); CL_CLEAR_ERR( &request.gca_os_status ); gcn_checkerr( "GCA_REQUEST", &status, E_GC000A_INT_PROT_LVL, &request.gca_os_status ); goto done; } *assoc_no = request.gca_assoc_id; *protocol = request.gca_peer_protocol; done : if ( target != tbuff ) MEfree( (PTR)target ); return( status ); }
/*{ ** Name: ERlog - Send message to the error logger. ** ** Description: ** This procedure sends a message to the system specific error ** logger (currently an error log file). ** ** Inputs: ** message Address of buffer containing the message. ** msg_length Length of the message. ** ** Outputs: ** err_code Operating system error code. ** Returns: ** OK ** ER_BADOPEN ** ER_BADSEND ** ER_BADPARAM ** Exceptions: ** none ** ** Side Effects: ** none ** ** History: ** 29-sep-1992 (pholman) ** First created, taken from original (6.4) version of ERsend ** CL Committee approved, 18-Sep-1992 */ STATUS ERlog(char *message, i4 msg_length, CL_ERR_DESC *err_code) { static int er_ifi = -2; auto LOCATION loc; auto char *fname; #ifdef xCL_074_WRITEV_READV_EXISTS char buf2 = '\n'; struct iovec iov[2]; i4 iovlen = 2; #endif /* Check for bad paramters. */ CL_CLEAR_ERR( err_code ); if (message == 0 || msg_length == 0) return (ER_BADPARAM); if (er_ifi == -2) { if (NMloc(LOG, FILENAME, "errlog.log", &loc) != OK) { er_ifi = -1; return(ER_BADSEND); } LOtos(&loc, &fname); er_ifi = open(fname, O_WRONLY|O_CREAT|O_APPEND, 0666); if (er_ifi == -1) { SETCLERR(err_code, 0, ER_open); return(ER_BADSEND); } } #ifdef xCL_074_WRITEV_READV_EXISTS iov[0].iov_base = (caddr_t)message; iov[0].iov_len = msg_length; iov[1].iov_base = (caddr_t)&buf2; iov[1].iov_len = 1; if( writev(er_ifi, iov, iovlen) != (msg_length + 1)) #else if (write(er_ifi, message, msg_length) != msg_length || write(er_ifi, "\n", 1) != 1) #endif { SETCLERR(err_code, 0, ER_write); return(ER_BADSEND); } return(OK); }
/*{ ** Name: TMperfstat() - Return performance statistics available from the OS. ** ** Description: ** Returns available performance statistics available from the OS for ** the current process. As best as is possible, per platform, TMperfstat() ** provides a snapshot of all performance statistics available and ** returns the statistics by filling in the TM_PERFSTAT structure passed ** in by the caller. Implimentations of this function will attempt to ** collect the data required with the minimum possible affect on the ** state being measured (ie. will try to make a single system call vs. ** multiple system calls where possible). ** ** The members of the TM_PERFSTAT structure are public and are defined in ** <tm.h>; see <tm.h> for description of current members of TM_PERFSTAT ** structure. ** ** Some statistics may not be available on some operating systems, if ** a statistic is not available then TMperfstat() will initialize the ** field to the value "-1". In the case of SYSTIME structure members ** one must test both members of the structure for -1. ** ** It is expected that over time new CL proposals will add members to ** the TM_PERFSTAT structure as new OS performance statistics become ** available. Clients which access the structure members by name, ** and do not make structure order assumptions will be upward compatible ** with future structure definition changes. ** ** Inputs: ** stat pointer to a TM_PERFSTAT structure. ** ** Outputs: ** stat TM_PERFSTAT structure is filled in by ** this routine with available OS ** statistics. Unavailable fields will be ** set to -1. ** sys_err ** ** Returns: ** OK - success. ** TM_NOSUPPORT - no support for gathering system statistics. ** ** Exceptions: ** none ** ** Side Effects: ** none ** ** History: ** 26-jul-93 (mikem) ** Created. ** 29-aug-1995 (shero03) ** Obtain statistics from NT. */ STATUS TMperfstat( TM_PERFSTAT *stat, CL_SYS_ERR *sys_err) { STATUS status; CL_CLEAR_ERR(sys_err); if ((status = wnt_perfstat(stat, sys_err)) != OK) (void) default_perfstat(stat, sys_err); return(status); }
/*{ ** Name: LKcreate_list - Create a lock list. ** ** Description: ** This function associates a unique identifier with a list that can ** contain a list of locks. Deadlock detection is performed between ** lists, as this is the way of identifying different threads of ** execution. A lock list can be related to another list by passing ** the lock list identifier of the related list. The related list ** cannot be released until all dependent lists have been released. ** This mechanism can be used to associate locks in different lists ** with the same thread. This works if the same lock does not appear ** in the related lock list and the created lock list. Only one ** level of lock lists are supported currently, this means that ** a related lock list cannot be related to another list. ** ** Using the LK_RECOVER flag an existing lock list can be searched for ** and it's identifer returned. Using the LK_ASSIGN flag a unique number ** will be supplied for you and a lock identifer will be returned. ** ** If the LK_SHARED flag is specified, then the lock list will be ** shareable by other processes. This means that other LK clients may ** connect to the same lock list and make lock calls on it (release locks ** already held on it, convert them, or request new locks). ** ** If the caller specifies LK_CONNECT, then the request is to connect ** to an already existing shared lock list. The caller should not specify ** the LK_ASSIGN flag, but should pass in the key of the shared list ** in the 'unique_id' argument. A lock list id will be returned to use ** to reference the shared lock list. The lock list attributes (maxlocks, ** related list, interruptable ..) are ignored if LK_CONNECT is specified. ** The lock list attributes are set when the list is actually created. ** ** Note that two processes using a common shared lock list may use ** different lock list id's to reference the same lock list. Lock list ** id's are unique to each connection to a shared list. ** ** Inputs: ** flags Create list options: ** LK_RECOVER - assume ownership of an ** already existing lock list. This ** is used only by the recovery system. ** LK_NONPROTECT - list holds locks that ** can be released without recovery. ** LK_ASSIGN - assign a unique lock list ** key to the created list. If not ** specified, then the caller supplies ** the key (unique_id argument). ** LK_NOINTERRUPT - lock requests on this ** list are not interruptable. ** LK_MASTER - lock list owned by recovery ** process. ** LK_SHARED - create shared lock list. ** LK_CONNECT - connect to existing shared ** lock list. ** unique_id A unique value used to identify a ** lock domain. ** related_lli The lock list identifier of the list ** that should be related to this list. ** count The maximum number of locks on the ** lock list. ** Outputs: ** lock_list_id The lock list identifier assigned to ** this list. ** Returns: ** OK Successful completion. ** LK_BADPARAM Something wrong with a parameter. ** LK_NOLOCKS No more lock resources available. ** ** Exceptions: ** none ** ** Side Effects: ** none ** ** Implementation Notes: ** ** Shared Lock Lists : Shared Lock Lists are lists that can be referenced ** concurrently by multiple processes. They are identified by a ** llb_status of LLB_SHARED. ** ** When a client creates a Shared Lock List, two lists are actually ** created: ** ** - The actual shared list is created - this is the llb that ** all locks will be held on. All of the information regarding ** the number of locks held, max number of locks, related lists, ** etc - are kept on this lock list. ** ** - A second lock list is created that is used as the creating ** process's handle to the shared lock list. Its status type is ** LLB_PARENT_SHARED and its purpose is mainly to point to the ** shared lock list. No locks are actually held on this list. The ** llb_pid, llb_ast, and llb_astp fields are set to the values that ** are specific to the creating process (the shared list cannot hold ** these values since they may be different for each process). The ** llb_shared_llb field holds a pointer to the shared list. ** ** The lock list id of the LLB_PARENT_SHARED list is returned to the ** caller as the lock list id. All lock requests should be made on ** this lock list (the actual id of the shared list is unknown to the ** calling process). ** ** When a client connects to an already existing shared list, an ** LLB_PARENT_SHARED handle is created and pointed at the shared list. ** As in the create shared call, the id returned to the caller is the ** id of the handle lock list. ** ** All routines which recieve requests on a lock ** list of the type LLB_PARENT_SHARED will handle the request as though ** it were made on the shared list. If the caller must be suspended then ** the pid, ast, and ast parm are used from the PARENT_SHARED list. ** ** The llb_connect_count of the shared lock list indicates the number of ** handles that exist to the shared list. Each time an LLB_PARENT_SHARED ** list is deallocated, the reference count of the shared list must ** be decremented. When the last handle is released, the shared list ** is released (and all locks released). ** ** History: ** Summer, 1992 (bryanp) ** Working on the new portable logging and locking system. ** 1-Apr-1993 (daveb) ** Init the llb_sid field so it's always valid. ** 24-may-1993 (bryanp) ** Optimize away PCpid calls by using LK_my_pid global variable. ** 26-jul-1993 (bryanp) ** All lock lists created by the CSP get marked LLB_MASTER_CSP, even ** if the caller forgot to pass in the LK_MASTER_CSP flag. ** Added some casts to pacify compilers. ** 23-aug-1993 (bryanp) ** When adopting ownership of a lock list using the LK_RECOVER flag, ** we must free the new LLB we allocated so that we don't leak. ** 31-jan-1994 (rogerk) ** The RCP adopts ownership of transactions it needs to recover ** by calling this routine with the LK_RECOVER flag. The routine ** always allocates a new llb, but in the LK_RECOVER mode, it ends ** up looking up the orphaned llb and updating it. The newly ** allocated llb is discarded. Fixed bug in this code which was ** storing the caller's session id into the llb_sid field of the ** about-to-be-discarded llb rather than the target orphan llb. ** 17-nov-1994 (medji01) ** Mutex Granularity Project ** - Changed LK_xmutex() calls to pass the mutex address. ** - Acquire LKD semaphore in LK_create_list() before ** searching active transaction chain for insertion point. ** - Acquire LKD and LLB semaphores in LK_create_list() ** when a process-specific connection to an existing shared ** LLB is made. ** 28-mar-1995 (medji01) ** 64K+ Lock Limit Project ** - Removed references to id_instance in LK_ID. ** - Changed error messages to reference id_id instead ** of id_instance. ** 19-jul-1995 (canor01) ** Mutex Granularity Project ** - Acquire LKD semaphore before calling LK_deallocate_cb() ** 4-sep-1995 (dougb) ** Init new llb_gsearch_count field. ** 12-Dec-1995 (jenjo02) ** Don't acquire LKD semaphore before calling LK_deallocate_cb(); ** new mutex protection added to that function. ** Maintenance of lkd_llb_inuse moved to LK_allocate|deallocate_cb(). ** A host of new queue/list mutexes (mutexi?) to reduce overabuse ** of lkd_mutex. Tried to be more sensible with mutex latching/ ** unlatching, keeping the lock for as short a time as possible. ** Protect lkd_stat.next_id with lkd_mutex to prevent feeble ** but possible concurrent update by multiple processes. ** 10-jan-1996 (canor01) ** Mutex granularization: ** - acquire LKD semaphore before calling LK_allocate_cb() ** 12-Jan-1996 (jenjo02) ** Moved assignment of llb_name to LK_allocate_cb() where ** it's done under the protection of the lkb_mutex. ** 30-may-1996 (pchang) ** Added LLB as a parameter to the call to LK_allocate_cb() to enable ** the reservation of SBK table resource for use by the recovery ** process. (B76879) ** 11-Oct-1996 (jenjo02) ** When inheriting a lock list using LK_RECOVER, also update the ** pid and sid of its related lock list, if any. ** 28-Feb-2002 (jenjo02) ** LK_MULTITHREAD is now an attribute of a lock list, not just ** a run-time flag. It means a lock list is subject to being ** concurrently used by more than one thread, hence special ** care must be taken to prevent corruption of the list. ** 27-Apr-2007 (jonj) ** Mark shared LLB as MULTITHREAD as well as the sharers. ** 23-Sep-2010 (jonj) B124486 ** If LK_RECOVER_LLID, scrutinize the lock list more ** closely - must look like an LLB and match on id_instance. */ STATUS LKcreate_list( i4 flags, LK_LLID related_lli, LK_UNIQUE *unique_id, LK_LLID *lock_list_id, i4 count, CL_ERR_DESC *sys_err) { LKD *lkd = (LKD *)LGK_base.lgk_lkd_ptr; LLB *related_llb = NULL; LLB *shared_llb = NULL; LLB *llb; LLB *end; LLB *next_llb; LLB *prev_llb; LLB *proc_next_llb; LLB_ID *recov_list_id; LLB *recov_llb; LLB_ID *input_related_lli = (LLB_ID *) &related_lli; STATUS status; STATUS return_status; SIZE_TYPE *lbk_table; SIZE_TYPE llb_offset; SIZE_TYPE end_offset; i4 err_code; STATUS local_status; CL_ERR_DESC local_sys_err; i4 assigned_llbname[2]; LK_WHERE("LK_create_list") CL_CLEAR_ERR(sys_err); /* Must be within range, and not be related to another list. */ lkd->lkd_stat.create_list++; if (input_related_lli->id_id) { if (input_related_lli->id_id == 0 || (i4)input_related_lli->id_id > lkd->lkd_lbk_count) { uleFormat(NULL, E_CL102A_LK_CREATE_BADPARAM, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, (char *)NULL, 0L, (i4 *)NULL, &err_code, 2, 0, input_related_lli->id_id, 0, lkd->lkd_lbk_count); return (LK_BADPARAM); } lbk_table = (SIZE_TYPE *)LGK_PTR_FROM_OFFSET(lkd->lkd_lbk_table); related_llb = (LLB *) LGK_PTR_FROM_OFFSET(lbk_table[input_related_lli->id_id]); if (related_llb->llb_type != LLB_TYPE || related_llb->llb_related_llb) { uleFormat(NULL, E_CL102B_LK_CREATE_BADPARAM, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, (char *)NULL, 0L, (i4 *)NULL, &err_code, 3, 0, input_related_lli->id_id, 0, related_llb->llb_type, 0, related_llb->llb_id.id_id); return (LK_BADPARAM); } LGK_VERIFY_ADDR( related_llb, sizeof(LLB) ); } if (lock_list_id == (LK_LLID *) NULL) { uleFormat(NULL, E_CL102C_LK_CREATE_BADPARAM, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, (char *)NULL, 0L, (i4 *)NULL, &err_code, 0); return(LK_BADPARAM); } /* ** LK_RECOVER_LLID: ** ** The RCP may try to assume ownership of a lock list for which ** it has the lock list id only. */ if ( flags & LK_RECOVER_LLID) { recov_list_id = (LLB_ID *)lock_list_id; if ((i4) recov_list_id->id_id > lkd->lkd_lbk_count) { uleFormat(NULL, E_CL103D_LK_REQUEST_BAD_PARAM, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, (char *)NULL, 0L, (i4 *)NULL, &err_code, 2, 0, recov_list_id->id_id, 0, lkd->lkd_lbk_count); return (LK_BADPARAM); } /* ** Change owner of lock list to new process. This is typically ** performed by the recovery process when it requests ** ownership of an orphaned lock list. ** Set the lock list block fields such as PID and SID to the ** new owner. */ lbk_table = (SIZE_TYPE *)LGK_PTR_FROM_OFFSET(lkd->lkd_lbk_table); recov_llb = (LLB *)LGK_PTR_FROM_OFFSET(lbk_table[recov_list_id->id_id]); LGK_VERIFY_ADDR( recov_llb, sizeof(LLB) ); /* Must look like an LLB and match id_instance */ if ( recov_llb->llb_type != LLB_TYPE || recov_llb->llb_id.id_instance != recov_list_id->id_instance ) { uleFormat(NULL, E_CL102B_LK_CREATE_BADPARAM, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, (char *)NULL, 0L, (i4 *)NULL, &err_code, 3, 0, recov_list_id->id_id, 0, recov_llb->llb_type, 0, recov_llb->llb_id.id_id); return (LK_BADPARAM); } recov_llb->llb_status |= LLB_RECOVER; recov_llb->llb_status &= ~LLB_NOINTERRUPT; CSget_cpid(&recov_llb->llb_cpid); /* ** If there's a related llb, update its pid and sid as well. ** dmxe_resume()'s about to inherit the related lock list ** and it must reflect the inheriting session's process information. */ if (recov_llb->llb_related_llb) { related_llb = (LLB *)LGK_PTR_FROM_OFFSET(recov_llb->llb_related_llb); STRUCT_ASSIGN_MACRO(recov_llb->llb_cpid, related_llb->llb_cpid); LGK_VERIFY_ADDR( related_llb, sizeof(LLB) ); } return (OK); } /* ** Allocate a lock list block. ** ** This also results in the assignment of a unique llb_name ** and llb_cpid and the initialization of the LLB. */ if ((llb = (LLB *)LK_allocate_cb(LLB_TYPE, (LLB *)NULL)) == 0) { uleFormat(NULL, E_DMA011_LK_NO_LLBS, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, (char *)NULL, 0L, (i4 *)NULL, &err_code, 0); return (LK_NOLOCKS); } /* ** If not LK_ASSIGN, then unique id must be specified. Normally ** the id cannot be zero, but if LK_CONNECT then let it be anything, ** the request will fail below if the specified id does not exist. */ if ((flags & LK_ASSIGN) == 0) { if ((unique_id == 0) || ((unique_id->lk_uhigh == 0) && ((flags & LK_CONNECT) == 0))) { (VOID) LK_unmutex(&lkd->lkd_llb_q_mutex); uleFormat(NULL, E_CL1031_LK_BAD_UNIQUE_ID, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, (char *)NULL, 0L, (i4 *)NULL, &err_code, 2, 0, unique_id, 0, (unique_id ? unique_id->lk_uhigh : -1)); LK_deallocate_cb( llb->llb_type, (PTR)llb, llb ); return (LK_BADPARAM); } /* ** Save the unique id assigned to the allocated LLB - ** we may need it later. */ assigned_llbname[0] = llb->llb_name[0]; assigned_llbname[1] = llb->llb_name[1]; llb->llb_name[0] = unique_id->lk_uhigh; llb->llb_name[1] = unique_id->lk_ulow; } /* ** Find the position in the sorted lock list that this ** lock list block should be placed. The list is sorted ** by unique identifier in descending order. Since unique ** identifiers are normally every increasing numbers, this ** search should terminate on the first comparison. */ if (local_status = LK_mutex(SEM_EXCL, &lkd->lkd_llb_q_mutex)) { uleFormat(NULL, local_status, &local_sys_err, ULE_LOG, NULL, 0, 0, 0, &err_code, 0); status = E_DMA02D_LKCLIST_SYNC_ERROR; STRUCT_ASSIGN_MACRO(local_sys_err, *sys_err); return (LK_NOLOCKS); } end_offset = LGK_OFFSET_FROM_PTR(&lkd->lkd_llb_next); next_llb = (LLB *)LGK_PTR_FROM_OFFSET(lkd->lkd_llb_next); for (llb_offset = lkd->lkd_llb_next; llb_offset != end_offset; llb_offset = next_llb->llb_q_next) { next_llb = (LLB *)LGK_PTR_FROM_OFFSET(llb_offset); if (llb->llb_name[0] > next_llb->llb_name[0] || (llb->llb_name[0] == next_llb->llb_name[0] && llb->llb_name[1] > next_llb->llb_name[1])) { break; } if (llb->llb_name[0] == next_llb->llb_name[0] && llb->llb_name[1] == next_llb->llb_name[1]) { /* Mutex the apparently matching list */ (VOID)LK_mutex(SEM_EXCL, &next_llb->llb_mutex); /* ** Unique id already exists. This is an error unless the 'flag' ** argument is LK_CONNECT or LK_RECOVER. ** ** If the request flag is LK_RECOVER, then this is the recovery ** process requesting an orphaned lock list. ** ** If the request flag is LK_CONNECT then the caller is requesting ** to create a connection to a shared lock list. In this case ** the lock list must be a shared lock list. */ if ((flags & LK_CONNECT) && (next_llb->llb_status & LLB_SHARED)) { /* ** Save pointer to the shared list. The llb will be formatted ** below to serve as a reference to this lock list. */ shared_llb = next_llb; LGK_VERIFY_ADDR( shared_llb, sizeof(LLB) ); /* ** The llb_name field was assigned above to the key of the ** shared lock list in order to find the list to connect to. ** Now that it is found, reassign the unique llb_name ** given when the LLB was allocated, then ** research the list for the spot for this key. */ llb->llb_name[0] = assigned_llbname[0]; llb->llb_name[1] = assigned_llbname[1]; llb_offset = lkd->lkd_llb_next; next_llb = (LLB *)LGK_PTR_FROM_OFFSET(llb_offset); next_llb = (LLB *)LGK_PTR_FROM_OFFSET(next_llb->llb_q_prev); continue; } else { (VOID) LK_unmutex(&lkd->lkd_llb_q_mutex); LK_deallocate_cb( llb->llb_type, (PTR)llb, llb ); } if (flags & LK_RECOVER) { /* ** Change owner of lock list to new process. This is typically ** performed by the recovery process when it requests ** ownership of an orphaned lock list. We don't need the new ** lock list block we just allocated, so deallocate it. ** Set the lock list block fields such as PID and SID to the ** new owner. It might be nice to maintain both new and old, ** so you know who the old owner was, but we don't have the ** fields for this in the LLB. */ if ( CXconfig_settings( CX_HAS_MASTER_CSP_ROLE ) ) next_llb->llb_status |= LLB_MASTER_CSP; next_llb->llb_status |= LLB_RECOVER; next_llb->llb_status &= ~LLB_NOINTERRUPT; CSget_cpid(&next_llb->llb_cpid); /* ** If there's a related llb, update its session info as well. ** dmxe_resume()'s about to inherit the related lock list ** and it must reflect the inheriting session's process information. */ if (next_llb->llb_related_llb) { related_llb = (LLB *)LGK_PTR_FROM_OFFSET(next_llb->llb_related_llb); STRUCT_ASSIGN_MACRO(next_llb->llb_cpid, related_llb->llb_cpid); } *lock_list_id = *(LK_LLID *)&next_llb->llb_id; (VOID)LK_unmutex(&next_llb->llb_mutex); return (OK); } uleFormat(NULL, E_CL1032_LK_DUPLICATE_LOCK_ID, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, (char *)NULL, 0L, (i4 *)NULL, &err_code, 2, 0, next_llb->llb_name[0], 0, next_llb->llb_name[1]); (VOID)LK_unmutex(&next_llb->llb_mutex); return (LK_BADPARAM); } } /* ** Lock list not found. ** If this is a request to connect to an already existing list, then ** return an error since the lock list did not exist. */ if ((flags & LK_RECOVER) || ((flags & LK_CONNECT) && (shared_llb == NULL))) { (VOID) LK_unmutex(&lkd->lkd_llb_q_mutex); uleFormat(NULL, E_CL1050_LK_LOCKID_NOTFOUND, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, (char *)NULL, 0L, (i4 *)NULL, &err_code, 3, 0, llb->llb_name[0], 0, llb->llb_name[1], 0, flags); LK_deallocate_cb( llb->llb_type, (PTR)llb, llb ); return (LK_BADPARAM); } /* ** We are now creating a new lock list. ** LLB has been removed from the free list ** and initialized by LK_allocate_cb(). ** ** Insert it in the active queue before the entry ** pointed to by "next". ** ** llb_cpid was filled in by the LLB allocation code. */ llb->llb_shared_llb = 0; llb->llb_related_llb = 0; llb->llb_max_lkb = lkd->lkd_max_lkb; if (count) { /* Rather than using the default maximum logical locks. */ llb->llb_max_lkb = count; } /* Update the llb_status. */ if (flags & LK_NONPROTECT) { llb->llb_status |= LLB_NONPROTECT; llb->llb_max_lkb = lkd->lkd_max_lkb / 2; } if (flags & LK_MASTER) llb->llb_status |= LLB_MASTER; if (flags & LK_MASTER_CSP) llb->llb_status |= LLB_MASTER_CSP; if (flags & LK_NOINTERRUPT) llb->llb_status |= LLB_NOINTERRUPT; if (flags & LK_MULTITHREAD) llb->llb_status |= LLB_MULTITHREAD; /* ** LLB_MASTER_CSP attribute is now only used by lock lists created ** by the master CSP during recovery. */ if ( CXconfig_settings( CX_HAS_MASTER_CSP_ROLE ) ) llb->llb_status |= LLB_MASTER_CSP; /* ** Now insert LLB into the active queue before the ** entry pointed to by "next_llb": */ llb->llb_q_next = LGK_OFFSET_FROM_PTR(next_llb); llb->llb_q_prev = next_llb->llb_q_prev; prev_llb = (LLB *)LGK_PTR_FROM_OFFSET(next_llb->llb_q_prev); prev_llb->llb_q_next = next_llb->llb_q_prev = LGK_OFFSET_FROM_PTR(llb); if (flags & LK_SHARED) { /* ** This is a request to create a shared lock list. We have just ** allocated the shared lock list, now create a list to use as the ** caller's connection to the shared lock list. ** ** The function of allocating an LLB also assigned a unique ** llb_name, llb_cpid and initialized the remainder. */ return_status = OK; shared_llb = llb; if ((llb = (LLB *)LK_allocate_cb(LLB_TYPE, (LLB *)NULL)) == 0) return_status = LK_NOLOCKS; else { /* Look for spot of new lock list in sorted lock list. */ end_offset = LGK_OFFSET_FROM_PTR(&lkd->lkd_llb_next); for (llb_offset = lkd->lkd_llb_next; llb_offset != end_offset; llb_offset = proc_next_llb->llb_q_next) { proc_next_llb = (LLB *)LGK_PTR_FROM_OFFSET(llb_offset); if (llb->llb_name[0] > proc_next_llb->llb_name[0] || (llb->llb_name[0] == proc_next_llb->llb_name[0] && llb->llb_name[1] > proc_next_llb->llb_name[1])) break; if (llb->llb_name[0] == proc_next_llb->llb_name[0] && llb->llb_name[1] == proc_next_llb->llb_name[1]) { uleFormat(NULL, E_CL1051_LK_DUPLICATE_LOCK_ID, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, (char *)NULL, 0L, (i4 *)NULL, &err_code, 2, 0, llb->llb_name[0], 0, llb->llb_name[1]); return_status = LK_BADPARAM; break; } } } /* ** If an error was encountered, then we have to free the lock list ** already allocated for the shared lock list. */ if (return_status != OK) { next_llb = (LLB *)LGK_PTR_FROM_OFFSET(shared_llb->llb_q_next); prev_llb = (LLB *)LGK_PTR_FROM_OFFSET(shared_llb->llb_q_prev); next_llb->llb_q_prev = shared_llb->llb_q_prev; prev_llb->llb_q_next = shared_llb->llb_q_next; (VOID) LK_unmutex(&lkd->lkd_llb_q_mutex); LK_deallocate_cb( shared_llb->llb_type, (PTR)shared_llb, shared_llb ); return (return_status); } /* ** We are creating a shared lock list. The list in 'shared_llb' which was ** formatted above is the actual shared list. The callers unique ** handle to the list is llb. This list is formatted with ** the information from 'shared_llb' and assigned LLB_PARENT_SHARED status. */ /* Mark LLB as SHARED, leave NONPROTECT up to the caller */ shared_llb->llb_status |= (LLB_SHARED | LLB_MULTITHREAD); shared_llb->llb_connect_count = 0; /* Insert PARENT_SHARED list into the active queue */ llb->llb_q_next = LGK_OFFSET_FROM_PTR(proc_next_llb); llb->llb_q_prev = proc_next_llb->llb_q_prev; prev_llb = (LLB *)LGK_PTR_FROM_OFFSET(proc_next_llb->llb_q_prev); prev_llb->llb_q_next = proc_next_llb->llb_q_prev = LGK_OFFSET_FROM_PTR(llb); /* Fall thru to update the SHARED list */ } /* Release the active LLB queue */ (VOID) LK_unmutex(&lkd->lkd_llb_q_mutex); if ( shared_llb ) { /* ** We are allocating a process-specific connection to a shared lock ** list. Mark the proc llb as a shared reference and point it to the ** real shared lock list. Also increment the reference count. ** ** The shared lock list has been mutexed. */ /* Mark LLB as PARENT_SHARED, NONPROTECT as it will contain no locks */ llb->llb_status |= (LLB_PARENT_SHARED | LLB_NONPROTECT); llb->llb_status &= ~LLB_MULTITHREAD; llb->llb_shared_llb = LGK_OFFSET_FROM_PTR(shared_llb); /* Increment reference count to shared list */ shared_llb->llb_connect_count++; (VOID) LK_unmutex(&shared_llb->llb_mutex); } if (related_llb) { /* Bump related count. */ llb->llb_related_llb = LGK_OFFSET_FROM_PTR(related_llb); (VOID) LK_unmutex(&llb->llb_mutex); (VOID) LK_mutex(SEM_EXCL, &related_llb->llb_mutex); related_llb->llb_related_count++; (VOID) LK_unmutex(&related_llb->llb_mutex); } else (VOID) LK_unmutex(&llb->llb_mutex); /* Return the lock_list_id. */ *lock_list_id = *(LK_LLID *)&llb->llb_id; return (OK); }
/*{ ** Name: DIrename - Renames a file. ** ** Description: ** The DIrename will change the name of a file. ** The file MUST be closed. The file can be renamed ** but the path cannot be changed. A fully qualified ** filename must be provided for old and new names. ** This includes the type qualifier extension. ** ** Inputs: ** di_io_unused UNUSED DI_IO pointer (always set to 0 by caller) ** path Pointer to the path name. ** pathlength Length of path name. ** oldfilename Pointer to old file name. ** oldlength Length of old file name. ** newfilename Pointer to new file name. ** newlength Length of new file name. ** Outputs: ** err_code Pointer to a variable used ** to return operating system ** errors. ** Returns: ** OK ** DI_BADRNAME Any i/o error during rename. ** DI_BADPARAM Parameter(s) in error. ** DI_DIRNOTFOUND Path not found. ** Exceptions: ** none ** ** Side Effects: ** none ** ** History: ** 26-mar-87 (mmm) ** Created new for 6.0. ** 06-feb-89 (mikem) ** Clear the CL_ERR_DESC. ** 15-apr-1992 (bryanp) ** Remove DI_IO argument and no longer support renaming open files. ** 30-nov-1992 (rmuth) ** - Prototype. ** - DIlru error checking ** 17-sep-1994 (nanpr01) ** - Needs to check for interrupted system calls specially for ** SIGUSR2. Curren implementation of 1 more retry is optimistic. ** In lot of UNIX systems, link, unlink, rename cannot be ** interrupted(HP-UX).But Solaris returns EINTR. Bug # 57938. ** 10-oct-1994 (nanpr01) ** - Wrong number of parameter in DIlru_flush. Bug # 64169 ** 20-Feb-1998 (jenjo02) ** DIlru_flush() prototype changed, it now computes the number of ** FDs to close instead of being passed an arbitrary number. ** Cleaned up handling of errno, which will be invalid after calling ** DIlru_flush(). ** 15-Apr-2004 (fanch01) ** Force closing of LRU file descriptors when a rename error is ** is encountered. Only occurs on a rename failure and the only ** file that is closed is the file associated with the error. ** Relieves problems on filesystems which don't accomodate renaming ** open files. "Interesting" semaphore usage is consistent with other ** DI usage. ** 21-Apr-2004 (schka24) ** retry declaration got misplaced somehow, fix so it compiles. ** 26-Jul-2005 (schka24) ** Don't flush fd's on any random rename failure. Do a better job ** of re-verifying the fd and di-io after locking the fd when we're ** searching for a file-open conflict. ** 30-Sep-2005 (jenjo02) ** htb_fd_list_mutex, fd_mutex are now CS_SYNCH objects. ** 15-Nov-2010 (kschendel) SIR 124685 ** Delete unused variables. */ STATUS DIrename( DI_IO *di_io_unused, char *path, u_i4 pathlength, char *oldfilename, u_i4 oldlength, char *newfilename, u_i4 newlength, CL_ERR_DESC *err_code) { char oldfile[DI_FULL_PATH_MAX]; char newfile[DI_FULL_PATH_MAX]; STATUS ret_val; CL_ERR_DESC local_err; /* unix variables */ int os_ret; /* retry variables */ i4 retry = 0, failflag = 0; /* default returns */ ret_val = OK; if ((pathlength > DI_PATH_MAX) || (pathlength == 0) || (oldlength > DI_FILENAME_MAX) || (oldlength == 0) || (newlength > DI_FILENAME_MAX) || (newlength == 0)) return (DI_BADPARAM); /* get null terminated path and filename for old file */ MEcopy((PTR) path, pathlength, (PTR) oldfile); oldfile[pathlength] = '/'; MEcopy((PTR) oldfilename, oldlength, (PTR) &oldfile[pathlength + 1]); oldfile[pathlength + oldlength + 1] = '\0'; /* get null terminated path and filename for new file */ MEcopy((PTR) path, pathlength, (PTR) newfile); newfile[pathlength] = '/'; MEcopy((PTR) newfilename, newlength, (PTR) &newfile[pathlength + 1]); newfile[pathlength + newlength + 1] = '\0'; do { if (retry > 0 && failflag++ == 0) TRdisplay("%@ DIrename: retry on %t/%t\n", pathlength, path, oldlength, oldfilename); retry = 0; CL_CLEAR_ERR( err_code ); #ifdef xCL_035_RENAME_EXISTS /* Now rename the file. */ while ((os_ret = rename(oldfile, newfile)) == -1) { SETCLERR(err_code, 0, ER_rename); if (err_code->errnum != EINTR) break; } #else /* xCL_035_RENAME_EXISTS */ /* Now rename the file. */ while ((os_ret = link(oldfile, newfile)) == -1) { SETCLERR(err_code, 0, ER_rename); if (err_code->errnum != EINTR) break; } if (os_ret != -1) { while ((os_ret = unlink(oldfile)) == -1) { if (err_code->errnum != EINTR) break; } } #endif /* xCL_035_RENAME_EXISTS */ /* if the rename failed, see if we're holding the file open */ if (os_ret == -1 && htb_initialized) { QUEUE *p, *q, *next; CS_synch_lock(&htb->htb_fd_list_mutex); q = &htb->htb_fd_list; for (p = q->q_prev; p != q; p = next) { DI_FILE_DESC *di_file = (DI_FILE_DESC *) p; DI_IO *di_io = (DI_IO *) di_file->fd_uniq.uniq_di_file; next = p->q_prev; if (di_io != NULL && di_file->fd_state == FD_IN_USE && di_io->io_type == DI_IO_ASCII_ID && pathlength == di_io->io_l_pathname && oldlength == di_io->io_l_filename) { CS_synch_unlock(&htb->htb_fd_list_mutex); CS_synch_lock(&di_file->fd_mutex); /* Make sure it's still the right ** DI_IO and compare the filename */ if ((DI_IO *) di_file->fd_uniq.uniq_di_file == di_io && di_file->fd_state == FD_IN_USE && di_file->fd_unix_fd != -1 && !(di_io->io_open_flags & DI_O_NOT_LRU_MASK) && di_io->io_type == DI_IO_ASCII_ID && pathlength == di_io->io_l_pathname && MEcmp((PTR) di_io->io_pathname, path, pathlength) == 0 && oldlength == di_io->io_l_filename && MEcmp((PTR) di_io->io_filename, oldfilename, oldlength) == 0) { /* have a match, print out stats */ /* try to close it */ CS_synch_unlock(&di_file->fd_mutex); DIlru_close(di_io, &local_err); retry++; } else CS_synch_unlock(&di_file->fd_mutex); CS_synch_lock(&htb->htb_fd_list_mutex); } } CS_synch_unlock(&htb->htb_fd_list_mutex); } } while (retry); if (os_ret == -1) { if ((err_code->errnum == ENOTDIR) || (err_code->errnum == EACCES)) { ret_val = DI_DIRNOTFOUND; } else { ret_val = DI_BADRNAME; } } else CL_CLEAR_ERR( err_code ); return(ret_val); }
/*{ ** Name: LGK_initialize() - initialize the lg/lk shared mem segment. ** ** Description: ** This routine is called by the LGinitialize or LKinitialize routine. IT ** assumes that a previous caller has allocated the shared memory segment. ** ** If it discovers that the shared memory segment has not yet been ** initialized, it calls the LG and LK initialize-memory routines to do so. ** ** Inputs: ** flag - bit mask of: ** LOCK_LGK_MEMORY to lock the shared data segment ** LGK_IS_CSP if process is CSP process this node. ** ** Outputs: ** sys_err - place for system-specific error information. ** ** Returns: ** OK - success ** !OK - failure (CS*() routine failure, segment not mapped, ...) ** ** History: ** Summer, 1992 (bryanp) ** Working on the new portable logging and locking system. ** 19-oct-1992 (bryanp) ** Check memory version number when attaching. ** 22-oct-1992 (bryanp) ** Change LGLKDATA.MEM to lglkdata.mem. ** 23-Oct-1992 (daveb) ** name the semaphore too. ** 13-feb-1993 (keving) ** Remove support for II_LGK_MEMORY_SIZE. If II_LG_MEMSIZE ** is not set then calculate memory size from PM values. ** 24-may-1993 (bryanp) ** If the shared memory is the wrong version, don't install the ** at_exit handlers (the rundown routines won't be able to interpret ** the memory properly). ** 26-jul-1993 (jnash) ** Add 'flag' param lock the LGK data segment. ** 20-sep-1993 (bryanp) ** In addition to calling PCatexit, call (on VMS) sys$dclexh, since ** there are some situations (image death and image rundown without ** process rundown) which are caught neither by PCatexit (since ** PCexit isn't run) nor by check-dead threads (since process ** rundown never happened). This fixes a hole where an access- ** violating ckpdb or auditdb command never got cleaned up. ** 31-jan-1994 (bryanp) ** Back out a few "features" which are proving countereffective: ** 1) Don't bother checking mem_creator_pid to see if the previous ** creator of the shared memory has died. This was an attempt to ** gracefully re-use sticky shared memory following a system crash, ** but it is suspected as being the culprit in a series of system ** failures by re-initializing the shared memory at inopportune ** times. ** 2) Don't complain if the shared memory already exists but is of a ** different size than you expected. Just go ahead and try to use ** it anyway. ** 21-feb-1994 (bryanp) ** Reverse item (1) of the above 31-jan-1994 change and re-enable the ** graceful re-use of shared memory. People weren't happy with ** having to run ipcclean and csinstall all the time. ** 23-may-1994 (bryanp) ** On VMS, disable ^Y for LG/LK-aware processes. We don't want to allow ** ^Y because you might interrupt the process right in the middle ** of an LG or LK operation, while holding the shared memory ** semaphore, and this would then wedge the whole installation. ** ** 17-May-1994 (daveb) 59127 ** Attach lgk_mem semaphore if we're attaching to the segment. ** 30-jan-1995 (lawst01) bug 61984 ** Use memory needed calculation from the 'lgk_calculate_size' ** function to determine the size of the shared memory pool for ** locking and locking. If the II_LG_MEMSIZE variable is specified ** with a value larger than needed use the supplied value. If ** lgk_calculate_size is unable to calculate a size then use the ** magic number of 400000. In addition issue a warning message ** and continue executing in the event the number of pages ** allocated is less than the number requested. ** 24-apr-1997 (nanpr01) ** Reinstate Bryanp's change. In the process of fixing bug 61984 ** by Steve Lawrence and subsequent undo of Steve's fix by Nick ** Ireland on 25-jun-96 (nick) caused the if 0 code removed. ** Part of the Steve's change was not reinstated such as not returning ** the status and exit and continue. ** 1. Don't complain if the shared memory already exists but is of a ** different size than you expected. Just go ahead and try to use ** it. ** 18-aug-1998 (hweho01) ** Reclaim the kernel resource if LG/LK shared memory segment is ** reinitialized. If the shared segment is re-used (the previous creator ** of the shared segment has died), the cross-process semaphores get ** initialized more than once at the same locations. That cause the ** kernel resource leaks on DG/UX (OS release 4.11MU04). To fix the ** problem, CS_cp_sem_cleanup() is called to destroy all the ** semaphores before LG/LK shraed segment get recreated. ** CS_cp_sem_cleanup() is made dependent on xCL_NEED_SEM_CLEANUP and ** OS_THREADS_USED, it returns immediately for most platforms. ** 27-Mar-2000 (jenjo02) ** Added test for crossed thread types, refuse connection ** to LGK memory with E_DMA811_LGK_MT_MISMATCH. ** 18-apr-2001 (devjo01) ** s103715 (Portable cluster support) ** - Add CX mem requirement calculations. ** - Add LGK_IS_CSP flag to indicate that LGK memory is being ** initialized for a CSP process. ** - Add basic CX initialization. ** 19-sep-2002 (devjo01) ** If running NUMA clustered allocate memory out of local RAD. ** 30-Apr-2003 (jenjo02) ** Rearchitected to silence long-tolerated race conditions. ** BUG 110121. ** 27-feb-2004 (devjo01) ** Rework allocation of CX shared memory to be compatible ** with race condition fix introduced for bug 110121. ** 29-Dec-2008 (jonj) ** If lgk_calculate_size() returns FAIL, the total memory ** needed exceeds MAX_SIZE_TYPE and we can't continue, but ** tell what we can about the needs of the various bits of ** memory before quitting. ** 06-Aug-2009 (wanfr01) ** Bug 122418 - Return E_DMA812 if LOCK_LGK_MUST_ATTACH is ** is passed in and memory segment does not exist ** 20-Nov-2009 (maspa05) bug 122642 ** In order to synchronize creation of UUIDs across servers added ** a semaphore and a 'last time' variable into LGK memory. ** 14-Dec-2009 (maspa05) bug 122642 ** #ifdef out the above change for Windows. The rest of the change ** does not apply to Windows so the variables aren't defined. */ STATUS LGK_initialize( i4 flag, CL_ERR_DESC *sys_err, char *lgk_info) { PTR ptr; SIZE_TYPE memleft; SIZE_TYPE size; STATUS ret_val; STATUS mem_exists; char mem_name[15]; SIZE_TYPE allocated_pages; i4 me_flags; i4 me_locked_flag; SIZE_TYPE memory_needed; char *nm_string; SIZE_TYPE pages; LGK_MEM *lgk_mem; i4 err_code; SIZE_TYPE min_memory; i4 retries; i4 i; i4 attached; PID *my_pid_slot; i4 clustered; u_i4 nodes; SIZE_TYPE cxmemreq; PTR pcxmem; LGLK_INFO lgkcount; char instid[4]; CL_CLEAR_ERR(sys_err); /* ** if LGK_base is set then this routine has already been called. It is ** set up so that both LGiniitalize and LKinitialize calls it, but only ** the first call does anything. */ if (LGK_base.lgk_mem_ptr) return(OK); PCpid( &LGK_my_pid ); memory_needed = 0; NMgtAt("II_LG_MEMSIZE", &nm_string); if (nm_string && *nm_string) #if defined(LP64) if (CVal8(nm_string, (long*)&memory_needed)) #else if (CVal(nm_string, (i4 *)&memory_needed)) #endif /* LP64 */ memory_needed = 0; /* Always calculate memory needed from PM resource settings */ /* and compare with supplied value, if supplied value is less */ /* than minimum then use minimum */ min_memory = 0; if ( OK == lgk_get_counts(&lgkcount, FALSE)) { if ( lgk_calculate_size(FALSE, &lgkcount, &min_memory) ) { /* ** Memory exceeds MAX_SIZE_TYPE, can't continue. ** ** Do calculation again, this time with "wordy" ** so user can see allocation bits, then quit. */ lgk_calculate_size(TRUE, &lgkcount, &min_memory); return (E_DMA802_LGKINIT_ERROR); } } if (min_memory) memory_needed = (memory_needed < min_memory) ? min_memory : memory_needed; else memory_needed = (memory_needed < 400000 ) ? 400000 : memory_needed; clustered = (i4)CXcluster_enabled(); cxmemreq = 0; if ( clustered ) { if ( OK != CXcluster_nodes( &nodes, NULL ) ) nodes = 0; cxmemreq = CXshm_required( 0, nodes, lgkcount.lgk_max_xacts, lgkcount.lgk_max_locks, lgkcount.lgk_max_resources ); if ( MAX_SIZE_TYPE - memory_needed < cxmemreq ) { /* ** Memory exceeds MAX_SIZE_TYPE, can't continue. ** ** Do calculation again, this time with "wordy" ** so user can see allocation bits, then quit. */ SIprintf("Total LG/LK/CX allocation exceeds max of %lu bytes by %lu\n" "Adjust logging/locking configuration values and try again\n", MAX_SIZE_TYPE, cxmemreq - (MAX_SIZE_TYPE - memory_needed)); lgk_calculate_size(TRUE, &lgkcount, &min_memory); return (E_DMA802_LGKINIT_ERROR); } memory_needed += cxmemreq; } if ( memory_needed < MAX_SIZE_TYPE - ME_MPAGESIZE ) pages = (memory_needed + ME_MPAGESIZE - 1) / ME_MPAGESIZE; else pages = memory_needed / ME_MPAGESIZE; /* ** Lock the LGK segment if requested to do so */ if (flag & LOCK_LGK_MEMORY) me_locked_flag = ME_LOCKED_MASK; else me_locked_flag = 0; me_flags = (me_locked_flag | ME_MSHARED_MASK | ME_IO_MASK | ME_CREATE_MASK | ME_NOTPERM_MASK | ME_MZERO_MASK); if (CXnuma_user_rad()) me_flags |= ME_LOCAL_RAD; STcopy("lglkdata.mem", mem_name); /* ** In general, we just want to attach to the shared memory and detect if ** we are the first process to do so. However, there are ugly race ** conditions to consider, as well as complications because the shared ** memory may be left around following a system crash. ** ** First we attempt to create the shared memory. Usually it already exists, ** so we check for and handle the case of "already exists". */ /* ** (jenjo02) ** ** Restructured to better handle all those ugly race conditions ** which are easily reproduced by running two scripts, one that ** continuously executes "lockstat" while the other is starting ** and stopping Ingres. ** ** For example, ** ** lockstat A acquires and init's the memory ** RCP attaches to "A" memory ** lockstat A terminates normally ** lockstat B attaches to "A" memory, sees that ** "A"s pid is no longer alive, and ** reinitializes the memory, much to ** the RCP's chagrin. ** or (more commonly) ** ** lockstat A acquires and begins to init the mem ** RCP attaches to "A" memory which is ** still being zero-filled by lockstat, ** checks the version number (zero), ** and fails with a E_DMA434 mismatch. ** ** The fix utilizes the mem_ext_sem to synchronize multiple ** processes; if the semaphore hasn't been initialized or ** if mem_version_no is zero, we'll wait one second and retry, ** up to 60 seconds before giving up. This gives the creating ** process time to complete initialization of the memory. ** ** Up to LGK_MAX_PIDS are allowed to attach to the shared ** memory. When a process attaches it sets its PID in the ** first vacant slot in lgk_mem->mem_pid[]; if there are ** no vacant slots, the attach is refused. When the process ** terminates normally by calling LGK_rundown(), it zeroes ** its PID slot. ** ** When attaching to an existing segment, we check if ** there are any live processes still using the memory; ** if so, we can't destroy it (no matter who created it). ** If there are no live processes attached to the memory, ** we destroy and reallocate it (based on current config.dat ** settings). */ for ( retries = 0; ;retries++ ) { LGK_base.lgk_mem_ptr = (PTR)NULL; /* Give up if unable to get memory in one minute */ #if defined(conf_CLUSTER_BUILD) if (retries > 1) #else if ( retries ) #endif { if ( retries < 60 ) PCsleep(1000); else { /* Another process has it blocked way too long */ uleFormat(NULL, E_DMA800_LGKINIT_GETMEM, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, NULL, 0, NULL, &err_code, 0); /* Unable to attach allocated shared memory segment. */ return (E_DMA802_LGKINIT_ERROR); } } ret_val = MEget_pages(me_flags, pages, mem_name, (PTR*)&lgk_mem, &allocated_pages, sys_err); if ( mem_exists = ret_val ) { if (ret_val == ME_ALREADY_EXISTS) { ret_val = MEget_pages((me_locked_flag | ME_MSHARED_MASK | ME_IO_MASK), pages, mem_name, (PTR*)&lgk_mem, &allocated_pages, sys_err); #if defined(conf_CLUSTER_BUILD) if (ret_val && !retries) continue; /* try one more time */ #endif } if (ret_val) { uleFormat(NULL, ret_val, sys_err, ULE_LOG, NULL, NULL, 0, NULL, &err_code, 0); uleFormat(NULL, E_DMA800_LGKINIT_GETMEM, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, NULL, 0, NULL, &err_code, 0); /* Unable to attach allocated shared memory segment. */ return (E_DMA802_LGKINIT_ERROR); } } else if (flag & LOCK_LGK_MUST_ATTACH) { /* Do not use the shared segment you just allocated */ MEfree_pages((PTR)lgk_mem, allocated_pages, sys_err); return (E_DMA812_LGK_NO_SEGMENT); } size = allocated_pages * ME_MPAGESIZE; /* Expose this process to the memory */ LGK_base.lgk_mem_ptr = (PTR)lgk_mem; if ( mem_exists ) { /* ** Memory exists. ** ** Try to acquire the semaphore. If it's ** uninitialzed, retry from the top. ** ** If the version is zero, then another ** process is initializing the memory; ** keep retrying until the version is ** filled in. ** */ if ( ret_val = CSp_semaphore(1, &lgk_mem->mem_ext_sem) ) { if ( ret_val != E_CS000A_NO_SEMAPHORE ) { uleFormat(NULL, ret_val, sys_err, ULE_LOG, NULL, NULL, 0, NULL, &err_code, 0); ret_val = E_DMA802_LGKINIT_ERROR; break; } continue; } /* Retry if still being init'd by another process */ if ( !lgk_mem->mem_version_no ) { CSv_semaphore(&lgk_mem->mem_ext_sem); continue; } /* ** Check pids which appear to be attached to ** the memory: ** ** If any process is still alive, then we ** assume the memory is consistent and use it. ** ** If a process is now dead, it terminated ** without going through LGK_rundown ** to zero its PID slot, zero it now. ** ** If there are no live PIDs attached to ** the memory, we destroy and recreate it. */ my_pid_slot = (PID*)NULL; attached = 0; for ( i = 0; i < LGK_MAX_PIDS; i++ ) { if ( lgk_mem->mem_pid[i] && PCis_alive(lgk_mem->mem_pid[i]) ) { attached++; } else { /* Vacate the slot */ if (lgk_mem->mem_pid[i]) { uleFormat(NULL, E_DMA499_DEAD_PROCESS_INFO, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, NULL, 0, NULL, &err_code, 2, 0, lgk_mem->mem_pid[i], 0, lgk_mem->mem_info[i].info_txt); } lgk_mem->mem_pid[i] = (PID)0; lgk_mem->mem_info[i].info_txt[0] = EOS; /* Use first vacant slot for this process */ if ( !my_pid_slot ) { my_pid_slot = &lgk_mem->mem_pid[i]; LGK_base.lgk_pid_slot = i; } } /* Quit when both questions answered */ if ( attached && my_pid_slot ) break; } /* If no living pids attached, destroy/reallocate */ if ( !attached ) { CSv_semaphore(&lgk_mem->mem_ext_sem); if ( LGK_destroy(allocated_pages, sys_err) ) { ret_val = E_DMA802_LGKINIT_ERROR; break; } continue; } /* All attached pids alive? */ if ( !my_pid_slot ) { /* ... then there's no room for this process */ uleFormat(NULL, E_DMA80A_LGK_ATTACH_LIMIT, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, NULL, 0, NULL, &err_code, 1, 0, attached); ret_val = E_DMA802_LGKINIT_ERROR; } else if (lgk_mem->mem_version_no != LGK_MEM_VERSION_CURRENT) { uleFormat(NULL, E_DMA434_LGK_VERSION_MISMATCH, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, NULL, 0, NULL, &err_code, 2, 0, lgk_mem->mem_version_no, 0, LGK_MEM_VERSION_CURRENT); ret_val = E_DMA435_WRONG_LGKMEM_VERSION; } /* ** Don't allow mixed connections of MT/non-MT processes. ** Among other things, the mutexing mechanisms are ** incompatible! */ else if ( (CS_is_mt() && (lgk_mem->mem_status & LGK_IS_MT) == 0) || (!CS_is_mt() && lgk_mem->mem_status & LGK_IS_MT) ) { uleFormat(NULL, E_DMA811_LGK_MT_MISMATCH, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, NULL, 0, NULL, &err_code, 2, 0, (lgk_mem->mem_status & LGK_IS_MT) ? "OS" : "INTERNAL", 0, (CS_is_mt()) ? "OS" : "INTERNAL"); ret_val = E_DMA802_LGKINIT_ERROR; } else { /* ** CX memory (if any) will lie immediately past LGK header. */ pcxmem = (PTR)(lgk_mem + 1); pcxmem = (PTR)ME_ALIGN_MACRO(pcxmem, sizeof(ALIGN_RESTRICT)); LGK_base.lgk_lkd_ptr = (char *)LGK_base.lgk_mem_ptr + lgk_mem->mem_lkd; LGK_base.lgk_lgd_ptr = (char *)LGK_base.lgk_mem_ptr + lgk_mem->mem_lgd; /* Stuff our pid in first vacant slot */ *my_pid_slot = LGK_my_pid; STlcopy(lgk_info, lgk_mem->mem_info[i].info_txt, LGK_INFO_SIZE-1); } #if defined(VMS) || defined(UNIX) /* set up pointers to reference the uuid mutex and last time * variable */ if (!ID_uuid_sem_ptr) ID_uuid_sem_ptr=&lgk_mem->id_uuid_sem; if (!ID_uuid_last_time_ptr) ID_uuid_last_time_ptr=&lgk_mem->uuid_last_time; if (!ID_uuid_last_cnt_ptr) ID_uuid_last_cnt_ptr=&lgk_mem->uuid_last_cnt; #endif CSv_semaphore(&lgk_mem->mem_ext_sem); } else { /* Memory did not exist */ /* Zero the version to keep other processes out */ lgk_mem->mem_version_no = 0; #if defined(VMS) || defined(UNIX) /* set up the uuid mutex and last time pointers to * reference the objects in shared memory */ { STATUS id_stat; ID_uuid_sem_ptr=&lgk_mem->id_uuid_sem; ID_uuid_last_time_ptr=&lgk_mem->uuid_last_time; ID_uuid_last_cnt_ptr=&lgk_mem->uuid_last_cnt; *ID_uuid_last_cnt_ptr=0; ID_UUID_SEM_INIT(ID_uuid_sem_ptr,CS_SEM_MULTI,"uuid sem", &id_stat); } #endif /* ... then initialize the mutex */ CSw_semaphore(&lgk_mem->mem_ext_sem, CS_SEM_MULTI, "LGK mem ext sem" ); /* Record if memory created for MT or not */ if ( CS_is_mt() ) lgk_mem->mem_status = LGK_IS_MT; /* ** memory is as follows: ** ** -----------------------------------------------------------| ** | LGK_MEM struct (keep track of this mem) | ** | | ** -----------------------------------------------------------| ** | If a clustered installation memory reserved for CX | ** | | ** ------------------------------------------------------------ ** | LKD - database of info for lk system | ** | | ** ------------------------------------------------------------ ** | LGD - database of info for lg system | ** | | ** ------------------------------------------------------------ ** | memory manipulated by LGKm_* routines for structures used | ** | by both the lk and lg systems. | ** | | ** ------------------------------------------------------------ */ /* put the LGK_MEM struct at head of segment leaving ptr pointing ** at next aligned piece of memory */ /* ** CX memory (if any) will lie immediately past LGK header. */ pcxmem = (PTR)(lgk_mem + 1); pcxmem = (PTR)ME_ALIGN_MACRO(pcxmem, sizeof(ALIGN_RESTRICT)); LGK_base.lgk_lkd_ptr = pcxmem + cxmemreq; LGK_base.lgk_lkd_ptr = (PTR) ME_ALIGN_MACRO(LGK_base.lgk_lkd_ptr, sizeof(ALIGN_RESTRICT)); lgk_mem->mem_lkd = (i4)((char *)LGK_base.lgk_lkd_ptr - (char *)LGK_base.lgk_mem_ptr); LGK_base.lgk_lgd_ptr = (PTR) ((char *) LGK_base.lgk_lkd_ptr + sizeof(LKD)); LGK_base.lgk_lgd_ptr = (PTR) ME_ALIGN_MACRO(LGK_base.lgk_lgd_ptr, sizeof(ALIGN_RESTRICT)); lgk_mem->mem_lgd = (i4)((char *)LGK_base.lgk_lgd_ptr - (char *)LGK_base.lgk_mem_ptr); /* now initialize the rest of memory for allocation */ /* how much memory is left? */ ptr = ((char *)LGK_base.lgk_lgd_ptr + sizeof(LGD)); memleft = size - (((char *) ptr) - ((char *) LGK_base.lgk_mem_ptr)); if ( (ret_val = lgkm_initialize_mem(memleft, ptr)) == OK && (ret_val = LG_meminit(sys_err)) == OK && (ret_val = LK_meminit(sys_err)) == OK ) { /* Clear array of attached pids and pid info */ for ( i = 0; i < LGK_MAX_PIDS; i++ ) { lgk_mem->mem_pid[i] = (PID)0; lgk_mem->mem_info[i].info_txt[0] = EOS; } /* Set the creator pid */ LGK_base.lgk_pid_slot = 0; lgk_mem->mem_creator_pid = LGK_my_pid; /* Set the version, releasing other processes */ lgk_mem->mem_version_no = LGK_MEM_VERSION_CURRENT; } else { uleFormat(NULL, ret_val, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, NULL, 0, NULL, &err_code, 0); ret_val = E_DMA802_LGKINIT_ERROR; /* Destroy the shared memory */ LGK_destroy(allocated_pages, sys_err); } } if ( ret_val == OK ) { PCatexit(LGK_rundown); if ( clustered ) { /* ** Perform preliminary cluster connection and CX memory init. */ /* Get installation code */ NMgtAt("II_INSTALLATION", &nm_string); if ( nm_string ) { instid[0] = *(nm_string); instid[1] = *(nm_string+1); } else { instid[0] = 'A'; instid[1] = 'A'; } instid[2] = '\0'; ret_val = CXinitialize( instid, pcxmem, flag & LGK_IS_CSP ); if ( ret_val ) { /* Report error returned from CX */ uleFormat(NULL, ret_val, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, NULL, 0, NULL, &err_code, 0 ); break; } } #ifdef VMS { static $EXHDEF exit_block; i4 ctrl_y_mask = 0x02000000; /* ** On VMS, programs like the dmfjsp and logstat run as images in ** the shell process. That is, the system doesn't start and stop ** a process for each invocation of the program, it just starts ** and stops an image in the same process. This means that if ** the program should die, the image may be rundown but the process ** will remain, which means that the check-dead threads of other ** processes in the installation will not feel that they need to ** rundown this process, since it's still alive. ** ** By declaring an exit handler, which will get a chance to run ** even if PCexit isn't called, we improve our chances of getting ** to perform rundown processing if we should die unexpectedly. ** ** Furthermore, we ask DCL to disable its ^Y processing, which ** lessens the chance that the user will interrupt us while we ** are holding the semaphore. */ exit_block.exh$g_func = LGK_rundown; exit_block.exh$l_argcount = 1; exit_block.exh$gl_value = &exit_block.exh$l_status; if (sys$dclexh(&exit_block) != SS$_NORMAL) ret_val = FAIL; lib$disable_ctrl(&ctrl_y_mask, 0); } #endif } break; } if ( ret_val ) LGK_base.lgk_mem_ptr = NULL; return(ret_val); }
/*{ ** Name: DIread - Read a page of a file. ** ** Description: ** The DIread routine is used to read pages of a direct access ** file. For the large block read option, the number of pages ** to read is an input parameter to this routine. It will ** return the number of pages it read, since at ** end of file it may read less pages than requested. ** If multiple page reads are requested, the buffer is assumed ** to be large enough to hold n pages. The size of a page is ** determined at create. ** ** BUG FIX WORKAROUND (b4854): ** ** The current mainline code (in the case of reading from a ** temporary file) expects that a DIread past logical ** end of file, but within allocated end of file will return with ** no error. The value of the data retrieved is undefined. To ** make the current code work, DIread on unix has been changed to ** meet these requirements, but it hoped that mainline code in the ** future will be changed to not rely on this behaviour. ** ** The buffer address into which the data is to be read is examined to ** see if it is in shared memory. If so, we then instruct the slave to ** read the page(s) directly into the target buffer. Otherwise, we read ** the page(s) into the buffer in the server segment, and then copy the ** data from the server segment to the target address. ** ** Inputs: ** f Pointer to the DI file ** context needed to do I/O. ** n Pointer to value of number of pages to read. ** page Value indicating page to begin reading. ** buf Pointer to the area to hold ** page(s) being read. ** ** Outputs: ** n Number of pages read. ** f Updates the file control block. ** buf Pointer to the page read. ** err_code Pointer to a variable used ** to return operating system ** errors. ** Returns: ** OK ** DI_BADFILE Bad file context. ** DI_BADREAD Error reading file. ** DI_BADPARAM Parameter(s) in error. ** DI_ENDFILE Not all blocks read. ** DI_BADLRU_RELEASE Error releasing file. ** Exceptions: ** none ** ** Side Effects: ** none ** ** History: ** 26-mar-87 (mmm) ** Created new for 6.0. ** 23-mar-89 (mmm) ** bug fix for b4854 (see bug fix workaround comments in header and ** in code.) ** 25-Apr-89 (GordonW) ** Don't use "bcopy" but use MECOPY macro call. bcopy is unportable. ** 07-may-89 (russ) ** Add missing semicolon to MECOPY_VAR_MACRO. ** 2-Feb-90 (anton) ** Don't always copy CL_ERR_DESC ** 6-Feb-90 (jkb) ** Add IIdio_read so direct io is available for Sequent. ** 5-aug-1991 (bryanp) ** Added support for I/O directly to server shared memory, bypassing ** the copy through the server segment if possible. ** 12-dec-1991 (bryanp) ** Added support for DIread on a raw log file for LG's use. In this ** case, all that had to happen was to replace the direct "lseek" ** call with a call to the IIdio code which supports file size ** determination for raw log files. ** 03-mar-1992 (jnash) ** Fix LG slave problem noted when Sun mmap() support ** introduced, change slave logic to send to the slave the ** segment id "key" rather than "segid" (segid value not ** the same in the slave). ** 30-October-1992 (rmuth) ** Prototype. ** 30-nov-1992 (rmuth) ** - Use DI_sense to find out the size of a file. ** - DIlru error checking ** 10-dec-1993 (rmuth) ** If fail the past io_allocated_eof test then make sure that we ** unset the errno value in CL_ERR_DESC set by SETCLERR. This was ** causing confusion as we were logging random errno's to the ** errlog.log ** 31-jan-94 (mikem) ** sir #57671 ** The transfer size of slave I/O is now stored in ** Cs_srv_block.cs_size_io_buf, rather than a constant ** DI_FILE_BUF_SIZE. ** 20-jun-1995 (amo ICL) ** Added call on DI_async_read for async io ** 14-Oct-2005 (jenjo02) ** Chris's file descriptor properties now cached in io_fprop ** (file properties) and established on the first open, ** not every open. */ STATUS DIread( DI_IO *f, i4 *n, i4 page, char *buf, CL_ERR_DESC *err_code ) { STATUS small_status = OK, big_status = OK, r_status; i4 num_of_pages; i4 last_page_to_read; DI_OP diop; CL_ERR_DESC lerr_code ; /* default returns */ CL_CLEAR_ERR( err_code ); num_of_pages = *n; last_page_to_read = page + num_of_pages - 1; *n = 0; diop.di_flags = 0; if (num_of_pages <= 0) return (DI_BADPARAM); /* ** check file control block pointer, return if bad. */ if (f->io_type != DI_IO_ASCII_ID) return( DI_BADFILE ); /* Count another read */ f->io_stat.read++; if (big_status = DIlru_open(f, FALSE, &diop, err_code)) return(big_status); /* ** Sanity check to make sure we are reading within the bounds of ** the file. Note: we may still be reading garbage pages--it is ** up to the upper layers to guarantee that we are not doing this */ if (last_page_to_read > f->io_alloc_eof ) { i4 real_eof; /* ** DI_sense updates f->io_alloc_eof with the protection ** of io_sem (OS_THREADS), so there's no need to ** duplicate that update here. */ big_status = DI_sense(f, &diop, &real_eof, err_code); if (big_status == OK) { if (last_page_to_read > f->io_alloc_eof) { small_status = DI_ENDFILE; SETCLERR(err_code, 0, ER_read); /* ** The above sets errno as errno will be left over from ** a previous call zero it out to avoid confusion. */ err_code->errnum = 0; } } } if (big_status == OK && small_status == OK) { if (Di_slave) { big_status = DI_slave_read( f, &diop, buf, page, num_of_pages, n, err_code ); } else # if defined(OS_THREADS_USED) || defined(xCL_ASYNC_IO) if (Di_async_io) { big_status = DI_async_read( f, &diop, buf, page, num_of_pages, n, err_code ); } else # endif /* OS_THREADS_USED || xCL_ASYNC_IO */ { big_status = DI_inproc_read( f, &diop, buf, page, num_of_pages, n, err_code ); } } r_status = DIlru_release(&diop, &lerr_code); if ( big_status ) return( big_status ); else if (small_status) return( small_status); return(r_status); }
/*{ ** Name: DIforce - Forces all pages to the disk. ** ** Description: ** The DIforce routine is used to force all pages held by an operating ** system to disk. This is not necessary on VMS so this routine will just ** return. This routine should wait for completion of all I/O to insure ** all pages are correctly on disk. If an error occurs it should return ** DI_BADWRITE. ** ** Inputs: ** f Pointer to the DI file ** context needed to do I/O. ** ** Outputs: ** err_code Pointer to a variable used ** to return operating system ** errors. ** Returns: ** OK ** DI_BADFILE Bad file context. ** DI_BADWRITE Error forcing pages to disk. ** Exceptions: ** none ** ** Side Effects: ** none ** ** History: ** 26-mar-87 (mmm) ** Created new for 6.0. ** 21-jan-89 (mikem) ** DI_SYNC_MASK support (O_SYNC, fsync()). ** 06-feb-89 (mikem) ** Added better support for DI CL_ERR_DESC, including initializing to ** zero and passing back DIlru_open() err_code info. And ifdef'd ** variables only used by "FSYNC" case to shut up lint. ** 21-Apr-89 (GordonW) ** change "#ifdef FSYNC_EXISTS" -> it's correct xCL_xx define. ** 2-Feb-90 (anton) ** Don't always copy CL_ERR_DESC ** 25-sep-1991 (mikem) integrated following change: 27-jul-91 (mikem) ** DIopen() now sets the flag DI_O_FSYNC_MASK if fsync() should be ** used to sync a DIforce of a file. Change the code to use ** DI_O_FSYNC_MASK rather than DI_SYNC_MASK. ** 30-nov-1992 (rmuth) ** - Prototype. ** - Add error checking. ** 10-mar-1993 (mikem) ** Changed the type of the first parameter to DI_send_ev_to_slave() and ** the 2nd parameter to DI_slave_send(), so that DI_send_ev_to_slave() ** could access the slave control block's status. ** This routine will now initialize the status to DI_INPROGRESS, before ** making the request and the slave will change the status once the ** operation is complete. ** 18-apr-1994 (jnash) ** fsync project. DIforce() now calls fsync unconditionally ** (assuming that it exists). ** 14-Oct-2005 (jenjo02) ** Chris's file descriptor properties now cached in io_fprop ** (file properties) and established on the first open, ** not every open. ** 11-Jan-2008 (kschendel) b122122 ** Force has long been a no-op on unix, but that's incorrect. ** It should fsync or fdatasync the file unless the file is ** already open in sync mode. */ STATUS DIforce( DI_IO *f, CL_ERR_DESC *err_code) { STATUS status = OK; #ifdef xCL_010_FSYNC_EXISTS DI_OP diop; #endif /* FSYNC_EXISTS */ /* default return values */ CL_CLEAR_ERR( err_code ); /* Check file control block pointer, return if bad. */ if (f->io_type != DI_IO_ASCII_ID) return(DI_BADFILE); /* Count a force */ f->io_stat.force++; /* Don't do anything to the file if it's open O_SYNC or with direct ** IO. Otherwise the caller wants the file sync'ed, so do it. */ if ( (f->io_open_flags & DI_O_OSYNC_MASK) == 0 && (f->io_fprop & FPROP_DIRECT) == 0) { #if ! defined(xCL_010_FSYNC_EXISTS) /* Yarggh! no fsync and file has no osync. This must be some ** obsolete or improperly ported platform. Use global sync. */ sync(); #else do { /* ** get file descriptor for this file */ status = DIlru_open(f, FALSE, &diop, err_code); if ( status != OK ) break; status = DI_force( f, &diop, err_code ); if ( status != OK ) { CL_ERR_DESC lerr_code; (VOID) DIlru_release(&diop, &lerr_code); } else { status = DIlru_release(&diop, err_code); } } while (FALSE); #endif /* FSYNC_EXISTS */ } return (status); }
STATUS ERsend(i4 flag, char *message, i4 msg_length, CL_ERR_DESC *err_code) { # ifdef NT_GENERIC static bool er_init = FALSE; static bool is_w95 = FALSE; # else /* !NT_GENERIC */ static int er_ifi = -2; static int ar_ifi = -2; # endif /* !NT_GENERIC */ STATUS status; char tmp_buf[ER_MAX_LEN]; char* logmsg = message; /* Check for bad paramters. */ CL_CLEAR_ERR( err_code ); if ((message == 0 || msg_length == 0) && flag != ER_AUDIT_MSG) return (ER_BADPARAM); if ((flag != ER_ERROR_MSG) && (flag != ER_AUDIT_MSG) && ( flag != ER_OPER_MSG)) return (ER_BADPARAM); # ifndef NT_GENERIC if (flag & ER_AUDIT_MSG) { key_t msg_key; char *ipc_number; struct { long mtype; char mtext[ER_MAX_LEN]; } msg; if (ar_ifi == -2) { NMgtAt("II_AUDIT_IPC", &ipc_number); if (ipc_number && ipc_number[0]) { CVal(ipc_number, &msg_key); ar_ifi = msgget(msg_key, 0); if (ar_ifi == -1) { SETCLERR(err_code, 0, ER_open); return(ER_NO_AUDIT); } } else { SETCLERR(err_code, 0, ER_open); return(ER_NO_AUDIT); } } /* Handle special case to connect only but not send message. */ if (msg_length == 0 && message == 0) return (OK); MEcopy(message, msg_length, msg.mtext); msg.mtype = 1; if (msgsnd(ar_ifi, &msg, msg_length, 0)) { SETCLERR(err_code, 0, ER_open); return(ER_BADSEND); } return (OK); } else # endif /* ! NT_GENERIC */ if (flag & ER_OPER_MSG) { char hostname[GL_MAXNAME]; STATUS status; message[msg_length] = EOS; TRdisplay("ER Operator:\"%s\"\n",message); if (!ERsysinit) ERinitsyslog(); # ifdef NT_GENERIC { wchar_t *wmessage = NULL; /* ** Update the ReportEvent to report information in the event log. */ if ( ReportEvent( EventLog, (WORD) EVENTLOG_INFORMATION_TYPE, (WORD) 0, /* event category */ (DWORD) I_ING_INFO, /* event identifier */ (PSID) NULL, (WORD) 1, /* number of strings */ (DWORD) 0, &message, NULL ) == FALSE) status = GetLastError(); if ( !er_init ) { char VersionString[256]; FUNC_EXTERN BOOL GVosvers(char *OSVersionString); GVosvers(VersionString); is_w95 = ( STstrindex(VersionString, "Microsoft Windows 9", 0, FALSE) != NULL ) ? TRUE : FALSE; if ( !is_w95 ) /* netapi32 only on NT */ { HANDLE hDll; if ((hDll = LoadLibrary(TEXT("netapi32.dll"))) != NULL) { pNetMessageNameAdd = (NET_API_STATUS (*)(LPCWSTR,LPCWSTR)) GetProcAddress(hDll, TEXT("NetMessageNameAdd")); pNetMessageNameDel = (NET_API_STATUS (*)(LPCWSTR,LPCWSTR)) GetProcAddress(hDll, TEXT("NetMessageNameDel")); pNetMessageBufferSend = (NET_API_STATUS (*)(LPCWSTR,LPCWSTR,LPCWSTR,LPBYTE,DWORD)) GetProcAddress(hDll, TEXT("NetMessageBufferSend")); } /* if any problem, pretend we don't support it */ if ( pNetMessageNameAdd == NULL || pNetMessageNameDel == NULL || pNetMessageBufferSend == NULL ) is_w95 = TRUE; } } if ( !is_w95 ) { /* ** Now, send the message to the server console, ** putting up a message box (if the messenger service ** is running. Everything must be in Unicode. */ if ( whostname[0] == 0 ) { unsigned int len = sizeof(hostname); /* ** get the hostname in Unicode format for use ** by messenger service */ GetComputerName( (char *)hostname, &len ); MultiByteToWideChar( GetACP(), 0, hostname, sizeof(hostname), whostname, sizeof(whostname) ); } /* initialize the messenger service */ status = (*pNetMessageNameAdd)( whostname, msgname ); if ( status != NERR_Success ) status = GetLastError(); /* Allocate a buffer for the Unicode */ wmessage = (wchar_t *) MEreqmem( 0, msg_length * sizeof(wchar_t), TRUE, &status ); if ( wmessage ) { /* copy the message to the Unicode buffer */ MultiByteToWideChar( GetACP(), 0, message, msg_length, wmessage, msg_length * sizeof(wchar_t) ); status = (*pNetMessageBufferSend)( whostname, msgname, NULL, (LPBYTE) wmessage, msg_length*sizeof(wchar_t) ); if ( status != NERR_Success ) status = GetLastError(); MEfree( (PTR)wmessage ); } /* re-initialize the messenger service */ status = (*pNetMessageNameDel)( whostname, msgname ); if ( status != NERR_Success ) status = GetLastError(); } } # elif defined(OS_THREADS_USED) && defined(any_aix) syslog_r( LOG_ALERT|LOG_ERR, message ); # else syslog( LOG_ALERT|LOG_ERR, message ); # endif /* NT_GENERIC */ } if (flag & ER_OPER_MSG) { i4 msglen = 0; char* host = PMhost(); MEfill( ER_MAX_LEN, 0, tmp_buf ); /* ** Format the message string for the event log. As the source is ** not known a fixed string of INGSYSLOG is used. */ TRformat( NULL, 0, tmp_buf, ER_MAX_LEN - 1, "%8.8t::[INGSYSLOG , 00000000]: %@ ", STlength(host), host ); msglen = STlength(tmp_buf); STcat( tmp_buf, message ); /* append original message */ msg_length += msglen; logmsg = tmp_buf; } status = ERlog( logmsg, msg_length, err_code ); return( status ); }
/*{ ** Name: LGadd - Add Database. ** ** Description: ** Add database to logging system for a process. ** ** This routine adds a database to the logging system. This service ** is used to inform the logging system that records recorded in the log ** file should be associated with this database. A database can be ** marked as journaled by setting the LG_JOURNAL flag. The fact that a ** database is journaled is used by the logging system to recognize the ** need to copy log records from the log file to a journal file. ** ** NOTE on adding databases that are being recovered: ** When a database requires REDO recovery, the LDB for that database ** is marked LDB_RECOVER. This routine will return LG_DB_INCONSISTENT ** (signifying that the database is inconsistent) if anyone tries ** to add the db while it is being recovered. This is not a very ** on-line solution. ** ** A better solution is to make sure that servers that want to open ** a database that is currently being recovered are forced to wait ** until the db is fully recovered, then they should be able to ** proceed. ** ** Inputs: ** lg_id Log identifier. ** flag Zero or ** LG_JOURNAL: if a journaled DB. ** LG_NOTDB: not a DB; administrative ** LG_PRETEND_CONSISTENT: used by verifydb ** LG_FCT: fast commit ** LG_READONLY: a readonly database ** buffer Database information buffer. ** l_buffer Length of buffer. ** ** Outputs: ** db_id Database identifier. Unique ** identifier associated with this ** instantiation of the logging/locking ** server. After logging/locking ** restarted, a database can have ** a different id. ** sys_err Reason for error return status. ** Returns: ** OK Success. ** LG_BADPARAM Bad parameters to call. ** LG_DB_INCONSISTENT Inconsistent database. ** LG_EXCEED_LIMIT Out of LDB's. ** LG_SHUTTING_DOWN Shutdown has occured (or pending). ** Exceptions: ** none ** ** Side Effects: ** none ** ** History: ** Summer, 1992 (bryanp) ** Working on the new portable logging and locking system. ** 18-jan-1993 (rogerk) ** Removed LG_WILLING_COMMIT flag - now only LG_ADDONLY is used ** during recovery processing. Add ldb_j_last_la, ldb_d_last_la ** fields. ** 15-mar-1993 (rogerk) ** Reduced Logging - Phase IV: ** Removed LG_ADDONLY flag. Recovery processing now adds db with ** a normal LGadd call and alters it via LG_A_DBCONTEXT to ** reestablish its context. ** 26-apr-1993 (bryanp) ** 6.5 Cluster Support: ** Add ldb_sback_lsn field to the LDB. ** Make sure that lpd_type is set so that LPDs can be deallocated ** properly upon error. ** 26-jul-1993 (bryanp) ** When adding a database which is associated with a remote log file, ** do not signal a local opening of the database. This occurs ** when the CSP process on one node is recovering the work ** performed by another node; in this case we do NOT wish to ** signal to the RCP that a local open is being performed, since ** in fact no local access is implied by adding this database. ** When adding the notdb again, increment the ldb_lpd_count even if ** the ldb_buffer info doesn't match. The notdb is always the notdb ** 26-jul-1993 (rogerk) ** Changed journal and dump window tracking in the logging system. ** Use new journal and dump log address fields. ** 12-oct-1993 (tad) ** Bug #56449 ** Changed %x to %p for pointer values. ** 30-Jan-1996 (jenjo02) ** Reorganized LG_add() such that if NOTDB is wanted, ** the search of the ldb queue is bypassed; after all, ** we know it's buried in the LGD and easy to find. ** 11-Sep-1996 (jenjo02) ** Fix a bug in LG_add() search of lgd_ldb_q which was looping ** if more that 2 LDBs were extant. ** 13-jun-1997 (wonst02) ** Added LG_READONLY and LDB_READONLY for readonly databases. ** 12-nov-1998 (kitch01) ** Bug 90140. If the database is currently pending a close then ** mark the open as in CLOSE_WAIT. This will ensure that the close ** is processed before this open and prevent locking errors on the journals ** 7-oct-2004 (thaju02) ** Use SIZE_TYPE to allow memory pools > 2Gig. 21-Jun-2006 (hanal04) Bug 116272 ** Take the lgd_mutex before the ldb_mutex in order to ensure ** the acquisition order is consistent with LG_archive_complete() ** and LG_event(). Flag LG_signal_event() that we already have the ** lgd_mutex. ** 01-Nov-2006 (jonj) ** Use consistent ldb_q_mutex, ldb_mutex ordering thoughout the code. ** Don't put LDB on queue until it's completely initialized. ** 15-Jan-2010 (jonj) ** SIR 121619 MVCC: Initialize new ldb_active_lxbq. ** 09-aug-2010 (maspa05) b123189, b123960 ** Pass flag to indicate a readonly database LDB_RODB, so that it ** gets picked up by LGshow */ STATUS LGadd( LG_LGID external_lg_id, i4 flag, char *buffer, i4 l_buffer, LG_DBID *external_db_id, CL_ERR_DESC *sys_err) { register LGD *lgd = (LGD *)LGK_base.lgk_lgd_ptr; register LPB *lpb; register LDB *ldb; register LFB *lfb; register LPD *lpd; LDB *next_ldb; LPD *next_lpd; SIZE_TYPE end_offset; SIZE_TYPE ldb_offset; SIZE_TYPE *lpbb_table; SIZE_TYPE *ldbb_table; i4 err_code; bool initialize_ldb = FALSE; STATUS status; LG_I4ID_TO_ID lg_id; LG_ID *db_id = (LG_ID*)external_db_id; LFB *cur_db_lfb; i4 SignalEvent = 0; /* ** If the logging system is already in "shutdown" mode, then no new ** LGadd calls are permitted */ LG_WHERE("LGadd") CL_CLEAR_ERR(sys_err); if ((lgd->lgd_status & (LGD_START_SHUTDOWN | LGD_IMM_SHUTDOWN)) != 0) return (LG_SHUTTING_DOWN); if (l_buffer == 0 || l_buffer > sizeof(ldb->ldb_buffer)) { uleFormat(NULL, E_DMA411_LGADD_BAD_LEN, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, NULL, 0, NULL, &err_code, 2, 0, l_buffer, 0, sizeof(ldb->ldb_buffer)); return (LG_BADPARAM); } /* Check the lg_id. */ lg_id.id_i4id = external_lg_id; if (lg_id.id_lgid.id_id == 0 || (i4)lg_id.id_lgid.id_id > lgd->lgd_lpbb_count) { uleFormat(NULL, E_DMA40F_LGADD_BAD_ID, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, NULL, 0, NULL, &err_code, 2, 0, lg_id.id_lgid.id_id, 0, lgd->lgd_lpbb_count); return (LG_BADPARAM); } lpbb_table = (SIZE_TYPE *)LGK_PTR_FROM_OFFSET(lgd->lgd_lpbb_table); lpb = (LPB *)LGK_PTR_FROM_OFFSET(lpbb_table[lg_id.id_lgid.id_id]); if (status = LG_mutex(SEM_EXCL, &lpb->lpb_mutex)) return(status); if (lpb->lpb_type != LPB_TYPE || lpb->lpb_id.id_instance != lg_id.id_lgid.id_instance) { (VOID)LG_unmutex(&lpb->lpb_mutex); uleFormat(NULL, E_DMA410_LGADD_BAD_PROC, (CL_ERR_DESC *)NULL, ULE_LOG, NULL, NULL, 0, NULL, &err_code, 3, 0, lpb->lpb_type, 0, lpb->lpb_id.id_instance, 0, lg_id.id_lgid.id_instance); return (LG_BADPARAM); } /* ** Allocate an LPD, causing lpd_type to be set to LPD_TYPE. */ if ((lpd = (LPD *)LG_allocate_cb(LPD_TYPE)) == 0) { (VOID)LG_unmutex(&lpb->lpb_mutex); return (LG_EXCEED_LIMIT); } /* ** CLEANUP: error returns after this point must free the lpd before ** returning! */ lfb = (LFB *)LGK_PTR_FROM_OFFSET(lpb->lpb_lfb_offset); /* ** If this isn't a real user database, but is instead the "NOTDB" ** database which is used by system processes such as the DMFRCP and ** DMFACP daemons, then it has a special reserved LDB slot and does not ** get located by its database information buffer, therefore we ** can skip locking and scanning the ldb queue. */ end_offset = LGK_OFFSET_FROM_PTR(&lgd->lgd_ldb_next); /* ** When both the lgd_ldb_q and ldb must be mutexed, always take ** the lgd_ldb_q_mutex, then ldb_mutex. */ /* Lock and hold the ldb queue mutex */ if (status = LG_mutex(SEM_EXCL, &lgd->lgd_ldb_q_mutex)) return(status); if (flag & LG_NOTDB) { ldbb_table = (SIZE_TYPE *)LGK_PTR_FROM_OFFSET(lgd->lgd_ldbb_table); ldb = (LDB *)LGK_PTR_FROM_OFFSET(ldbb_table[1]); if (status = LG_mutex(SEM_EXCL, &ldb->ldb_mutex)) return(status); /* ** IF the notdb has already been initialized, then we have some ** caller who is adding the notdb with a different buffer, thus ** we didn't match when we searched the database list for a ** matching ldb_buffer field. Since we really don't care about the ** ldb_buffer for the notdb (the notdb is the notdb, after all), ** we'll treat this case as though the ldb buffer fields matched. */ if (ldb->ldb_type == LDB_TYPE) { /* Count new reference to LDB. */ ldb->ldb_lpd_count++; } else { /* ** first use of NOTDB; initialize it. */ lgd->lgd_ldb_inuse++; initialize_ldb = TRUE; } } else { /* ** Scan database list to see if this database is already known. Each ** database is identified by a "database information buffer", which DMF ** passes in. This buffer contains items such as the database name, owner ** name, etc. If the database information buffer passed to LGadd exactly ** matches the database information buffer of an existing LDB, then this ** database is already known (has already been added by another logging ** system process). */ for (ldb_offset = lgd->lgd_ldb_next; ldb_offset != end_offset;) { ldb = (LDB *)LGK_PTR_FROM_OFFSET(ldb_offset); if (ldb->ldb_l_buffer != l_buffer || MEcmp(ldb->ldb_buffer, buffer, l_buffer)) { ldb_offset = ldb->ldb_next; continue; } if ( CXcluster_enabled() ) { /* ** Node recovery must use distinct ldb context per node log file */ cur_db_lfb = (LFB *)LGK_PTR_FROM_OFFSET(ldb->ldb_lfb_offset); if ((lfb->lfb_l_nodename || cur_db_lfb->lfb_l_nodename) && (lfb->lfb_l_nodename != cur_db_lfb->lfb_l_nodename || MEcmp(lfb->lfb_nodename, cur_db_lfb->lfb_nodename, lfb->lfb_l_nodename))) { ldb_offset = ldb->ldb_next; #ifdef xDEBUG TRdisplay("%@ RCP-P1: Recovering %~t, ignore ldb for %~t %x\n", lfb->lfb_l_nodename, lfb->lfb_nodename, cur_db_lfb->lfb_l_nodename, cur_db_lfb->lfb_nodename, flag & LG_CSP_RECOVER); #endif continue; } } if (status = LG_mutex(SEM_EXCL, &ldb->ldb_mutex)) return(status); /* ** Check again after semaphore wait. ** If LDB is no longer a match (it was in the ** process of being eradicated while we waited for ** the ldb_mutex), and start the search again from ** the top of the queue. */ if (ldb->ldb_type != LDB_TYPE || ldb->ldb_l_buffer != l_buffer || MEcmp(ldb->ldb_buffer, buffer, l_buffer)) { (VOID)LG_unmutex(&ldb->ldb_mutex); ldb_offset = lgd->lgd_ldb_next; continue; } break; } if (ldb_offset != end_offset) { /* ** LDB exists. If the database is already known to be inconsistent, ** then no new adds of the database are permitted, unless the caller ** acknowledges that it "knows" that the database is inconsistent by ** passing the "pretend consistent" flag (used by verifydb). */ if (ldb->ldb_status & LDB_INVALID) { if ( (flag & LG_PRETEND_CONSISTENT) == 0 ) { (VOID)LG_unmutex(&ldb->ldb_mutex); (VOID)LG_unmutex(&lgd->lgd_ldb_q_mutex); LG_deallocate_cb(LPD_TYPE, (PTR)lpd); (VOID)LG_unmutex(&lpb->lpb_mutex); return (LG_DB_INCONSISTENT); } } /* ** If the database reference count is zero, then the database ** must be opened by the RCP before the server can use it. ** Mark the status opendb_pending - this will suspend any thread ** making an LGwrite call on this database (note that the first ** thing a server does after opening a database is to write an ** OPENDB log record) until the RCP has finished opening it. ** ** If the database reference count is not zero, but the database ** is undergoing REDO recovery, then we cannot allow new servers ** to access the database until recovery is complete. Set the ** database status to opendb_pending and opn_wait. ** ** NOTE that if we begin to support READ-ONLY databases and servers ** are able to open databases without writing an OPENDB record, then ** we must come up with a new method of suspending database openers ** until recovery is complete. */ /* Bug 90140. If the database is currently pending a close then ** mark the open as in CLOSE_WAIT. This will ensure that the close ** is processed before this open and prevent locking errors on the journals */ if (ldb->ldb_lpd_count == 0) { if ((ldb->ldb_status & LDB_PURGE) == 0) { if ((ldb->ldb_status & LDB_OPENDB_PEND) == 0) { ldb->ldb_status |= LDB_OPENDB_PEND; if (ldb->ldb_status & LDB_CLOSEDB_PEND) ldb->ldb_status |= LDB_CLOSE_WAIT; if (flag & LG_PRETEND_CONSISTENT) ldb->ldb_status |= LDB_PRETEND_CONSISTENT; if (flag & LG_READONLY) ldb->ldb_status |= LDB_READONLY; if (flag & LG_RODB) ldb->ldb_status |= LDB_RODB; SignalEvent = LGD_OPENDB; } } else ldb->ldb_status &= ~(LDB_PURGE); } else if (ldb->ldb_status & LDB_RECOVER) { /* ** The database is open, but is being recovered. ** Set the opendb_pending and opn_wait flags - this will ** prevent any new transactions from proceeding on this ** database until recovery is complete. Marking this ** database as OPENDB_PEND will not cause the database to ** be processed in count_opens because of the opn_wait flag. */ ldb->ldb_status |= (LDB_OPENDB_PEND | LDB_OPN_WAIT); } /* Count new reference to LDB. */ ldb->ldb_lpd_count++; } else { /* ** This database is NOT known. ** ** If the caller has passed special flags indicating that they ** require that the newly-added database must have a particular DB_ID ** assigned to it, then ensure that the new LDB gets the right ID. ** ** Otherwise, just pick the next LDB off the free list. */ /* ** Allocate a new LDB ** returning with the ldb_mutex held ** and lgd_ldb_inuse incremented. */ if ((ldb = (LDB *)LG_allocate_cb(LDB_TYPE)) == 0) { LG_deallocate_cb(LPD_TYPE, (PTR)lpd); (VOID)LG_unmutex(&lgd->lgd_ldb_q_mutex); (VOID)LG_unmutex(&lpb->lpb_mutex); return (LG_EXCEED_LIMIT); } initialize_ldb = TRUE; } } #ifdef xDEBUG /* ** For a while, we were having problems with corruption of the LFB/LDB ** large block queues, and this debugging code helped to track those ** problems down. */ if (ldb->ldb_id.id_id == 0) { TRdisplay("%@ LGadd: args were:(%d,%d).%x.%p.%x.%p\n", lg_id.id_lgid.id_id, lg_id.id_lgid.id_instance, flag, buffer, l_buffer, db_id); LG_debug_wacky_ldb_found(lgd, ldb); return (LG_BADPARAM); } #endif /* ** NOTE: Be careful about adding error returns after this point, ** because any such error return must first free up BOTH the LPD AND ** the LDB, if an LDB was actually allocated. */ /* ** Initialize the LDB, if one was allocated ** or if first use of NOTDB LDB. */ if (initialize_ldb) { MEcopy((PTR)buffer, l_buffer, (PTR)ldb->ldb_buffer); ldb->ldb_l_buffer = l_buffer; ldb->ldb_type = LDB_TYPE; ldb->ldb_status = LDB_ACTIVE; ldb->ldb_stat.read = 0; ldb->ldb_stat.write = 0; ldb->ldb_stat.begin = 0; ldb->ldb_stat.wait = 0; ldb->ldb_stat.force = 0; ldb->ldb_stat.end = 0; ldb->ldb_lxbo_count = 0; ldb->ldb_lxb_count = 0; ldb->ldb_lpd_count = 1; ldb->ldb_lfb_offset = lpb->lpb_lfb_offset; ldb->ldb_j_first_la.la_sequence = 0; ldb->ldb_j_first_la.la_block = 0; ldb->ldb_j_first_la.la_offset = 0; ldb->ldb_j_last_la.la_sequence = 0; ldb->ldb_j_last_la.la_block = 0; ldb->ldb_j_last_la.la_offset = 0; ldb->ldb_d_first_la.la_sequence = 0; ldb->ldb_d_first_la.la_block = 0; ldb->ldb_d_first_la.la_offset = 0; ldb->ldb_d_last_la.la_sequence = 0; ldb->ldb_d_last_la.la_block = 0; ldb->ldb_d_last_la.la_offset = 0; ldb->ldb_sbackup.la_sequence = 0; ldb->ldb_sbackup.la_block = 0; ldb->ldb_sbackup.la_offset = 0; ldb->ldb_sback_lsn.lsn_high = 0; ldb->ldb_sback_lsn.lsn_low = 0; ldb->ldb_eback_lsn.lsn_high = 0; ldb->ldb_eback_lsn.lsn_low = 0; /* ** Assume no simulated MVCC journal writes. ** ** This may be changed by LGalter(LG_A_JFIB) */ MEfill(sizeof(ldb->ldb_jfib), 0, &ldb->ldb_jfib); /* ** Set last_commit, last_lsn, and first_la to ** the current values from the header. */ ldb->ldb_last_commit = lfb->lfb_header.lgh_last_lsn; ldb->ldb_last_lsn = lfb->lfb_header.lgh_last_lsn; ldb->ldb_first_la = lfb->lfb_header.lgh_end; /* ** Initialize active transaction queue to empty. */ ldb->ldb_active_lxbq.lxbq_next = ldb->ldb_active_lxbq.lxbq_prev = LGK_OFFSET_FROM_PTR(&ldb->ldb_active_lxbq.lxbq_next); ldb->ldb_lgid_low = 0; ldb->ldb_lgid_high = 0; /* ** Extract the external Database Id from the info buffer to ** put in an accessable place of the ldb. */ I4ASSIGN_MACRO(ldb->ldb_buffer[DB_DB_MAXNAME+DB_OWN_MAXNAME], ldb->ldb_database_id); if (flag & LG_NOTDB) { ldb->ldb_status |= LDB_NOTDB; } else { if (flag & LG_JOURNAL) ldb->ldb_status |= LDB_JOURNAL; if (flag & LG_PRETEND_CONSISTENT) ldb->ldb_status |= LDB_PRETEND_CONSISTENT; if (flag & LG_READONLY) ldb->ldb_status |= LDB_READONLY; if (flag & LG_RODB) ldb->ldb_status |= LDB_RODB; } if ((ldb->ldb_status & LDB_NOTDB) == 0) { if ((lfb->lfb_status & LFB_USE_DIIO) == 0) { /* ** signal to the RCP that local use of this database is ** beginning. The database remains in pending-open state ** until the RCP acknowledges the open. */ ldb->ldb_status |= LDB_OPENDB_PEND; SignalEvent = LGD_OPENDB; } } } /* ** The LPD (Logging system Process-Database connection block) contains ** pointers to its associated database and process blocks, and contains ** a list of all transactions which this process has begun within this ** database: */ lpd->lpd_ldb = LGK_OFFSET_FROM_PTR(ldb); lpd->lpd_lpb = LGK_OFFSET_FROM_PTR(lpb); lpd->lpd_lxbq.lxbq_next = lpd->lpd_lxbq.lxbq_prev = LGK_OFFSET_FROM_PTR(&lpd->lpd_lxbq.lxbq_next); lpd->lpd_lxb_count = 0; /* Change various counters. */ lpb->lpb_lpd_count++; lgd->lgd_stat.add++; /* Queue LPD to the LPB. */ lpd->lpd_next = lpb->lpb_lpd_next; lpd->lpd_prev = LGK_OFFSET_FROM_PTR(&lpb->lpb_lpd_next); next_lpd = (LPD *)LGK_PTR_FROM_OFFSET(lpb->lpb_lpd_next); next_lpd->lpd_prev = lpb->lpb_lpd_next = LGK_OFFSET_FROM_PTR(lpd); /* ** If the adding process uses fast commit, then mark the database ** as open with FC protocols. Should a crash occur, all updates to ** this db since the last Consistency Point will need to be redone. */ if ((flag & LG_FCT) && (lpb->lpb_status & LPB_FCT)) ldb->ldb_status |= LDB_FAST_COMMIT; /* If opener wants MVCC, ensure that it is on, found or not */ if ( flag & LG_MVCC ) ldb->ldb_status |= LDB_MVCC; /* Return identifier. */ *db_id = lpd->lpd_id; if ( initialize_ldb ) { /* Lastly, insert LDB on the active queue. */ ldb->ldb_next = lgd->lgd_ldb_next; ldb->ldb_prev = end_offset; next_ldb = (LDB *)LGK_PTR_FROM_OFFSET(lgd->lgd_ldb_next); next_ldb->ldb_prev = lgd->lgd_ldb_next = LGK_OFFSET_FROM_PTR(ldb); } /* ** Unwind the mutexes */ (VOID)LG_unmutex(&ldb->ldb_mutex); (VOID)LG_unmutex(&lgd->lgd_ldb_q_mutex); (VOID)LG_unmutex(&lpb->lpb_mutex); /* If any events to signal, do so */ if ( SignalEvent ) LG_signal_event(SignalEvent, 0, FALSE); return (OK); }
/*{ ** Name: SRopen - Opens a file. ** ** Description: ** The SRopen routine is used to open a direct access file. ** It will open the file for write access and will ** place no locks on the file. If the create_flag is set ** it will create it and allocate the amount specified. ** Additional space may be required to accomplish the sort. ** An unlimited number of extensions must be allowed. ** You can specify that nothing should be allocated at ** create time. ** ** SR files are allocated in SR_MIN_INCR-block chunks, ** i.e. (SR_MIN_INCR/2) Kb chunks. Ideally, one reads and ** writes SR files using a constant page size; but, it's ** allowable to read with a larger page size as long as both ** old and new page sizes are a power of 2 no larger than ** (SR_MIN_INCR/2) Kb. Likewise, it's allowable to read an ** existing SR file with a smaller page size as long as it ** divides evenly into the original page size. In general there ** is no burning need to flail around with SR file page sizes... ** ** Inputs: ** f Pointer to the SR file ** context needed to do I/O. ** path Pointer to the directory name ** for the file. ** pathlength Length of path name. ** filename Pointer to file name. ** filelength Length of file name. ** pagesize Value indicating size of page ** in bytes. Must be a multiple of ** the VMS page size of 512 bytes. ** create_flag Value indicating if creation needed. ** Must be SR_IO_CREATE. ** n Value indicating number of pages to ** pre-allocate. ** ** Outputs: ** f Updates the file control block. ** err_code Pointer to a variable used ** to return operating system ** errors. ** Returns: ** OK ** SR_BADDIR Error in path specification. ** SR_BADOPEN Error opening file. ** SR_BADFILE Bad file context. ** SR_PADPARAM Parameter(s) in error. ** SR_EXCEED_LIMIT Too many open files, exceeding disk quota ** or exceeding available disk space. ** Exceptions: ** none ** ** Side Effects: ** none ** ** History: ** 30-sep-85 (jennifer) ** Created new for 5.0. ** 02-apr-86 (jennifer) ** Modified the system error returned from a i4 to ** the type CL_ERR_DESC per CL request. ** 27-jul-87 (rogerk) ** Added return code SR_EXCEED_LIMIT. ** 17-aug-87 (rogerk) ** Save local return status after CSsuspend call before acting on ** its value. We were losing the return status from the allocate ** call by seting the local return status from CSsuspend later on. ** 11-may-98 (kinte01) ** Added page size for 65536 */ STATUS SRopen( SR_IO *f, char *path, u_i4 pathlength, char *filename, u_i4 filelength, i4 pagesize, u_i4 create_flag, i4 n, CL_ERR_DESC *err_code) { IOSB local_iosb; /* Operation return status. */ i4 s; /* Request return status. */ FIBDEF *fib; struct { i4 count; char *pointer; } descriptor; CS_SID sid; CL_CLEAR_ERR(err_code); #ifdef SR_CHECK_RESUME if (SR_check_resume) { SR_check_init(); } #endif /* Page size has to be a multiple of 512. */ if ((pagesize & 511) != 0) return (SR_BADPARAM); /* Initialize file control block. */ MEfill (sizeof(SR_IO), NULL, (PTR)f); f->io_type = SR_IO_ASCII_ID; CSget_sid(&sid); /* Assign a channel for I/O. */ descriptor.count = pathlength; descriptor.pointer = path; s = sys$assign(&descriptor, &f->io_channel, 0, 0); if ((s & 1) == 0) { err_code->error = s; if ((s == SS$_EXQUOTA) || (s == SS$_EXDISKQUOTA) || (s == SS$_NOIOCHAN)) return (SR_EXCEED_LIMIT); return (SR_BADOPEN); } /* Access the file with no locking.*/ fib = (FIBDEF *)f->io_fib; fib->fib$l_acctl = FIB$M_NOLOCK | FIB$M_WRITE; fib->fib$w_nmctl = 0; fib->fib$w_exctl = 0; f->io_fib_desc.str_len = sizeof(f->io_fib); f->io_fib_desc.str_str = f->io_fib; /* Queue the Create call. */ s = sr_qio(EFN$C_ENF, f->io_channel, IO$_CREATE | IO$M_CREATE | IO$M_ACCESS | IO$M_DELETE, &local_iosb, sr_resume_fcn, sid, &f->io_fib_desc, 0, 0, 0, 0, 0); if (s & 1) { /* Successfully queued, wait for completion. */ #ifdef SR_SUSPEND_THREAD SR_SUSPEND_THREAD; #endif s = local_iosb.iosb$w_status; if (local_iosb.iosb$w_status & 1) { /* Successful completion, setup the control block. */ fib->fib$l_acctl = 0; f->io_block_size = pagesize; f->io_blks = pagesize >> 9; /* Don't bother with io_log2_blk, nothing uses it... */ if (n != 0) { /* Preallocate space for the file. */ s = iiSRallocate(f, n, sid, err_code); if (s == OK) return (OK); /* ** If allocate returns an error, stuff the os errcode into ** 's' for processing below. */ s = err_code->error; } else { return (OK); } }
STATUS SRclose( SR_IO *f, i4 delete_flag, CL_ERR_DESC *err_code) { IOSB local_iosb; /* Operation return status. */ int s; /* Request return status. */ CS_SID sid; #ifdef xDEBUG long device_class; long dvi_length; ILE3 itmlst = {4, DVI$_DEVCLASS, &device_class, &dvi_length}; #endif CL_CLEAR_ERR(err_code); /* Check for valid control block. */ if (f->io_type != SR_IO_ASCII_ID) return (SR_BADFILE); /* Check that the file is open. */ if (f->io_channel == 0) return (OK); CSget_sid(&sid); #ifdef xDEBUG /* ** Check that the channel we are deallocating is still connected ** to a disk device. This will trap the errors where we are ** conflicting with GCF on IO channels. */ s = sr_getdvi(EFN$C_ENF, f->io_channel, 0, &itmlst, &local_iosb, sr_resume_fcn, sid, 0); if (s & 1) { #ifdef SR_SUSPEND_THREAD SR_SUSPEND_THREAD; #endif s = local_iosb.iosb$w_status; } if (((s & 1) == 0) || (device_class != DC$_DISK)) { /* ** This io channel is no longer any good, or is not what we ** think it is. Return BADCLOSE and either the system error ** (if one was returned), or SS$_NOTFILEDEV. */ err_code->error = s; if (s & 1) err_code->error = SS$_NOTFILEDEV; return (SR_BADCLOSE); } #endif /* Queue request to close and delete the file. */ s = sr_qio(EFN$C_ENF, f->io_channel, IO$_DEACCESS, &local_iosb, sr_resume_fcn, sid, 0, 0, 0, 0, 0, 0); if (s & 1) { /* Successfully queued, wait for completion. */ #ifdef SR_SUSPEND_THREAD SR_SUSPEND_THREAD; #endif if (local_iosb.iosb$w_status & 1) { /* Deassign the channel. */ s = sys$dassgn(f->io_channel); if (s & 1) { /* Return successful. */ f->io_channel = 0; f->io_type = 0; return (OK); } } else s = local_iosb.iosb$w_status; } err_code->error = s; return (SR_BADCLOSE); }
/*{ ** Name: DIgalloc - Allocates N pages to a direct access file. ** ** Description: ** The DIgalloc routine is used to add pages to a direct access ** file, the disc space for these pages is guaranteed to exist ** once the routine returns. The contents of the pages allocated ** are undefined until a DIwrite to the page has happened. ** ** This routine can add more than one page at a time by accepting ** a count of the number of pages to add. ** ** Inputs: ** f Pointer to the DI file ** context needed to do I/O. ** n The number of pages to allocate. ** ** Outputs: ** page Pointer to variable used to ** return the page number of the ** first page allocated. ** err_code Pointer to a variable used ** to return operating system ** errors. ** Returns: ** OK ** DI_BADEXTEND Can't allocate disk space ** DI_BADFILE Bad file context. ** DI_EXCEED_LIMIT Too many open files. ** DI_BADLRU_RELEASE Problem releasing open file. ** ** If running with slaves : ** DI_GENERAL_ERR More info in the err_code.intern field. ** ** If running without slaves : ** DI_BADINFO Error finding current end-of-file. ** ** Exceptions: ** none ** ** Side Effects: ** none ** ** History: ** 30-October-1992 (rmuth) ** Created. ** 30-nov-1992 ** Call IIdio_get_file_eof instead of lseek as this can ** deal with both raw and ordinary files. Change some ** types accordingly. ** 10-mar-1993 (mikem) ** Changed the type of the first parameter to DI_send_ev_to_slave() and ** the 2nd parameter to DI_slave_send(), so that DI_send_ev_to_slave() ** could access the slave control block's status. ** This routine will now initialize the status to DI_INPROGRESS, before ** making the request and the slave will change the status once the ** operation is complete. ** 30-feb-1993 (rmuth) ** Use the global DIzero_buffer as opposed to the local ** zero_alloc. ** 23-jul-1996 (canor01) ** Semaphore protect the lseek/write combination when used with ** operating-system threads. ** 06-Jan-2005 (jenjo02) ** Fix inproc code to use proper version of write/pwrite/pwrite64 ** instead of always IIdio_write. View bytes-to-be-written in ** page terms rather than bytes, expand DI_ZERO_BUFFER_SIZE. ** 30-Sep-2005 (jenjo02) ** Ok to use pwrite if FD_PER_THREAD. ** 14-Oct-2005 (jenjo02) ** Chris's file descriptor properties now cached in io_fprop ** (file properties) and established on the first open, ** not every open. ** ** Design Details: ** ** UNIX DESIGN: ** ** Since the only way that UNIX allows one to allocate space to a ** file is to write to the file. This routine will write zero ** filled data to file for the required number of pages. ** ** If we are running with slaves we will request the slave process ** to do the actual I/O otherwise it is done inline. ** ** For more information on DI see di.h */ STATUS DIgalloc( DI_IO *f, i4 n, i4 *page, CL_ERR_DESC *err_code ) { i4 end_of_file; STATUS status = OK; DI_OP diop; CL_CLEAR_ERR( err_code ); /* Check file control block pointer, return if bad. */ if (f->io_type != DI_IO_ASCII_ID) return(DI_BADFILE); /* Count a galloc */ f->io_stat.galloc++; do { /* ** get file descriptor for this file */ status = DIlru_open(f, FALSE, &diop, err_code); if ( status != OK ) break; status = DI_galloc( f, n,&diop, &end_of_file, err_code ); if ( status != OK ) { CL_ERR_DESC lerr_code; (VOID) DIlru_release(&diop, &lerr_code); } else { status = DIlru_release(&diop, err_code); } /* ** If we failed to extend the table then do not update ** the following */ if (status == OK ) { /* ** Return page number of first page allocated */ *page = end_of_file + 1; #ifdef xDEV_TST TRdisplay( "DIgalloc: file: %t/%t, alloc_eof: %d, first: %d, count: %d\n", f->io_l_pathname, f->io_pathname, f->io_l_filename, f->io_filename, f->io_alloc_eof, *page, n); #endif } } while (FALSE); return( status ); }
/*{ ** Name: DIwrite - Writes page(s) of a file to disk. ** ** Description: ** The DIwrite routine is used to write pages of a direct access ** file. This routine should be flexible enough to write multiple ** contiguous pages. The number of pages to write is indicated ** as an input parameter, This value is updated to indicate the ** actual number of pages written. A synchronous write is preferred ** but not required. ** ** The buffer address from which the data is to be written is examined ** to see if it is in shared memory. If so, we then instruct the slave ** to write the page(s) directly from the target buffer. Otherwise, we ** copy the page(s) from the buffer into the server segment, and then ** instruct the slave to write the page(s) from the server segment. ** ** Inputs: ** f Pointer to the DI file ** context needed to do I/O. ** n Pointer to value indicating number of pages to ** write. ** page Value indicating page(s) to write. ** buf Pointer to page(s) to write. ** ** Outputs: ** f Updates the file control block. ** n Pointer to value indicating number of pages ** written. ** err_code Pointer to a variable used ** to return operating system ** errors. ** Returns: ** OK ** DI_BADFILE Bad file context. ** DI_BADWRITE Error writing. ** DI_BADPARAM Parameter(s) in error. ** DI_ENDFILE Write past end of file. ** DI_BADLRU_RELEASE Error releasing open file. ** Exceptions: ** none ** ** Side Effects: ** none ** ** History: ** 26-mar-87 (mmm) ** Created new for 6.0. ** 06-feb-89 (mikem) ** Return CL_ERR_DESC from DIlru_open(). ** 23-mar-89 (mikem) ** update io_system_eof when necessary (bug 4854). ** 10-jul-89 (rogerk & mikem) ** When asked to write a page that is past our cached EOF marker, call ** DIsense to check the actual EOF before signalling an error. The ** page may have been allocated by a different server and our copy of ** the EOF has just not been updated yet. ** ** This case can also come up in a single server case now, as we ** continue to cache pages in the buffer manager even when the table ** is closed. This can result in a DIwrite() being performed on a ** newly opened file without ever doing a DIread() (where eof info ** was previously obtained). ** 10-jul-89 (mikem) ** Return DI_EXCEED_LIMIT if out of disk space, rather than ** BAD_WRITE. Also add some debugging code to make it easier to ** test that the server handles out of disk space correctly (by ** returning out of disk space out of DIwrite based on a gloal set ** by DIalloc every N times called). ** 23-Jan-90 (anton) ** Call DI_sense instead of DIsense to prevent multiple DIlru_opens ** and use of two CSevcbs. ** 2-Feb-90 (anton) ** Don't always copy CL_ERR_DESC ** 6-Feb-90 (jkb) ** Change write to IIdio_write which combines the write and lseek ** commands and makes direct io available for Sequent ** 5-aug-1991 (bryanp) ** Added support for I/O directly from server shared memory, ** bypassing the copy through the server segment if possible. ** 03-mar-1992 (jnash) ** Fix LG slave problem noted when Sun mmap() support ** introduced, change slave logic to send to the slave the ** segment id "key" rather than "segid" (segid value not ** the same in the slave). ** 30-October-1992 (rmuth) ** Prototype and make sure we have opened the file before ** we close it. ** 30-nov-1992 (rmuth) ** - Include <cldio.h> ** - DIlru error checking, this was a major restructuring of the ** code. No Change in functionality. ** 10-dec-1993 (rmuth) ** If fail the past io_allocated_eof test then make sure that we ** unset the errno value in CL_ERR_DESC set by SETCLERR. This was ** causing confusion as we were logging random errno's to the ** errlog.log ** 31-jan-94 (mikem) ** sir #57671 ** The transfer size of slave I/O is now stored in ** Cs_srv_block.cs_size_io_buf, rather than a constant ** DI_FILE_BUF_SIZE. ** 18-apr-1994 (jnash) ** fsync project. Call DIforce() on systems where fsync() exists ** but O_SYNC does not (hopefully never). ** 20-jun-1995 (amo ICL) ** Added call on DI_async_write for async io ** 20-Apr-1998 (merja01) ** Move "#" to column 1 to correct compile errors on axp_osf. ** 01-oct-1998 (somsa01) ** Return DI_NODISKSPACE when we are out of disk space. ** 29-Oct-1998 (schte01) ** Move "#" to column 1 to correct compile errors on axp_osf. ** 14-Oct-2005 (jenjo02) ** Chris's file descriptor properties now cached in io_fprop ** (file properties) and established on the first open, ** not every open. */ STATUS DIwrite( DI_IO *f, i4 *n, i4 page, char *buf, CL_ERR_DESC *err_code) { STATUS big_status = OK, small_status = OK, r_status; i4 num_of_pages; i4 last_page_to_write; CL_ERR_DESC lerr_code; DI_OP diop; /* default returns */ CL_CLEAR_ERR( err_code ); num_of_pages = *n; *n = 0; if (num_of_pages <= 0) return (DI_BADPARAM); last_page_to_write = page + num_of_pages - 1; diop.di_flags = 0; if (f->io_type != DI_IO_ASCII_ID) return(DI_BADFILE); if (f->io_mode != DI_IO_WRITE) return(DI_BADWRITE); /* Count another write */ f->io_stat.write++; /* ** get open file descriptor for the file */ if (big_status = DIlru_open(f, FALSE, &diop, err_code)) return(big_status); /* ** now check for write within bounds of the file */ if (last_page_to_write > f->io_alloc_eof) { i4 real_eof; /* ** DI_sense updates f->io_alloc_eof with the protection ** of io_sem (OS_THREADS), so there's no need to ** duplicate that update here. */ big_status = DI_sense(f, &diop, &real_eof, err_code); if (big_status == OK) { if (last_page_to_write > f->io_alloc_eof) { small_status = DI_ENDFILE; SETCLERR(err_code, 0, ER_write); /* ** The above sets errno as errno will be left over from ** a previous call zero it out to avoid confusion. */ err_code->errnum = 0; } } } if (big_status == OK && small_status == OK) { #ifdef xOUT_OF_DISK_SPACE_TEST if ((f->io_open_flags & DI_O_NODISKSPACE_DEBUG) && (last_page_to_write > f->io_logical_eof) && (last_page_to_write <= f->io_alloc_eof)) { f->io_open_flags &= ~DI_O_NODISKSPACE_DEBUG; small_status = DI_NODISKSPACE; SETCLERR(err_code, 0, ER_write); err_code->errnum = ENOSPC; TRdisplay( "DIwrite(): Returning false DI_NODISKSPACE, page %d\n", page); } else #endif /* xOUT_OF_DISK_SPACE_TEST */ { if (Di_slave) { big_status = DI_slave_write( f, &diop, buf, page, num_of_pages, err_code ); } else # if defined(OS_THREADS_USED) || defined(xCL_ASYNC_IO) if (Di_async_io) { big_status = DI_async_write( f, &diop, buf, page, num_of_pages, err_code ); } else # endif /* OS_THREADS_USED || xCL_ASYNC_IO */ { big_status = DI_inproc_write( f, &diop, buf, page, num_of_pages, err_code ); } if (big_status == OK && small_status == OK) # if defined(xCL_010_FSYNC_EXISTS) && !defined(O_SYNC) { /* ** Due to lru activity, this code assumes that a force on any ** file descriptor forces pages for all open files. If not ** the case, fsync() logic must be installed in the slave. */ big_status = DIforce( f, err_code ); } if (big_status == OK && small_status == OK) # endif *n = num_of_pages; } } r_status = DIlru_release(&diop, &lerr_code); if (big_status) return( big_status ); else if (small_status) return( small_status ); return(r_status); }
/*{ ** Name: DIgather_write - GatherWrite external interface. ** ** Description: ** This routine is called by any thread that wants to write multiple pages ** The caller indicates via the op parameter whether the write request ** should be batched up or/and the batch list should be written to disc. ** ** Inputs: ** i4 op - indicates what operation to perform: ** DI_QUEUE_LISTIO - adds request to threads ** gatherwrite queue. ** DI_FORCE_LISTIO - causes do_writev() to be ** called for queued GIOs. ** DI_CHECK_LISTIO - checks if this thread has ** any outstanding I/O requests. ** DI_IO *f - Pointer to the DI file context needed to do I/O. ** i4 *n - Pointer to value indicating number of pages to ** write. ** i4 page - Value indicating page(s) to write. ** char *buf - Pointer to page(s) to write. ** (evcomp)() - Ptr to callers completion handler. ** PTR closure - Ptr to closure details used by evcomp. ** ** Outputs: ** f - Updates the file control block. ** n - Pointer to value indicating number of pages written. ** err_code - Pointer to a variable used to return operating system ** errors. ** ** Returns: ** OK Function completed normally. ** non-OK status Function completed abnormally ** with a DI error number. ** ** Exceptions: ** none ** ** Side Effects: ** The completion handler (evcomp) will do unspecified work. ** ** History: ** 19-May-1999 (jenjo02) ** Created. ** 20-jul-1999 (popri01) ** On Unixware (usl_us5), IOV MAX can only be determined ** at run-time, so use a hard-coded value of 16 (which ** guarantees portability). ** Also, add explicit cast for iov_base arithmetic. ** 03-Apr-2002 (bonro01) ** SGI is also missing IOV_MAX. Re-wrote the routine to ** determine IOV_MAX at run-time for SGI and Unixware. ** 26-Nov-2002 (inifa01) ** Crossed in and Ammended above change to fix compile ** problems. ** 09-Jan-2003 (bonro01) ** Unixware compiler could not generate variable size local ** structure for IOV_MAX. ** 25-Aug-2005 (schka24) ** Don't lru-open the file here, do it when we're going to ** really queue the request. ** 30-Sep-2005 (jenjo02) ** GWthreadsSem now a CS_SYNCH object. ** 15-Mar-2010 (smeke01) b119529 ** DM0P_BSTAT fields bs_gw_pages and bs_gw_io are now u_i8s. */ STATUS DIgather_write( i4 op, char *gio_p, DI_IO *f, i4 *n, i4 page, char *buf, VOID (*evcomp)(), PTR closure, i4 *uqueued, u_i8 *gw_pages, u_i8 *io_count, CL_ERR_DESC *err_code) { STATUS big_status = OK, small_status = OK; #ifdef OS_THREADS_USED DI_GIO *gio = (DI_GIO *)gio_p; i4 num_of_pages; i4 last_page_to_write; if ( GWSemsInit == FALSE ) { CS_synch_init( &GWthreadsSem ); GWSemsInit = TRUE; } /* default returns */ if (err_code) CL_CLEAR_ERR( err_code ); if ( op & DI_QUEUE_LISTIO ) { num_of_pages = *n; *n = 0; last_page_to_write = page + num_of_pages - 1; if (f->io_type != DI_IO_ASCII_ID) return(DI_BADFILE); if (f->io_mode != DI_IO_WRITE) return(DI_BADWRITE); /* ** now check for write within bounds of the file */ if ( last_page_to_write > f->io_alloc_eof ) { i4 real_eof; /* ** There may be pending writes which haven't been forced ** which would extend the file to a new eof. ** Before sensing, make sure they have been forced. */ if ( check_list() ) big_status = force_list( err_code ); /* ** DIsense updates f->io_alloc_eof with the protection ** of io_sem (OS_THREADS), so there's no need to ** duplicate that update here. */ if ( big_status == OK ) big_status = DIsense(f, &real_eof, err_code); if ( big_status == OK && last_page_to_write > f->io_alloc_eof ) { small_status = DI_ENDFILE; SETCLERR(err_code, 0, ER_write); /* ** The above sets errno as errno will be left over from ** a previous call zero it out to avoid confusion. */ err_code->errnum = 0; } } if ( big_status == OK && small_status == OK ) { /* Record another write */ f->io_stat.write++; gio->gio_evcomp = evcomp; gio->gio_data = closure; gio->gio_f = f; gio->gio_buf = buf; gio->gio_offset = (OFFSET_TYPE)f->io_bytes_per_page * (OFFSET_TYPE)page; gio->gio_len = f->io_bytes_per_page * num_of_pages; gio->gio_gw_pages = gw_pages; gio->gio_io_count = io_count; big_status = gather_list( gio, uqueued, err_code ); } if ( big_status == OK && small_status == OK ) *n = num_of_pages; } else if (op & DI_FORCE_LISTIO ) { return(force_list( err_code )); /* Won't return here until all requests have completed */ } else if (op & DI_CHECK_LISTIO ) { return(check_list()); } #endif /* OS_THREADS_USED */ return( big_status ? big_status : small_status ); }