/*{ ** Name: IIME_adAddTag - Add a node of allocated memory to a tag. ** ** Description: ** This routine is called when a new block of dynamic memory is being ** allocated under a tag. It is called by MEdoAlloc. The job ** of this routine is to store the allocated memory so that it ** will be freed with the other blocks allocated under this tag when ** MEtfree is called. ** ** It works by checking the hash table for an METAGNODE for this tag. ** If none is found, a new METAGNODE is allocated for this tag. ** Then the block of memory is put on the QUEUE for the METAGNODE. ** ** Inputs: ** tag The tag under which this block of memory is ** being allocated. ** ** node The block of memory being allocated. ** ** Side Effects: ** This will take a node off freelist, and if necessary will allocate ** dynamic memory. ** ** History: ** 5-dec-1989 (Joe) ** First Written */ VOID IIME_atAddTag( i4 tag, ME_NODE *node) { register METAGNODE **first; # ifdef OS_THREADS_USED CS_synch_lock( &MEtaglist_mutex ); # endif /* OS_THREADS_USED */ /* ** Note that first is a pointer to a pointer. ** The loop will cause a return from the routine if the tag already ** has an METAGNODE in the hash table. ** If the loop finishes, then first will point to the pointer ** that must contain the METAGNODE. */ for (first = &(htab[tag%256]); *first != NULL; first = &((*first)->met_hash)) { if ((*first)->met_tag == tag) { (void)QUinsert((QUEUE *) node, (QUEUE *) (*first)->met_list.MElast); # ifdef OS_THREADS_USED CS_synch_unlock( &MEtaglist_mutex ); # endif /* OS_THREADS_USED */ return; } } if (freelist == NULL) { register METAGNODE *next; register int i; freelist = (METAGNODE *) MEreqmem(0, sizeof(METAGNODE)*50, TRUE, NULL); for (i = 0, next = freelist; i < 49; i++) { next->met_hash = next + 1; next = next->met_hash; } next->met_hash = NULL; } *first = freelist; freelist = freelist->met_hash; (*first)->met_hash = NULL; (*first)->met_tag = tag; QUinit((QUEUE *)&((*first)->met_list)); (void)QUinsert((QUEUE *) node, (QUEUE *) (*first)->met_list.MElast); # ifdef OS_THREADS_USED CS_synch_unlock( &MEtaglist_mutex ); # endif /* OS_THREADS_USED */ return; }
/* ** ** Name: PCisthread_alive - is a thread alive? ** ** Description: ** This function grabs a thread id's handle off of the PID queue and ** tests to see if it is alive. If it cannot be found, then it ** executes PCis_alive(). ** ** History: ** 10-nov-1999 (somsa01) ** Created. */ bool PCisthread_alive(PID pid) { PIDQUE *qp; bool pid_found = FALSE; HANDLE hPid; /* No queue means nothing has been started */ if (!Pidq_init) return(PCis_alive(pid)); /* ** Find the thread id in the PID queue. */ CS_synch_lock(&Pidq_mutex); for (qp = (PIDQUE *)Pidq.q_next; qp != (PIDQUE *)&Pidq; qp = (PIDQUE *)qp->pidq.q_next) { if ((qp->pid == pid) && qp->hPid) { pid_found = TRUE; hPid = qp->hPid; break; } } CS_synch_unlock(&Pidq_mutex); if (!pid_found) return(PCis_alive(pid)); else { DWORD status; /* ** Let's see if this thread is still alive. */ GetExitCodeThread(hPid, &status); if (status == STILL_ACTIVE) return(TRUE); else { /* ** Set the thread's exit status in the queue. */ CS_synch_lock(&Pidq_mutex); qp->stat = status; qp->hPid = NULL; CS_synch_unlock(&Pidq_mutex); CloseHandle(hPid); return(FALSE); } } }
/*{ ** Name: CS_scb_attach - make an SCB known to MO/IMA ** ** Description: ** Links the specified SCB into the known thread tree. Logs ** error to server log if it's already present (it shouldn't). ** ** Re-entrancy: ** no. Called with inkernel set, presumabely. ** (OS-threads: yes, locks global tree mutex) ** ** Inputs: ** scb the thread to link in. ** ** Outputs: ** scb cs_spblk is updated. ** ** Returns: ** none. ** ** Side Effects: ** May TRdisplay debug information. ** ** History: ** 26-Oct-1992 (daveb) ** documented. ** 06-oct-1993 (tad) ** Bug #56449 ** Changed %x to %p for pointer values. ** 13-Feb-98 (fanra01) ** Modified to use the SID as the key. */ void CS_scb_attach( CS_SCB *scb ) { SPBLK node; SPBLK *sp; # ifdef OS_THREADS_USED CS_synch_lock( &Cs_srv_block.cs_scb_mutex ); # endif /* OS_THREADS_USED */ node.key = (PTR) scb->cs_self; sp = SPlookup( &node, Cs_srv_block.cs_scb_ptree ); if( sp != NULL ) { TRdisplay("CS_scb_attach: attempt to attach existing SCB %p!!!\n", scb ); } else { scb->cs_spblk.uplink = NULL; scb->cs_spblk.leftlink = NULL; scb->cs_spblk.rightlink = NULL; scb->cs_spblk.key = (PTR) scb->cs_self; SPinstall( &scb->cs_spblk, Cs_srv_block.cs_scb_ptree ); } # ifdef OS_THREADS_USED CS_synch_unlock( &Cs_srv_block.cs_scb_mutex ); # endif /* OS_THREADS_USED */ }
STATUS TMhrnow(HRSYSTIME *stime) { #if defined(sqs_ptx) struct timespec cur_syst; #else SYSTIME cur_syst; #endif /* sqs_ptx */ #ifdef TMHRNOW_WRAPPED_CLOCKGETTIME return clock_gettime( CLOCK_REALTIME, stime ); #endif #ifndef WRAPPED if ( !initialized ) { initialized = TRUE; #ifdef OS_THREADS_USED CS_synch_init(&nanomutex); #endif /* OS_THREADS_USED */ } #ifdef sqs_ptx getclock(TIMEOFDAY, &cur_syst); stime->tv_sec = cur_syst.tv_sec; stime->tv_nsec = cur_syst.tv_nsec; #else TMet(&cur_syst); stime->tv_sec = cur_syst.TM_secs; stime->tv_nsec = cur_syst.TM_msecs * NANO_PER_MILLI; #endif /* sqs_ptx */ /* ** if we have been called twice within the same ** interval, increment the time by one nanosecond. */ #ifdef OS_THREADS_USED CS_synch_lock(&nanomutex); #endif /* OS_THREADS_USED */ if ( stime->tv_sec == lasttime.tv_sec && stime->tv_nsec <= lasttime.tv_nsec ) { stime->tv_nsec = lasttime.tv_nsec + 1; } lasttime.tv_sec = stime->tv_sec; lasttime.tv_nsec = stime->tv_nsec; #ifdef OS_THREADS_USED CS_synch_unlock(&nanomutex); #endif /* OS_THREADS_USED */ return OK; #endif /* WRAPPED */ }
STATUS CS_scb_index(i4 msg, PTR cdata, i4 linstance, char *instance, PTR *instdata ) { STATUS stat = OK; PTR ptr; # ifdef OS_THREADS_USED CS_synch_lock( &Cs_srv_block.cs_scb_mutex ); # endif /* OS_THREADS_USED */ switch( msg ) { case MO_GET: if( OK == (stat = CS_get_block( instance, Cs_srv_block.cs_scb_ptree, &ptr ) ) ) *instdata = (PTR) CS_find_scb((CS_SID) ptr); break; case MO_GETNEXT: if( OK == ( stat = CS_nxt_block( instance, Cs_srv_block.cs_scb_ptree, &ptr ) ) ) { *instdata = (PTR) CS_find_scb((CS_SID) ptr); stat = MOptrout( MO_INSTANCE_TRUNCATED, ptr, linstance, instance ); } break; default: stat = MO_BAD_MSG; break; } # ifdef OS_THREADS_USED CS_synch_unlock( &Cs_srv_block.cs_scb_mutex ); # endif /* OS_THREADS_USED */ return( stat ); }
/****************************************************************************** ** ** Name: DIalloc - Allocates a page to a direct access file. ** ** Description: ** The DIalloc routine is used to add pages to a direct ** access file. This routine can add more than one page ** at a time by accepting a count of the number of pages to add. ** ** The end of file and allocated are not updated on disk until a DIflush ** call is issued. This insures that pages are not considered valid ** until after they are formatted. The allocation can be ignored if ** the file is closed or the system crashes before the DIflush. ** ** Inputs: ** f Pointer to the DI file ** context needed to do I/O. ** n The number of pages to allocate. ** ** Outputs: ** page Pointer to variable used to ** return the page number of the ** first page allocated. ** err_code Pointer to a variable used ** to return operating system ** errors. ** Returns: ** OK ** DI_BADEXTEND Can't allocate disk space ** DI_BADFILE Bad file context. ** DI_EXCEED_LIMIT Too many open files. ** Exceptions: ** none ** ** Side Effects: ** none ** History: ** 09-feb-1996 (canor01) ** Get exclusive semaphore on DI_IO before updating it in DI_sense ** 08-dec-1997 (canor01) ** Implement LRU for open files (initial copy from Unix). ** 28-jan-1998 (canor01) ** Optimize LRU--only call DIlru_open if file has been closed. ** 06-aug-1999 (mcgem01) ** Replace nat and longnat with i4. ** 13-Nov-2009 (kschendel) SIR 122757 ** Make io-sem a SYNCH. ** ******************************************************************************/ STATUS DIalloc(DI_IO *f, i4 n, i4 *page, CL_SYS_ERR *err_code) { STATUS status = OK; CLEAR_ERR(err_code); /* * Check file control block pointer, return if bad. */ if (f->io_type != DI_IO_ASCII_ID) return (DI_BADFILE); CS_synch_lock( &f->io_sem ); /* get file descriptor for this file */ do { if ( f->io_nt_fh == INVALID_HANDLE_VALUE ) status = DIlru_open( f, FALSE, err_code); if ( status != OK ) break; status = DI_sense( f, page, err_code ); if ( status != OK ) break; *page = (i4) (f->io_system_eof + 1); f->io_system_eof += n; } while (FALSE); CS_synch_unlock( &f->io_sem ); return( status ); }
/****************************************************************************** ** Name: ** MEfree.c ** ** Function: ** MEfree ** ** Arguments: ** void * block; ** ** Result: ** Frees the block of memory pointed to by 'block'. ** ** Removes the block from the tag queue if appropriate. ** ** Returns: ** STATUS: OK, ME_00_FREE, ME_FREE_FIRST ** ** Side Effects: ** None ** ** History: ** 21-mar-1996 (canor01) ** Free memory from calling process's heap. Compact the ** heap after every several frees. ** 03-jun-1996 (canor01) ** Internally, store the tag as an i4 instead of an i2. This makes ** for more efficient code on byte-aligned platforms that do fixups. ** 08-aug-1999 (mcgem01) ** Changed longnat to i4. ** 08-feb-2001 (somsa01) ** Changed types of i_meactual and i_meuser to be SIZE_TYPE. ** 21-jun-2002 (somsa01) ** Sync'ed up with UNIX. Rely on ME_NODE rather than a ptr UNION. ** Also, removed call to HeapCompact() logic. ** 05-Jul-2005 (drivi01) ** Replaced HeapFree call with free. ** 11-May-2009 (kschendel) b122041 ** Change pointer arg to void *, more appropriate. ** 23-Sep-2009 (wonst02) Bug 122427 ** Fix possibly trashing memory (alloc'd by tag) by using taglist mutex ** ******************************************************************************/ STATUS MEfree(void *block) { register ME_NODE *this; STATUS rv = OK; if ( block == NULL ) rv = ME_00_FREE; else { this = (ME_NODE *)((char *)block - sizeof(ME_NODE)); /* ** assume block is legit if the node looks like it points to an ** allocated block. */ if (this->MEaskedfor == 0) rv = ME_NO_FREE; if (rv == OK) { i_meactual -= this->MEsize; i_meuser -= this->MEaskedfor; if (this->MEtag) { CS_synch_lock(&MEtaglist_mutex); QUremove((QUEUE *)this); CS_synch_unlock(&MEtaglist_mutex); } free((char *)this); } } return(rv); }
void CS_detach_scb( CS_SCB *scb ) { SPBLK node; SPBLK *sp; # ifdef OS_THREADS_USED CS_synch_lock( &Cs_srv_block.cs_scb_mutex ); # endif /* OS_THREADS_USED */ node.key = (PTR) scb->cs_self; sp = SPlookup( &node, Cs_srv_block.cs_scb_ptree ); if( sp == NULL ) { TRdisplay("CS_detach_scb: attempt to detach unknown SCB %p\n", scb ); } else { SPdelete( &scb->cs_spblk, Cs_srv_block.cs_scb_ptree ); } # ifdef OS_THREADS_USED CS_synch_unlock( &Cs_srv_block.cs_scb_mutex ); # endif /* OS_THREADS_USED */ }
PTR MEreqmem( u_i2 tag, SIZE_TYPE size, bool zero, STATUS *status) { PTR block=NULL; register ME_NODE *node; /* the node to return */ register ME_NODE *start; /* for searching free list */ register ME_NODE *this; /* block to get node from */ register ME_NODE *frag; /* fragment block */ ME_NODE *tmp; /* not register for MEadd */ SIZE_TYPE nsize; /* nsize node to obtain */ SIZE_TYPE fsize; /* size of 'this' fragment block */ SIZE_TYPE newstuff; /* size to add to process, or 0 */ SIZE_TYPE prev_actual; /* rescan free list? */ SIZE_TYPE alloc_pages; CL_ERR_DESC err_code; STATUS MEstatus = OK; i_meuser += size; if (!size) MEstatus = ME_NO_ALLOC; if( !MEsetup ) MEinitLists(); /* ** Try to do the allocation. */ if( MEstatus == OK ) { nsize = SIZE_ROUND( size ); /* ** Get memory with malloc(). */ if( MEadvice == ME_USER_ALLOC ) { if( (node = (ME_NODE *)malloc( nsize )) == NULL ) MEstatus = ME_GONE; } /* ** Get block from private free list. */ else { # ifdef OS_THREADS_USED CS_synch_lock( &MEfreelist_mutex ); # endif /* OS_THREADS_USED */ /* ** Look on free list for 1st block big enough ** to hold request. This linear search can be slow. */ start = (ME_NODE *)&MEfreelist; this = MEfreelist.MEfirst; while ( this != NULL && this != start && this->MEsize < nsize ) this = this->MEnext; if( this == NULL ) MEstatus = ME_CORRUPTED; /* ** At this point, we are in one of three states: ** 1) Corrupted memory; MEstatus != OK ** 2) this is good node, this != start ** 3) No good node; this == start; */ if ( MEstatus == OK ) { /* ** If nothing on free list is big enough ** get one or more standard blocks from system, ** take what is needed and add remainder ** to free list. */ if (this != start) { /* take right off the free list */ newstuff = 0; } else /* this == start */ { /* * Expand the free list by calling getpages * newstuff holds the number of pages needed */ newstuff = (nsize + ME_MPAGESIZE-1)/ME_MPAGESIZE; /* if first time allocation, get enough for MO overhead */ if ( (prev_actual = i_meactual) == (SIZE_TYPE) 0 ) newstuff += 4; # ifdef OS_THREADS_USED CS_synch_unlock( &MEfreelist_mutex ); # endif /* OS_THREADS_USED */ MEstatus = MEget_pages(ME_SPARSE_MASK, newstuff, NULL, (PTR *)&tmp, &alloc_pages, &err_code); # ifdef OS_THREADS_USED CS_synch_lock( &MEfreelist_mutex ); # endif /* OS_THREADS_USED */ if (MEstatus == OK) { /* now we need to find where to put this new memory on the sorted free list - we search in reverse */ tmp->MEsize = newstuff * ME_MPAGESIZE; this = MEfreelist.MElast; while (start != this && this != NULL && this > tmp) this = this->MEprev; if (this != start && NEXT_NODE(this) == tmp) { this->MEsize += tmp->MEsize; } else { (void)QUinsert( (QUEUE *) tmp, (QUEUE *)this ); this = tmp; } if (this->MEnext != start && NEXT_NODE(this) == this->MEnext) { this->MEsize += this->MEnext->MEsize; (void)QUremove( (QUEUE *) this->MEnext); } /* ** While the free list mutex was released, another ** thread may have freed up a big enough piece of ** memory for our needs, or may have extended the ** free list. ** If that's the case, research the free list; ** we'll find either a right-sized node or ** the new memory we just added to the free list. */ if ( prev_actual != i_meactual ) { this = MEfreelist.MEfirst; while ( this != NULL && this != start && this->MEsize < nsize ) this = this->MEnext; if( this == NULL ) MEstatus = ME_CORRUPTED; } } else if (MEstatus == ME_OUT_OF_MEM) MEstatus = ME_GONE; } /* ** At this point, we can be in two states. ** 1) Corrupted memory, MEstatus != OK ** 2) 'this' is an OK node from the free list. */ if ( MEstatus == OK ) { node = this; /* ** if this is correct size or would ** leave useless block in chain ** just move block to allocated list ** else ** grab what is needed from 'this' ** block and then update 'this' */ fsize = node->MEsize - nsize; if ( fsize <= sizeof(ME_NODE) ) { (void)QUremove( (QUEUE *) node ); /* fudge size in node to eat leftover amount. */ fsize = 0; nsize = node->MEsize; } else /* make fragment block */ { /* ** Make a leftover block after the ** allocated space in node, in 'this' */ frag = (ME_NODE *)((char *) node + nsize ); frag->MEsize = fsize; frag->MEtag = 0; /* remove node, add fragment to free list */ (void)QUremove( (QUEUE *) node ); MEstatus = MEfadd( frag, FALSE ); } /* fragment left over */ /* Increment meactual while mutex held */ i_meactual += nsize; } /* Got a node */ } /* free list search OK */ # ifdef OS_THREADS_USED CS_synch_unlock( &MEfreelist_mutex ); # endif /* OS_THREADS_USED */ } /* ME_USER_ALLOC */ /* ** At this point we are in one of two states: ** 1. Corrupted, MEstatus != OK. ** 2. Have a 'node' to use, from freelist or malloc. ** The freelist is consistant, but the allocated list is ** not setup for the node. "nsize" is the actual size of "node". */ if( MEstatus == OK ) { /* splice into allocated object queue */ if (0 == tag) { # ifdef OS_THREADS_USED CS_synch_lock( &MElist_mutex ); # endif /* OS_THREADS_USED */ (void)QUinsert( (QUEUE *) node, (QUEUE *) MElist.MElast ); # ifdef OS_THREADS_USED CS_synch_unlock( &MElist_mutex ); # endif /* OS_THREADS_USED */ } else { IIME_atAddTag(tag, node); } /* Set values in block to be returned */ node->MEtag = tag; node->MEsize = nsize; node->MEaskedfor = size; /* Fill in the returned pointer */ block = (PTR)((char *)node + sizeof(ME_NODE)); if (zero) MEfill( (nsize - sizeof(ME_NODE)), 0, block); } /* got node OK */ } if (status != NULL) *status = MEstatus; if (MEstatus != OK) return((PTR)NULL); else return(block); }
/*{ ** Name: DI_inproc_write - writes page(s) to a file on disk. ** ** Description: ** This routine was created to make DIwrite more readable once ** error checking had been added. See DIwrite for comments. ** ** Inputs: ** f Pointer to the DI file ** context needed to do I/O. ** diop Pointer to dilru file context. ** buf Pointer to page(s) to write. ** page Value indicating page(s) to write. ** num_of_pages number of pages to write ** ** Outputs: ** err_code Pointer to a variable used ** to return operating system ** errors. ** Returns: ** OK ** other errors. ** Exceptions: ** none ** ** Side Effects: ** none ** ** History: ** 30-nov-1992 (rmuth) ** Created. ** 03-jun-1996 (canor01) ** Note in the scb that this is a DI wait. ** 05-May-1997 (merja01) ** Changed preprocessor stmt for pwrite. Not all platforms ** using OS_THREADS have a pwrite function. This function ** seems to only be available on Solaris 2.4 where async IO ** is not yet supported. ** 14-July-1997 (schte01) ** For those platforms that do direct i/o (where the ** seek and the write are separate functions), do not release and ** reaquire the semaphore on the DI_IO block. This will protect ** against i/o being done by a different thread in between the ** lseek and the write. ** 14-Aug-1997 (schte01) ** Add xCL_DIRECT_IO as a condition to the 14-July-1997 change ** instead of the test for !xCL_ASYNCH_IO. ** 22-Dec-1998 (jenjo02) ** If DI_FD_PER_THREAD is defined, call IIdio_write() instead of ** pwrite(). ** 01-oct-1998 (somsa01) ** Return DI_NODISKSPACE when we are out of disk space. ** 01-Apr-2004 (fanch01) ** Add O_DIRECT support on Linux depending on the filesystem ** properties, pagesize. Fixups for misaligned buffers on read() ** and write() operations. ** 13-apr-04 (toumi01) ** Move stack variable declaration to support "standard" C compilers. ** 29-Jan-2005 (schka24) ** Ditch attempt to gather diow timing stats, not useful in ** the real world and generates excess syscalls on some platforms. ** 15-Mar-2006 (jenjo02) ** io_sem is not needed with thread affinity. ** 6-Nov-2009 (kschendel) SIR 122757 ** Make io-sem a SYNCH, avoid entirely if PRIVATE. ** Delete copy-to-align, caller is supposed to do it now. ** Don't attempt SCB updating if not backend. */ static STATUS DI_inproc_write( DI_IO *f, DI_OP *diop, char *buf, i4 page, i4 num_of_pages, CL_ERR_DESC *err_code ) { STATUS status = OK; CS_SCB *scb; i4 saved_state; /* unix variables */ int bytes_written; int bytes_to_write; OFFSET_TYPE lseek_offset; /* ** seek to place to write */ lseek_offset = (OFFSET_TYPE)(f->io_bytes_per_page) * (OFFSET_TYPE)page; bytes_to_write = (f->io_bytes_per_page * (num_of_pages)); if (Di_backend) { CSget_scb(&scb); if ( scb ) { saved_state = scb->cs_state; scb->cs_state = CS_EVENT_WAIT; if (f->io_open_flags & DI_O_LOG_FILE_MASK) { scb->cs_memory = CS_LIOW_MASK; scb->cs_liow++; Cs_srv_block.cs_wtstatistics.cs_liow_done++; Cs_srv_block.cs_wtstatistics.cs_liow_waits++; Cs_srv_block.cs_wtstatistics.cs_liow_kbytes += bytes_to_write / 1024; } else { scb->cs_memory = CS_DIOW_MASK; scb->cs_diow++; Cs_srv_block.cs_wtstatistics.cs_diow_done++; Cs_srv_block.cs_wtstatistics.cs_diow_waits++; Cs_srv_block.cs_wtstatistics.cs_diow_kbytes += bytes_to_write / 1024; } } } # if defined(OS_THREADS_USED) && defined(xCL_NO_ATOMIC_READ_WRITE_IO) if ( !Di_thread_affinity && (f->io_fprop & FPROP_PRIVATE) == 0) CS_synch_lock( &f->io_sem ); # endif /* OS_THREADS_USED && !xCL_NO_ATOMIC_READ_WRITE_IO */ # if defined(OS_THREADS_USED) && !defined(xCL_NO_ATOMIC_READ_WRITE_IO) bytes_written = #ifdef LARGEFILE64 pwrite64( (int)diop->di_fd, buf, bytes_to_write, lseek_offset ); #else /* LARGEFILE64 */ pwrite( (int)diop->di_fd, buf, bytes_to_write, lseek_offset ); #endif /* LARGEFILE64 */ # else /* OS_THREADS_USED !xCL_NO_ATOMIC_READ_WRITE_IO */ bytes_written = IIdio_write( (int)diop->di_fd, buf, bytes_to_write, lseek_offset, 0, f->io_fprop, err_code ); # endif /* OS_THREADS_USED */ if ( bytes_written != bytes_to_write ) { SETCLERR(err_code, 0, ER_write); switch( err_code->errnum ) { case EFBIG: status = DI_BADEXTEND; break; case ENOSPC: status = DI_NODISKSPACE; break; #ifdef EDQUOTA case EDQUOT: status = DI_EXCEED_LIMIT; break; #endif default: if (err_code->errnum == 0) status = DI_ENDFILE; else status = DI_BADWRITE; break; } } # if defined(OS_THREADS_USED) && defined(xCL_NO_ATOMIC_READ_WRITE_IO) if ( !Di_thread_affinity && (f->io_fprop & FPROP_PRIVATE) == 0) CS_synch_unlock( &f->io_sem ); # endif /* OS_THREADS_USED && xCL_NO_ATOMIC_READ_WRITE_IO */ if ( Di_backend && scb ) { scb->cs_memory &= ~(CS_DIOW_MASK | CS_LIOW_MASK); scb->cs_state = saved_state; } return( status ); }
static STATUS DI_galloc( DI_IO *f, i4 n, DI_OP *diop, i4 *end_of_file, CL_ERR_DESC *err_code) { STATUS big_status = OK, small_status =OK; STATUS intern_status = OK; register DI_SLAVE_CB *disl; i4 last_page; OFFSET_TYPE lseek_ret; do { # ifdef OS_THREADS_USED /* Seek/write must be semaphore protected */ if ((f->io_fprop & FPROP_PRIVATE) == 0) CS_synch_lock( &f->io_sem ); # endif /* OS_THREADS_USED */ if (Di_slave) { disl = diop->di_evcb; disl->file_op = DI_SL_ZALLOC; disl->length = n * f->io_bytes_per_page; /* Pass file properties to slave */ FPROP_COPY(f->io_fprop,disl->io_fprop); DI_slave_send( disl->dest_slave_no, diop, &big_status, &small_status, &intern_status ); if (( big_status != OK ) || ( small_status != OK )) break; if ( disl->status != OK ) { STRUCT_ASSIGN_MACRO(disl->errcode, *err_code); small_status = DI_BADEXTEND; break; } else { lseek_ret = disl->length; } } else { /* ** Running without slaves */ OFFSET_TYPE lseek_offset; i8 reservation; i4 buf_size; i4 bytes_written; i4 pages_remaining = n; i4 pages_at_a_time = Di_zero_bufsize / f->io_bytes_per_page; /* find current end-of-file */ lseek_ret = IIdio_get_file_eof(diop->di_fd, f->io_fprop); if ( lseek_ret == (OFFSET_TYPE)-1L ) { SETCLERR(err_code, 0, ER_lseek); small_status = DI_BADINFO; break; } else { lseek_offset = lseek_ret; /* If this filesystem can do reservations, see if we ** should reserve more space. ** Even though we have to write the zeros anyway, the ** reservation may well be larger than the zeroing ** buffer, and this way helps maintain contiguity. ** Not worth it for tiny writes. */ if (pages_remaining > 2 && FPROP_ALLOCSTRATEGY_GET(f->io_fprop) == FPROP_ALLOCSTRATEGY_RESV) { reservation = lseek_offset + (pages_remaining * f->io_bytes_per_page); if (reservation > f->io_reserved_bytes) { /* Re-check in case some other server reserved */ small_status = IIdio_get_reserved(diop->di_fd, &f->io_reserved_bytes, err_code); if (small_status == OK && reservation > f->io_reserved_bytes) { small_status = IIdio_reserve(diop->di_fd, f->io_reserved_bytes, reservation - f->io_reserved_bytes, err_code); if (small_status == OK) { f->io_reserved_bytes = reservation; } else { if (small_status != DI_BADFILE) break; /* Fallocate not supported, turn off ** "reserve" strategy, continue without. */ small_status = OK; FPROP_ALLOCSTRATEGY_SET(f->io_fprop, FPROP_ALLOCSTRATEGY_VIRT); } } } } /* end reservations */ while ( pages_remaining > 0 ) { if ( pages_remaining < pages_at_a_time ) buf_size = pages_remaining * f->io_bytes_per_page; else buf_size = Di_zero_bufsize; # if defined(OS_THREADS_USED) && !defined(xCL_NO_ATOMIC_READ_WRITE_IO) bytes_written = #ifdef LARGEFILE64 pwrite64( diop->di_fd, Di_zero_buffer, buf_size, lseek_offset ); #else /* LARGEFILE64 */ pwrite( diop->di_fd, Di_zero_buffer, buf_size, lseek_offset ); #endif /* LARGEFILE64 */ # else /* OS_THREADS_USED !xCL_NO_ATOMIC_READ_WRITE_IO */ bytes_written = IIdio_write( diop->di_fd, Di_zero_buffer, buf_size, lseek_offset, &lseek_offset, f->io_fprop, err_code ); # endif /* OS_THREADS_USED */ if ( bytes_written != buf_size ) { SETCLERR(err_code, 0, ER_write); small_status = DI_BADEXTEND; break; } lseek_offset += buf_size; pages_remaining -= pages_at_a_time; } if ( small_status != OK ) break; } } *end_of_file = ( lseek_ret / f->io_bytes_per_page) - 1; } while (FALSE); if (big_status == OK && small_status == OK) { /* ** Update the current allocated end-of-file under mutex protection */ last_page = *end_of_file + n; if (last_page > f->io_alloc_eof) f->io_alloc_eof = last_page; } # ifdef OS_THREADS_USED if ((f->io_fprop & FPROP_PRIVATE) == 0) CS_synch_unlock( &f->io_sem ); # endif /* OS_THREADS_USED */ if ( big_status != OK ) small_status = big_status; if ( small_status != OK ) DIlru_set_di_error( &small_status, err_code, intern_status, DI_GENERAL_ERR); return(small_status); }
/*{ ** Name: DIrename - Renames a file. ** ** Description: ** The DIrename will change the name of a file. ** The file MUST be closed. The file can be renamed ** but the path cannot be changed. A fully qualified ** filename must be provided for old and new names. ** This includes the type qualifier extension. ** ** Inputs: ** di_io_unused UNUSED DI_IO pointer (always set to 0 by caller) ** path Pointer to the path name. ** pathlength Length of path name. ** oldfilename Pointer to old file name. ** oldlength Length of old file name. ** newfilename Pointer to new file name. ** newlength Length of new file name. ** Outputs: ** err_code Pointer to a variable used ** to return operating system ** errors. ** Returns: ** OK ** DI_BADRNAME Any i/o error during rename. ** DI_BADPARAM Parameter(s) in error. ** DI_DIRNOTFOUND Path not found. ** Exceptions: ** none ** ** Side Effects: ** none ** ** History: ** 26-mar-87 (mmm) ** Created new for 6.0. ** 06-feb-89 (mikem) ** Clear the CL_ERR_DESC. ** 15-apr-1992 (bryanp) ** Remove DI_IO argument and no longer support renaming open files. ** 30-nov-1992 (rmuth) ** - Prototype. ** - DIlru error checking ** 17-sep-1994 (nanpr01) ** - Needs to check for interrupted system calls specially for ** SIGUSR2. Curren implementation of 1 more retry is optimistic. ** In lot of UNIX systems, link, unlink, rename cannot be ** interrupted(HP-UX).But Solaris returns EINTR. Bug # 57938. ** 10-oct-1994 (nanpr01) ** - Wrong number of parameter in DIlru_flush. Bug # 64169 ** 20-Feb-1998 (jenjo02) ** DIlru_flush() prototype changed, it now computes the number of ** FDs to close instead of being passed an arbitrary number. ** Cleaned up handling of errno, which will be invalid after calling ** DIlru_flush(). ** 15-Apr-2004 (fanch01) ** Force closing of LRU file descriptors when a rename error is ** is encountered. Only occurs on a rename failure and the only ** file that is closed is the file associated with the error. ** Relieves problems on filesystems which don't accomodate renaming ** open files. "Interesting" semaphore usage is consistent with other ** DI usage. ** 21-Apr-2004 (schka24) ** retry declaration got misplaced somehow, fix so it compiles. ** 26-Jul-2005 (schka24) ** Don't flush fd's on any random rename failure. Do a better job ** of re-verifying the fd and di-io after locking the fd when we're ** searching for a file-open conflict. ** 30-Sep-2005 (jenjo02) ** htb_fd_list_mutex, fd_mutex are now CS_SYNCH objects. ** 15-Nov-2010 (kschendel) SIR 124685 ** Delete unused variables. */ STATUS DIrename( DI_IO *di_io_unused, char *path, u_i4 pathlength, char *oldfilename, u_i4 oldlength, char *newfilename, u_i4 newlength, CL_ERR_DESC *err_code) { char oldfile[DI_FULL_PATH_MAX]; char newfile[DI_FULL_PATH_MAX]; STATUS ret_val; CL_ERR_DESC local_err; /* unix variables */ int os_ret; /* retry variables */ i4 retry = 0, failflag = 0; /* default returns */ ret_val = OK; if ((pathlength > DI_PATH_MAX) || (pathlength == 0) || (oldlength > DI_FILENAME_MAX) || (oldlength == 0) || (newlength > DI_FILENAME_MAX) || (newlength == 0)) return (DI_BADPARAM); /* get null terminated path and filename for old file */ MEcopy((PTR) path, pathlength, (PTR) oldfile); oldfile[pathlength] = '/'; MEcopy((PTR) oldfilename, oldlength, (PTR) &oldfile[pathlength + 1]); oldfile[pathlength + oldlength + 1] = '\0'; /* get null terminated path and filename for new file */ MEcopy((PTR) path, pathlength, (PTR) newfile); newfile[pathlength] = '/'; MEcopy((PTR) newfilename, newlength, (PTR) &newfile[pathlength + 1]); newfile[pathlength + newlength + 1] = '\0'; do { if (retry > 0 && failflag++ == 0) TRdisplay("%@ DIrename: retry on %t/%t\n", pathlength, path, oldlength, oldfilename); retry = 0; CL_CLEAR_ERR( err_code ); #ifdef xCL_035_RENAME_EXISTS /* Now rename the file. */ while ((os_ret = rename(oldfile, newfile)) == -1) { SETCLERR(err_code, 0, ER_rename); if (err_code->errnum != EINTR) break; } #else /* xCL_035_RENAME_EXISTS */ /* Now rename the file. */ while ((os_ret = link(oldfile, newfile)) == -1) { SETCLERR(err_code, 0, ER_rename); if (err_code->errnum != EINTR) break; } if (os_ret != -1) { while ((os_ret = unlink(oldfile)) == -1) { if (err_code->errnum != EINTR) break; } } #endif /* xCL_035_RENAME_EXISTS */ /* if the rename failed, see if we're holding the file open */ if (os_ret == -1 && htb_initialized) { QUEUE *p, *q, *next; CS_synch_lock(&htb->htb_fd_list_mutex); q = &htb->htb_fd_list; for (p = q->q_prev; p != q; p = next) { DI_FILE_DESC *di_file = (DI_FILE_DESC *) p; DI_IO *di_io = (DI_IO *) di_file->fd_uniq.uniq_di_file; next = p->q_prev; if (di_io != NULL && di_file->fd_state == FD_IN_USE && di_io->io_type == DI_IO_ASCII_ID && pathlength == di_io->io_l_pathname && oldlength == di_io->io_l_filename) { CS_synch_unlock(&htb->htb_fd_list_mutex); CS_synch_lock(&di_file->fd_mutex); /* Make sure it's still the right ** DI_IO and compare the filename */ if ((DI_IO *) di_file->fd_uniq.uniq_di_file == di_io && di_file->fd_state == FD_IN_USE && di_file->fd_unix_fd != -1 && !(di_io->io_open_flags & DI_O_NOT_LRU_MASK) && di_io->io_type == DI_IO_ASCII_ID && pathlength == di_io->io_l_pathname && MEcmp((PTR) di_io->io_pathname, path, pathlength) == 0 && oldlength == di_io->io_l_filename && MEcmp((PTR) di_io->io_filename, oldfilename, oldlength) == 0) { /* have a match, print out stats */ /* try to close it */ CS_synch_unlock(&di_file->fd_mutex); DIlru_close(di_io, &local_err); retry++; } else CS_synch_unlock(&di_file->fd_mutex); CS_synch_lock(&htb->htb_fd_list_mutex); } } CS_synch_unlock(&htb->htb_fd_list_mutex); } } while (retry); if (os_ret == -1) { if ((err_code->errnum == ENOTDIR) || (err_code->errnum == EACCES)) { ret_val = DI_DIRNOTFOUND; } else { ret_val = DI_BADRNAME; } } else CL_CLEAR_ERR( err_code ); return(ret_val); }
/*{ ** Name: IIME_ftFreeTag - Free all allocated memory for a tag. ** ** Description: ** This routine is called by MEtfree to free all the allocated ** memory for a tag. ** ** It works by finding the METAGNODE for the tag in the hash table ** and then traversing the QUEUE of allocated blocks freeing ** each block. ** ** Inputs: ** tag The tag whose memory is to be freed. ** ** Outputs: ** Returns: ** OK if all the allocated memory for the tag was freed. ** ME_NO_TFREE if the tag does not have a record in the hash table. ** other failure status if the nodes can't be freed. ** ** Side Effects: ** Will return the METAGNODE for the tag to freelist. ** ** History: ** 5-dec-1989 (Joe) ** First Written ** 30-May-96 (stial01) ** New advice ME_TUXEDO_ALLOC should behave like ME_INGRES_ALLOC ** 12-feb-1997 (canor01) ** Initialize local MEstatus. ** 27-Jan-1999 (fanra01) ** Add thread alloc case for tag free. Otherwise our memory is ** returned to the system heap causing wonderfully esoteric execution. */ STATUS IIME_ftFreeTag( i4 tag ) { register METAGNODE **first; STATUS MEstatus = OK; # ifdef OS_THREADS_USED CS_synch_lock( &MEtaglist_mutex ); # endif /* OS_THREADS_USED */ for (first = &(htab[tag%256]); *first != NULL; first = &((*first)->met_hash)) { if ((*first)->met_tag == tag) { register ME_NODE *this; register ME_NODE *next; register METAGNODE *freenode; for (this = (*first)->met_list.MEfirst; this != NULL && this != (ME_NODE *) &((*first)->met_list);) { next = this->MEnext; if ( MEstatus == OK ) { i_meactual -= this->MEsize; i_meuser -= this->MEaskedfor; (void)QUremove( (QUEUE *) this ); if( (MEadvice == ME_INGRES_ALLOC ) || (MEadvice == ME_INGRES_THREAD_ALLOC) || (MEadvice == ME_TUXEDO_ALLOC) ) { # ifdef OS_THREADS_USED CS_synch_lock( &MEfreelist_mutex ); # endif /* OS_THREADS_USED */ MEstatus = MEfadd(this, TRUE); # ifdef OS_THREADS_USED CS_synch_unlock( &MEfreelist_mutex ); # endif /* OS_THREADS_USED */ } else free( (char *)this ); } if (MEstatus == OK) this = next; else break; } freenode = *first; *first = freenode->met_hash; freenode->met_hash = freelist; freelist = freenode; # ifdef OS_THREADS_USED CS_synch_unlock( &MEtaglist_mutex ); # endif /* OS_THREADS_USED */ return MEstatus; } } # ifdef OS_THREADS_USED CS_synch_unlock( &MEtaglist_mutex ); # endif /* OS_THREADS_USED */ return ME_NO_TFREE; }
/*{ ** Name: do_writev - Perform writev() call. ** ** Description: ** This function collects the queued write requests, ** chooses the optimum function to perform the write(s), ** and invokes the completion handler for each request. ** ** Inputs: ** DI_TGIO * tgio - Control block for current thread. ** ** Outputs: ** None. ** ** Returns: ** OK ** FAIL - One of more of the write requests failed. ** ** Exceptions: ** none ** ** Side Effects: ** The completion handler for each I/O request is invoked. ** ** History: ** 19-May-1999 (jenjo02) ** Created. ** 09-Jul-1999 (jenjo02) ** If queued list is ordered, skip the quicksort. ** 09-Apr-2001 (jenjo02) ** Increment first gio's io_count stat for each physical I/O, ** gw_pages for multi-page writes. ** 05-Nov-2002 (jenjo02) ** Cleaned up use of io_sem: only write() and writev() need ** the mutex to protect the (separate) seek. pwrite(64) ** atomically seeks and does not need the mutex. ** 25-Aug-2005 (schka24) ** Don't bother with IO timing, too slow on some platforms (Linux) ** and the results aren't all that interesting. ** 14-Oct-2005 (jenjo02) ** Chris's file descriptor properties now cached in io_fprop ** (file properties) and established on the first open, ** not every open. ** 24-Jan-2006 (jenjo02) ** Break on change in file ("f"), then lru-open to get an ** FD, do the write(v), and lru_release the FD. This keeps ** gather_write from hogging FDs while waiting for the ** signal to actually do something. ** 15-Mar-2006 (jenjo02) ** f->io_sem not needed if running with thread affinity, ** the fd is not shared by multiple threads. */ static STATUS do_writev( DI_TGIO * tgio, CL_ERR_DESC *err_code ) { CL_ERR_DESC lerr_code; STATUS big_status = OK, small_status = OK; i4 i, j, k; DI_GIO *gio, *first_gio; DI_IO *f; DI_OP *diop; OFFSET_TYPE next_offset, lseek_offset; i4 bytes_to_write, bytes_written; i4 saved_state; i4 num_writev = 0, num_write = 0; i4 num_writev_gio = 0, num_write_gio = 0; #if defined(sgi_us5) if( iov_max == 0 ) { iov_max = sysconf(_SC_IOV_MAX); if( iov_max <= 0 ) { iov_max = 16; /* arbitrary minimum value */ #ifdef DEBUG_THIS_PUPPY TRdisplay("%@ %x do_writev: %t ERROR sysconf failed with %d\n", tgio->tgio_scb->cs_self, f->io_l_filename, f->io_filename, iov_max); #endif /* DEBUG_THIS_PUPPY */ } else if( iov_max > 2048 ) { iov_max = 2048; /* arbitrary maximum value */ } } #else iov_max = IOV_MAX; #endif /* If unordered, sort the queued list into file,offset order */ if ( tgio->tgio_state & TGIO_UNORDERED ) { gio_sort( tgio->tgio_queue, 0, tgio->tgio_queued-1 ); tgio->tgio_state &= ~(TGIO_UNORDERED); } /* ** Method: ** ** Collect requests by file/offset into an iovec until ** the next file offset becomes discontiguous. Additionally, if ** the buffer addresses are contiguous, colaesce those requests. ** ** Up to IOV_MAX iovecs can be written by a single writev(). ** ** If but a single iovec results, the probably-more-efficient ** function (p)write() is called instead of writev(). */ k = 0; while ( (j = k) < tgio->tgio_queued ) { #if defined(sgi_us5) struct iovec iov[iov_max]; #else struct iovec iov[IOV_MAX]; #endif /* ** "i" indexes the current iovec element ** "j" is the first GIO used in this iovec array ** "k" is the current GIO in the queue */ i = 0; gio = first_gio = tgio->tgio_queue[j]; f = gio->gio_f; lseek_offset = next_offset = gio->gio_offset; small_status = OK; iov[0].iov_base = gio->gio_buf; iov[0].iov_len = 0; do { /* If this buffer is contiguous with previous, coalesce it */ if ( (char *)iov[i].iov_base + iov[i].iov_len == gio->gio_buf ) { iov[i].iov_len += gio->gio_len; } /* Initialize next iovec if any remain */ else if ( i < iov_max - 1 ) { i++; iov[i].iov_base = gio->gio_buf; iov[i].iov_len = gio->gio_len; } else break; next_offset += gio->gio_len; } while ( ++k < tgio->tgio_queued && (gio = tgio->tgio_queue[k]) && gio->gio_f == f && gio->gio_offset == next_offset ); /* "k" indexes the next, unprocessed GIO */ bytes_to_write = next_offset - lseek_offset; saved_state = tgio->tgio_scb->cs_state; tgio->tgio_scb->cs_state = CS_EVENT_WAIT; tgio->tgio_scb->cs_memory = CS_DIOW_MASK; /* Accumulate multi-page write stats */ if ( k - j > 1 ) { /* ** Using the first gio, count ** the number of multi-pages written (k-j) ** and a single I/O. */ if ( first_gio->gio_io_count ) ++*first_gio->gio_io_count; if ( first_gio->gio_gw_pages ) *first_gio->gio_gw_pages += k - j; } /* Count a single I/O write for server */ tgio->tgio_scb->cs_diow++; Cs_srv_block.cs_wtstatistics.cs_diow_done++; /* Count a single I/O wait for server */ Cs_srv_block.cs_wtstatistics.cs_diow_waits++; /* Accumulate number of KB written by this I/O */ Cs_srv_block.cs_wtstatistics.cs_diow_kbytes += bytes_to_write / 1024; /* Now get an FD to do the write(v) */ diop = (DI_OP*)&first_gio->gio_diop; if ( big_status = DIlru_open(f, FALSE, diop, err_code) ) return(big_status); #ifdef DEBUG_THIS_PUPPY { i4 x; i8 offset = lseek_offset; TRdisplay("%@ %p do_writev: %~t doing %d todo %d fd %d lseek from %ld\n", tgio->tgio_scb->cs_self, f->io_l_filename, f->io_filename, i+1, tgio->tgio_queued - j, diop->di_fd, offset); for (x = 0; x <= i; x++) { TRdisplay("%@ do_writev: iovec[%d] base %p bytes %d (page %d for %d)\n", x, iov[x].iov_base, iov[x].iov_len, (i4)(offset/f->io_bytes_per_page), iov[x].iov_len/f->io_bytes_per_page); offset += iov[x].iov_len; } } #endif /* DEBUG_THIS_PUPPY */ /* If more than one iovec, seek and use writev */ if ( i++ ) { /* writev needs seek mutex protection */ if ( !Di_thread_affinity ) CS_synch_lock( &f->io_sem ); num_writev++; num_writev_gio += k - j; bytes_written = IIdio_writev( diop->di_fd, (char *)iov, i, lseek_offset, 0, f->io_fprop, err_code); if ( !Di_thread_affinity ) CS_synch_unlock( &f->io_sem ); } else { num_write++; num_write_gio += k - j; # if !defined(xCL_NO_ATOMIC_READ_WRITE_IO) /* pwrite(64) needs no seek mutex protection */ bytes_written = #ifdef LARGEFILE64 pwrite64( diop->di_fd, iov[0].iov_base, bytes_to_write, lseek_offset ); #else /* LARGEFILE64 */ pwrite( diop->di_fd, iov[0].iov_base, bytes_to_write, lseek_offset ); #endif /* LARGEFILE64 */ if (bytes_written != bytes_to_write) SETCLERR(err_code, 0, ER_write); # else /* !xCL_NO_ATOMIC_READ_WRITE_IO */ /* write() needs seek mutex protection */ if ( !Di_thread_affinity ) CS_synch_lock( &f->io_sem ); bytes_written = IIdio_write( diop->di_fd, iov[0].iov_base, bytes_to_write, lseek_offset, 0, f->io_fprop, err_code ); if ( !Di_thread_affinity ) CS_synch_unlock( &f->io_sem ); # endif /* !xCL_NO_ATOMIC_READ_WRITE_IO */ } /* Release the FD */ (VOID)DIlru_release( diop, &lerr_code ); tgio->tgio_scb->cs_memory &= ~(CS_DIOW_MASK); tgio->tgio_scb->cs_state = saved_state; if (bytes_written != bytes_to_write) { switch ( err_code->errnum ) { case EFBIG: small_status = DI_BADEXTEND; break; case ENOSPC: small_status = DI_EXCEED_LIMIT; break; #ifdef EDQUOTA case EDQUOT: small_status = DI_EXCEED_LIMIT; break; #endif default: if (err_code->errnum == 0) small_status = DI_ENDFILE; else small_status = DI_BADWRITE; break; } /* Preserve the worst status from all the writes */ big_status = (big_status) ? big_status : small_status; } /* Invoke completion handler for each GIO written */ do { gio = tgio->tgio_queue[j]; (gio->gio_evcomp)( gio->gio_data, small_status, err_code ); } while ( ++j < k ); } #ifdef DEBUG_THIS_PUPPY TRdisplay("%@ %p do_writev: %d write requests completed using %d(%d) writev, %d(%d) write\n", tgio->tgio_scb->cs_self, tgio->tgio_queued, num_writev, num_writev_gio, num_write, num_write_gio); #endif /* DEBUG_THIS_PUPPY */ /* Clear the queued count(s) */ tgio->tgio_queued = *tgio->tgio_uqueued = 0; return( big_status ); }
/*{ ** Name: gather_list - Gather write requests together. ** ** Description: ** This routine batches up write requests for later submission via ** the writev() routine. ** ** Inputs: ** DI_GIO * gio - gio Control block for write operation. ** ** Outputs: ** err_code Pointer to a variable used ** to return operating system ** errors. ** Returns: ** OK ** DI_MEREQMEN_ERR - MEreqmem failed. ** Exceptions: ** none ** ** Side Effects: ** Will call do_writev if number of write requests has reached ** GIO_MAX_QUEUED. ** ** History: ** 19-May-1999 (jenjo02) ** Created. ** 09-Jul-1999 (jenjo02) ** Watch I/O queue as it's being constructed. If pre-ordered, ** skip the quicksort. ** 25-Aug-2005 (schka24) ** Don't blindly lru-open each request; instead, see if the ** file (DI_IO) is already on the queue, and share its fd with the ** queued request. This is essential when doing fd-per-thread, ** as each call to lru-open would allocate a new fd, thus negating ** the ability to do a writev! When not doing fd-per-thread, ** this change is effectively a no-op. ** Also, return without queueing if queue-flush fails. ** 25-Jan-2006 (jenjo02) ** Defer lru-open until do_writev to prevent hogging FDs ** while waiting for futhur writes. */ static STATUS gather_list( DI_GIO *gio, i4 *uqueued, CL_ERR_DESC *err_code) { DI_GIO *qgio; /* a GIO on the queue already */ DI_TGIO *tgio; STATUS status = OK; CS_SCB *scb; CSget_scb(&scb); if ( (tgio = (DI_TGIO *)scb->cs_ditgiop) == (DI_TGIO *)NULL || tgio->tgio_scb != scb ) { /* ** No TGIO for this thread, so reuse an inactive one ** or allocate a new one. */ CS_synch_lock( &GWthreadsSem ); for ( tgio = GWthreads; tgio && tgio->tgio_state != TGIO_INACTIVE; tgio = tgio->tgio_next ); if (tgio == NULL) { tgio = (DI_TGIO *)MEreqmem(0, sizeof( DI_TGIO ), TRUE, NULL); if (tgio == NULL) { CS_synch_unlock( &GWthreadsSem ); return( DI_MEREQMEM_ERR); } tgio->tgio_next = GWthreads; GWthreads = tgio; } scb->cs_ditgiop = (PTR)tgio; tgio->tgio_scb = scb; tgio->tgio_uqueued = uqueued; *tgio->tgio_uqueued = tgio->tgio_queued = 0; tgio->tgio_state = TGIO_ACTIVE; CS_synch_unlock( &GWthreadsSem ); } /* If the queue is full, force the writes. ** If this fails, we get blamed, but someone has to report it. */ if ( tgio->tgio_queued == GIO_MAX_QUEUED ) { status = do_writev( tgio, err_code ); if (status != OK) return (status); } /* ** Check for out of sequence GIO. ** If all I/O's are presented in file/offset order, ** a sort won't be needed. */ if ( (tgio->tgio_state & TGIO_UNORDERED) == 0 && tgio->tgio_queued ) { qgio = tgio->tgio_queue[tgio->tgio_queued - 1]; if ( gio->gio_f < qgio->gio_f || (gio->gio_f == qgio->gio_f && gio->gio_offset < qgio->gio_offset) ) { tgio->tgio_state |= TGIO_UNORDERED; } } /* Add this request to the queue */ tgio->tgio_queue[tgio->tgio_queued++] = gio; /* Update caller's queued count */ *tgio->tgio_uqueued = tgio->tgio_queued; return( status ); }
/*{ ** Name: DI_inproc_read - read page(s) from a file on disk. ** ** Description: ** This routine was created to make DIread more readable once ** error checking had been added. See DIread for comments. ** ** Inputs: ** f Pointer to the DI file ** context needed to do I/O. ** diop Pointer to dilru file context. ** buf Pointer to page(s) to read. ** page Value indicating page(s) to read. ** num_of_pages number of pages to read ** ** Outputs: ** err_code Pointer to a variable used ** to return operating system ** errors. ** Returns: ** OK ** other errors. ** Exceptions: ** none ** ** Side Effects: ** none ** ** History: ** 30-nov-1992 (rmuth) ** Created. ** 03-jun-1996 (canor01) ** Note in the scb that this is a DI wait. ** 14-July-1997 (schte01) ** For those platforms that do direct i/o (where the ** seek and the read are separate functions), do not release and ** reaquire the semaphore on the DI_IO block. This will protect ** against i/o being done by a different thread in between the ** lseek and the read. ** 14-Aug-1997 (schte01) ** Add xCL_DIRECT_IO as a condition to the 14-July-1997 change ** instead of the test for !xCL_ASYNCH_IO. ** 22-Dec-1998 (jenjo02) ** If DI_FD_PER_THREAD is defined, call IIdio_read() instead of ** pread(). ** 01-Apr-2004 (fanch01) ** Add O_DIRECT support on Linux depending on the filesystem ** properties, pagesize. Fixups for misaligned buffers on read() ** and write() operations. ** 13-apr-04 (toumi01) ** Move stack variable declaration to support "standard" C compilers. ** 29-Jan-2005 (schka24) ** Ditch attempt to gather dior timing stats, not useful in ** the real world and generates excess syscalls on some platforms. ** 15-Mar-2006 (jenjo02) ** io_sem is not needed with thread affinity. ** 6-Nov-2009 (kschendel) SIR 122757 ** Remove copy to aligned buffer, caller is supposed to do it. */ static STATUS DI_inproc_read( DI_IO *f, DI_OP *diop, char *buf, i4 page, i4 num_of_pages, i4 *n, CL_ERR_DESC *err_code ) { STATUS status = OK; CS_SCB *scb; i4 saved_state; /* unix variables */ int unix_fd; int bytes_read = 0; int bytes_to_read; OFFSET_TYPE lseek_offset; /* ** Seek to place to read */ lseek_offset = (OFFSET_TYPE)f->io_bytes_per_page * (OFFSET_TYPE)page; bytes_to_read = f->io_bytes_per_page * num_of_pages; unix_fd = diop->di_fd; if (Di_backend) { CSget_scb(&scb); if ( scb ) { saved_state = scb->cs_state; scb->cs_state = CS_EVENT_WAIT; if (f->io_open_flags & DI_O_LOG_FILE_MASK) { scb->cs_memory = CS_LIOR_MASK; scb->cs_lior++; Cs_srv_block.cs_wtstatistics.cs_lior_done++; Cs_srv_block.cs_wtstatistics.cs_lior_waits++; Cs_srv_block.cs_wtstatistics.cs_lior_kbytes += bytes_to_read / 1024; } else { scb->cs_memory = CS_DIOR_MASK; scb->cs_dior++; Cs_srv_block.cs_wtstatistics.cs_dior_done++; Cs_srv_block.cs_wtstatistics.cs_dior_waits++; Cs_srv_block.cs_wtstatistics.cs_dior_kbytes += bytes_to_read / 1024; } } } # if defined( OS_THREADS_USED ) && (defined (xCL_NO_ATOMIC_READ_WRITE_IO)) if ( !Di_thread_affinity && (f->io_fprop & FPROP_PRIVATE) == 0) { CS_synch_lock( &f->io_sem ); } # endif /* OS_THREADS_USED && xCL_NO_ATOMIC_READ_WRITE_IO */ # if defined( OS_THREADS_USED ) && (! defined (xCL_NO_ATOMIC_READ_WRITE_IO)) #ifdef LARGEFILE64 bytes_read = pread64( unix_fd, buf, bytes_to_read, lseek_offset ); #else /* LARGEFILE64 */ bytes_read = pread( unix_fd, buf, bytes_to_read, lseek_offset ); #endif /* LARGEFILE64 */ if ( bytes_read != bytes_to_read ) { SETCLERR(err_code, 0, ER_read); # else /* OS_THREADS_USED */ bytes_read = IIdio_read( unix_fd, buf, bytes_to_read, lseek_offset, 0, f->io_fprop, err_code ); if ( bytes_read != bytes_to_read ) { # endif /* OS_THREADS_USED && ! xCL_NO_ATOMIC_READ_WRITE_IO */ if (bytes_read == -1) { status = DI_BADREAD; } else { status = DI_ENDFILE; } } # if defined( OS_THREADS_USED ) && (defined (xCL_NO_ATOMIC_READ_WRITE_IO) ) if ( !Di_thread_affinity && (f->io_fprop & FPROP_PRIVATE) == 0) CS_synch_unlock( &f->io_sem ); # endif /* OS_THREADS_USED && xCL_NO_ATOMIC_READ_WRITE_IO */ if (Di_backend) { if ( scb ) { scb->cs_memory &= ~(CS_DIOR_MASK | CS_LIOR_MASK); scb->cs_state = saved_state; } } if ( bytes_read > 0 ) *n = bytes_read / f->io_bytes_per_page; return(status); } # if defined(OS_THREADS_USED) || defined(xCL_ASYNC_IO) /*{ ** Name: DI_async_read - read page(s) asynchronously from a file on disk. ** ** Description: ** This routine was created to interface with async io routines ** where such routines are available. ** ** Inputs: ** f Pointer to the DI file ** context needed to do I/O. ** diop Pointer to dilru file context. ** buf Pointer to page(s) to read. ** page Value indicating page(s) to read. ** num_of_pages number of pages to read ** ** Outputs: ** err_code Pointer to a variable used ** to return operating system ** errors. ** Returns: ** OK ** other errors. ** Exceptions: ** none ** ** Side Effects: ** none ** ** History: ** 20-jun-1995 (amo ICL) ** Created. */ static STATUS DI_async_read( DI_IO *f, DI_OP *diop, char *buf, i4 page, i4 num_of_pages, i4 *n, CL_ERR_DESC *err_code ) { STATUS status = OK; CS_SCB *scb; int saved_state; i4 start_time; /* unix variables */ int bytes_read = 0; int bytes_to_read; OFFSET_TYPE lseek_offset; /* ** Seek to place to read */ lseek_offset = (OFFSET_TYPE)(f->io_bytes_per_page) * (OFFSET_TYPE)(page); bytes_to_read = f->io_bytes_per_page * num_of_pages; CSget_scb(&scb); if ( scb ) { saved_state = scb->cs_state; scb->cs_state = CS_EVENT_WAIT; if (f->io_open_flags & DI_O_LOG_FILE_MASK) { scb->cs_memory = CS_LIOR_MASK; scb->cs_lior++; Cs_srv_block.cs_wtstatistics.cs_lior_done++; Cs_srv_block.cs_wtstatistics.cs_lior_waits++; Cs_srv_block.cs_wtstatistics.cs_lior_kbytes += bytes_to_read / 1024; } else { scb->cs_memory = CS_DIOR_MASK; scb->cs_dior++; Cs_srv_block.cs_wtstatistics.cs_dior_done++; Cs_srv_block.cs_wtstatistics.cs_dior_waits++; Cs_srv_block.cs_wtstatistics.cs_dior_kbytes += bytes_to_read / 1024; } /* Clock the read */ start_time = CS_checktime(); } # if defined(OS_THREADS_USED) && !defined(xCL_ASYNC_IO) bytes_read = DI_thread_rw( O_RDONLY, diop, buf, bytes_to_read, lseek_offset, NULL, err_code); # else /* OS_THREADS_USED */ bytes_read = DI_aio_rw( O_RDONLY, diop, buf, bytes_to_read, lseek_offset, NULL, err_code); # endif /* OS_THREADS_USED */ if ( bytes_read != bytes_to_read ) { SETCLERR(err_code, 0, ER_read); if (bytes_read == -1) { status = DI_BADREAD; } else { status = DI_ENDFILE; } } if ( scb ) { scb->cs_memory &= ~(CS_DIOR_MASK | CS_LIOR_MASK); scb->cs_state = saved_state; if (f->io_open_flags & DI_O_LOG_FILE_MASK) Cs_srv_block.cs_wtstatistics.cs_lior_time += CS_checktime() - start_time; else Cs_srv_block.cs_wtstatistics.cs_dior_time += CS_checktime() - start_time; } if ( bytes_read > 0 ) *n = bytes_read / f->io_bytes_per_page; return(status); }