/*{ ** Name: DI_async_write - writes page(s) to a file on disk. ** ** Description: ** This routine was created to interface with async io routines ** where such routines are available ** ** Inputs: ** f Pointer to the DI file ** context needed to do I/O. ** diop Pointer to dilru file context. ** buf Pointer to page(s) to write. ** page Value indicating page(s) to write. ** num_of_pages number of pages to write ** ** Outputs: ** err_code Pointer to a variable used ** to return operating system ** errors. ** Returns: ** OK ** other errors. ** Exceptions: ** none ** ** Side Effects: ** none ** ** History: ** 20-jun-1995 (amo ICL) ** Created. ** 01-oct-1998 (somsa01) ** Return DI_NODISKSPACE when we are out of disk space. */ static STATUS DI_async_write( DI_IO *f, DI_OP *diop, char *buf, i4 page, i4 num_of_pages, CL_ERR_DESC *err_code ) { STATUS status = OK; int errnum; CS_SCB *scb; int saved_state; i4 start_time, elapsed; /* unix variables */ OFFSET_TYPE lseek_offset; int bytes_written; int bytes_to_write; /* ** seek to place to write */ lseek_offset = (OFFSET_TYPE)f->io_bytes_per_page * (OFFSET_TYPE)page; bytes_to_write = (f->io_bytes_per_page * (num_of_pages)); CSget_scb(&scb); if ( scb ) { saved_state = scb->cs_state; scb->cs_state = CS_EVENT_WAIT; if (f->io_open_flags & DI_O_LOG_FILE_MASK) { scb->cs_memory = CS_LIOW_MASK; scb->cs_liow++; Cs_srv_block.cs_wtstatistics.cs_liow_done++; Cs_srv_block.cs_wtstatistics.cs_liow_waits++; Cs_srv_block.cs_wtstatistics.cs_liow_kbytes += bytes_to_write / 1024; } else { scb->cs_memory = CS_DIOW_MASK; scb->cs_diow++; Cs_srv_block.cs_wtstatistics.cs_diow_done++; Cs_srv_block.cs_wtstatistics.cs_diow_waits++; Cs_srv_block.cs_wtstatistics.cs_diow_kbytes += bytes_to_write / 1024; } start_time = CS_checktime(); } # if defined(OS_THREADS_USED) && !defined(xCL_ASYNC_IO) bytes_written = DI_thread_rw( O_WRONLY, diop, buf, bytes_to_write, lseek_offset, (long*)0, err_code); # else /* OS_THREADS_USED */ bytes_written = DI_aio_rw( O_WRONLY, diop, buf, bytes_to_write, lseek_offset, (long*)0, err_code); # endif /* OS_THREADS_USED */ if ( bytes_written != bytes_to_write ) { SETCLERR(err_code, 0, ER_write); switch( err_code->errnum ) { case EFBIG: status = DI_BADEXTEND; break; case ENOSPC: status = DI_NODISKSPACE; break; #ifdef EDQUOTA case EDQUOT: status = DI_EXCEED_LIMIT; break; #endif default: if (err_code->errnum == 0) status = DI_ENDFILE; else status = DI_BADWRITE; break; } } if ( scb ) { elapsed = CS_checktime() - start_time; scb->cs_memory &= ~(CS_DIOW_MASK | CS_LIOW_MASK); scb->cs_state = saved_state; if (f->io_open_flags & DI_O_LOG_FILE_MASK) Cs_srv_block.cs_wtstatistics.cs_liow_time += elapsed; else Cs_srv_block.cs_wtstatistics.cs_diow_time += elapsed; } return( status ); }
VOID CS_sampler(void) { CS_SCB *an_scb; i4 sleeptime, elapsed, seconds, event; i4 starttime, stoptime; i4 cs_thread_type; i4 cs_state; bool attached = FALSE; u_i4 bior, biow; u_i4 dior, diork, diow, diowk; u_i4 lior, liork, liow, liowk; /* ** This thread goes into a loop: ** 1. Lock the sampler block ** 2. Do sampling ** 3. Sleep for the specified interval ** The thread will exit normally when the sampler block pointer is NULL. ** The thread exits abnormally if it cannot lock the block. */ starttime = CS_checktime(); elapsed = 0; /* Prime the local I/O, Transaction rate counters */ bior = Cs_srv_block.cs_wtstatistics.cs_bior_done; biow = Cs_srv_block.cs_wtstatistics.cs_biow_done; dior = Cs_srv_block.cs_wtstatistics.cs_dior_done; diork = Cs_srv_block.cs_wtstatistics.cs_dior_kbytes; diow = Cs_srv_block.cs_wtstatistics.cs_diow_done; diowk = Cs_srv_block.cs_wtstatistics.cs_diow_kbytes; lior = Cs_srv_block.cs_wtstatistics.cs_lior_done; liork = Cs_srv_block.cs_wtstatistics.cs_lior_kbytes; liow = Cs_srv_block.cs_wtstatistics.cs_liow_done; liowk = Cs_srv_block.cs_wtstatistics.cs_liow_kbytes; /* Transaction rates cannot be determined */ CsSamplerBlkPtr->txn[CURR] = 0; CsSamplerBlkPtr->txn[PEAK] = 0; for (;;) { if (LockSamplerBlk(&hCsSamplerSem) != OK) { ExitThread((DWORD)-1); } if (CsSamplerBlkPtr->shutdown) { /* ** Detach the sampler block Managed Object */ if (attached) MOdetach(CSsamp_index_name, "CsSamplerBlkPtr"); MEfree((PTR)CsSamplerBlkPtr); CsSamplerBlkPtr = NULL; CSsamp_stopping = TRUE; UnlockSamplerBlk(hCsSamplerSem); CloseHandle(hCsSamplerSem); hCsSamplerSem = NULL; ExitThread(0); } if (!attached) { /* ** Attach the sampler block Managed Object */ MOattach(MO_INSTANCE_VAR, CSsamp_index_name, "CsSamplerBlkPtr", (PTR) CsSamplerBlkPtr); attached = TRUE; } ++CsSamplerBlkPtr->numsamples; /* Count the number of times we sample */ /* Loop thru all the SCBs in the server */ for (an_scb = Cs_srv_block.cs_known_list->cs_next; an_scb && an_scb != Cs_srv_block.cs_known_list; an_scb = an_scb->cs_next) { if (an_scb->cs_thread_type >= -1 && an_scb->cs_thread_type <= MAXSAMPTHREADS - 1) cs_thread_type = an_scb->cs_thread_type; else cs_thread_type = MAXSAMPTHREADS - 1; /* use the <invalid> thread */ /* If Factotum thread, try to isolate which kind */ if ( cs_thread_type == CS_FACTOTUM ) { if ( MEcmp((char *)&an_scb->cs_username, " <WriteBehind", 13) == 0 ) cs_thread_type = CS_WRITE_BEHIND; else if ( MEcmp((char *)&an_scb->cs_username, " <Sort", 6) == 0 ) cs_thread_type = CS_SORT; } if (an_scb->cs_state >= 0 && an_scb->cs_state <= MAXSTATES - 1) cs_state = an_scb->cs_state; else cs_state = MAXSTATES - 1; /* use the <invalid> state */ ++CsSamplerBlkPtr->Thread[cs_thread_type].numthreadsamples; if ( cs_thread_type == CS_NORMAL ) ++CsSamplerBlkPtr->totusersamples; else ++CsSamplerBlkPtr->totsyssamples; switch (cs_state) { case CS_COMPUTABLE: /* Count current facility */ { i4 facility; ++CsSamplerBlkPtr->Thread[cs_thread_type].state[cs_state]; facility = (*Cs_srv_block.cs_facility)(an_scb); if (facility >= MAXFACS || facility < 0) facility = MAXFACS - 1; ++CsSamplerBlkPtr->Thread[cs_thread_type].facility[facility]; break; } case CS_EVENT_WAIT: /* Count event types */ if ( an_scb->cs_memory & CS_BIO_MASK ) ++CsSamplerBlkPtr->Thread[cs_thread_type].evwait[EV_BIO]; else if ( an_scb->cs_memory & CS_DIO_MASK ) ++CsSamplerBlkPtr->Thread[cs_thread_type].evwait[EV_DIO]; else if ( an_scb->cs_memory & CS_LIO_MASK ) ++CsSamplerBlkPtr->Thread[cs_thread_type].evwait[EV_LIO]; else if ( an_scb->cs_memory & CS_LOG_MASK ) ++CsSamplerBlkPtr->Thread[cs_thread_type].evwait[EV_LOG]; else if (an_scb->cs_memory & CS_LOCK_MASK) { ++CsSamplerBlkPtr->Thread[cs_thread_type].evwait[EV_LOCK]; AddLock( an_scb->cs_sync_obj ? *((LK_LOCK_KEY *)an_scb->cs_sync_obj) : dummy_lock, cs_thread_type ); } else ++CsSamplerBlkPtr->Thread[cs_thread_type].state[cs_state]; event = (an_scb->cs_memory & CS_DIO_MASK ? an_scb->cs_memory & CS_IOR_MASK ? 0 : 1 : an_scb->cs_memory & CS_LIO_MASK ? an_scb->cs_memory & CS_IOR_MASK ? 2 : 3 : an_scb->cs_memory & CS_BIO_MASK ? an_scb->cs_memory & CS_IOR_MASK ? 4 : 5 : an_scb->cs_memory & CS_LOG_MASK ? 6 : an_scb->cs_memory & CS_LOCK_MASK ? 7 : an_scb->cs_memory & CS_LGEVENT_MASK ? 8 : an_scb->cs_memory & CS_LKEVENT_MASK ? 9 : /* else it is ... unknown */ 10); switch (cs_thread_type) { case CS_USER_THREAD: ++CsSamplerBlkPtr->numusereventsamples; ++CsSamplerBlkPtr->userevent[event]; /* count event type */ break; default: ++CsSamplerBlkPtr->numsyseventsamples; ++CsSamplerBlkPtr->sysevent[event]; /* count event type */ break; } /* switch (cs_thread_type) */ break; case CS_MUTEX: ++CsSamplerBlkPtr->Thread[cs_thread_type].state[cs_state]; AddMutex( ((CS_SEMAPHORE *)an_scb->cs_sync_obj), cs_thread_type ); break; /* Uninteresting states */ default: ++CsSamplerBlkPtr->Thread[cs_thread_type].state[cs_state]; break; } /* switch (cs_state) */ } /* for */ /* ** If a second or more worth of intervals appear to have elapsed, ** compute current and peak per-second I/O, Transaction rates. */ if ( (elapsed += CsSamplerBlkPtr->interval) >= 1000 ) { /* Get the current time; the interval is not reliable! */ stoptime = CS_checktime(); if ( (seconds = stoptime - starttime) ) { if ( (CsSamplerBlkPtr->bior[CURR] = (Cs_srv_block.cs_wtstatistics.cs_bior_done - bior) / seconds) > CsSamplerBlkPtr->bior[PEAK] ) CsSamplerBlkPtr->bior[PEAK] = CsSamplerBlkPtr->bior[CURR]; if ( (CsSamplerBlkPtr->biow[CURR] = (Cs_srv_block.cs_wtstatistics.cs_biow_done - biow) / seconds) > CsSamplerBlkPtr->biow[PEAK] ) CsSamplerBlkPtr->biow[PEAK] = CsSamplerBlkPtr->biow[CURR]; if ( (CsSamplerBlkPtr->dior[CURR] = (Cs_srv_block.cs_wtstatistics.cs_dior_done - dior) / seconds) > CsSamplerBlkPtr->dior[PEAK] ) CsSamplerBlkPtr->dior[PEAK] = CsSamplerBlkPtr->dior[CURR]; if ( (CsSamplerBlkPtr->diork[CURR] = (Cs_srv_block.cs_wtstatistics.cs_dior_kbytes - diork) / seconds) > CsSamplerBlkPtr->diork[PEAK] ) CsSamplerBlkPtr->diork[PEAK] = CsSamplerBlkPtr->diork[CURR]; if ( (CsSamplerBlkPtr->diow[CURR] = (Cs_srv_block.cs_wtstatistics.cs_diow_done - diow) / seconds) > CsSamplerBlkPtr->diow[PEAK] ) CsSamplerBlkPtr->diow[PEAK] = CsSamplerBlkPtr->diow[CURR]; if ( (CsSamplerBlkPtr->diowk[CURR] = (Cs_srv_block.cs_wtstatistics.cs_diow_kbytes - diowk) / seconds) > CsSamplerBlkPtr->diowk[PEAK] ) CsSamplerBlkPtr->diowk[PEAK] = CsSamplerBlkPtr->diowk[CURR]; if ( (CsSamplerBlkPtr->lior[CURR] = (Cs_srv_block.cs_wtstatistics.cs_lior_done - lior) / seconds) > CsSamplerBlkPtr->lior[PEAK] ) CsSamplerBlkPtr->lior[PEAK] = CsSamplerBlkPtr->lior[CURR]; if ( (CsSamplerBlkPtr->liork[CURR] = (Cs_srv_block.cs_wtstatistics.cs_lior_kbytes - liork) / seconds) > CsSamplerBlkPtr->liork[PEAK] ) CsSamplerBlkPtr->liork[PEAK] = CsSamplerBlkPtr->liork[CURR]; if ( (CsSamplerBlkPtr->liow[CURR] = (Cs_srv_block.cs_wtstatistics.cs_liow_done - liow) / seconds) > CsSamplerBlkPtr->liow[PEAK] ) CsSamplerBlkPtr->liow[PEAK] = CsSamplerBlkPtr->liow[CURR]; if ( (CsSamplerBlkPtr->liowk[CURR] = (Cs_srv_block.cs_wtstatistics.cs_liow_kbytes - liowk) / seconds) > CsSamplerBlkPtr->liowk[PEAK] ) CsSamplerBlkPtr->liowk[PEAK] = CsSamplerBlkPtr->liowk[CURR]; /* Transaction rate cannot be determined */ } starttime = CS_checktime(); elapsed = 0; bior = Cs_srv_block.cs_wtstatistics.cs_bior_done; biow = Cs_srv_block.cs_wtstatistics.cs_biow_done; dior = Cs_srv_block.cs_wtstatistics.cs_dior_done; diork = Cs_srv_block.cs_wtstatistics.cs_dior_kbytes; diow = Cs_srv_block.cs_wtstatistics.cs_diow_done; diowk = Cs_srv_block.cs_wtstatistics.cs_diow_kbytes; lior = Cs_srv_block.cs_wtstatistics.cs_lior_done; liork = Cs_srv_block.cs_wtstatistics.cs_lior_kbytes; liow = Cs_srv_block.cs_wtstatistics.cs_liow_done; liowk = Cs_srv_block.cs_wtstatistics.cs_liow_kbytes; } sleeptime = CsSamplerBlkPtr->interval; UnlockSamplerBlk(hCsSamplerSem); Sleep (sleeptime); } /* for (;;) */ } /* CS_sampler */
/*{ ** Name: DI_inproc_read - read page(s) from a file on disk. ** ** Description: ** This routine was created to make DIread more readable once ** error checking had been added. See DIread for comments. ** ** Inputs: ** f Pointer to the DI file ** context needed to do I/O. ** diop Pointer to dilru file context. ** buf Pointer to page(s) to read. ** page Value indicating page(s) to read. ** num_of_pages number of pages to read ** ** Outputs: ** err_code Pointer to a variable used ** to return operating system ** errors. ** Returns: ** OK ** other errors. ** Exceptions: ** none ** ** Side Effects: ** none ** ** History: ** 30-nov-1992 (rmuth) ** Created. ** 03-jun-1996 (canor01) ** Note in the scb that this is a DI wait. ** 14-July-1997 (schte01) ** For those platforms that do direct i/o (where the ** seek and the read are separate functions), do not release and ** reaquire the semaphore on the DI_IO block. This will protect ** against i/o being done by a different thread in between the ** lseek and the read. ** 14-Aug-1997 (schte01) ** Add xCL_DIRECT_IO as a condition to the 14-July-1997 change ** instead of the test for !xCL_ASYNCH_IO. ** 22-Dec-1998 (jenjo02) ** If DI_FD_PER_THREAD is defined, call IIdio_read() instead of ** pread(). ** 01-Apr-2004 (fanch01) ** Add O_DIRECT support on Linux depending on the filesystem ** properties, pagesize. Fixups for misaligned buffers on read() ** and write() operations. ** 13-apr-04 (toumi01) ** Move stack variable declaration to support "standard" C compilers. ** 29-Jan-2005 (schka24) ** Ditch attempt to gather dior timing stats, not useful in ** the real world and generates excess syscalls on some platforms. ** 15-Mar-2006 (jenjo02) ** io_sem is not needed with thread affinity. ** 6-Nov-2009 (kschendel) SIR 122757 ** Remove copy to aligned buffer, caller is supposed to do it. */ static STATUS DI_inproc_read( DI_IO *f, DI_OP *diop, char *buf, i4 page, i4 num_of_pages, i4 *n, CL_ERR_DESC *err_code ) { STATUS status = OK; CS_SCB *scb; i4 saved_state; /* unix variables */ int unix_fd; int bytes_read = 0; int bytes_to_read; OFFSET_TYPE lseek_offset; /* ** Seek to place to read */ lseek_offset = (OFFSET_TYPE)f->io_bytes_per_page * (OFFSET_TYPE)page; bytes_to_read = f->io_bytes_per_page * num_of_pages; unix_fd = diop->di_fd; if (Di_backend) { CSget_scb(&scb); if ( scb ) { saved_state = scb->cs_state; scb->cs_state = CS_EVENT_WAIT; if (f->io_open_flags & DI_O_LOG_FILE_MASK) { scb->cs_memory = CS_LIOR_MASK; scb->cs_lior++; Cs_srv_block.cs_wtstatistics.cs_lior_done++; Cs_srv_block.cs_wtstatistics.cs_lior_waits++; Cs_srv_block.cs_wtstatistics.cs_lior_kbytes += bytes_to_read / 1024; } else { scb->cs_memory = CS_DIOR_MASK; scb->cs_dior++; Cs_srv_block.cs_wtstatistics.cs_dior_done++; Cs_srv_block.cs_wtstatistics.cs_dior_waits++; Cs_srv_block.cs_wtstatistics.cs_dior_kbytes += bytes_to_read / 1024; } } } # if defined( OS_THREADS_USED ) && (defined (xCL_NO_ATOMIC_READ_WRITE_IO)) if ( !Di_thread_affinity && (f->io_fprop & FPROP_PRIVATE) == 0) { CS_synch_lock( &f->io_sem ); } # endif /* OS_THREADS_USED && xCL_NO_ATOMIC_READ_WRITE_IO */ # if defined( OS_THREADS_USED ) && (! defined (xCL_NO_ATOMIC_READ_WRITE_IO)) #ifdef LARGEFILE64 bytes_read = pread64( unix_fd, buf, bytes_to_read, lseek_offset ); #else /* LARGEFILE64 */ bytes_read = pread( unix_fd, buf, bytes_to_read, lseek_offset ); #endif /* LARGEFILE64 */ if ( bytes_read != bytes_to_read ) { SETCLERR(err_code, 0, ER_read); # else /* OS_THREADS_USED */ bytes_read = IIdio_read( unix_fd, buf, bytes_to_read, lseek_offset, 0, f->io_fprop, err_code ); if ( bytes_read != bytes_to_read ) { # endif /* OS_THREADS_USED && ! xCL_NO_ATOMIC_READ_WRITE_IO */ if (bytes_read == -1) { status = DI_BADREAD; } else { status = DI_ENDFILE; } } # if defined( OS_THREADS_USED ) && (defined (xCL_NO_ATOMIC_READ_WRITE_IO) ) if ( !Di_thread_affinity && (f->io_fprop & FPROP_PRIVATE) == 0) CS_synch_unlock( &f->io_sem ); # endif /* OS_THREADS_USED && xCL_NO_ATOMIC_READ_WRITE_IO */ if (Di_backend) { if ( scb ) { scb->cs_memory &= ~(CS_DIOR_MASK | CS_LIOR_MASK); scb->cs_state = saved_state; } } if ( bytes_read > 0 ) *n = bytes_read / f->io_bytes_per_page; return(status); } # if defined(OS_THREADS_USED) || defined(xCL_ASYNC_IO) /*{ ** Name: DI_async_read - read page(s) asynchronously from a file on disk. ** ** Description: ** This routine was created to interface with async io routines ** where such routines are available. ** ** Inputs: ** f Pointer to the DI file ** context needed to do I/O. ** diop Pointer to dilru file context. ** buf Pointer to page(s) to read. ** page Value indicating page(s) to read. ** num_of_pages number of pages to read ** ** Outputs: ** err_code Pointer to a variable used ** to return operating system ** errors. ** Returns: ** OK ** other errors. ** Exceptions: ** none ** ** Side Effects: ** none ** ** History: ** 20-jun-1995 (amo ICL) ** Created. */ static STATUS DI_async_read( DI_IO *f, DI_OP *diop, char *buf, i4 page, i4 num_of_pages, i4 *n, CL_ERR_DESC *err_code ) { STATUS status = OK; CS_SCB *scb; int saved_state; i4 start_time; /* unix variables */ int bytes_read = 0; int bytes_to_read; OFFSET_TYPE lseek_offset; /* ** Seek to place to read */ lseek_offset = (OFFSET_TYPE)(f->io_bytes_per_page) * (OFFSET_TYPE)(page); bytes_to_read = f->io_bytes_per_page * num_of_pages; CSget_scb(&scb); if ( scb ) { saved_state = scb->cs_state; scb->cs_state = CS_EVENT_WAIT; if (f->io_open_flags & DI_O_LOG_FILE_MASK) { scb->cs_memory = CS_LIOR_MASK; scb->cs_lior++; Cs_srv_block.cs_wtstatistics.cs_lior_done++; Cs_srv_block.cs_wtstatistics.cs_lior_waits++; Cs_srv_block.cs_wtstatistics.cs_lior_kbytes += bytes_to_read / 1024; } else { scb->cs_memory = CS_DIOR_MASK; scb->cs_dior++; Cs_srv_block.cs_wtstatistics.cs_dior_done++; Cs_srv_block.cs_wtstatistics.cs_dior_waits++; Cs_srv_block.cs_wtstatistics.cs_dior_kbytes += bytes_to_read / 1024; } /* Clock the read */ start_time = CS_checktime(); } # if defined(OS_THREADS_USED) && !defined(xCL_ASYNC_IO) bytes_read = DI_thread_rw( O_RDONLY, diop, buf, bytes_to_read, lseek_offset, NULL, err_code); # else /* OS_THREADS_USED */ bytes_read = DI_aio_rw( O_RDONLY, diop, buf, bytes_to_read, lseek_offset, NULL, err_code); # endif /* OS_THREADS_USED */ if ( bytes_read != bytes_to_read ) { SETCLERR(err_code, 0, ER_read); if (bytes_read == -1) { status = DI_BADREAD; } else { status = DI_ENDFILE; } } if ( scb ) { scb->cs_memory &= ~(CS_DIOR_MASK | CS_LIOR_MASK); scb->cs_state = saved_state; if (f->io_open_flags & DI_O_LOG_FILE_MASK) Cs_srv_block.cs_wtstatistics.cs_lior_time += CS_checktime() - start_time; else Cs_srv_block.cs_wtstatistics.cs_dior_time += CS_checktime() - start_time; } if ( bytes_read > 0 ) *n = bytes_read / f->io_bytes_per_page; return(status); }