ULogEventOutcome ReadUserLog::readEventXML( ULogEvent *& event ) { classad::ClassAdXMLParser xmlp; // we obtain a write lock here not because we want to write // anything, but because we want to ensure we don't read // mid-way through someone else's write Lock( true ); // store file position so that if we are unable to read the event, we can // rewind to this location long filepos; if (!m_fp || ((filepos = ftell(m_fp)) == -1L)) { Unlock( true ); event = NULL; return ULOG_UNK_ERROR; } ClassAd* eventad = new ClassAd(); if ( !xmlp.ParseClassAd(m_fp, *eventad) ) { delete eventad; eventad = NULL; } Unlock( true ); if( !eventad ) { // we don't have the full event in the stream yet; restore file // position and return if( fseek(m_fp, filepos, SEEK_SET) ) { dprintf(D_ALWAYS, "fseek() failed in ReadUserLog::readEvent"); return ULOG_UNK_ERROR; } clearerr(m_fp); event = NULL; return ULOG_NO_EVENT; } int enmbr; if( !eventad->LookupInteger("EventTypeNumber", enmbr) ) { event = NULL; delete eventad; return ULOG_NO_EVENT; } if( !(event = instantiateEvent((ULogEventNumber) enmbr)) ) { event = NULL; delete eventad; return ULOG_UNK_ERROR; } event->initFromClassAd(eventad); delete eventad; return ULOG_OK; }
ULogEventOutcome ReadUserLog::readEventOld( ULogEvent *& event ) { long filepos; int eventnumber; int retval1, retval2; // we obtain a write lock here not because we want to write // anything, but because we want to ensure we don't read // mid-way through someone else's write if ( m_lock->isUnlocked() ) { m_lock->obtain( WRITE_LOCK ); } // store file position so that if we are unable to read the event, we can // rewind to this location if (!m_fp || ((filepos = ftell(m_fp)) == -1L)) { dprintf( D_FULLDEBUG, "ReadUserLog: invalid m_fp, or ftell() failed\n" ); if ( m_lock->isLocked() ) { m_lock->release(); } return ULOG_UNK_ERROR; } retval1 = fscanf (m_fp, "%d", &eventnumber); // so we don't dump core if the above fscanf failed if (retval1 != 1) { eventnumber = 1; // check for end of file -- why this is needed has been // lost, but it was removed once and everything went to // hell, so don't touch it... // Note: this is needed because if this method is called and // you're at the end of the file, fscanf returns EOF (-1) and // you get here. If you're at EOF you had better bail out... // (This is not uncommon -- any time you try to read an event // and there aren't any events to read you get here.) // If fscanf returns 0, you're probably *really* in trouble. // wenger 2004-10-07. if( feof( m_fp ) ) { event = NULL; // To prevent FMR: Free memory read clearerr( m_fp ); if( m_lock->isLocked() ) { m_lock->release(); } return ULOG_NO_EVENT; } dprintf( D_FULLDEBUG, "ReadUserLog: error (not EOF) reading " "event number\n" ); } // allocate event object; check if allocated successfully event = instantiateEvent ((ULogEventNumber) eventnumber); if (!event) { dprintf( D_FULLDEBUG, "ReadUserLog: unable to instantiate event\n" ); if ( m_lock->isLocked()) { m_lock->release(); } return ULOG_UNK_ERROR; } // read event from file; check for result retval2 = event->getEvent (m_fp); // check if error in reading event if (!retval1 || !retval2) { dprintf( D_FULLDEBUG, "ReadUserLog: error reading event; re-trying\n" ); // we could end up here if file locking did not work for // whatever reason (usual NFS bugs, whatever). so here // try to wait a second until the current partially-written // event has benn completely written. the algorithm is // wait a second, rewind to our initial position (in case a // buggy getEvent() slurped up more than one event), then // again try to synchronize the log // // NOTE: this code is important, so don't remove or "fix" // it unless you *really* know what you're doing and test it // extermely well if( m_lock->isLocked() ) { m_lock->release(); } sleep( 1 ); if( m_lock->isUnlocked() ) { m_lock->obtain( WRITE_LOCK ); } if( fseek( m_fp, filepos, SEEK_SET)) { dprintf( D_ALWAYS, "fseek() failed in %s:%d", __FILE__, __LINE__ ); if ( m_lock->isLocked() ) { m_lock->release(); } return ULOG_UNK_ERROR; } if( synchronize() ) { // if synchronization was successful, reset file position and ... if (fseek (m_fp, filepos, SEEK_SET)) { dprintf(D_ALWAYS, "fseek() failed in ReadUserLog::readEvent"); if ( m_lock->isLocked() ) { m_lock->release(); } return ULOG_UNK_ERROR; } // ... attempt to read the event again clearerr (m_fp); int oldeventnumber = eventnumber; eventnumber = -1; retval1 = fscanf (m_fp, "%d", &eventnumber); if( retval1 == 1 ) { if( eventnumber != oldeventnumber ) { if( event ) { delete event; } // allocate event object; check if allocated // successfully event = instantiateEvent( (ULogEventNumber)eventnumber ); if( !event ) { dprintf( D_FULLDEBUG, "ReadUserLog: unable to " "instantiate event\n" ); if( m_lock->isLocked() ) { m_lock->release(); } return ULOG_UNK_ERROR; } } retval2 = event->getEvent( m_fp ); } // if failed again, we have a parse error if (!retval1 != 1 || !retval2) { dprintf( D_FULLDEBUG, "ReadUserLog: error reading event " "on second try\n"); delete event; event = NULL; // To prevent FMR: Free memory read synchronize (); if (m_lock->isLocked()) { m_lock->release(); } return ULOG_RD_ERROR; } else { // finally got the event successfully -- // synchronize the log if( synchronize() ) { if( m_lock->isLocked() ) { m_lock->release(); } return ULOG_OK; } else { // got the event, but could not synchronize!! // treat as incomplete event dprintf( D_FULLDEBUG, "ReadUserLog: got event on second try " "but synchronize() failed\n"); delete event; event = NULL; // To prevent FMR: Free memory read clearerr( m_fp ); if( m_lock->isLocked() ) { m_lock->release(); } return ULOG_NO_EVENT; } } } else { // if we could not synchronize the log, we don't have the full // event in the stream yet; restore file position and return dprintf( D_FULLDEBUG, "ReadUserLog: syncronize() failed\n"); if (fseek (m_fp, filepos, SEEK_SET)) { dprintf(D_ALWAYS, "fseek() failed in ReadUserLog::readEvent"); if (m_lock->isLocked()) { m_lock->release(); } return ULOG_UNK_ERROR; } clearerr (m_fp); delete event; event = NULL; // To prevent FMR: Free memory read if (m_lock->isLocked()) { m_lock->release(); } return ULOG_NO_EVENT; } } else { // got the event successfully -- synchronize the log if (synchronize ()) { if (m_lock->isLocked()) { m_lock->release(); } return ULOG_OK; } else { // got the event, but could not synchronize!! treat as incomplete // event dprintf( D_FULLDEBUG, "ReadUserLog: got event on first try " "but synchronize() failed\n"); delete event; event = NULL; // To prevent FMR: Free memory read clearerr (m_fp); if (m_lock->isLocked() ) { m_lock->release(); } return ULOG_NO_EVENT; } } // will not reach here if (m_lock->isLocked()) { m_lock->release(); } dprintf( D_ALWAYS, "Error: got to the end of " "ReadUserLog::readEventOld()\n"); return ULOG_UNK_ERROR; }
int pseudo_ulog( ClassAd *ad ) { ULogEvent *event = instantiateEvent(ad); int result = 0; char const *critical_error = NULL; MyString CriticalErrorBuf; bool event_already_logged = false; bool put_job_on_hold = false; char const *hold_reason = NULL; char *hold_reason_buf = NULL; int hold_reason_code = 0; int hold_reason_sub_code = 0; if(!event) { MyString add_str; sPrintAd(add_str, *ad); dprintf( D_ALWAYS, "invalid event ClassAd in pseudo_ulog: %s\n", add_str.Value()); return -1; } if(ad->LookupInteger(ATTR_HOLD_REASON_CODE,hold_reason_code)) { put_job_on_hold = true; ad->LookupInteger(ATTR_HOLD_REASON_SUBCODE,hold_reason_sub_code); ad->LookupString(ATTR_HOLD_REASON,&hold_reason_buf); if(hold_reason_buf) { hold_reason = hold_reason_buf; } } if( event->eventNumber == ULOG_REMOTE_ERROR ) { RemoteErrorEvent *err = (RemoteErrorEvent *)event; if(!err->getExecuteHost() || !*err->getExecuteHost()) { //Insert remote host information. char *execute_host = NULL; thisRemoteResource->getMachineName(execute_host); err->setExecuteHost(execute_host); delete[] execute_host; } if(err->isCriticalError()) { CriticalErrorBuf.formatstr( "Error from %s: %s", err->getExecuteHost(), err->getErrorText()); critical_error = CriticalErrorBuf.Value(); if(!hold_reason) { hold_reason = critical_error; } //Temporary: the following causes critical remote errors //to be logged as ShadowExceptionEvents, rather than //RemoteErrorEvents. The result is ugly, but guaranteed to //be compatible with other user-log reading tools. BaseShadow::log_except(critical_error); event_already_logged = true; } } if( !event_already_logged && !Shadow->uLog.writeEvent( event, ad ) ) { MyString add_str; sPrintAd(add_str, *ad); dprintf( D_ALWAYS, "unable to log event in pseudo_ulog: %s\n", add_str.Value()); result = -1; } if(put_job_on_hold) { hold_reason = critical_error; if(!hold_reason) { hold_reason = "Job put on hold by remote host."; } Shadow->holdJobAndExit(hold_reason,hold_reason_code,hold_reason_sub_code); //should never get here, because holdJobAndExit() exits. } if( critical_error ) { //Suppress ugly "Shadow exception!" Shadow->exception_already_logged = true; //lame: at the time of this writing, EXCEPT does not want const: EXCEPT("%s", critical_error); } delete event; return result; }