bool CCBListener::HandleCCBRequest( ClassAd &msg ) { MyString address; MyString connect_id; MyString request_id; MyString name; if( !msg.LookupString( ATTR_MY_ADDRESS, address) || !msg.LookupString( ATTR_CLAIM_ID, connect_id) || !msg.LookupString( ATTR_REQUEST_ID, request_id) ) { MyString msg_str; msg.sPrint(msg_str); EXCEPT("CCBListener: invalid CCB request from %s: %s\n", m_ccb_address.Value(), msg_str.Value() ); } msg.LookupString( ATTR_NAME, name ); if( name.find(address.Value())<0 ) { name.formatstr_cat(" with reverse connect address %s",address.Value()); } dprintf(D_FULLDEBUG|D_NETWORK, "CCBListener: received request to connect to %s, request id %s.\n", name.Value(), request_id.Value()); return DoReversedCCBConnect( address.Value(), connect_id.Value(), request_id.Value(), name.Value() ); }
int TransferQueueManager::HandleRequest(int cmd,Stream *stream) { ReliSock *sock = (ReliSock *)stream; ASSERT( cmd == TRANSFER_QUEUE_REQUEST ); ClassAd msg; sock->decode(); if( !getClassAd( sock, msg ) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "TransferQueueManager: failed to receive transfer request " "from %s.\n", sock->peer_description() ); return FALSE; } bool downloading = false; MyString fname; MyString jobid; MyString queue_user; filesize_t sandbox_size; if( !msg.LookupBool(ATTR_DOWNLOADING,downloading) || !msg.LookupString(ATTR_FILE_NAME,fname) || !msg.LookupString(ATTR_JOB_ID,jobid) || !msg.LookupString(ATTR_USER,queue_user) || !msg.LookupInteger(ATTR_SANDBOX_SIZE,sandbox_size)) { MyString msg_str; sPrintAd(msg_str, msg); dprintf(D_ALWAYS,"TransferQueueManager: invalid request from %s: %s\n", sock->peer_description(), msg_str.Value()); return FALSE; } // Currently, we just create the client with the default max queue // age. If it becomes necessary to customize the maximum age // on a case-by-case basis, it should be easy to adjust. TransferQueueRequest *client = new TransferQueueRequest( sock, sandbox_size, fname.Value(), jobid.Value(), queue_user.Value(), downloading, m_default_max_queue_age); if( !AddRequest( client ) ) { delete client; return KEEP_STREAM; // we have already closed this socket } return KEEP_STREAM; }
bool DCStarter::createJobOwnerSecSession(int timeout,char const *job_claim_id,char const *starter_sec_session,char const *session_info,MyString &owner_claim_id,MyString &error_msg,MyString &starter_version,MyString &starter_addr) { ReliSock sock; if (IsDebugLevel(D_COMMAND)) { dprintf (D_COMMAND, "DCStarter::createJobOwnerSecSession(%s,...) making connection to %s\n", getCommandStringSafe(CREATE_JOB_OWNER_SEC_SESSION), _addr ? _addr : "NULL"); } if( !connectSock(&sock, timeout, NULL) ) { error_msg = "Failed to connect to starter"; return false; } if( !startCommand(CREATE_JOB_OWNER_SEC_SESSION, &sock,timeout,NULL,NULL,false,starter_sec_session) ) { error_msg = "Failed to send CREATE_JOB_OWNER_SEC_SESSION to starter"; return false; } ClassAd input; input.Assign(ATTR_CLAIM_ID,job_claim_id); input.Assign(ATTR_SESSION_INFO,session_info); sock.encode(); if( !putClassAd(&sock, input) || !sock.end_of_message() ) { error_msg = "Failed to compose CREATE_JOB_OWNER_SEC_SESSION to starter"; return false; } sock.decode(); ClassAd reply; if( !getClassAd(&sock, reply) || !sock.end_of_message() ) { error_msg = "Failed to get response to CREATE_JOB_OWNER_SEC_SESSION from starter"; return false; } bool success = false; reply.LookupBool(ATTR_RESULT,success); if( !success ) { reply.LookupString(ATTR_ERROR_STRING,error_msg); return false; } reply.LookupString(ATTR_CLAIM_ID,owner_claim_id); reply.LookupString(ATTR_VERSION,starter_version); // get the full starter address from the starter in case it contains // extra CCB info that we don't already know about reply.LookupString(ATTR_STARTER_IP_ADDR,starter_addr); return true; }
bool JobServerObject::getSummary(const char* key, JobSummaryFields& _summary, AviaryStatus &_status) { Job* job = NULL; if (!(job = getValidKnownJob(key,_status))) { return false; } ClassAd classAd; job->getSummary ( classAd ); // little cheat for ad problems with history lookups string str; if ( classAd.LookupString("JOB_AD_ERROR", str) ) { aviUtilFmt(_status.text,"Error obtaining ClassAd for job '%s'; ",key); _status.text += str; dprintf(D_ALWAYS,"%s\n",_status.text.c_str()); return false; } // return the limited attributes classAd.LookupString(ATTR_JOB_CMD,_summary.cmd); classAd.LookupString(ATTR_JOB_ARGUMENTS1,_summary.args1); classAd.LookupString(ATTR_JOB_ARGUMENTS2,_summary.args2); classAd.LookupString(ATTR_HOLD_REASON,_summary.hold_reason); classAd.LookupString(ATTR_RELEASE_REASON,_summary.release_reason); classAd.LookupString(ATTR_REMOVE_REASON,_summary.remove_reason); classAd.LookupString(ATTR_JOB_SUBMISSION,_summary.submission_id); classAd.LookupString(ATTR_OWNER,_summary.owner); classAd.LookupInteger(ATTR_Q_DATE,_summary.queued); classAd.LookupInteger(ATTR_ENTERED_CURRENT_STATUS,_summary.last_update); _summary.status = job->getStatus(); _status.type = AviaryStatus::A_OK; return true; }
void cp_compute_consumption(ClassAd& job, ClassAd& resource, consumption_map_t& consumption) { consumption.clear(); string mrv; if (!resource.LookupString(ATTR_MACHINE_RESOURCES, mrv)) { EXCEPT("Resource ad missing %s attribute", ATTR_MACHINE_RESOURCES); } StringList alist(mrv.c_str()); alist.rewind(); while (char* asset = alist.next()) { if (MATCH == strcasecmp(asset, "swap")) continue; string ra; string coa; formatstr(ra, "%s%s", ATTR_REQUEST_PREFIX, asset); formatstr(coa, "_condor_%s", ra.c_str()); bool override = false; double ov=0; if (job.EvalFloat(coa.c_str(), NULL, ov)) { // Allow _condor_RequestedXXX to override RequestedXXX // this case is intended to be operative when a scheduler has set // such values and sent them on to the startd that owns this resource // (e.g. I'd not expect this case to arise elsewhere, like the negotiator) string ta; formatstr(ta, "_cp_temp_%s", ra.c_str()); job.CopyAttribute(ta.c_str(), ra.c_str()); job.Assign(ra.c_str(), ov); override = true; }
bool cp_supports_policy(ClassAd& resource, bool strict) { // currently, only p-slots can support a functional consumption policy if (strict) { bool part = false; if (!resource.LookupBool(ATTR_SLOT_PARTITIONABLE, part)) part = false; if (!part) return false; } // must support MachineResources attribute string mrv; if (!resource.LookupString(ATTR_MACHINE_RESOURCES, mrv)) return false; // must define ConsumptionXxx for all resources Xxx (including extensible resources) StringList alist(mrv.c_str()); alist.rewind(); while (char* asset = alist.next()) { if (MATCH == strcasecmp(asset, "swap")) continue; string ca; formatstr(ca, "%s%s", ATTR_CONSUMPTION_PREFIX, asset); ClassAd::iterator f(resource.find(ca)); if (f == resource.end()) return false; } return true; }
void SubmitterObject::update(const ClassAd &ad) { MGMT_DECLARATIONS; INTEGER(HeldJobs); INTEGER(IdleJobs); TIME_INTEGER(JobQueueBirthdate); STRING(Machine); // STRING(Name); INTEGER(RunningJobs); STRING(ScheddName); if (ad.LookupString("Name", &str)) { mgmtObject->set_Name(str); } else { dprintf(D_FULLDEBUG, "Warning: Could not find Name from ad\n"); } // infer the Owner from the local-part of Name if (str) { string _owner(str); mgmtObject->set_Owner(_owner.substr(0,_owner.find('@'))); free(str); } // debug if (IsFulldebug(D_FULLDEBUG)) { const_cast<ClassAd*>(&ad)->dPrint(D_FULLDEBUG|D_NOHEADER); } }
bool VMUniverseMgr::allocVM(pid_t s_pid, ClassAd &ad, char const *execute_dir) { if( canCreateVM(&ad) == false ) { return false; } // Find memory for VM int vm_mem = 0; if( (ad.LookupInteger(ATTR_JOB_VM_MEMORY, vm_mem) != 1) && (ad.LookupInteger(ATTR_REQUEST_MEMORY, vm_mem) != 1) ) { dprintf(D_ALWAYS, "Can't find VM memory in Job ClassAd\n"); return false; } int vcpus = 0; if( (ad.LookupInteger(ATTR_JOB_VM_VCPUS, vcpus) != 1) && (ad.LookupInteger(ATTR_REQUEST_CPUS, vcpus) != 1) ) { dprintf(D_FULLDEBUG, "Defaulting to one CPU\n"); vcpus = 1; } // check whether this pid already exists VMStarterInfo *oldinfo = findVMStarterInfoWithStarterPid(s_pid); if( oldinfo ) { freeVM(s_pid); // oldinfo is freed oldinfo = NULL; } VMStarterInfo *newinfo = new VMStarterInfo; ASSERT(newinfo); m_vm_used_memory += vm_mem; newinfo->m_pid = s_pid; newinfo->m_memory = vm_mem; newinfo->m_job_ad = ad; newinfo->m_execute_dir = execute_dir; newinfo->m_vcpus = vcpus; // If there exists MAC or IP address for a checkpointed VM, // we use them as initial values. MyString string_value; if( ad.LookupString(ATTR_VM_CKPT_MAC, string_value) == 1 ) { newinfo->m_vm_mac = string_value; } /* string_value = ""; if( ad.LookupString(ATTR_VM_CKPT_IP, string_value) == 1 ) { newinfo->m_vm_ip = string_value; } */ m_vm_starter_list.Append(newinfo); return true; }
bool GetSubmitterNameFromAd(ClassAd &ad, MyString &name) { if (!ad.LookupString(ATTR_NAME, name)) { return false; } SanitizeSubmitterName(name); return true; }
int do_command_upload_sandbox(void *arg, Stream*) { dprintf(D_ALWAYS, "FTGAHP: upload sandbox\n"); Gahp_Args args; parse_gahp_command ((char*)arg, &args); // first two args: result id and sandbox id: std::string rid = args.argv[1]; std::string sid = args.argv[2]; // third arg: job ad ClassAd ad; classad::ClassAdParser my_parser; if (!(my_parser.ParseClassAd(args.argv[3], ad))) { // FAIL write_to_pipe( ChildErrorPipe, "Failed to parse job ad" ); return 1; } // rewrite the IWD to the actual sandbox dir std::string iwd; define_sandbox_path(sid, iwd); ad.Assign(ATTR_JOB_IWD, iwd.c_str()); char ATTR_SANDBOX_ID[] = "SandboxId"; ad.Assign(ATTR_SANDBOX_ID, sid.c_str()); // directory was created, let's set up the FileTransfer object FileTransfer ft; if (!ft.Init(&ad)) { // FAIL write_to_pipe( ChildErrorPipe, "Failed to initialize FileTransfer" ); return 1; } // lookup ATTR_VERSION and set it. this changes the wire // protocol and it is important that this happens before // calling UploadFiles. char* peer_version = NULL; ad.LookupString(ATTR_VERSION, &peer_version); ft.setPeerVersion(peer_version); free (peer_version); dprintf(D_ALWAYS, "BOSCO: calling upload files\n"); // the "true" param to UploadFiles here means blocking (i.e. "in the foreground") if (!ft.UploadFiles(true)) { // FAIL write_to_pipe( ChildErrorPipe, ft.GetInfo().error_desc.Value() ); return 1; } // SUCCEED return 0; }
void test_user_policy_on_exit_remove_yes(void) { int val; int action; char buf[4096]; ClassAd *result; ClassAd *jad = new ClassAd; if (jad == NULL) { printf("Out of memory!\n"); exit(EXIT_FAILURE); } printf("==========================================\n"); printf("Testing User Policy on On Exit Remove: YES\n"); /* set up the classad */ sprintf(buf, "%s = %d", ATTR_ON_EXIT_CODE, 0); jad->Insert(buf); sprintf(buf, "%s = 42", ATTR_TOTAL_SUSPENSIONS); jad->Insert(buf); sprintf(buf, "%s = FALSE", ATTR_PERIODIC_HOLD_CHECK); jad->Insert(buf); sprintf(buf, "%s = FALSE", ATTR_PERIODIC_REMOVE_CHECK); jad->Insert(buf); sprintf(buf, "%s = FALSE", ATTR_ON_EXIT_HOLD_CHECK); jad->Insert(buf); sprintf(buf, "%s = TotalSuspensions == 42", ATTR_ON_EXIT_REMOVE_CHECK); jad->Insert(buf); result = user_job_policy(jad); result->EvalBool(ATTR_USER_POLICY_ERROR, result, val); if(val == true) { printf("An error happened\n"); delete result; return; } result->EvalBool(ATTR_TAKE_ACTION, result, val); if (val == true) { printf("%s was true.\n", ATTR_TAKE_ACTION); result->LookupInteger(ATTR_USER_POLICY_ACTION, action); printf("Action is: %s\n", action==REMOVE_JOB?"REMOVE_JOB":action==HOLD_JOB?"HOLD_JOB": "UNKNOWN"); result->LookupString(ATTR_USER_POLICY_FIRING_EXPR, buf); printf("Reason for action: %s\n", buf); } else { printf("Something went wrong. I should have had an action to take.\n"); } }
bool Defrag::drain(const ClassAd &startd_ad) { std::string name; startd_ad.LookupString(ATTR_NAME,name); dprintf(D_ALWAYS,"Initiating %s draining of %s.\n", m_draining_schedule_str.c_str(),name.c_str()); DCStartd startd( &startd_ad ); int graceful_completion = 0; startd_ad.LookupInteger(ATTR_EXPECTED_MACHINE_GRACEFUL_DRAINING_COMPLETION,graceful_completion); int quick_completion = 0; startd_ad.LookupInteger(ATTR_EXPECTED_MACHINE_QUICK_DRAINING_COMPLETION,quick_completion); int graceful_badput = 0; startd_ad.LookupInteger(ATTR_EXPECTED_MACHINE_GRACEFUL_DRAINING_BADPUT,graceful_badput); int quick_badput = 0; startd_ad.LookupInteger(ATTR_EXPECTED_MACHINE_QUICK_DRAINING_BADPUT,quick_badput); time_t now = time(NULL); std::string draining_check_expr; double badput_growth_tolerance = 1.25; // for now, this is hard-coded int negligible_badput = 1200; int negligible_deadline_slippage = 1200; if( m_draining_schedule <= DRAIN_GRACEFUL ) { dprintf(D_ALWAYS,"Expected draining completion time is %ds; expected draining badput is %d cpu-seconds\n", (int)(graceful_completion-now),graceful_badput); sprintf(draining_check_expr,"%s <= %d && %s <= %d", ATTR_EXPECTED_MACHINE_GRACEFUL_DRAINING_COMPLETION, graceful_completion + negligible_deadline_slippage, ATTR_EXPECTED_MACHINE_GRACEFUL_DRAINING_BADPUT, (int)(badput_growth_tolerance*graceful_badput) + negligible_badput); } else { // DRAIN_FAST and DRAIN_QUICK are effectively equivalent here dprintf(D_ALWAYS,"Expected draining completion time is %ds; expected draining badput is %d cpu-seconds\n", (int)(quick_completion-now),quick_badput); sprintf(draining_check_expr,"%s <= %d && %s <= %d", ATTR_EXPECTED_MACHINE_QUICK_DRAINING_COMPLETION, quick_completion + negligible_deadline_slippage, ATTR_EXPECTED_MACHINE_QUICK_DRAINING_BADPUT, (int)(badput_growth_tolerance*quick_badput) + negligible_badput); } std::string request_id; bool resume_on_completion = true; bool rval = startd.drainJobs( m_draining_schedule, resume_on_completion, draining_check_expr.c_str(), request_id ); if( !rval ) { dprintf(D_ALWAYS,"Failed to send request to drain %s: %s\n",startd.name(),startd.error()); m_stats.DrainFailures += 1; return false; } m_stats.DrainSuccesses += 1; return true; }
void callCreateOneAnnex() { Stream * s = NULL; ClassAd * reply = new ClassAd(); // Otherwise, reply-and-clean will take care of user notification. if( createOneAnnex( command, s, reply ) != KEEP_STREAM ) { std::string resultString; reply->LookupString( ATTR_RESULT, resultString ); CAResult result = getCAResultNum( resultString.c_str() ); ASSERT( result != CA_SUCCESS ); std::string errorString; reply->LookupString( ATTR_ERROR_STRING, errorString ); ASSERT(! errorString.empty()); fprintf( stderr, "%s\n", errorString.c_str() ); delete reply; DC_Exit( 6 ); } }
bool CCBListener::HandleCCBRegistrationReply( ClassAd &msg ) { if( !msg.LookupString(ATTR_CCBID,m_ccbid) ) { MyString msg_str; msg.sPrint(msg_str); EXCEPT("CCBListener: no ccbid in registration reply: %s\n", msg_str.Value() ); } msg.LookupString(ATTR_CLAIM_ID,m_reconnect_cookie); dprintf(D_ALWAYS, "CCBListener: registered with CCB server %s as ccbid %s\n", m_ccb_address.Value(), m_ccbid.Value() ); m_waiting_for_registration = false; m_registered = true; daemonCore->daemonContactInfoChanged(); return true; }
std::string ickpt_share_get_hash(ClassAd& ad) { // for now, we only pay attention to the executable's MD5 // MyString md5; if (!ad.LookupString(ATTR_JOB_CMD_MD5, md5)) { return ""; } return escape_for_filename(ATTR_JOB_CMD_MD5) + "-" + escape_for_filename(md5.Value()); }
void test_oldstyle_with_exit(void) { int val; int action; char buf[4096]; ClassAd *result; ClassAd *jad = new ClassAd; if (jad == NULL) { printf("Out of memory!\n"); exit(EXIT_FAILURE); } printf("==================================================\n"); printf("Testing OldStyle job where it is marked as exited.\n"); /* An oldstyle classad would have this */ sprintf(buf, "%s = %d", ATTR_COMPLETION_DATE, 10); /* non zero */ jad->Insert(buf); result = user_job_policy(jad); result->EvalBool(ATTR_USER_POLICY_ERROR, result, val); if(val == true) { printf("An error happened\n"); delete result; return; } result->EvalBool(ATTR_TAKE_ACTION, result, val); if (val == true) { printf("%s was true.\n", ATTR_TAKE_ACTION); result->LookupInteger(ATTR_USER_POLICY_ACTION, action); printf("Action is: %s\n", action==REMOVE_JOB?"REMOVE_JOB":action==HOLD_JOB?"HOLD_JOB": "UNKNOWN"); result->LookupString(ATTR_USER_POLICY_FIRING_EXPR, buf); printf("Reason for action: %s\n", buf); } else { printf("Something went wrong. I should have had an action to take.\n"); } }
void update(int command, const ClassAd &ad) { string public_addr; string cmd_str(getCollectorCommandString(command)); string param_ignore_str("AVIARY_IGNORE_"); param_ignore_str.append(cmd_str); switch (command) { case UPDATE_COLLECTOR_AD: dprintf(D_FULLDEBUG, "AviaryCollectorPlugin: Received UPDATE_COLLECTOR_AD\n"); // We could receive collector ads from many // collectors, but we only maintain our own. So, // ignore all others. if (ad.LookupString(ATTR_MY_ADDRESS, public_addr)) { if (collector->getMyAddress() == public_addr) { if(!collector->update(command,ad)) { dprintf(D_ALWAYS,"AviaryCollectorPlugin: Error on UPDATE_COLLECTOR_AD\n"); } } } else { dprintf(D_ALWAYS,"AviaryCollectorPlugin: Unable to get attribute '%s' from collector ad\n",ATTR_MY_ADDRESS); } break; case UPDATE_MASTER_AD: case UPDATE_NEGOTIATOR_AD: case UPDATE_SCHEDD_AD: case UPDATE_STARTD_AD: case UPDATE_SUBMITTOR_AD: dprintf(D_FULLDEBUG, "AviaryCollectorPlugin: Received %s\n",cmd_str.c_str()); if (param_boolean(param_ignore_str.c_str(), false)) { dprintf(D_FULLDEBUG, "AviaryCollectorPlugin: Configured to ignore %s\n",cmd_str.c_str()); break; } if(!collector->update(command,ad)) { dprintf(D_ALWAYS,"AviaryCollectorPlugin: Error on %s\n",cmd_str.c_str()); } break; case UPDATE_GRID_AD: // TODO: ignore Grid ads? default: dprintf(D_FULLDEBUG, "AviaryCollectorPlugin: Unsupported command: %s\n",cmd_str.c_str()); } }
void invalidate(int command, const ClassAd &ad) { string generic_target_name; switch (command) { case INVALIDATE_ADS_GENERIC: dprintf(D_FULLDEBUG, "AviaryLocatorPlugin: Received INVALIDATE_ADS_GENERIC\n"); if (ad.LookupString(ATTR_TARGET_TYPE,generic_target_name)) { if (generic_target_name != ENDPOINT) { return; } locator.invalidate(ad); } break; default: dprintf(D_FULLDEBUG, "AviaryLocatorPlugin: Unsupported command: %s\n", getCollectorCommandString(command)); } }
bool Defrag::cancel_drain(const ClassAd &startd_ad) { std::string name; startd_ad.LookupString(ATTR_NAME,name); dprintf(D_ALWAYS,"Initiating %s draining of %s.\n", m_draining_schedule_str.c_str(),name.c_str()); DCStartd startd( &startd_ad ); bool rval = startd.cancelDrainJobs( NULL ); if ( rval ) { dprintf(D_FULLDEBUG, "Sent request to cancel draining on %s\n", startd.name()); } else { dprintf(D_ALWAYS, "Unable to cancel draining on %s: %s\n", startd.name(), startd.error()); } return rval; }
bool JobServerObject::getJobAd ( const char* key, AttributeMapType& _map, AviaryStatus &_status) { Job* job = NULL; if (!(job = getValidKnownJob(key,_status))) { return false; } // call Job::getFullAd and use utils to populate the map ClassAd classAd; job->getFullAd ( classAd ); // little cheat for ad problems with history lookups string str; if ( classAd.LookupString("JOB_AD_ERROR", str) ) { aviUtilFmt(_status.text,"Error obtaining ClassAd for job '%s'; ",key); _status.text += str; dprintf(D_ALWAYS,"%s\n",_status.text.c_str()); } // return all the attributes in the ClassAd if ( !m_codec->classAdToMap ( classAd, _map ) ) { aviUtilFmt(_status.text,"Error mapping info for job '%s'; ",key); dprintf(D_ALWAYS,"%s\n",_status.text.c_str()); return false; } // debug // if (IsFulldebug(D_FULLDEBUG)) { // dPrintAd(D_FULLDEBUG|D_NOHEADER, classAd); // std::ostringstream oss; // oss << _map; // dprintf(D_FULLDEBUG|D_NOHEADER, oss.str().c_str()); // } _status.type = AviaryStatus::A_OK; return true; }
// This handler is called when a client wishes to write files from the // transferd's storage. int TransferD::write_files_handler(int cmd, Stream *sock) { ReliSock *rsock = (ReliSock*)sock; MyString capability; int protocol = FTP_UNKNOWN; TransferRequest *treq = NULL; MyString fquser; static int transfer_reaper_id = -1; ThreadArg *thread_arg; int tid; ClassAd reqad; ClassAd respad; cmd = cmd; // quiet the compiler. dprintf(D_ALWAYS, "Got TRANSFERD_WRITE_FILES!\n"); ///////////////////////////////////////////////////////////////////////// // make sure we are authenticated ///////////////////////////////////////////////////////////////////////// if( ! rsock->triedAuthentication() ) { CondorError errstack; if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) { // we failed to authenticate, we should bail out now // since we don't know what user is trying to perform // this action. // TODO: it'd be nice to print out what failed, but we // need better error propagation for that... errstack.push( "TransferD::setup_transfer_request_handler()", 42, "Failure to register transferd - Authentication failed" ); dprintf( D_ALWAYS, "setup_transfer_request_handler() " "aborting: %s\n", errstack.getFullText() ); refuse( rsock ); return CLOSE_STREAM; } } fquser = rsock->getFullyQualifiedUser(); ///////////////////////////////////////////////////////////////////////// // Check to see if the capability the client tells us is something that // we have knowledge of. We ONLY check the capability and not the // identity of the person in question. This allows people of different // identities to write files here as long as they had the right // capability. While this might not sound secure, they STILL had to have // authenticated as someone this daemon trusts. // Similarly, check the protocol it wants to use as well as ensure that // the direction the transfer request was supposed to be is being honored. ///////////////////////////////////////////////////////////////////////// rsock->decode(); // soak the request ad from the client about what it wants to transfer reqad.initFromStream(*rsock); rsock->end_of_message(); reqad.LookupString(ATTR_TREQ_CAPABILITY, capability); rsock->encode(); // do I know of such a capability? if (m_treqs.lookup(capability, treq) != 0) { // didn't find it. Log it and tell them to leave and close up shop respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE); respad.Assign(ATTR_TREQ_INVALID_REASON, "Invalid capability!"); respad.put(*rsock); rsock->end_of_message(); dprintf(D_ALWAYS, "Client identity '%s' tried to write some files " "using capability '%s', but there was no such capability. " "Access denied.\n", fquser.Value(), capability.Value()); return CLOSE_STREAM; } reqad.LookupInteger(ATTR_TREQ_FTP, protocol); // am I willing to use this protocol? switch(protocol) { case FTP_CFTP: // FileTrans protocol, I'm happy. break; default: respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE); respad.Assign(ATTR_TREQ_INVALID_REASON, "Invalid file transfer protocol!"); respad.put(*rsock); rsock->end_of_message(); dprintf(D_ALWAYS, "Client identity '%s' tried to write some files " "using protocol '%d', but I don't support that protocol. " "Access denied.\n", fquser.Value(), protocol); return CLOSE_STREAM; } // nsure that this transfer request was of the uploading variety if (treq->get_direction() != FTPD_UPLOAD) { respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE); respad.Assign(ATTR_TREQ_INVALID_REASON, "Transfer Request was not an uploading request!"); respad.put(*rsock); rsock->end_of_message(); dprintf(D_ALWAYS, "Client identity '%s' tried to write some files " "to a transfer request that wasn't expecting to be written. " "Access denied.\n", fquser.Value()); } ///////////////////////////////////////////////////////////////////////// // Tell the client everything was ok. ///////////////////////////////////////////////////////////////////////// respad.Assign(ATTR_TREQ_INVALID_REQUEST, FALSE); respad.put(*rsock); rsock->end_of_message(); ///////////////////////////////////////////////////////////////////////// // Set up a thread (a process under unix) to read ALL of the job files // for all of the ads in the TransferRequest. ///////////////////////////////////////////////////////////////////////// // now create a thread, passing in the sock, which uses the file transfer // object to accept the files. if (transfer_reaper_id == -1) { // only set this up ONCE so each and every thread gets one. transfer_reaper_id = daemonCore->Register_Reaper( "write_files_reaper", (ReaperHandlercpp) &TransferD::write_files_reaper, "write_files_reaper", this ); } thread_arg = new ThreadArg(protocol, treq); // Start a new thread (process on Unix) to do the work tid = daemonCore->Create_Thread( (ThreadStartFunc)&TransferD::write_files_thread, (void *)thread_arg, rsock, transfer_reaper_id ); if (tid == FALSE) { // XXX How do I handle this failure? } // associate the tid with the request so I can deal with it propery in // the reaper m_client_to_transferd_threads.insert(tid, treq); // The stream is inherited to the thread, who does the transfer and // finishes the protocol, but in the parent, I'm closing it. return CLOSE_STREAM; }
/* After the job exits, look into the classad to see if certain things are true or not */ void static_policy(void) { ClassAd *result; int val; int action; char buf[4096]; char buf2[4096]; /* See what the user job policy has in store for me. */ result = user_job_policy(JobAd); result->EvalBool(ATTR_USER_POLICY_ERROR, result, val); if (val == 1) { dprintf(D_ALWAYS, "There was an error in the static policy\n"); delete result; return; } result->EvalBool(ATTR_TAKE_ACTION, result, val); if (val == 1) { result->LookupString(ATTR_USER_POLICY_FIRING_EXPR, buf, sizeof(buf)); result->LookupInteger(ATTR_USER_POLICY_ACTION, action); switch(action) { case REMOVE_JOB: dprintf(D_ALWAYS, "Static Policy: removing job because %s has " "become true\n", buf); /* do nothing, the nasty old shadow logic takes it from here. */ delete result; return; break; case HOLD_JOB: dprintf(D_ALWAYS, "Static Policy: holding job because %s has " "become true\n", buf); delete result; sprintf(buf, "Your job has been held because %s has become " "true\n", ATTR_PERIODIC_HOLD_CHECK); sprintf(buf2, "Your job has been held because %s has become " "true", ATTR_PERIODIC_HOLD_CHECK); /* This exits */ HoldJob(buf, buf2, CONDOR_HOLD_CODE_JobPolicy, 0); return; break; default: dprintf(D_ALWAYS, "WARNING! Ignoring unknown action type in " "periodic_policy()\n"); delete result; return; break; } } delete result; dprintf( D_ALWAYS, "Static policy: don't remove on exit\n" ); EXCEPT( "Job didn't exit under conditions specifed in %s", ATTR_ON_EXIT_REMOVE_CHECK ); }
/* evaluate various periodic checks during the running of the shadow and perform actions based upon what special attributes evaluate to. */ bool periodic_policy(void) { ClassAd *result; int val; int action; char buf[4096]; char buf2[4096]; /* See what the user job policy has in store for me. */ result = user_job_policy(JobAd); result->EvalBool(ATTR_USER_POLICY_ERROR, result, val); if (val == 1) { dprintf(D_ALWAYS, "There was an error in the periodic policy\n"); delete result; return false; } result->EvalBool(ATTR_TAKE_ACTION, result, val); if (val == 1) { result->LookupString(ATTR_USER_POLICY_FIRING_EXPR, buf, sizeof(buf)); result->LookupInteger(ATTR_USER_POLICY_ACTION, action); switch(action) { case REMOVE_JOB: dprintf(D_ALWAYS, "Periodic Policy: removing job because %s " "has become true\n", buf); /* set some yucky global variables */ JobStatus = 0; JobExitStatus = 0; delete result; return true; break; case HOLD_JOB: sprintf(buf, "Periodic Policy: holding job because %s has " "become true\n", buf); sprintf(buf, "Your job has been held because %s has become " "true\n", ATTR_PERIODIC_HOLD_CHECK); sprintf(buf2, "Your job has been held because %s has become " "true", ATTR_PERIODIC_HOLD_CHECK); delete result; /* This exits */ HoldJob(buf, buf2, CONDOR_HOLD_CODE_JobPolicy, 0); break; default: dprintf(D_ALWAYS, "WARNING! Ignoring unknown action type in " "periodic_policy()\n"); delete result; return false; break; } } delete result; return false; }
int CCBServer::HandleRequest(int cmd,Stream *stream) { ReliSock *sock = (ReliSock *)stream; ASSERT( cmd == CCB_REQUEST ); // Avoid lengthy blocking on communication with our peer. // This command-handler should not get called until data // is ready to read. sock->timeout(1); ClassAd msg; sock->decode(); if( !msg.initFromStream( *sock ) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "CCB: failed to receive request " "from %s.\n", sock->peer_description() ); return FALSE; } MyString name; if( msg.LookupString(ATTR_NAME,name) ) { // client name is purely for debugging purposes name.formatstr_cat(" on %s",sock->peer_description()); sock->set_peer_description(name.Value()); } MyString target_ccbid_str; MyString return_addr; MyString connect_id; // id target daemon should present to requester CCBID target_ccbid; // NOTE: using ATTR_CLAIM_ID for connect id so that it is // automatically treated as a secret over the network. // It must be presented by the target daemon when connecting // to the requesting client, so the client can confirm that // the connection is in response to its request. if( !msg.LookupString(ATTR_CCBID,target_ccbid_str) || !msg.LookupString(ATTR_MY_ADDRESS,return_addr) || !msg.LookupString(ATTR_CLAIM_ID,connect_id) ) { MyString ad_str; msg.sPrint(ad_str); dprintf(D_ALWAYS, "CCB: invalid request from %s: %s\n", sock->peer_description(), ad_str.Value() ); return FALSE; } if( !CCBIDFromString(target_ccbid,target_ccbid_str.Value()) ) { dprintf(D_ALWAYS, "CCB: request from %s contains invalid CCBID %s\n", sock->peer_description(), target_ccbid_str.Value() ); return FALSE; } CCBTarget *target = GetTarget( target_ccbid ); if( !target ) { dprintf(D_ALWAYS, "CCB: rejecting request from %s for ccbid %s because no daemon is " "currently registered with that id " "(perhaps it recently disconnected).\n", sock->peer_description(), target_ccbid_str.Value()); MyString error_msg; error_msg.formatstr( "CCB server rejecting request for ccbid %s because no daemon is " "currently registered with that id " "(perhaps it recently disconnected).", target_ccbid_str.Value()); RequestReply( sock, false, error_msg.Value(), 0, target_ccbid ); return FALSE; } SetSmallBuffers(sock); CCBServerRequest *request = new CCBServerRequest( sock, target_ccbid, return_addr.Value(), connect_id.Value() ); AddRequest( request, target ); dprintf(D_FULLDEBUG, "CCB: received request id %lu from %s for target ccbid %s " "(registered as %s)\n", request->getRequestID(), request->getSock()->peer_description(), target_ccbid_str.Value(), target->getSock()->peer_description()); ForwardRequestToTarget( request, target ); return KEEP_STREAM; }
ClassAd *CollectorEngine:: collect (int command,ClassAd *clientAd,const condor_sockaddr& from,int &insert,Sock *sock) { ClassAd *retVal; ClassAd *pvtAd; int insPvt; AdNameHashKey hk; HashString hashString; static int repeatStartdAds = -1; // for debugging ClassAd *clientAdToRepeat = NULL; _condor_auto_accum_runtime<collector_runtime_probe> rt(CollectorEngine_rucc_runtime); double rt_last = rt.begin; if (repeatStartdAds == -1) { repeatStartdAds = param_integer("COLLECTOR_REPEAT_STARTD_ADS",0); } if( !ValidateClassAd(command,clientAd,sock) ) { return NULL; } CollectorEngine_rucc_validateAd_runtime.Add(rt.tick(rt_last)); // mux on command switch (command) { case UPDATE_STARTD_AD: case UPDATE_STARTD_AD_WITH_ACK: if ( repeatStartdAds > 0 ) { clientAdToRepeat = new ClassAd(*clientAd); } if (!makeStartdAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); CollectorEngine_rucc_makeHashKey_runtime.Add(rt.tick(rt_last)); retVal=updateClassAd (StartdAds, "StartdAd ", "Start", clientAd, hk, hashString, insert, from ); if (last_updateClassAd_was_insert) { CollectorEngine_rucc_insertAd_runtime.Add(rt.tick(rt_last)); } else { CollectorEngine_rucc_updateAd_runtime.Add(rt.tick(rt_last)); } // if we want to store private ads if (!sock) { dprintf (D_ALWAYS, "Want private ads, but no socket given!\n"); break; } else { if (!(pvtAd = new ClassAd)) { EXCEPT ("Memory error!"); } if( !getClassAd(sock, *pvtAd) ) { dprintf(D_FULLDEBUG,"\t(Could not get startd's private ad)\n"); delete pvtAd; break; } // Fix up some stuff in the private ad that we depend on. // We started doing this in 7.2.0, so once we no longer // care about compatibility with stuff from before then, // the startd could stop bothering to send these attributes. // Queries of private ads depend on the following: SetMyTypeName( *pvtAd, STARTD_ADTYPE ); // Negotiator matches up private ad with public ad by // using the following. if( retVal ) { CopyAttribute( ATTR_MY_ADDRESS, *pvtAd, *retVal ); CopyAttribute( ATTR_NAME, *pvtAd, *retVal ); } CollectorEngine_rucc_getPvtAd_runtime.Add(rt.tick(rt_last)); // insert the private ad into its hashtable --- use the same // hash key as the public ad (void) updateClassAd (StartdPrivateAds, "StartdPvtAd ", "StartdPvt", pvtAd, hk, hashString, insPvt, from ); if (last_updateClassAd_was_insert) { CollectorEngine_rucc_insertPvtAd_runtime.Add(rt.tick(rt_last)); } else { CollectorEngine_rucc_updatePvtAd_runtime.Add(rt.tick(rt_last)); } } // create fake duplicates of this ad, each with a different name, if // we are told to do so. this feature exists for developer // scalability testing. if ( repeatStartdAds > 0 && clientAdToRepeat ) { ClassAd *fakeAd; int n; char newname[150],oldname[130]; oldname[0] = '\0'; clientAdToRepeat->LookupString("Name",oldname,sizeof(oldname)); for (n=0;n<repeatStartdAds;n++) { fakeAd = new ClassAd(*clientAdToRepeat); snprintf(newname,sizeof(newname), "Name=\"fake%d-%s\"",n,oldname); fakeAd->Insert(newname); makeStartdAdHashKey (hk, fakeAd); hashString.Build( hk ); if (! updateClassAd (StartdAds, "StartdAd ", "Start", fakeAd, hk, hashString, insert, from ) ) { // don't leak memory if there is some failure delete fakeAd; } } delete clientAdToRepeat; clientAdToRepeat = NULL; CollectorEngine_rucc_repeatAd_runtime.Add(rt.tick(rt_last)); } break; case MERGE_STARTD_AD: if (!makeStartdAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); retVal=mergeClassAd (StartdAds, "StartdAd ", "Start", clientAd, hk, hashString, insert, from ); break; case UPDATE_SCHEDD_AD: if (!makeScheddAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); retVal=updateClassAd (ScheddAds, "ScheddAd ", "Schedd", clientAd, hk, hashString, insert, from ); break; case UPDATE_SUBMITTOR_AD: // use the same hashkey function as a schedd ad if (!makeScheddAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } // since submittor ads always follow a schedd ad, and a master check is // performed for schedd ads, we don't need a master check in here hashString.Build( hk ); retVal=updateClassAd (SubmittorAds, "SubmittorAd ", "Submittor", clientAd, hk, hashString, insert, from ); break; case UPDATE_LICENSE_AD: // use the same hashkey function as a schedd ad if (!makeLicenseAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } // since submittor ads always follow a schedd ad, and a master check is // performed for schedd ads, we don't need a master check in here hashString.Build( hk ); retVal=updateClassAd (LicenseAds, "LicenseAd ", "License", clientAd, hk, hashString, insert, from ); break; case UPDATE_MASTER_AD: if (!makeMasterAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); retVal=updateClassAd (MasterAds, "MasterAd ", "Master", clientAd, hk, hashString, insert, from ); break; case UPDATE_CKPT_SRVR_AD: if (!makeCkptSrvrAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); retVal=updateClassAd (CkptServerAds, "CkptSrvrAd ", "CkptSrvr", clientAd, hk, hashString, insert, from ); break; case UPDATE_COLLECTOR_AD: if (!makeCollectorAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); retVal=updateClassAd (CollectorAds, "CollectorAd ", "Collector", clientAd, hk, hashString, insert, from ); break; case UPDATE_STORAGE_AD: if (!makeStorageAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); retVal=updateClassAd (StorageAds, "StorageAd ", "Storage", clientAd, hk, hashString, insert, from ); break; case UPDATE_ACCOUNTING_AD: if (!makeAccountingAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); retVal=updateClassAd (AccountingAds, "AccountingAd ", "Accouting", clientAd, hk, hashString, insert, from ); break; case UPDATE_NEGOTIATOR_AD: if (!makeNegotiatorAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); if (m_allowOnlyOneNegotiator) { // first, purge all the existing negotiator ads, since we // want to enforce that *ONLY* 1 negotiator is in the // collector any given time. purgeHashTable( NegotiatorAds ); } retVal=updateClassAd (NegotiatorAds, "NegotiatorAd ", "Negotiator", clientAd, hk, hashString, insert, from ); break; case UPDATE_HAD_AD: if (!makeHadAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); retVal=updateClassAd (HadAds, "HadAd ", "HAD", clientAd, hk, hashString, insert, from ); break; case UPDATE_GRID_AD: if (!makeGridAdHashKey(hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); retVal=updateClassAd (GridAds, "GridAd ", "Grid", clientAd, hk, hashString, insert, from ); break; case UPDATE_AD_GENERIC: { const char *type_str = GetMyTypeName(*clientAd); if (type_str == NULL) { dprintf(D_ALWAYS, "collect: UPDATE_AD_GENERIC: ad has no type\n"); insert = -3; retVal = 0; break; } MyString type(type_str); CollectorHashTable *cht = findOrCreateTable(type); if (cht == NULL) { dprintf(D_ALWAYS, "collect: findOrCreateTable failed\n"); insert = -3; retVal = 0; break; } if (!makeGenericAdHashKey (hk, clientAd)) { dprintf(D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build(hk); retVal = updateClassAd(*cht, type_str, type_str, clientAd, hk, hashString, insert, from); break; } case QUERY_STARTD_ADS: case QUERY_SCHEDD_ADS: case QUERY_MASTER_ADS: case QUERY_SUBMITTOR_ADS: case QUERY_CKPT_SRVR_ADS: case QUERY_STARTD_PVT_ADS: case QUERY_COLLECTOR_ADS: case QUERY_NEGOTIATOR_ADS: case QUERY_HAD_ADS: case QUERY_GENERIC_ADS: case INVALIDATE_STARTD_ADS: case INVALIDATE_SCHEDD_ADS: case INVALIDATE_MASTER_ADS: case INVALIDATE_CKPT_SRVR_ADS: case INVALIDATE_SUBMITTOR_ADS: case INVALIDATE_COLLECTOR_ADS: case INVALIDATE_NEGOTIATOR_ADS: case INVALIDATE_HAD_ADS: case INVALIDATE_ADS_GENERIC: // these are not implemented in the engine, but we allow another // daemon to detect that these commands have been given insert = -2; retVal = 0; break; default: dprintf (D_ALWAYS, "Received illegal command: %d\n", command); insert = -1; retVal = 0; } if (command != UPDATE_STARTD_AD && command != UPDATE_STARTD_AD_WITH_ACK) { CollectorEngine_rucc_other_runtime.Add(rt.tick(rt_last)); } // return the updated ad return retVal; }
bool DCStarter::startSSHD(char const *known_hosts_file,char const *private_client_key_file,char const *preferred_shells,char const *slot_name,char const *ssh_keygen_args,ReliSock &sock,int timeout,char const *sec_session_id,MyString &remote_user,MyString &error_msg,bool &retry_is_sensible) { retry_is_sensible = false; #ifndef HAVE_SSH_TO_JOB error_msg = "This version of Condor does not support ssh key exchange."; return false; #else if( !connectSock(&sock, timeout, NULL) ) { error_msg = "Failed to connect to starter"; return false; } if( !startCommand(START_SSHD, &sock,timeout,NULL,NULL,false,sec_session_id) ) { error_msg = "Failed to send START_SSHD to starter"; return false; } ClassAd input; if( preferred_shells && *preferred_shells ) { input.Assign(ATTR_SHELL,preferred_shells); } if( slot_name && *slot_name ) { // This is a little silly. // We are telling the remote side the name of the slot so // that it can put it in the welcome message. input.Assign(ATTR_NAME,slot_name); } if( ssh_keygen_args && *ssh_keygen_args ) { input.Assign(ATTR_SSH_KEYGEN_ARGS,ssh_keygen_args); } sock.encode(); if( !putClassAd(&sock, input) || !sock.end_of_message() ) { error_msg = "Failed to send START_SSHD request to starter"; return false; } ClassAd result; sock.decode(); if( !getClassAd(&sock, result) || !sock.end_of_message() ) { error_msg = "Failed to read response to START_SSHD from starter"; return false; } bool success = false; result.LookupBool(ATTR_RESULT,success); if( !success ) { std::string remote_error_msg; result.LookupString(ATTR_ERROR_STRING,remote_error_msg); error_msg.formatstr("%s: %s",slot_name,remote_error_msg.c_str()); retry_is_sensible = false; result.LookupBool(ATTR_RETRY,retry_is_sensible); return false; } result.LookupString(ATTR_REMOTE_USER,remote_user); std::string public_server_key; if( !result.LookupString(ATTR_SSH_PUBLIC_SERVER_KEY,public_server_key) ) { error_msg = "No public ssh server key received in reply to START_SSHD"; return false; } std::string private_client_key; if( !result.LookupString(ATTR_SSH_PRIVATE_CLIENT_KEY,private_client_key) ) { error_msg = "No ssh client key received in reply to START_SSHD"; return false; } // store the private client key unsigned char *decode_buf = NULL; int length = -1; condor_base64_decode(private_client_key.c_str(),&decode_buf,&length); if( !decode_buf ) { error_msg = "Error decoding ssh client key."; return false; } FILE *fp = safe_fcreate_fail_if_exists(private_client_key_file,"a",0400); if( !fp ) { error_msg.formatstr("Failed to create %s: %s", private_client_key_file,strerror(errno)); free( decode_buf ); return false; } if( fwrite(decode_buf,length,1,fp)!=1 ) { error_msg.formatstr("Failed to write to %s: %s", private_client_key_file,strerror(errno)); fclose( fp ); free( decode_buf ); return false; } if( fclose(fp)!=0 ) { error_msg.formatstr("Failed to close %s: %s", private_client_key_file,strerror(errno)); free( decode_buf ); return false; } fp = NULL; free( decode_buf ); decode_buf = NULL; // store the public server key in the known_hosts file length = -1; condor_base64_decode(public_server_key.c_str(),&decode_buf,&length); if( !decode_buf ) { error_msg = "Error decoding ssh server key."; return false; } fp = safe_fcreate_fail_if_exists(known_hosts_file,"a",0600); if( !fp ) { error_msg.formatstr("Failed to create %s: %s", known_hosts_file,strerror(errno)); free( decode_buf ); return false; } // prepend a host name pattern (*) to the public key to make a valid // record in the known_hosts file fprintf(fp,"* "); if( fwrite(decode_buf,length,1,fp)!=1 ) { error_msg.formatstr("Failed to write to %s: %s", known_hosts_file,strerror(errno)); fclose( fp ); free( decode_buf ); return false; } if( fclose(fp)!=0 ) { error_msg.formatstr("Failed to close %s: %s", known_hosts_file,strerror(errno)); free( decode_buf ); return false; } fp = NULL; free( decode_buf ); decode_buf = NULL; return true; #endif }
bool DockerProc::JobReaper( int pid, int status ) { TemporaryPrivSentry sentry(PRIV_ROOT); dprintf( D_ALWAYS, "DockerProc::JobReaper()\n" ); // // This should mean that the container has terminated. // if( pid == JobPid ) { // // Even running Docker in attached mode, we have a race condition // is exiting when the container exits, not when the docker daemon // notices that the container has exited. // int rv = -1; bool running = false; ClassAd dockerAd; CondorError error; // Years of careful research. for( int i = 0; i < 20; ++i ) { rv = DockerAPI::inspect( containerName, & dockerAd, error ); if( rv < 0 ) { dprintf( D_FULLDEBUG, "Failed to inspect (for removal) container '%s'; sleeping a second (%d already slept) to give Docker a chance to catch up.\n", containerName.c_str(), i ); sleep( 1 ); continue; } if( ! dockerAd.LookupBool( "Running", running ) ) { dprintf( D_FULLDEBUG, "Inspection of container '%s' failed to reveal its running state; sleeping a second (%d already slept) to give Docke a chance to catch up.\n", containerName.c_str(), i ); sleep( 1 ); continue; } if( running ) { dprintf( D_FULLDEBUG, "Inspection reveals that container '%s' is still running; sleeping a second (%d already slept) to give Docker a chance to catch up.\n", containerName.c_str(), i ); sleep( 1 ); continue; } break; } // FIXME: Move all this shared conditional-checking into a function. if( rv < 0 ) { dprintf( D_ALWAYS | D_FAILURE, "Failed to inspect (for removal) container '%s'.\n", containerName.c_str() ); std::string imageName; if( ! JobAd->LookupString( ATTR_DOCKER_IMAGE, imageName ) ) { dprintf( D_ALWAYS | D_FAILURE, "%s not defined in job ad.\n", ATTR_DOCKER_IMAGE ); imageName = "Unknown"; // shouldn't ever happen } std::string message; formatstr(message, "Cannot start container: invalid image name: %s", imageName.c_str()); Starter->jic->holdJob(message.c_str(), CONDOR_HOLD_CODE_InvalidDockerImage, 0); return VanillaProc::JobReaper( pid, status ); } if( ! dockerAd.LookupBool( "Running", running ) ) { dprintf( D_ALWAYS | D_FAILURE, "Inspection of container '%s' failed to reveal its running state.\n", containerName.c_str() ); return VanillaProc::JobReaper( pid, status ); } if( running ) { dprintf( D_ALWAYS | D_FAILURE, "Inspection reveals that container '%s' is still running.\n", containerName.c_str() ); return VanillaProc::JobReaper( pid, status ); } // FIXME: Rethink returning a classad. Having to check for missing // attributes blows. // TODO: Set status appropriately (as if it were from waitpid()). std::string oomkilled; if (! dockerAd.LookupString( "OOMKilled", oomkilled)) { dprintf( D_ALWAYS | D_FAILURE, "Inspection of container '%s' failed to reveal whether it was OOM killed. Assuming it was not.\n", containerName.c_str() ); } if (oomkilled.find("true") == 0) { ClassAd *machineAd = Starter->jic->machClassAd(); int memory; machineAd->LookupInteger(ATTR_MEMORY, memory); std::string message; formatstr(message, "Docker job exhaused %d Mb memory", memory); dprintf(D_ALWAYS, "%s, going on hold\n", message.c_str()); Starter->jic->holdJob(message.c_str(), CONDOR_HOLD_CODE_JobOutOfResources, 0); DockerAPI::rm( containerName, error ); if ( Starter->Hold( ) ) { Starter->allJobsDone(); this->JobExit(); } Starter->ShutdownFast(); return 0; } // See if docker could not run the job // most likely invalid executable std::string dockerError; if (! dockerAd.LookupString( "DockerError", dockerError)) { dprintf( D_ALWAYS | D_FAILURE, "Inspection of container '%s' failed to reveal whether there was an internal docker error.\n", containerName.c_str() ); } if (dockerError.length() > 0) { std::string message; formatstr(message, "Error running docker job: %s", dockerError.c_str()); dprintf(D_ALWAYS, "%s, going on hold\n", message.c_str()); Starter->jic->holdJob(message.c_str(), CONDOR_HOLD_CODE_FailedToCreateProcess, 0); DockerAPI::rm( containerName, error ); if ( Starter->Hold( ) ) { Starter->allJobsDone(); this->JobExit(); } Starter->ShutdownFast(); return 0; } int dockerStatus; if( ! dockerAd.LookupInteger( "ExitCode", dockerStatus ) ) { dprintf( D_ALWAYS | D_FAILURE, "Inspection of container '%s' failed to reveal its exit code.\n", containerName.c_str() ); return VanillaProc::JobReaper( pid, status ); } dprintf( D_FULLDEBUG, "Setting status of Docker job to %d.\n", dockerStatus ); status = dockerStatus; // TODO: Record final job usage. // We don't have to do any process clean-up, because container. // We'll do the disk clean-up after we've transferred files. } // This helps to make ssh-to-job more plausible. return VanillaProc::JobReaper( pid, status ); }
void CCBServer::HandleRequestResultsMsg( CCBTarget *target ) { // Reply from target daemon about whether it succeeded in // connecting to the requested client. Sock *sock = target->getSock(); ClassAd msg; sock->decode(); if( !msg.initFromStream( *sock ) || !sock->end_of_message() ) { // disconnect dprintf(D_FULLDEBUG, "CCB: received disconnect from target daemon %s " "with ccbid %lu.\n", sock->peer_description(), target->getCCBID() ); RemoveTarget( target ); return; } int command = 0; if( msg.LookupInteger( ATTR_COMMAND, command ) && command == ALIVE ) { SendHeartbeatResponse( target ); return; } target->decPendingRequestResults(); bool success = false; MyString error_msg; MyString reqid_str; CCBID reqid; MyString connect_id; msg.LookupBool( ATTR_RESULT, success ); msg.LookupString( ATTR_ERROR_STRING, error_msg ); msg.LookupString( ATTR_REQUEST_ID, reqid_str ); msg.LookupString( ATTR_CLAIM_ID, connect_id ); if( !CCBIDFromString( reqid, reqid_str.Value() ) ) { MyString msg_str; msg.sPrint(msg_str); dprintf(D_ALWAYS, "CCB: received reply from target daemon %s with ccbid %lu " "without a valid request id: %s\n", sock->peer_description(), target->getCCBID(), msg_str.Value()); RemoveTarget( target ); return; } CCBServerRequest *request = GetRequest( reqid ); if( request && request->getSock()->readReady() ) { // Request socket must have just closed. To avoid noise in // logs when we fail to write to it, delete the request now. RemoveRequest( request ); request = NULL; } char const *request_desc = "(client which has gone away)"; if( request ) { request_desc = request->getSock()->peer_description(); } if( success ) { dprintf(D_FULLDEBUG,"CCB: received 'success' from target daemon %s " "with ccbid %lu for " "request %s from %s.\n", sock->peer_description(), target->getCCBID(), reqid_str.Value(), request_desc); } else { dprintf(D_FULLDEBUG,"CCB: received error from target daemon %s " "with ccbid %lu for " "request %s from %s: %s\n", sock->peer_description(), target->getCCBID(), reqid_str.Value(), request_desc, error_msg.Value()); } if( !request ) { if( success ) { // expected: the client has gone away; it got what it wanted return; } dprintf( D_FULLDEBUG, "CCB: client for request %s to target daemon %s with ccbid " "%lu disappeared before receiving error details.\n", reqid_str.Value(), sock->peer_description(), target->getCCBID()); return; } if( connect_id != request->getConnectID() ) { MyString msg_str; msg.sPrint(msg_str); dprintf( D_FULLDEBUG, "CCB: received wrong connect id (%s) from target daemon %s " "with ccbid %lu for " "request %s\n", connect_id.Value(), sock->peer_description(), target->getCCBID(), reqid_str.Value()); RemoveTarget( target ); return; } RequestFinished( request, success, error_msg.Value() ); }
int main(int argc, char *argv[]) { char *arg; int nArgs = 0; // number of args int i, result; char* pool = NULL; char* scheddName = NULL; char* scheddAddr = NULL; MyString method; char *tmp; myDistro->Init( argc, argv ); MyName = condor_basename(argv[0]); config(); #if !defined(WIN32) install_sig_handler(SIGPIPE, SIG_IGN ); #endif // dig around in the config file looking for what the config file says // about getting files from Condor. This defaults with the global variable // initialization. tmp = param( "SANDBOX_TRANSFER_METHOD" ); if ( tmp != NULL ) { method = tmp; free( tmp ); string_to_stm( method, st_method ); } char **args = (char **)malloc(sizeof(char *) * argc); // args if ( ! args) exit(2); // parse the arguments. for( argv++; (arg = *argv); argv++ ) { if( arg[0] == '-' ) { if( ! arg[1] ) { usage(); } switch( arg[1] ) { case 'd': // dprintf to console dprintf_set_tool_debug("TOOL", 0); break; case 'c': args[nArgs] = arg; nArgs++; argv++; if( ! *argv ) { fprintf( stderr, "%s: -constraint requires another argument\n", MyName); exit(1); } args[nArgs] = *argv; nArgs++; break; case 'a': if( arg[2] && arg[2] == 'd' ) { argv++; if( ! *argv ) { fprintf( stderr, "%s: -addr requires another argument\n", MyName); exit(1); } if( is_valid_sinful(*argv) ) { scheddAddr = strdup(*argv); if( ! scheddAddr ) { fprintf( stderr, "Out of Memory!\n" ); exit(1); } } else { fprintf( stderr, "%s: \"%s\" is not a valid address\n", MyName, *argv ); fprintf( stderr, "Should be of the form " "<ip.address.here:port>\n" ); fprintf( stderr, "For example: <123.456.789.123:6789>\n" ); exit( 1 ); } break; } All = true; break; case 'n': // use the given name as the schedd name to connect to argv++; if( ! *argv ) { fprintf( stderr, "%s: -name requires another argument\n", MyName); exit(1); } if ( scheddName ) free(scheddName); scheddName = strdup(*argv); break; case 'p': // use the given name as the central manager to query argv++; if( ! *argv ) { fprintf( stderr, "%s: -pool requires another argument\n", MyName); exit(1); } if( pool ) { free( pool ); } pool = strdup( *argv ); break; case 's': argv++; if( ! *argv ) { fprintf( stderr, "%s: -stm requires another argument\n", MyName); exit(1); } method = *argv; string_to_stm(method, st_method); break; case 'v': version(); break; case 'h': usage(0); break; default: fprintf( stderr, "Unrecognized option: %s\n", arg ); usage(); break; } } else { if( All ) { // If -all is set, there should be no other // constraint arguments. usage(); } args[nArgs] = arg; nArgs++; } } // Check to make sure we have a valid sandbox transfer mechanism. if (st_method == STM_UNKNOWN) { fprintf( stderr, "%s: Unknown sandbox transfer method: %s\n", MyName, method.Value()); usage(); exit(1); } if( ! (All || nArgs) ) { // We got no indication of what to act on fprintf( stderr, "You did not specify any jobs\n" ); usage(); } // We're done parsing args, now make sure we know how to // contact the schedd. if( ! scheddAddr ) { // This will always do the right thing, even if either or // both of scheddName or pool are NULL. schedd = new DCSchedd( scheddName, pool ); } else { schedd = new DCSchedd( scheddAddr ); } if( ! schedd->locate() ) { fprintf( stderr, "%s: %s\n", MyName, schedd->error() ); exit( 1 ); } // Process the args. if( All ) { handleAll(); } else { for(i = 0; i < nArgs; i++) { if( match_prefix( args[i], "-constraint" ) ) { i++; addConstraint( args[i] ); } else { procArg(args[i]); } } } // Sanity check: make certain we now have a constraint if ( global_constraint.Length() <= 0 ) { fprintf( stderr, "Unable to create a job constraint!\n"); exit(1); } fprintf(stdout,"Fetching data files...\n"); switch(st_method) { case STM_USE_SCHEDD_ONLY: { // start block // Get the sandbox directly from the schedd. // And now, do the work. CondorError errstack; result = schedd->receiveJobSandbox(global_constraint.Value(), &errstack); if ( !result ) { fprintf( stderr, "\n%s\n", errstack.getFullText(true).c_str() ); fprintf( stderr, "ERROR: Failed to spool job files.\n" ); exit(1); } // All done return 0; } //end block break; case STM_USE_TRANSFERD: { // start block // NEW METHOD where we ask the schedd for a transferd, then get the // files from the transferd CondorError errstack; ClassAd respad; int invalid; MyString reason; MyString td_sinful; MyString td_cap; result = schedd->requestSandboxLocation(FTPD_DOWNLOAD, global_constraint, FTP_CFTP, &respad, &errstack); if ( !result ) { fprintf( stderr, "\n%s\n", errstack.getFullText(true).c_str() ); fprintf( stderr, "ERROR: Failed to spool job files.\n" ); exit(1); } respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid); if (invalid == TRUE) { fprintf( stderr, "ERROR: Failed to spool job files.\n" ); respad.LookupString(ATTR_TREQ_INVALID_REASON, reason); fprintf( stderr, "%s\n", reason.Value()); exit(EXIT_FAILURE); } respad.LookupString(ATTR_TREQ_TD_SINFUL, td_sinful); respad.LookupString(ATTR_TREQ_CAPABILITY, td_cap); dprintf(D_ALWAYS, "td: %s, cap: %s\n", td_sinful.Value(), td_cap.Value()); DCTransferD dctd(td_sinful.Value()); result = dctd.download_job_files(&respad, &errstack); if ( !result ) { fprintf( stderr, "\n%s\n", errstack.getFullText(true).c_str() ); fprintf( stderr, "ERROR: Failed to spool job files.\n" ); exit(1); } } // end block break; default: EXCEPT("PROGRAMMER ERROR: st_method must be known."); break; } // All done return 0; }
void test_user_policy_on_exit_hold_no(void) { int val; int action; char buf[4096]; ClassAd *result; ClassAd *jad = new ClassAd; if (jad == NULL) { printf("Out of memory!\n"); exit(EXIT_FAILURE); } printf("=======================================\n"); printf("Testing User Policy on On Exit Hold: NO\n"); /* set up the classad */ sprintf(buf, "%s = %d", ATTR_ON_EXIT_CODE, 0); jad->Insert(buf); sprintf(buf, "%s = 40", ATTR_TOTAL_SUSPENSIONS); jad->Insert(buf); sprintf(buf, "%s = FALSE", ATTR_PERIODIC_HOLD_CHECK); jad->Insert(buf); sprintf(buf, "%s = FALSE", ATTR_PERIODIC_REMOVE_CHECK); jad->Insert(buf); sprintf(buf, "%s = TotalSuspensions == 42", ATTR_ON_EXIT_HOLD_CHECK); jad->Insert(buf); sprintf(buf, "%s = TRUE", ATTR_ON_EXIT_REMOVE_CHECK); jad->Insert(buf); result = user_job_policy(jad); result->EvalBool(ATTR_USER_POLICY_ERROR, result, val); if(val == true) { printf("An error happened\n"); delete result; return; } result->EvalBool(ATTR_TAKE_ACTION, result, val); if (val == true) { printf("%s was true.\n", ATTR_TAKE_ACTION); result->LookupInteger(ATTR_USER_POLICY_ACTION, action); printf("Action is: %s\n", action==REMOVE_JOB?"REMOVE_JOB":action==HOLD_JOB?"HOLD_JOB": "UNKNOWN"); result->LookupString(ATTR_USER_POLICY_FIRING_EXPR, buf); printf("Reason for action: %s\n", buf); if (strcmp(ATTR_USER_POLICY_FIRING_EXPR, ATTR_ON_EXIT_HOLD_CHECK) == 0) { printf("Failed. I got removed because exit_hold was true.\n"); } else { printf("Success. I was removed because of %s, not because of %s\n", ATTR_ON_EXIT_REMOVE_CHECK, ATTR_ON_EXIT_HOLD_CHECK); } } else { printf("Ignoring correctly.\n"); } }