Ejemplo n.º 1
0
void
makeAndDisplayRegular( char* name, char* pool )
{
	Daemon startd( DT_STARTD, name, pool );
	Daemon schedd( DT_SCHEDD, name, pool );
	Daemon master( DT_MASTER, name, pool );

	if( ! startd.locate() ) {
		dprintf( dflag, "%s\n", startd.error() );
	} else { 
		startd.display( dflag );
	} 
	dprintf( dflag, "\n" );
	if( ! schedd.locate() ) {
		dprintf( dflag, "%s\n", schedd.error() );
	} else { 
		schedd.display( dflag );
	} 
	dprintf( dflag, "\n" );
	if( ! master.locate() ) {
		dprintf( dflag, "%s\n", master.error() );
	} else { 
		master.display( dflag );
	} 
	dprintf( dflag, "\n" );
}
Ejemplo n.º 2
0
    void
    delegateGSI(boost::python::object fname)
    {
        if (m_claim.empty()) {THROW_EX(ValueError, "No claim set for object.");}

        std::string proxy_file;
        if (fname.ptr() == Py_None)
        {
            proxy_file = get_x509_proxy_filename();
        }
        else
        {
            proxy_file = boost::python::extract<std::string>(fname);
        }

        DCStartd startd(m_addr.c_str());
        startd.setClaimId(m_claim);
        compat_classad::ClassAd reply;
        int irval;
        {
            condor::ModuleLock ml;
            irval = startd.delegateX509Proxy(proxy_file.c_str(), 0, NULL);
        }
        if (irval != OK) {THROW_EX(RuntimeError, "Startd failed to delegate GSI proxy.");}
    }
Ejemplo n.º 3
0
void
testAPI( char* my_name, bool do_socks )
{
	char *name, *addr, *fullhost, *host, *pool, *error, *id;

	Daemon startd( DT_STARTD, my_name );
	if( ! startd.locate() ) {
		dprintf( dflag, "%s\n", startd.error() );
	}
	name = startd.name();
	addr = startd.addr();
	fullhost = startd.fullHostname();
	host = startd.hostname();
	pool = startd.pool();
	error = (char*)startd.error();
	id = startd.idStr();

	dprintf( dflag, "Type: %d (%s), Name: %s, Addr: %s\n", 
			 (int)startd.type(), daemonString(startd.type()), 
			 name ? name : "(null)", 
			 addr ? addr : "(null)" );
	dprintf( dflag, "FullHost: %s, Host: %s, Pool: %s, Port: %d\n", 
			 fullhost ? fullhost : "(null)",
			 host ? host : "(null)", 
			 pool ? pool : "(null)", startd.port() );
	dprintf( dflag, "IsLocal: %s, IsFound: %s, IdStr: %s, Error: %s\n", 
			 startd.isLocal ? "Y" : "N",
			 startd.isFound ? "Y" : "N",
			 id ? id : "(null)",
			 error ? error : "(null)" );
	if( do_socks ) {
		testSocks( &startd );
	}
}
Ejemplo n.º 4
0
bool 
VMProc::reportVMInfoToStartd(int cmd, const char *value)
{
	Daemon startd(DT_STARTD, NULL);

	if( !startd.locate() ) {
		dprintf(D_ALWAYS,"ERROR: %s\n", startd.error());
		return false;
	}

	char* addr = startd.addr();
	if( !addr ) {
		dprintf(D_ALWAYS,"Can't find the address of local startd\n");
		return false;
	}

	// Using udp packet
	SafeSock ssock;

	ssock.timeout( 5 ); // 5 seconds timeout
	ssock.encode();

	if( !ssock.connect(addr) ) {
		dprintf( D_ALWAYS, "Failed to connect to local startd(%s)\n", addr);
		return false;
	}

	if( !startd.startCommand(cmd, &ssock) ) {
		dprintf( D_ALWAYS, "Failed to send UDP command(%s) "
					"to local startd %s\n", getCommandString(cmd), addr);
		return false;
	}

	// Send the pid of this starter
	MyString s_pid;
	s_pid += (int)daemonCore->getpid();

	char *starter_pid = strdup(s_pid.Value());
	ASSERT(starter_pid);
	ssock.code(starter_pid);

	// Send vm info 
	char *vm_value = strdup(value);
	ASSERT(vm_value);
	ssock.code(vm_value);

	if( !ssock.end_of_message() ) {
		dprintf( D_FULLDEBUG, "Failed to send EOM to local startd %s\n", addr);
		free(starter_pid);
		free(vm_value);
		return false;
	}
	free(starter_pid);
	free(vm_value);

	sleep(1);
	return true;
}
Ejemplo n.º 5
0
bool
Defrag::drain(const ClassAd &startd_ad)
{
	std::string name;
	startd_ad.LookupString(ATTR_NAME,name);

	dprintf(D_ALWAYS,"Initiating %s draining of %s.\n",
			m_draining_schedule_str.c_str(),name.c_str());

	DCStartd startd( &startd_ad );

	int graceful_completion = 0;
	startd_ad.LookupInteger(ATTR_EXPECTED_MACHINE_GRACEFUL_DRAINING_COMPLETION,graceful_completion);
	int quick_completion = 0;
	startd_ad.LookupInteger(ATTR_EXPECTED_MACHINE_QUICK_DRAINING_COMPLETION,quick_completion);
	int graceful_badput = 0;
	startd_ad.LookupInteger(ATTR_EXPECTED_MACHINE_GRACEFUL_DRAINING_BADPUT,graceful_badput);
	int quick_badput = 0;
	startd_ad.LookupInteger(ATTR_EXPECTED_MACHINE_QUICK_DRAINING_BADPUT,quick_badput);

	time_t now = time(NULL);
	std::string draining_check_expr;
	double badput_growth_tolerance = 1.25; // for now, this is hard-coded
	int negligible_badput = 1200;
	int negligible_deadline_slippage = 1200;
	if( m_draining_schedule <= DRAIN_GRACEFUL ) {
		dprintf(D_ALWAYS,"Expected draining completion time is %ds; expected draining badput is %d cpu-seconds\n",
				(int)(graceful_completion-now),graceful_badput);
		sprintf(draining_check_expr,"%s <= %d && %s <= %d",
				ATTR_EXPECTED_MACHINE_GRACEFUL_DRAINING_COMPLETION,
				graceful_completion + negligible_deadline_slippage,
				ATTR_EXPECTED_MACHINE_GRACEFUL_DRAINING_BADPUT,
				(int)(badput_growth_tolerance*graceful_badput) + negligible_badput);
	}
	else { // DRAIN_FAST and DRAIN_QUICK are effectively equivalent here
		dprintf(D_ALWAYS,"Expected draining completion time is %ds; expected draining badput is %d cpu-seconds\n",
				(int)(quick_completion-now),quick_badput);
		sprintf(draining_check_expr,"%s <= %d && %s <= %d",
				ATTR_EXPECTED_MACHINE_QUICK_DRAINING_COMPLETION,
				quick_completion + negligible_deadline_slippage,
				ATTR_EXPECTED_MACHINE_QUICK_DRAINING_BADPUT,
				(int)(badput_growth_tolerance*quick_badput) + negligible_badput);
	}

	std::string request_id;
	bool resume_on_completion = true;
	bool rval = startd.drainJobs( m_draining_schedule, resume_on_completion, draining_check_expr.c_str(), request_id );
	if( !rval ) {
		dprintf(D_ALWAYS,"Failed to send request to drain %s: %s\n",startd.name(),startd.error());
		m_stats.DrainFailures += 1;
		return false;
	}
	m_stats.DrainSuccesses += 1;

	return true;
}
Ejemplo n.º 6
0
int updateMachineAdAt( const char * const name, const char * const pool, const ClassAd & update, ClassAd & reply  ) {
	DCStartd startd( name, pool );
	if( ! startd.locate() ) {
		fprintf( stderr, "Unable to locate startd: %s\n", startd.error() );
		return 1;
	}

	if( ! startd.updateMachineAd( & update, & reply ) ) {
		fprintf( stderr, "Unable to update machine ad: %s\n", startd.error() );
		return 1;
	}

	return 0;
}
Ejemplo n.º 7
0
    void
    resume()
    {
        if (m_claim.empty()) {THROW_EX(ValueError, "No claim set for object.");}

        DCStartd startd(m_addr.c_str());
        startd.setClaimId(m_claim);
        compat_classad::ClassAd reply;
        bool rval;
        {
            condor::ModuleLock ml;
            rval = startd.resumeClaim(&reply, 20);
        }
        if (!rval) {THROW_EX(RuntimeError, "Sartd failed to resume claim.");}
    }
Ejemplo n.º 8
0
    void
    deactivate(VacateType vacate_type)
    {
        if (m_claim.empty()) {THROW_EX(ValueError, "No claim set for object.");}

        DCStartd startd(m_addr.c_str());
        startd.setClaimId(m_claim);
        compat_classad::ClassAd reply;
        bool rval;
        {
            condor::ModuleLock ml;
            rval = startd.deactivateClaim(vacate_type, &reply, 20);
        }
        if (!rval) {THROW_EX(RuntimeError, "Startd failed to deactivate claim.");}
    }
Ejemplo n.º 9
0
int
main( int argc, char *argv[] )
{

	myDistro->Init( argc, argv );

	config();
	dprintf_config_tool_on_error(0);
	dprintf_OnExitDumpOnErrorBuffer(stderr);

	parseArgv( argc, argv );

	DCStartd startd( target, pool );

	if( ! startd.locate() ) {
		fprintf( stderr, "ERROR: %s\n", startd.error() );
		exit( 1 );
	}

	bool rval = false;

	if( cmd == DRAIN_JOBS ) {
		std::string request_id;
		rval = startd.drainJobs( how_fast, resume_on_completion, draining_check_expr, request_id );
		if( rval ) {
			printf("Sent request to drain %s\n",startd.name());
			if (dash_verbose && ! request_id.empty()) { printf("\tRequest id: %s\n", request_id.c_str()); }
		}
	}
	else if( cmd == CANCEL_DRAIN_JOBS ) {
		rval = startd.cancelDrainJobs( cancel_request_id );
		if( rval ) {
			printf("Sent request to cancel draining on %s\n",startd.name());
		}
	}

	if( ! rval ) {
		fprintf( stderr, "Attempt to send %s to startd %s failed\n%s\n",
				 getCommandString(cmd), startd.addr(), startd.error() ); 
		return 1;
	}

	dprintf_SetExitCode(0);
	return 0;
}
Ejemplo n.º 10
0
bool
Defrag::cancel_drain(const ClassAd &startd_ad)
{

	std::string name;
	startd_ad.LookupString(ATTR_NAME,name);

	dprintf(D_ALWAYS,"Initiating %s draining of %s.\n",
		m_draining_schedule_str.c_str(),name.c_str());

	DCStartd startd( &startd_ad );

	bool rval = startd.cancelDrainJobs( NULL );
	if ( rval ) {
		dprintf(D_FULLDEBUG, "Sent request to cancel draining on %s\n", startd.name());
	} else {
		dprintf(D_ALWAYS, "Unable to cancel draining on %s: %s\n", startd.name(), startd.error());
	}
	return rval;
}
Ejemplo n.º 11
0
    void
    activate(boost::python::object ad_obj)
    {
        if (m_claim.empty()) {THROW_EX(ValueError, "No claim set for object.");}

        compat_classad::ClassAd ad = boost::python::extract<ClassAdWrapper>(ad_obj)();
        if (ad.find(ATTR_JOB_KEYWORD) == ad.end())
        {
            ad.InsertAttr(ATTR_HAS_JOB_AD, true);
        }

        DCStartd startd(m_addr.c_str());
        startd.setClaimId(m_claim);
        compat_classad::ClassAd reply;
        int irval;
        {
            condor::ModuleLock ml;
            irval = startd.activateClaim(&ad, &reply, 20);
        }
        if (irval != OK) {THROW_EX(RuntimeError, "Startd failed to activate claim.");}
    }
Ejemplo n.º 12
0
    void
    requestCOD(boost::python::object constraint_obj, int lease_duration)
    {
        classad_shared_ptr<classad::ExprTree> constraint;
        boost::python::extract<std::string> constraint_extract(constraint_obj);
        if (constraint_obj.ptr() == Py_None) {}
        else if (constraint_extract.check())
        {
            classad::ClassAdParser parser;
            std::string constraint_str = constraint_extract();
            classad::ExprTree *expr_tmp = NULL;
            if (!parser.ParseExpression(constraint_str, expr_tmp)) {THROW_EX(ValueError, "Failed to parse request requirements expression");}
            constraint.reset(expr_tmp);
        }
        else
        {
            constraint.reset(convert_python_to_exprtree(constraint_obj));
        }

        compat_classad::ClassAd ad, reply;
        if (constraint.get())
        {
            classad::ExprTree *expr_tmp = constraint->Copy();
            ad.Insert(ATTR_REQUIREMENTS, expr_tmp);
        }
        ad.InsertAttr(ATTR_JOB_LEASE_DURATION, lease_duration);
        bool rval;
        DCStartd startd(m_addr.c_str());
        {
            condor::ModuleLock ml;
            rval = startd.requestClaim(CLAIM_COD, &ad, &reply, 20);
        }
        if (!rval) {THROW_EX(RuntimeError, "Failed to request claim from startd.");}

        if (!reply.EvaluateAttrString(ATTR_CLAIM_ID, m_claim)) {THROW_EX(RuntimeError, "Startd did not return a ClaimId.");}
    }
Ejemplo n.º 13
0
int
main( int argc, char *argv[] )
{

#ifndef WIN32
	// Ignore SIGPIPE so if we cannot connect to a daemon we do not
	// blowup with a sig 13.
	install_sig_handler(SIGPIPE, SIG_IGN );
#endif

	myDistro->Init( argc, argv );

	config();

	cmd = getCommandFromArgv( argc, argv );
	
	parseArgv( argc, argv );

	DCStartd startd( target, pool ? pool->addr() : NULL );

	if( needs_id ) {
		assert( claim_id );
		startd.setClaimId( claim_id );
	}

	if( ! startd.locate() ) {
		fprintf( stderr, "ERROR: %s\n", startd.error() );
		exit( 1 );
	}

	bool rval = FALSE;
	int irval;
	ClassAd reply;
	ClassAd ad;

	switch( cmd ) {
	case CA_REQUEST_CLAIM:
		fillRequestAd( &ad );
		rval = startd.requestClaim( CLAIM_COD, &ad, &reply, timeout );
		break;
	case CA_ACTIVATE_CLAIM:
		fillActivateAd( &ad );
		irval = startd.activateClaim( &ad, &reply, timeout );
		rval = (irval == OK);
		break;
	case CA_SUSPEND_CLAIM:
		rval = startd.suspendClaim( &reply, timeout );
		break;
	case CA_RESUME_CLAIM:
		rval = startd.resumeClaim( &reply, timeout );
		break;
	case CA_DEACTIVATE_CLAIM:
		rval = startd.deactivateClaim( vacate_type, &reply, timeout );
		break;
	case CA_RELEASE_CLAIM:
		rval = startd.releaseClaim( vacate_type, &reply, timeout );
		break;
	case CA_RENEW_LEASE_FOR_CLAIM:
		rval = startd.renewLeaseForClaim( &reply, timeout );
		break;
	case DELEGATE_GSI_CRED_STARTD:
		irval = startd.delegateX509Proxy( proxy_file, 0, NULL );
		rval = (irval == OK);
		break;
	}

	if( ! rval ) {
		fprintf( stderr, "Attempt to send %s to startd %s failed\n%s\n",
				 getCommandString(cmd), startd.addr(), startd.error() ); 
		return 1;
	}

	printOutput( &reply, &startd );
	return 0;
}
Ejemplo n.º 14
0
int
part_send_job(
	      int test_starter,
	      char *host,
	      int &reason,
	      char *capability,
	      char * /*schedd*/,
	      PROC *proc,
	      int &sd1,
	      int &sd2,
	      char **name)
{
  int reply;
  ReliSock *sock = NULL;
  StartdRec stRec;
  PORTS ports;
  bool done = false;
  int retry_delay = 3;
  int num_retries = 0;

  // make sure we have the job classad
  InitJobAd(proc->id.cluster, proc->id.proc);

  while( !done ) {

	  Daemon startd(DT_STARTD, host, NULL);
	  if (!(sock = (ReliSock*)startd.startCommand ( ACTIVATE_CLAIM, Stream::reli_sock, 90))) {
		  dprintf( D_ALWAYS, "startCommand(ACTIVATE_CLAIM) to startd failed\n");
		  goto returnfailure;
	  }

		  // Send the capability
	  ClaimIdParser idp( capability );
	  dprintf(D_FULLDEBUG, "send capability %s\n", idp.publicClaimId() );
	  if( !sock->put_secret(capability) ) {
		  dprintf( D_ALWAYS, "sock->put(\"%s\") failed\n",idp.publicClaimId());
		  goto returnfailure;
	  }

	  // Send the starter number
	  if( test_starter ) {
		  dprintf( D_ALWAYS, "Requesting Alternate Starter %d\n", test_starter );
	  } else {
		  dprintf( D_ALWAYS, "Requesting Primary Starter\n" );
	  }
	  if( !sock->code(test_starter) ) {
		  dprintf( D_ALWAYS, "sock->code(%d) failed\n", test_starter );
		  goto returnfailure;
	  }

		  // Send the job info 
	  if( !JobAd->put(*sock) ) {
		  dprintf( D_ALWAYS, "failed to send job ad\n" );
		  goto returnfailure;
	  }	

	  if( !sock->end_of_message() ) {
		  dprintf( D_ALWAYS, "failed to send message to startd\n" );
		  goto returnfailure;
	  }

		  // We're done sending.  Now, get the reply.
	  sock->decode();
	  if( !sock->code(reply) || !sock->end_of_message() ) {
		  dprintf( D_ALWAYS, "failed to receive reply from startd\n" );
		  goto returnfailure;
	  }
	  
	  switch( reply ) {
	  case OK:
		  dprintf( D_ALWAYS, "Shadow: Request to run a job was ACCEPTED\n" );
		  done = true;
		  break;

	  case NOT_OK:
		  dprintf( D_ALWAYS, "Shadow: Request to run a job was REFUSED\n");
		  goto returnfailure;
		  break;

	  case CONDOR_TRY_AGAIN:
		  num_retries++;
		  dprintf( D_ALWAYS,
				   "Shadow: Request to run a job was TEMPORARILY REFUSED\n" );
		  if( num_retries > 20 ) {
			  dprintf( D_ALWAYS, "Shadow: Too many retries, giving up.\n" );
			  goto returnfailure;
		  }			  
		  delete sock;
		  dprintf( D_ALWAYS,
				   "Shadow: will try again in %d seconds\n", retry_delay );
		  sleep( retry_delay );
		  break;

	  default:
		  dprintf(D_ALWAYS,"Unknown reply from startd for command ACTIVATE_CLAIM\n");
		  dprintf(D_ALWAYS,"Shadow: Request to run a job was REFUSED\n");
		  goto returnfailure;
		  break;
	  }
  }

  /* start flock : dhruba */
  sock->decode();
  memset( &stRec, '\0', sizeof(stRec) );
  if( !sock->code(stRec) || !sock->end_of_message() ) {
	  dprintf(D_ALWAYS, "Can't read reply from startd.\n");
	  goto returnfailure;
  }
  ports = stRec.ports;
  if( stRec.ip_addr ) {
	host = stRec.server_name;
	if(name) {
		*name = strdup(stRec.server_name);
	}
    dprintf(D_FULLDEBUG,
	    "host = %s inet_addr = 0x%x port1 = %d port2 = %d\n",
	    host, stRec.ip_addr,ports.port1, ports.port2
	    );
  } else {
    dprintf(D_FULLDEBUG,
	    "host = %s port1 = %d port2 = %d\n",
	    host, ports.port1, ports.port2
	    );
  }    

  if( ports.port1 == 0 ) {
    dprintf( D_ALWAYS, "Shadow: Request to run a job on %s was REFUSED\n",
	     host );
	goto returnfailure;
  }
  /* end  flock ; dhruba */

	  // We don't use the server_name in the StartdRec, because our
	  // DNS query may fail or may give us the wrong IP address
	  // (either because it's stale or because we're talking to a
	  // machine with multiple network interfaces).  Sadly, we can't
	  // use the ip_addr either, because the startd doesn't send it in
	  // the correct byte ordering on little-endian machines.  So, we
	  // grab the IP address from the ReliSock, since we konw the
	  // startd always uses the same IP address for all of its
	  // communication.
  char sinfulstring[SINFUL_STRING_BUF_SIZE];

  generate_sinful(sinfulstring, SINFUL_STRING_BUF_SIZE, sock->peer_ip_str(), ports.port1);
  if( (sd1 = do_connect(sinfulstring, (char *)0, (u_short)ports.port1)) < 0 ) {
    dprintf( D_ALWAYS, "failed to connect to scheduler on %s\n", sinfulstring );
	goto returnfailure;
  }

  generate_sinful(sinfulstring, SINFUL_STRING_BUF_SIZE, sock->peer_ip_str(), ports.port2);
  if( (sd2 = do_connect(sinfulstring, (char *)0, (u_short)ports.port2)) < 0 ) {
    dprintf( D_ALWAYS, "failed to connect to scheduler on %s\n", sinfulstring );
	close(sd1);
	goto returnfailure;
  }

  delete sock;
  sock = NULL;

  if ( stRec.server_name ) {
	  free( stRec.server_name );
  }

  return 0;

returnfailure:
  reason = JOB_NOT_STARTED;
  delete sock;
  return -1;
}