void
SpooledJobFiles::getJobSpoolPath(int cluster,int proc,std::string &spool_path)
{
	char * spool = param("SPOOL");
	ASSERT( spool );
	char * buf = gen_ckpt_name(spool, cluster, proc, 0);
	ASSERT( buf );

	spool_path = buf;
	free(buf);
	free(spool);
}
Example #2
0
void RemoveNewShadowDroppings(char *cluster, char *proc)
{
	char names[2][1024];
	int j;
	char *ckpt_name;
	char *myspool;
	struct stat buf;
	int clusternum, procnum;

	memset(&names[0], 0, 1024);
	memset(&names[1], 0, 1024);

	/* XXX I'm sorry.
		There are some incompatibilities between the new
		shadow and the old shadow. The new shadow now makes a
		_directory_ with the usual ckeckpoint name because there
		might eventually be more than one file that has to get
		checkpointed with a job. The old shadow is dumb, and it
		only makes a _file_ named the usual checkpoint name. So a
		contention happens when we are using opsys/arch to choose
		an executable name for both NT and UNIX between vanilla
		only jobs and standard universe jobs. What happens is
		that the old shadow gets back a correct stat() on the new
		shadow created directory but misinterprets it as a file
		and hilarity ensues. So, my nasty hack is to make the
		old shadow determine if the file it found is actually
		a directory and if so, then remove it and everything
		underneath it.	I somehow feel that this might bite us
		in the ass in the future, so each time the shadow does
		this, it logs it so a human can figure out what happened.
		I don't have to worry about the converse issue of a new
		shadow starting up with an old file-based checkpoint
		because whomever adds standard universe support to
		the new shadow will have to do something intelligent,
		and our submit program places expressions into the
		requirements attribute in the job forcing a checkpointed
		job to always run on the architecture it checkpointed
		on. 

		-psilord 7/30/01
	*/

	myspool = param("SPOOL");
	if (myspool == NULL)
	{
		EXCEPT ("RemoveNewShadowDroppings(): No Spool directory!?!\n");
	}
	clusternum = atoi(cluster);
	procnum = atoi(proc);
	if (clusternum < 0 || procnum < 0) /* sanity checks */
	{
		dprintf(D_ALWAYS, "RemoveNewShadowDroppings(): Asked to deal with "
			"negative cluster or proc numbers. Ignoring.\n");
		free(myspool);
		return;
	}
	ckpt_name = gen_ckpt_name( myspool, clusternum, procnum, 0 );

	strcpy(names[0], ckpt_name);
	strcpy(names[1], ckpt_name);
	strcat(names[1], ".tmp");
	free(ckpt_name); ckpt_name = NULL;

	for (j = 0; j < 2; j++)
	{
		if (stat(names[j], &buf) == 0) {
			/* ok, we have a hit, let's see if it is a directory... */
			if (IsDirectory(names[j]) == true) {
				/* it is, so blow away everything inside it */
				{
					Directory todd_droppings(names[j]);
					if (todd_droppings.Remove_Entire_Directory() == false) {
						dprintf(D_ALWAYS, "RemoveNewShadowDroppings(): Old "
							"shadow failed to remove new shadow ckpt directory "
							"contents: %s\n", names[j]);
						}
				}
				/* now delete the directory itself */
				if (rmdir(names[j]) < 0 && errno != ENOENT) {
					dprintf(D_ALWAYS, "RemoveNewShadowDroppings(): Old shadow "
						"failed to remove new shadow ckpt directory: %s (%s)\n",
						names[j], strerror(errno));
				} else {
					dprintf(D_ALWAYS, "RemoveNewShadowDroppings(): Old shadow "
						"removed new shadow ckpt directory: %s\n", names[j]);
				}
			}
		}
	}

	free(myspool);
}
Example #3
0
/*
** Opens job queue (Q), and reads in process structure (Proc) as side
** affects.
*/
void
start_job( char *cluster_id, char *proc_id )
{
	int		cluster_num;
	int		proc_num;
	char	*tmp;

	Proc->id.cluster = atoi( cluster_id );
	Proc->id.proc = atoi( proc_id );

	cluster_num = atoi( cluster_id );
	proc_num = atoi( proc_id );

	InitJobAd(cluster_num, proc_num); // make sure we have the job classad

	if (MakeProc(JobAd, Proc) < 0) {
		EXCEPT("MakeProc()");
	}

	JobAd->LookupFloat(ATTR_BYTES_SENT, TotalBytesSent);
	JobAd->LookupFloat(ATTR_BYTES_RECVD, TotalBytesRecvd);
	JobAd->LookupFloat(ATTR_RSC_BYTES_SENT, RSCBytesSent);
	JobAd->LookupFloat(ATTR_RSC_BYTES_RECVD, RSCBytesRecvd);
	JobAd->LookupInteger(ATTR_NUM_RESTARTS, NumRestarts);
	// by default, we round ATTR_NUM_CKPTS, so fetch the raw value
	// here (if available) for us to increment later.
	if ( !JobAd->LookupInteger(ATTR_NUM_CKPTS_RAW, NumCkpts)  ) {
		JobAd->LookupInteger(ATTR_NUM_CKPTS, NumCkpts);
	}

		// Grab the ClaimID (a.k.a. "capability") from the job classad
		// and put it in our global variable for use everywhere else.
	if (GlobalCap) {
		free(GlobalCap);
		GlobalCap = NULL;
	}
    JobAd->LookupString(ATTR_CLAIM_ID, &GlobalCap);
    if (! GlobalCap) {
        EXCEPT("ad does not include %s!", ATTR_CLAIM_ID);
    }

#define TESTING
#if !defined(HPUX) && !defined(TESTING)
	if( Proc->status != RUNNING ) {
		dprintf( D_ALWAYS, "Shadow: Asked to run proc %d.%d, but status = %d\n",
							Proc->id.cluster, Proc->id.proc, Proc->status );
		dprintf(D_ALWAYS, "********** Shadow Exiting(%d) **********\n",
			JOB_BAD_STATUS);
		exit( JOB_BAD_STATUS );	/* don't cleanup here */
	}
#endif

	LocalUsage = Proc->local_usage;
	RemoteUsage = Proc->remote_usage[0];
	ImageSize = Proc->image_size;

	if (Proc->universe != CONDOR_UNIVERSE_STANDARD) {
		strcpy( CkptName, "" );
		strcpy( TmpCkptName, "" );
	} else {
		tmp = gen_ckpt_name( Spool, Proc->id.cluster, Proc->id.proc, 0 );
		snprintf( CkptName, MAXPATHLEN, "%s", tmp );
		sprintf( TmpCkptName, "%s.tmp", CkptName );
		free(tmp); tmp = NULL;
	}

	tmp = gen_ckpt_name( Spool, Proc->id.cluster, ICKPT, 0 );
	snprintf( ICkptName, MAXPATHLEN, "%s", tmp );
	free(tmp); tmp = NULL;

	strcpy( RCkptName, CkptName );
}