// Note: caller must deallocate return value w/ delete [] char * GridUniverseLogic::scratchFilePath(gman_node_t *gman_node) { MyString filename; filename.formatstr("%s%p.%d",scratch_prefix, gman_node,daemonCore->getpid()); char *prefix = temp_dir_path(); ASSERT(prefix); // note: dircat allocates with new char[] char *finalpath = dircat(prefix,filename.Value()); free(prefix); return finalpath; }
// create a temporary lock path // return value must be freed via delete[] by caller. char * FileLock::GetTempPath() { const char *suffix = ""; char *result = NULL; char *path = param("LOCAL_DISK_LOCK_DIR"); if (!path) { path = temp_dir_path(); suffix = "condorLocks"; } result = dirscat(path, suffix); free(path); return result; }
GridUniverseLogic::gman_node_t * GridUniverseLogic::StartOrFindGManager(const char* owner, const char* domain, const char* attr_value, const char* attr_name, int cluster, int proc) { gman_node_t* gman_node; int pid; // If attr_value is an empty string, convert to NULL since code // after this point expects that. if ( attr_value && strlen(attr_value)==0 ) { attr_value = NULL; attr_name = NULL; } if ( (gman_node=lookupGmanByOwner(owner, attr_value, cluster, proc)) ) { // found it return gman_node; } // not found. fire one up! we want to run the GManager as the user. // but first, make certain we are not shutting down... if (!gman_pid_table) { // destructor has already been called; we are probably // closing down. return NULL; } #ifndef WIN32 if (owner && strcasecmp(owner, "root") == 0 ) { dprintf(D_ALWAYS, "Tried to start condor_gmanager as root.\n"); return NULL; } #endif dprintf( D_FULLDEBUG, "Starting condor_gmanager for owner %s (%d.%d)\n", owner, cluster, proc); char *gman_binary; gman_binary = param("GRIDMANAGER"); if ( !gman_binary ) { dprintf(D_ALWAYS,"ERROR - GRIDMANAGER not defined in config file\n"); return NULL; } ArgList args; MyString error_msg; args.AppendArg("condor_gridmanager"); args.AppendArg("-f"); char *gman_args = param("GRIDMANAGER_ARGS"); if(!args.AppendArgsV1RawOrV2Quoted(gman_args,&error_msg)) { dprintf( D_ALWAYS, "ERROR: failed to parse gridmanager args: %s\n", error_msg.Value()); free(gman_binary); free(gman_args); return NULL; } free(gman_args); // build a constraint if ( !owner ) { dprintf(D_ALWAYS,"ERROR - missing owner field\n"); free(gman_binary); return NULL; } MyString constraint; if ( !attr_name ) { constraint.formatstr("(%s=?=\"%s\"&&%s==%d)", ATTR_OWNER,owner, ATTR_JOB_UNIVERSE,CONDOR_UNIVERSE_GRID); } else { constraint.formatstr("(%s=?=\"%s\"&&%s=?=\"%s\"&&%s==%d)", ATTR_OWNER,owner, attr_name,attr_value, ATTR_JOB_UNIVERSE,CONDOR_UNIVERSE_GRID); args.AppendArg("-A"); args.AppendArg(attr_value); } args.AppendArg("-C"); args.AppendArg(constraint.Value()); MyString full_owner_name(owner); if ( domain && *domain ) { full_owner_name.formatstr_cat( "@%s", domain ); } args.AppendArg("-o"); args.AppendArg(full_owner_name.Value()); if (!init_user_ids(owner, domain)) { dprintf(D_ALWAYS,"ERROR - init_user_ids() failed in GRIDMANAGER\n"); free(gman_binary); return NULL; } static bool first_time_through = true; if ( first_time_through ) { // Note: Because first_time_through is static, this block runs only // once per schedd invocation. first_time_through = false; // Clean up any old / abandoned scratch dirs. dprintf(D_FULLDEBUG,"Checking for old gridmanager scratch dirs\n"); char *prefix = temp_dir_path(); ASSERT(prefix); Directory tmp( prefix, PRIV_USER ); const char *f; char const *dot; int fname_pid; int mypid = daemonCore->getpid(); int scratch_pre_len = strlen(scratch_prefix); while ( (f=tmp.Next()) ) { // skip regular files -- we only need to inspect subdirs if ( !tmp.IsDirectory() ) { continue; } // skip if it does not start with our prefix if ( strncmp(scratch_prefix,f,scratch_pre_len) ) { continue; } // skip if does not end w/ a pid dot = strrchr(f,'.'); if ( !dot ) { continue; } // skip if this pid is still alive and not ours dot++; // skip over period fname_pid = atoi(dot); if ( fname_pid != mypid && daemonCore->Is_Pid_Alive(fname_pid) ) { continue; } // if we made it here, blow away this subdir if ( tmp.Remove_Current_File() ) { dprintf(D_ALWAYS,"Removed old scratch dir %s\n", tmp.GetFullPath()); } } // end of while for cleanup of old scratch dirs dprintf(D_FULLDEBUG,"Done checking for old scratch dirs\n"); if (prefix != NULL) { free(prefix); prefix = NULL; } } // end of once-per-schedd invocation block // Create a temp dir for the gridmanager and append proper // command-line arguments to tell where it is. bool failed = false; gman_node = new gman_node_t; char *finalpath = scratchFilePath(gman_node); priv_state saved_priv = set_user_priv(); if ( (mkdir(finalpath,0700)) < 0 ) { // mkdir failed. dprintf(D_ALWAYS,"ERROR - mkdir(%s,0700) failed in GRIDMANAGER, errno=%d (%s)\n", finalpath, errno, strerror(errno)); failed = true; } set_priv(saved_priv); uninit_user_ids(); args.AppendArg("-S"); // -S = "ScratchDir" argument args.AppendArg(finalpath); delete [] finalpath; if ( failed ) { // we already did dprintf reason to the log... free(gman_binary); delete gman_node; return NULL; } if(IsFulldebug(D_FULLDEBUG)) { MyString args_string; args.GetArgsStringForDisplay(&args_string); dprintf(D_FULLDEBUG,"Really Execing %s\n",args_string.Value()); } pid = daemonCore->Create_Process( gman_binary, // Program to exec args, // Command-line args PRIV_ROOT, // Run as root, so it can switch to // PRIV_CONDOR rid // Reaper ID ); free(gman_binary); if ( pid <= 0 ) { dprintf ( D_ALWAYS, "StartOrFindGManager: Create_Process problems!\n" ); if (gman_node) delete gman_node; return NULL; } // If we made it here, we happily started up a new gridmanager process dprintf( D_ALWAYS, "Started condor_gmanager for owner %s pid=%d\n", owner,pid); // Make a new gman_node entry for our hashtable & insert it if ( !gman_node ) { gman_node = new gman_node_t; } gman_node->pid = pid; gman_node->owner[0] = '\0'; gman_node->domain[0] = '\0'; if ( owner ) { strcpy(gman_node->owner,owner); } if ( domain ) { strcpy(gman_node->domain,domain); } MyString owner_key(owner); if(attr_value){ owner_key += attr_value; } if (cluster) { owner_key.formatstr_cat( "-%d.%d", cluster, proc ); } ASSERT( gman_pid_table->insert(owner_key,gman_node) == 0 ); // start timer to signal gridmanager if we haven't already if ( gman_node->add_timer_id == -1 ) { // == -1 means no timer set gman_node->add_timer_id = daemonCore->Register_Timer(job_added_delay, GridUniverseLogic::SendAddSignal, "GridUniverseLogic::SendAddSignal"); daemonCore->Register_DataPtr(gman_node); } // All done return gman_node; }