Example #1
0
extern int configure_small_block(bg_record_t *bg_record)
{
	int rc = SLURM_SUCCESS;
#if defined HAVE_BG_FILES
	bool small = true;
	ba_mp_t* ba_node = NULL;
	rm_BP_t *curr_mp = NULL;
	rm_bp_id_t mp_id = NULL;
#ifndef HAVE_BGL
	rm_nodecard_id_t nc_char = NULL;
#endif
	int nc_id = 0;
	int num_ncards = 0, sub_nodecard = 0, ionode_card = 0, nc_count = 0;
	rm_nodecard_t *ncard;
	rm_nodecard_list_t *ncard_list = NULL;
	int num, i;
	int use_nc[bg_conf->mp_nodecard_cnt];
	double nc_pos = 0;
#endif
	xassert(bg_record->ionode_bitmap);
	if (bg_record->mp_count != 1) {
		error("Requesting small block with %d mps, needs to be 1.",
		      bg_record->mp_count);
		return SLURM_ERROR;
	}
/* 	info("configuring small block on ionodes %s out of %d ncs",  */
/* 	     bg_record->ionodes, bg_conf->mp_nodecard_cnt); */
#if defined HAVE_BG_FILES
	/* set that we are doing a small block */
	if ((rc = bridge_set_data(bg_record->bg_block, RM_PartitionSmall,
				  &small)) != SLURM_SUCCESS) {

		fatal("bridge_set_data(RM_PartitionPsetsPerBP): %s",
		      bg_err_str(rc));
	}

	num_ncards = bg_record->cnode_cnt/bg_conf->nodecard_cnode_cnt;
	if (num_ncards < 1) {
		num_ncards = 1;
		sub_nodecard = 1;
	}
	memset(use_nc, 0, sizeof(use_nc));

	/* find out how many nodecards to get for each ionode */

	for(i = 0; i<bg_conf->ionodes_per_mp; i++) {
		if (bit_test(bg_record->ionode_bitmap, i)) {
			if (bg_conf->nc_ratio > 1) {
				int j=0;
				for(j=0; j<bg_conf->nc_ratio; j++)
					use_nc[(int)nc_pos+j] = 1;
			} else {
				use_nc[(int)nc_pos] = 1;
				if (i%2)
					ionode_card = 1;
			}
		}
		nc_pos += bg_conf->nc_ratio;
	}

	if ((rc = bridge_set_data(bg_record->bg_block,
				  RM_PartitionNodeCardNum,
				  &num_ncards))
	    != SLURM_SUCCESS) {

		fatal("bridge_set_data: RM_PartitionBPNum: %s",
		      bg_err_str(rc));
	}

	ba_node = list_peek(bg_record->ba_mp_list);

	if (_get_mp_by_location(bg, ba_node->coord, &curr_mp)
	    == SLURM_ERROR) {
		fatal("_get_mp_by_location()");
	}

	/* Set the one MP */

	if ((rc = bridge_set_data(bg_record->bg_block,
				  RM_PartitionBPNum,
				  &bg_record->mp_count))
	    != SLURM_SUCCESS) {

		fatal("bridge_set_data: RM_PartitionBPNum: %s",
		      bg_err_str(rc));
		return SLURM_ERROR;
	}
	if ((rc = bridge_set_data(bg_record->bg_block,
				  RM_PartitionFirstBP,
				  curr_mp))
	    != SLURM_SUCCESS) {

		fatal("bridge_set_data("
		      "BRIDGE_PartitionFirstBP): %s",
		      bg_err_str(rc));
		return SLURM_ERROR;
	}


	/* find the mp_id of the mp to get the small32 */
	if ((rc = bridge_get_data(curr_mp, RM_BPID, &mp_id))
	    != SLURM_SUCCESS) {
		error("bridge_get_data(): %d", rc);
		return SLURM_ERROR;
	}


	if (!mp_id) {
		error("No MP ID was returned from database");
		return SLURM_ERROR;
	}

	if ((rc = bridge_get_nodecards(mp_id, &ncard_list))
	    != SLURM_SUCCESS) {
		error("bridge_get_nodecards(%s): %d",
		      mp_id, rc);
		free(mp_id);
		return SLURM_ERROR;
	}
	free(mp_id);


	if ((rc = bridge_get_data(ncard_list, RM_NodeCardListSize, &num))
	    != SLURM_SUCCESS) {
		error("bridge_get_data(RM_NodeCardListSize): %s",
		      bg_err_str(rc));
		return SLURM_ERROR;
	}
	if (num_ncards > num) {
		error("You requested more (%d > %d) nodecards "
		      "than are available on this block %s",
		      num_ncards, num, bg_record->mp_str);
	}

	for(i=0; i<num; i++) {
		if (i) {
			if ((rc = bridge_get_data(ncard_list,
						  RM_NodeCardListNext,
						  &ncard)) != SLURM_SUCCESS) {
				error("bridge_get_data"
				      "(RM_NodeCardListNext): %s",
				      bg_err_str(rc));
				rc = SLURM_ERROR;
				goto cleanup;
			}
		} else {
			if ((rc = bridge_get_data(ncard_list,
						  RM_NodeCardListFirst,
						  &ncard)) != SLURM_SUCCESS) {
				error("bridge_get_data"
				      "(RM_NodeCardListFirst): %s",
				      bg_err_str(rc));
				rc = SLURM_ERROR;
				goto cleanup;
			}
		}

#ifdef HAVE_BGL
		/* on BG/L we assume the order never changes when the
		   system is up.  This could change when a reboot of
		   the system happens, but that should be rare.
		*/
		nc_id = i;
		if (!use_nc[i])
			continue;
#else
		if ((rc = bridge_get_data(ncard,
					  RM_NodeCardID,
					  &nc_char)) != SLURM_SUCCESS) {
			error("bridge_get_data(RM_NodeCardID): %s",
			      bg_err_str(rc));
			rc = SLURM_ERROR;
			goto cleanup;
		}

		if (!nc_char) {
			error("No NodeCard ID was returned from database");
			rc = SLURM_ERROR;
			goto cleanup;
		}

		nc_id = atoi((char*)nc_char+1);

		if (!use_nc[nc_id]) {
			free(nc_char);
			continue;
		}

		if (sub_nodecard) {
			rm_ionode_t *ionode;
			char *ionode_id = "J00";

			if ((rc = bridge_new_nodecard(&ncard))
			    != SLURM_SUCCESS) {
				error("bridge_new_nodecard(): %s",
				      bg_err_str(rc));
				rc = SLURM_ERROR;
				goto cleanup;
			}

			if ((rc = bridge_set_data(ncard,
						  RM_NodeCardID,
						  nc_char))
			    != SLURM_SUCCESS) {
				error("bridge_set_data("
				      "RM_NodeCardID): %s",
				      bg_err_str(rc));
				rc = SLURM_ERROR;
				goto cleanup;
			}

			if ((rc = bridge_set_data(ncard,
						  RM_NodeCardIONodeNum,
						  &sub_nodecard))
			    != SLURM_SUCCESS) {
				error("bridge_set_data("
				      "RM_NodeCardIONodeNum): %s",
				      bg_err_str(rc));
				rc = SLURM_ERROR;
				goto cleanup;
			}

			if ((rc = bridge_new_ionode(&ionode))
			    != SLURM_SUCCESS) {
				error("bridge_new_ionode(): %s",
				      bg_err_str(rc));
				rc = SLURM_ERROR;
				goto cleanup;
			}

			if (ionode_card)
				ionode_id = "J01";

			if ((rc = bridge_set_data(ionode,
						  RM_IONodeID,
						  ionode_id))
			    != SLURM_SUCCESS) {
				error("bridge_set_data("
				      "RM_NodeCardIONodeNum): %s",
				      bg_err_str(rc));
				rc = SLURM_ERROR;
				goto cleanup;
			}

			if ((rc = bridge_set_data(ncard,
						  RM_NodeCardFirstIONode,
						  ionode))
			    != SLURM_SUCCESS) {
				error("bridge_set_data("
				      "RM_NodeCardFirstIONode): %s",
				      bg_err_str(rc));
				rc = SLURM_ERROR;
				goto cleanup;
			}

			if ((rc = bridge_free_ionode(ionode))
			    != SLURM_SUCCESS) {
				error("bridge_free_ionode(): %s",
				      bg_err_str(rc));
				rc = SLURM_ERROR;
				goto cleanup;
			}
		}
		free(nc_char);
#endif

		if (nc_count) {
			if ((rc = bridge_set_data(bg_record->bg_block,
						  RM_PartitionNextNodeCard,
						  ncard))
			    != SLURM_SUCCESS) {

				error("bridge_set_data("
				      "RM_PartitionNextNodeCard): %s",
				      bg_err_str(rc));
				rc = SLURM_ERROR;
				goto cleanup;
			}
		} else {
			if ((rc = bridge_set_data(bg_record->bg_block,
						  RM_PartitionFirstNodeCard,
						  ncard))
			    != SLURM_SUCCESS) {

				error("bridge_set_data("
				      "RM_PartitionFirstNodeCard): %s",
				      bg_err_str(rc));
				rc = SLURM_ERROR;
				goto cleanup;
			}
		}

		nc_count++;
#ifndef HAVE_BGL
		if (sub_nodecard) {
			if ((rc = bridge_free_nodecard(ncard))
			    != SLURM_SUCCESS) {
				error("bridge_free_nodecard(): %s",
				      bg_err_str(rc));
				rc = SLURM_ERROR;
				goto cleanup;
			}
		}
#endif
		if (nc_count == num_ncards)
			break;
	}
cleanup:
	if ((rc = bridge_free_nodecard_list(ncard_list)) != SLURM_SUCCESS) {
		error("bridge_free_nodecard_list(): %s", bg_err_str(rc));
		return SLURM_ERROR;
	}

#endif
	if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_WIRES)
		info("making the small block");
	if (rc != SLURM_ERROR)
		rc = SLURM_SUCCESS;
	return rc;
}
/*
 * This could potentially lock the node lock in the slurmctld with
 * slurm_drain_node, so if nodes_locked is called we will call the
 * drainning function without locking the lock again.
 */
static int _test_down_nodecards(rm_BP_t *bp_ptr, bool slurmctld_locked)
{
	rm_bp_id_t bp_id = NULL;
	int num = 0;
	int marked_down = 0;
	int i=0;
	int rc = SLURM_SUCCESS;
	rm_nodecard_list_t *ncard_list = NULL;
	rm_nodecard_t *ncard = NULL;
	//bitstr_t *ionode_bitmap = NULL;
	//bg_record_t *bg_record = NULL;
	char *node_name = NULL;
	//int bp_bit = 0;
	//int io_cnt = 1;

	/* Translate 1 nodecard count to ionode count */
/* 	if ((io_cnt *= bg_conf->io_ratio)) */
/* 		io_cnt--; */

	if ((rc = bridge_get_data(bp_ptr, RM_BPID, &bp_id))
	    != SLURM_SUCCESS) {
		error("bridge_get_data(RM_BPID): %s",
		      bg_err_str(rc));
		return SLURM_ERROR;
	}

	if ((rc = bridge_get_nodecards(bp_id, &ncard_list))
	    != SLURM_SUCCESS) {
		error("bridge_get_nodecards(%s): %d",
		      bp_id, rc);
		rc = SLURM_ERROR;
		goto clean_up;
	}

	/* The node_name will only be NULL if this system doesn't
	   really have the node.
	*/
	if (!(node_name = _get_bp_node_name(bp_ptr))) {
		rc = SLURM_ERROR;
		goto clean_up;
	}

	if ((rc = bridge_get_data(ncard_list, RM_NodeCardListSize, &num))
	    != SLURM_SUCCESS) {
		error("bridge_get_data(RM_NodeCardListSize): %s",
		      bg_err_str(rc));
		rc = SLURM_ERROR;
		goto clean_up;
	}

	for(i=0; i<num; i++) {
		if (i) {
			if ((rc = bridge_get_data(ncard_list,
						  RM_NodeCardListNext,
						  &ncard)) != SLURM_SUCCESS) {
				error("bridge_get_data"
				      "(RM_NodeCardListNext): %s",
				      bg_err_str(rc));
				rc = SLURM_ERROR;
				goto clean_up;
			}
		} else {
			if ((rc = bridge_get_data(ncard_list,
						  RM_NodeCardListFirst,
						  &ncard)) != SLURM_SUCCESS) {
				error("bridge_get_data"
				      "(RM_NodeCardListFirst: %s",
				      bg_err_str(rc));
				rc = SLURM_ERROR;
				goto clean_up;
			}
		}

		if (_test_nodecard_state(ncard, i, node_name, slurmctld_locked)
		    != SLURM_SUCCESS)
			marked_down++;
	}

	/* this code is here to bring up a block after it is in an
	   error state.  It is commented out because it hasn't been
	   tested very well yet.  If you ever want to use this code
	   there should probably be a configurable option in the
	   bluegene.conf file that gives you an option as to have this
	   happen or not automatically.
	*/
/* 	if (ionode_bitmap) { */
/* 		info("got ionode_bitmap"); */

/* 		bit_not(ionode_bitmap); */
/* 		up_nodecard(node_name, ionode_bitmap); */
/* 	} else { */
/* 		int ret = 0; */
/* 		info("no ionode_bitmap"); */
/* 		ListIterator itr = NULL; */
/* 		slurm_mutex_lock(&block_state_mutex); */
/* 		itr = list_iterator_create(bg_lists->main); */
/* 		while ((bg_record = list_next(itr))) { */
/* 			if (bg_record->job_running != BLOCK_ERROR_STATE) */
/* 				continue; */

/* 			if (!bit_test(bg_record->mp_bitmap, bp_bit)) */
/* 				continue; */
/* 			info("bringing %s back to service", */
/* 			     bg_record->bg_block_id); */
/* 			bg_record->job_running = NO_JOB_RUNNING; */
/* 			bg_record->state = BG_BLOCK_FREE; */
/* 			last_bg_update = time(NULL); */
/* 		} */
/* 		list_iterator_destroy(itr); */
/* 		slurm_mutex_unlock(&block_state_mutex); */

/* 		/\* FIX ME: This needs to call the opposite of */
/* 		   slurm_drain_nodes which does not yet exist. */
/* 		*\/ */
/* 		if ((ret = node_already_down(node_name))) { */
/* 			/\* means it was drained *\/ */
/* 			if (ret == 2) { */
/* 				/\* debug("node %s put back into
 * 				service after " *\/ */
/* /\* 				      "being in an error state", *\/ */
/* /\* 				      node_name); *\/ */
/* 			} */
/* 		} */
/* 	} */

clean_up:
	if (ncard_list)
		bridge_free_nodecard_list(ncard_list);
	xfree(node_name);
/* 	if (ionode_bitmap) */
/* 		FREE_NULL_BITMAP(ionode_bitmap); */
	free(bp_id);

	/* If we marked any nodecard down we need to state it here */
	if ((rc == SLURM_SUCCESS) && marked_down)
		rc = SLURM_ERROR;

	return rc;
}