Ejemplo n.º 1
0
static int
_wmessage_int64(lua_State *L) {
	struct pbc_wmessage * m = (struct pbc_wmessage *)checkuserdata(L,1);
	const char * key = luaL_checkstring(L,2);
	switch (lua_type(L,3)) {
	case LUA_TSTRING : {
		size_t len = 0;
		const char * number = lua_tolstring(L,3,&len);
		if (len !=8 ) {
			return luaL_error(L,"Need an 8 length string for int64");
		}
		const uint32_t * v = (const uint32_t *) number;
		pbc_wmessage_integer(m, key, v[0] , v[1]);
		break;
	}
	case LUA_TLIGHTUSERDATA : {
		void * v = lua_touserdata(L,3);
		uint64_t v64 = (uintptr_t)v;
		pbc_wmessage_integer(m, key, (uint32_t)v64 , (uint32_t)(v64>>32));
		break;
	}
	default :
		return luaL_error(L, "Need an int64 type");
	}
	return 0;
}
Ejemplo n.º 2
0
static void
test(struct pbc_env *env) {
	// int i;
	// for(i=0; i<COUNT; i++)
	// {
	// 		struct pbc_wmessage* w_msg = pbc_wmessage_new(env, "at");
	// 		struct pbc_rmessage* r_msg = NULL;
	// 		struct pbc_slice sl;
	// 		char buffer[1024];
	// 		sl.buffer = buffer, sl.len = 1024;
	// 		pbc_wmessage_integer(w_msg, "aa", 123, 0);
	// 		pbc_wmessage_integer(w_msg, "bb", 456, 0);
	// 		pbc_wmessage_string(w_msg, "cc", "test string!", 0);
	// 		pbc_wmessage_buffer(w_msg, &sl);

	// 		r_msg = pbc_rmessage_new(env, "at", &sl);

	// 		//===============
	// 		printf("aa = %d\n", pbc_rmessage_integer(r_msg, "aa", 0, NULL));
	// 		printf("bb = %d\n", pbc_rmessage_integer(r_msg, "bb", 0, NULL));
	// 		printf("cc = %s\n", pbc_rmessage_string(r_msg, "cc", 0, NULL));
	// 		//===============

	// 		pbc_rmessage_delete(r_msg);
	// 		pbc_wmessage_delete(w_msg);
	// }

	struct pbc_wmessage *w_msg = pbc_wmessage_new(env, "at");
	pbc_wmessage_integer(w_msg, "aa", 123, 0);
	pbc_wmessage_integer(w_msg, "bb", 456, 0); 
	pbc_wmessage_string(w_msg, "cc", "test string!", 0);

	struct pbc_slice slice;
	char buffer[1024];
	slice.len = 1024;
	slice.buffer = buffer;
	pbc_wmessage_buffer(w_msg, &slice);
	// pbc_wmessage_delete(w_msg);  //!!!!!!!! should not delete here

	struct pbc_rmessage *r_msg = pbc_rmessage_new(env, "at", &slice);
	printf("aa = %d\n", pbc_rmessage_integer(r_msg, "aa", 0, NULL));
	printf("bb = %d\n", pbc_rmessage_integer(r_msg, "bb", 0, NULL));
	printf("cc = %s\n", pbc_rmessage_string(r_msg, "cc", 0, NULL));

	pbc_rmessage_delete(r_msg);
	pbc_wmessage_delete(w_msg);
}
Ejemplo n.º 3
0
static int
_wmessage_int32(lua_State *L) {
	struct pbc_wmessage * m = lua_touserdata(L,1);
	const char * key = lua_tostring(L,2);
	void *number = lua_touserdata(L,3);
	pbc_wmessage_integer(m, key, (uint32_t)number , 0);
	return 0;
}
/*
 * generate query container state request
 */
static int generate_query_container_state_request(
    char** buffer,
    int* size,
    hadoop_rpc_proxy_t* proxy,
    int container_id) {

    int rc;
    struct pbc_wmessage* req = pbc_wmessage_new(env, "GetContainerStatusRequestProto");
    if (!req) {
        opal_output(0, "get GetContainerStatusRequestProto message failed.\n");
        return -1;
    }

    // set container_id
    struct pbc_wmessage* id_proto = pbc_wmessage_message(req, "container_id");
    if (!id_proto) {
        opal_output(0, "get ContainerIdProto from ContainerLaunchContextProto failed.\n");
        pbc_wmessage_delete(req);
        return -1;
    }
    rc = pbc_wmessage_integer(id_proto, "id", container_id, NULL);
    if (0 != rc) {
        opal_output(0, "pack container-id failed.\n");
        pbc_wmessage_delete(req);
        return -1;
    }
    rc = set_app_attempt_id(id_proto, "app_attempt_id", proxy);
    if (0 != rc) {
        opal_output(0, "pack app_attempt_id failed.\n");
        pbc_wmessage_delete(req);
        return -1;
    }
    rc = set_app_id(id_proto, "app_id", proxy);
    if (0 != rc) {
        opal_output(0, "pack app_id failed.\n");
        pbc_wmessage_delete(req);
        return -1;
    }

    struct pbc_slice slice;
    pbc_wmessage_buffer(req, &slice);

    /* try to create HadoopRpcRequestProto */
    rc = generate_hadoop_request((const char*)(slice.buffer),
        slice.len,
        CONTAINER_MANAGER_PROTOCOL_NAME,
        GET_CONTAINER_STATUS_METHOD_NAME,
        buffer,
        size);
    if (0 != rc) {
        opal_output(0, "create HadoopRpcRequestProto failed.\n");
        pbc_wmessage_delete(req);
        return -1;
    }

    pbc_wmessage_delete(req);
    return 0;
}
Ejemplo n.º 5
0
static int
_wmessage_int64(lua_State *L) {
	struct pbc_wmessage * m = lua_touserdata(L,1);
	const char * key = lua_tostring(L,2);
	const char * number = lua_tostring(L,3);
	const uint32_t * v = (const uint32_t *) number;
	pbc_wmessage_integer(m, key, v[0] , v[1]);
	return 0;
}
Ejemplo n.º 6
0
static int
_wmessage_int52(lua_State *L) {
	struct pbc_wmessage * m = (struct pbc_wmessage *)checkuserdata(L,1);
	const char * key = luaL_checkstring(L,2);
	int64_t number = (int64_t)(luaL_checknumber(L,3));
	uint32_t hi = (uint32_t)(number >> 32);
	pbc_wmessage_integer(m, key, (uint32_t)number, hi);

	return 0;
}
Ejemplo n.º 7
0
static int
_wmessage_int32(lua_State *L) {
	struct pbc_wmessage * m = (struct pbc_wmessage *)checkuserdata(L,1);
	const char * key = luaL_checkstring(L,2);
	if (!lua_islightuserdata(L,3)) {
		return luaL_error(L,"Need a lightuserdata for int32");
	}
	void *number = lua_touserdata(L,3);
	pbc_wmessage_integer(m, key, (uint32_t)(intptr_t)number , 0);
	return 0;
}
Ejemplo n.º 8
0
Archivo: test.c Proyecto: zhangjinde/z
static void
test(struct pbc_env *env) {
	int i;
	for(i=0; i<COUNT; i++)
	{
			struct pbc_wmessage* w_msg = pbc_wmessage_new(env, "at");
			struct pbc_rmessage* r_msg = NULL;
			struct pbc_slice sl;
			char buffer[1024];
			sl.buffer = buffer, sl.len = 1024;
			pbc_wmessage_integer(w_msg, "aa", 123, 0);
			pbc_wmessage_integer(w_msg, "bb", 456, 0);
			pbc_wmessage_string(w_msg, "cc", "test string!", -1);
			pbc_wmessage_buffer(w_msg, &sl);
					
			r_msg = pbc_rmessage_new(env, "at", &sl);
			pbc_rmessage_delete(r_msg);
			pbc_wmessage_delete(w_msg);
	} 
}
Ejemplo n.º 9
0
static int
_wmessage_integer(lua_State *L) {
	struct pbc_wmessage * m = (struct pbc_wmessage *)checkuserdata(L,1);
	const char * key = luaL_checkstring(L,2);
	int number = (int)luaL_checkinteger(L,3);
	uint32_t hi = 0;
	if (number < 0)
		hi = ~0;
	pbc_wmessage_integer(m, key, number, hi);

	return 0;
}
Ejemplo n.º 10
0
static struct pbc_wmessage *
test_wmessage(struct pbc_env * env)
{
	struct pbc_wmessage * msg = pbc_wmessage_new(env, "tutorial.Person");

	pbc_wmessage_string(msg, "name", "Alice", -1);
	pbc_wmessage_integer(msg, "id" , 12345, 0);
	pbc_wmessage_string(msg, "email", "alice@unkown", -1);
	pbc_wmessage_integer(msg, "testAdd", 7777, 0);

	struct pbc_wmessage * phone = pbc_wmessage_message(msg , "phone");
	pbc_wmessage_string(phone , "number", "87654321" , -1);

	phone = pbc_wmessage_message(msg , "phone");
	pbc_wmessage_string(phone , "number", "13901234567" , -1);
	pbc_wmessage_string(phone , "type" , "MOBILE" , -1);

	pbc_wmessage_integer(msg, "test", -123,0);
	pbc_wmessage_integer(msg, "test", 12345,0);
	pbc_wmessage_integer(msg, "test", 1234567,0);

	pbc_wmessage_integer(msg, "tutorial.Ext.test", 54321 , 0);

	return msg;
}
Ejemplo n.º 11
0
static int
_wmessage_uint52(lua_State *L) {
	struct pbc_wmessage * m = (struct pbc_wmessage *)checkuserdata(L,1);
	const char * key = luaL_checkstring(L,2);
	lua_Number v = (luaL_checknumber(L,3));
	if (v < 0) {
		return luaL_error(L, "negative number : %f passed to unsigned field",v);
	}
	uint64_t number = (uint64_t)v;
	uint32_t hi = (uint32_t)(number >> 32);
	pbc_wmessage_integer(m, key, (uint32_t)number, hi);

	return 0;
}
Ejemplo n.º 12
0
static int
_wmessage_int(lua_State *L) {
	struct pbc_wmessage * m = (struct pbc_wmessage *)checkuserdata(L,1);
	const char * key = luaL_checkstring(L,2);
	int64_t number;
	// compat float for some historical reasons.
	if (lua_isinteger(L, 3)) {
		number = lua_tointeger(L,3);
	} else {
		number = (int64_t)lua_tonumber(L,3);
	}
	uint32_t hi = (uint32_t)(number >> 32);
	pbc_wmessage_integer(m, key, (uint32_t)number, hi);

	return 0;
}
Ejemplo n.º 13
0
static struct pbc_wmessage *
test_ClientSevermessage(struct pbc_env * env)
{
	struct pbc_wmessage * msg = pbc_wmessage_new(env, "fgame.C2ServerMsg");

	struct pbc_wmessage * Mov_Req = pbc_wmessage_message(msg, "Mov_Req");
	pbc_wmessage_integer(Mov_Req, "Uin", 1000005, 0);

	struct pbc_wmessage * position = pbc_wmessage_message(Mov_Req, "position");
	pbc_wmessage_integer(position, "y", 0, 0);
	pbc_wmessage_integer(position, "x", 0, 0);

	pbc_wmessage_integer(Mov_Req, "status", 1, 0);

	struct pbc_wmessage * velocity = pbc_wmessage_message(Mov_Req, "velocity");
	pbc_wmessage_integer(velocity, "y", 0, 0);
	pbc_wmessage_integer(velocity, "x", 0, 0);



	return msg;
}
Ejemplo n.º 14
0
static int orte_ras_yarn_allocate_internal(int np, opal_list_t* nodes) {
    int rc, i;

    // create and send allocate message
    struct pbc_wmessage* msg = pbc_wmessage_new(orte_hdclient_pb_env, "AllocateRequestProto");
    if (!msg) {
        opal_output(0, "%s ras:yarn failed to create AllocateRequestProto", 
            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        return ORTE_ERROR;
    }
    pbc_wmessage_integer(msg, "resource_count", np, 0);
    rc = orte_hdclient_send_message_and_delete(msg, HAMSTER_MSG_ALLOCATE);
    if (rc != 0) {
        opal_output(0, "%s ras:yarn error happened when send allocate msg to AM",
            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        return ORTE_ERROR;
    }

    // read rmessage out
    struct pbc_rmessage* response = orte_hdclient_recv_message("AllocateResponseProto");
    if (!response) {
        opal_output(0, "%s ras:yarn error happened when recv allocate response from AM",
            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        return ORTE_ERROR;
    }

    int n = pbc_rmessage_size(response, "node_resources");
    if (n <= 0) {
        opal_output(0, "%s ras:yarn got n(=%d) <= 0, please check",
            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), n);
        return ORTE_ERROR;
    }

    // read node resources
    for (i = 0; i < n; i++) {
        struct pbc_rmessage* node_res = pbc_rmessage_message(response, "node_resources", i);
        if (!node_res) {
            opal_output(0, "%s ras:yarn error when parse returned resource from AM", 
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            return ORTE_ERROR;
        }
        
        // parse host, slot
        const char* host = pbc_rmessage_string(node_res, "host_name", 0, NULL);
        if (!host) {
            opal_output(0, "%s ras:yarn error when parse host from returned resource from AM",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            return ORTE_ERROR;
        }
        int slot = pbc_rmessage_integer(node_res, "slot", 0, NULL);

        // make node_t and add it to nodes
        orte_node_t* node = OBJ_NEW(orte_node_t);
        if (!node) {
            opal_output(0, "%s ras:yarn failed to create orte_node_t obj", 
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            return ORTE_ERROR;
        }

        node->name = strdup(host);
        node->state = ORTE_NODE_STATE_UP;
        node->slots_inuse = 0;
        node->slots_max = 0;
        node->slots = slot;
        opal_list_append(nodes, &node->super);

        OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
                     "%s ras:yarn: adding node %s with %d slot",
                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                     host, slot));
    }

    // All done
    OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                         "%s ras:yarn:allocate: success",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

    return ORTE_SUCCESS;
}
Ejemplo n.º 15
0
static struct pbc_wmessage *
test_wmessage(struct pbc_env * env)
{
	struct pbc_wmessage * msg = pbc_wmessage_new(env, "test");

	int i;
	for ( i = 0;i<5;i++)
	{
		struct pbc_wmessage * element = pbc_wmessage_message(msg , "el");
		pbc_wmessage_string(element , "str", "abcedf" , -1);
		pbc_wmessage_integer(element, "int8_min", 1*(i+1),0);
		pbc_wmessage_integer(element, "int8_max", 2*(i+1),0);
		pbc_wmessage_integer(element, "uint8_min", 3*(i+1),0);
		pbc_wmessage_integer(element, "uint8_max", 4*(i+1),0);

		pbc_wmessage_integer(element, "int16_min", 5*(i+1),0);
		pbc_wmessage_integer(element, "int16_max", 6*(i+1),0);
		pbc_wmessage_integer(element, "uint16_min", 7*(i+1),0);
		pbc_wmessage_integer(element, "uint16_max", 8*(i+1),0);

		pbc_wmessage_integer(element, "int32_min", 9*(i+1),0);
		pbc_wmessage_integer(element, "int32_max", 10*(i+1),0);
		pbc_wmessage_integer(element, "uint32_min", 11*(i+1),0);
		pbc_wmessage_integer(element, "uint32_max", 12*(i+1),0);

		pbc_wmessage_integer(element, "int64_min", 13*(i+1),0);
		pbc_wmessage_integer(element, "int64_max", 14*(i+1),0);
		pbc_wmessage_integer(element, "uint64_min", 15*(i+1),0);
		pbc_wmessage_integer(element, "uint64_max", 16*(i+1),0);

		pbc_wmessage_real(element, "double_min", 17*(i+1));
		pbc_wmessage_real(element, "double_max", 18*(i+1));
	}

	return msg;
}
/**
 * generate launch container PB request
     message ContainerLaunchContextProto {
      optional ContainerIdProto container_id = 1;
      optional string user = 2;
      optional ResourceProto resource = 3;
      repeated StringLocalResourceMapProto localResources = 4;
      optional bytes container_tokens = 5;
      repeated StringBytesMapProto service_data = 6;
      repeated StringStringMapProto environment = 7;
      repeated string command = 8;
      repeated ApplicationACLMapProto application_ACLs = 9;
    }

    message StartContainerRequestProto {
      optional ContainerLaunchContextProto container_launch_context = 1;
    }
 */
static int generate_launch_container_request(
        char** buffer,
        int* size,
        hadoop_rpc_proxy_t* proxy,
        int container_id,
        containers_launch_context_t* launch_context) {
    int rc;
    struct pbc_wmessage* req = pbc_wmessage_new(env, "StartContainerRequestProto");
    if (!req) {
        opal_output(0, "get StartContainerRequestProto message failed.\n");
        return -1;
    }

    struct pbc_wmessage* ctx = pbc_wmessage_message(req, "container_launch_context");
    if (!ctx) {
        opal_output(0, "get container_launch_context from StartContainerRequestProto failed.\n");
        pbc_wmessage_delete(req);
        return -1;
    }

    // set container_id
    struct pbc_wmessage* id_proto = pbc_wmessage_message(ctx, "container_id");
    if (!id_proto) {
        opal_output(0, "get ContainerIdProto from ContainerLaunchContextProto failed.\n");
        pbc_wmessage_delete(req);
        return -1;
    }
    rc = pbc_wmessage_integer(id_proto, "id", container_id, NULL);
    if (0 != rc) {
        opal_output(0, "pack container-id failed.\n");
        pbc_wmessage_delete(req);
        return -1;
    }
    rc = set_app_attempt_id(id_proto, "app_attempt_id", proxy);
    if (0 != rc) {
        opal_output(0, "pack app_attempt_id failed.\n");
        pbc_wmessage_delete(req);
        return -1;
    }
    rc = set_app_id(id_proto, "app_id", proxy);
    if (0 != rc) {
        opal_output(0, "pack app_id failed.\n");
        pbc_wmessage_delete(req);
        return -1;
    }

    // pack user
    rc = pbc_wmessage_string(ctx, "user", getlogin(), 0);
    if (rc != 0) {
        opal_output(0, "pack user name failed.\n");
        return -1;
    }

    // pack resource
    struct pbc_wmessage* res_msg = pbc_wmessage_message(ctx, "resource");
    if (!res_msg) {
        opal_output(0, "get resource_proto from context failed.\n");
        return -1;
    }
    rc = pbc_wmessage_integer(res_msg, "memory", launch_context->resource.memory_per_slot, NULL);
    if (rc != 0) {
        pbc_wmessage_delete(req);
        opal_output(0, "pack memory to resource failed.\n");
        return -1;
    }
    // TODO, in 2.0.3, need pack cpu

    // pack localResources
    rc = set_local_resources(ctx, "localResources");
    if (rc != 0) {
        pbc_wmessage_delete(req);
        opal_output(0, "pack local resources failed.\n");
        return -1;
    }

    // pack env
    int offset = 0;
    if (launch_context->env) {
        while (launch_context->env[offset]) {
            struct pbc_wmessage* env_msg = pbc_wmessage_message(ctx, "environment");
            if (!env_msg) {
                pbc_wmessage_delete(req);
                opal_output(0, "get env message from context failed.\n");
                return -1;
            }

            char* key = get_env_key(launch_context->env[offset]);
            char* val = get_env_val(launch_context->env[offset]);
            if ((!key) || (!val)) {
                if (key) {
                    free(key);
                }
                if (val) {
                    free(val);
                }
                pbc_wmessage_delete(req);
                opal_output(0, "get env key or value failed, env=%s.\n", launch_context->env[offset]);
                return -1;
            }

            // pack key
            rc = pbc_wmessage_string(env_msg, "key", key, 0);
            free(key);
            if (rc != 0) {
                free(val);
                pbc_wmessage_delete(req);
                opal_output(0, "set key to environment failed.\n");
                return -1;
            }
            // pack val
            rc = pbc_wmessage_string(env_msg, "value", val, 0);
            if (rc != 0) {
                free(val);
                pbc_wmessage_delete(req);
                opal_output(0, "set value to environment failed.\n");
                return -1;
            }
            free(val);
            offset++;
        }
    }

    // pack $PATH, $LD_LIBRARY_PATH, $DYLD_LIBRARY_PATH, $CLASSPATH to env
    if (0 != (rc = pack_env_to_launch_ctx("PATH", ctx))) {
        pbc_wmessage_delete(req);
        return -1;
    }
    if (0 != (rc = pack_env_to_launch_ctx("LD_LIBRARY_PATH", ctx))) {
        pbc_wmessage_delete(req);
        return -1;
    }
    if (0 != (rc = pack_env_to_launch_ctx("DYLD_LIBRARY_PATH", ctx))) {
        pbc_wmessage_delete(req);
        return -1;
    }
    if (0 != (rc = pack_env_to_launch_ctx("CLASSPATH", ctx))) {
        pbc_wmessage_delete(req);
        return -1;
    }

    // pack command
    char* command = concat_argv_to_cmd(launch_context->argv);
    if (!command) {
        pbc_wmessage_delete(req);
        opal_output(0, "concat argv to command to command failed. argv[0]:%s.\n", launch_context->argv[0]);
        return -1;
    }
    rc = pbc_wmessage_string(ctx, "command", command, 0);
    free(command);
    if (rc != 0) {
        opal_output(0, "pack command to context failed.\n");
        pbc_wmessage_delete(req);
        return -1;
    }

    struct pbc_slice slice;
    pbc_wmessage_buffer(req, &slice);

    /* try to create HadoopRpcRequestProto */
    rc = generate_hadoop_request((const char*)(slice.buffer),
        slice.len,
        CONTAINER_MANAGER_PROTOCOL_NAME,
        START_CONTAINER_METHOD_NAME,
        buffer,
        size);
    if (0 != rc) {
        opal_output(0, "create HadoopRpcRequestProto failed.\n");
        pbc_wmessage_delete(req);
        return -1;
    }

    pbc_wmessage_delete(req);
    return 0;
}
Ejemplo n.º 17
0
static int common_launch_process(orte_job_t *jdata, bool launch_daemon, int *launched_proc_num)
{
	int i, rc;
	orte_proc_t* proc = NULL;
	char **argv;
	int argc;
	char **env;
	bool error_flag = false;
	int launched_num = 0;

	/* 1. create launch message */
	/*
	 message LaunchRequestProto {
	 repeated LaunchContextProto launch_contexts = 1;
	 }

	 message LaunchContextProto {
	 repeated string envars = 1;
	 optional string args = 2;
	 optional string host_name = 3;
	 optional ProcessNameProto name = 4;
	 }

	 message ProcessNameProto {
	 optional int32 jobid = 1;
	 optional int32 vpid = 2;
	 }
	 */
	struct pbc_wmessage* request_msg = pbc_wmessage_new(orte_hdclient_pb_env, "LaunchRequestProto");
	if (!request_msg) {
		opal_output(0, "%s plm:yarn:common_process_launch: failed to create AllocateRequestProto",
				ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
		return ORTE_ERROR;
	}

	/* when launch_daemon, start from 1 because we don't need launch HNP process */
	i = launch_daemon ? 1 : 0;

	for (; i < jdata->num_procs; i++) {
		argv = NULL;
		argc = 0;
		env = NULL;
		/* setup env/argv  */
		proc = opal_pointer_array_get_item(jdata->procs, i);
		if (!proc) {
			opal_output(0, "%s plm:yarn:common_launch_process: proc[%d] is NULL",
					ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i);
			ORTE_ERROR_LOG(ORTE_ERROR_DEFAULT_EXIT_CODE);
		}

		if (launch_daemon) {
			rc = setup_daemon_proc_env_and_argv(proc, &argv, &argc, &env);
		} else {
			orte_app_context_t* app = (orte_app_context_t*) opal_pointer_array_get_item(jdata->apps, proc->app_idx);
			rc = setup_proc_env_and_argv(jdata, app, proc, &argv, &env);
		}
		if (0 != rc) {
			opal_output(0,
					"%s plm:yarn:common_launch_process: failed to setup env/argv of proc[%d]",
					ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i);
			ORTE_ERROR_LOG(ORTE_ERROR_DEFAULT_EXIT_CODE);
			error_flag = true;
			goto cleanup;
		}

		 /* print launch commandline and env when this env is specified */
		if (getenv("HAMSTER_VERBOSE")) {

			char* join_argv = opal_argv_join(argv, ' ');

			OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:yarn:common_launch_process: launch argv=%s",
							ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), join_argv));
			if (join_argv) {
				free(join_argv);
			}
		}

		OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
									"%s plm:yarn:common_launch_process: after setup env and argv for proc=%d.",
									ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i));

		/* now start packing request_msg */
		struct pbc_wmessage *launch_contexts_msg = pbc_wmessage_message(request_msg, "launch_contexts");
		if (!launch_contexts_msg) {
			opal_output(0,
					"%s plm:yarn:common_process_launch: create launch_contexts_msg failed",
					ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
			error_flag = true;
			goto cleanup;
		}

		char **tmp_env = env;
		while (*tmp_env) {
			pbc_wmessage_string(launch_contexts_msg, "envars", *tmp_env, strlen(*tmp_env));
			tmp_env++;
		}

		char* join_argv = opal_argv_join(argv, ' ');
		pbc_wmessage_string(launch_contexts_msg, "args", join_argv, strlen(join_argv));

		pbc_wmessage_string(launch_contexts_msg, "host_name", proc->node->name, strlen(proc->node->name));

		struct pbc_wmessage *proccess_name_msg = pbc_wmessage_message(launch_contexts_msg, "name");
		if (!proccess_name_msg) {
			opal_output(0,
					"%s plm:yarn:common_process_launch: create proccess_name_msg failed",
					ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
			error_flag = true;
			goto cleanup;
		}

		rc = pbc_wmessage_integer(proccess_name_msg, "jobid", ORTE_LOCAL_JOBID(proc->name.jobid), 0);
		if (0 != rc) {
			opal_output(0,
					"%s plm:yarn:common_process_launch: pack jobid in proccess_name_msg failed",
					ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
			error_flag = true;
			goto cleanup;
		}

		rc = pbc_wmessage_integer(proccess_name_msg, "vpid", proc->name.vpid,
				0);
		if (0 != rc) {
			opal_output(0,
					"%s plm:yarn:common_process_launch: pack vpid in proccess_name_msg failed",
					ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
			error_flag = true;
			goto cleanup;
		}

cleanup:
		/* free argv and env for this proc */
		if (argv) {
			opal_argv_free(argv);
		}
		if (env) {
			opal_argv_free(env);
		}
		if (join_argv) {
			free(join_argv);
		}
		if (error_flag) {
			pbc_wmessage_delete(request_msg);
			ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
			return ORTE_ERROR;
		}
	}

	/* 2. send launch deamon procs request msg */
	rc = orte_hdclient_send_message_and_delete(request_msg, HAMSTER_MSG_LAUNCH);
	if (rc != 0) {
		opal_output(0,
				"%s plm:yarn:common_process_launch: error happened when send launch proc request to AM",
				ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
		if (request_msg) {
			pbc_wmessage_delete(request_msg);
		}
		return ORTE_ERROR;
	}

	/* 3. recv response and parse the msg*/
	/*
	 message LaunchResponseProto {
	 repeated LaunchResultProto results = 1;
	 }

	 message LaunchResultProto {
	 optional ProcessNameProto name = 1;
	 optional bool success = 2;
	 }

	 message ProcessNameProto {
	 optional int32 jobid = 1;
	 optional int32 vpid = 2;
	 }
	 */
	struct pbc_rmessage* response_msg = NULL;
	response_msg = orte_hdclient_recv_message("LaunchResponseProto");
	if (!response_msg) {
		opal_output(0,
				"%s plm:yarn:common_process_launch: error happened when recv launch response msg from AM",
				ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
		goto launch_failed;
	}

	int n = pbc_rmessage_size(response_msg, "results");
	if (n < 0) {
		opal_output(0,
				"%s plm:yarn:common_process_launch: got n(=%d) < 0, please check",
				ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), n);
		goto launch_failed;
	}

	for (i = 0; i < n; i++) {
		struct pbc_rmessage* results_msg = pbc_rmessage_message(response_msg, "results", i);
		if (!results_msg) {
			opal_output(0,
					"%s plm:yarn:launch_daemons: error when parse returned launch results from AM",
					ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
			goto launch_failed;
		}

		struct pbc_rmessage* proc_name_msg = pbc_rmessage_message(results_msg, "name", 0);
		if (!proc_name_msg) {
			opal_output(0,
					"%s plm:yarn:common_process_launch: error when parse returned proc_name_msg from AM",
					ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
			goto launch_failed;
		}

		orte_jobid_t local_jobid = pbc_rmessage_integer(proc_name_msg, "jobid", 0, NULL);
		orte_vpid_t vpid = pbc_rmessage_integer(proc_name_msg, "vpid", 0, NULL);

		bool success = pbc_rmessage_integer(results_msg, "success", 0, NULL);

		orte_proc_t* proc = (orte_proc_t*) opal_pointer_array_get_item(jdata->procs, vpid);
		if (success) {
			proc->state = ORTE_PROC_STATE_RUNNING;
			launched_num++;
		} else {
			opal_output(0,
					"%s plm:yarn:common_process_launch: launch proc failed when jobid = %u, vpid = %u",
					ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_jobid, vpid);
			proc->state = ORTE_PROC_STATE_FAILED_TO_START;
			jdata->state = ORTE_JOB_STATE_FAILED_TO_START;
			goto launch_failed;
		}
	}

	/* to return back */
	*launched_proc_num = launched_num;
	return ORTE_SUCCESS;

launch_failed:
	    if (response_msg) {
	        pbc_rmessage_delete(response_msg);
	    }
	    ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
	    return ORTE_ERROR;
}
Ejemplo n.º 18
0
static void finish_app_master(bool succeed)
{
    int rc;
    int i, j;
    char *diag_msg = "finish_app_master";

    if (appmaster_finished) {
    	return;
    }

    // we need double check if any proc failed
    if (succeed) {
    	/* start with 1 because we don't want to check daemon's proc */
        for (i = 1; i < orte_job_data->size; i++) {
            orte_job_t* job = opal_pointer_array_get_item(orte_job_data, i);
            if (!job) {
                continue;
            }
            for (j = 0; j < job->procs->size; j++) {
                orte_proc_t* proc = opal_pointer_array_get_item(job->procs, j);
                if (!proc) {
                    continue;
                }
                // if any process is non-terminated, we will consider it's error
                if (proc->state < ORTE_PROC_STATE_TERMINATED) {
                    succeed = false;
                    break; /* break the inner 'for' loop */
                }
            }
            if (!succeed) {
                break;  /* break the outer 'for' loop */
            }
        }
    }

    /* 1. create launch message */
    /*
    message FinishRequestProto {
        optional bool succeed = 1;
        optional string diagnostics = 2;
    }
    */
    struct pbc_wmessage* request_msg = pbc_wmessage_new(orte_hdclient_pb_env, "FinishRequestProto");

    if (!request_msg) {
        opal_output(0, "%s plm:yarn:finish_app_master: failed to create request_msg",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        goto cleanup;
    }



    rc = pbc_wmessage_integer(request_msg, "succeed", succeed, 0);
    if (0 != rc) {
        opal_output(0,
                "%s plm:yarn:finish_app_master: pack succeed in request_msg failed",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        goto cleanup;
    }

    rc = pbc_wmessage_string(request_msg, "diagnostics", diag_msg, strlen(diag_msg));
    if (0 != rc) {
        opal_output(0,
                "%s plm:yarn:finish_app_master: pack diagnostics in request_msg failed",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        goto cleanup;
    }

    /* 2. send launch procs request msg */
    rc = orte_hdclient_send_message_and_delete(request_msg, HAMSTER_MSG_FINISH);
    if (rc != 0) {
        opal_output(0,
                "%s plm:yarn:finish_app_master: error happened when send request_msg to AM",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        goto cleanup;
    }

    /* 3. recv response and parse the msg*/
    /*
     message FinishResponseProto {
     }
     */
    struct pbc_rmessage* response_msg = orte_hdclient_recv_message("FinishResponseProto");
    if (!response_msg) {
        opal_output(0,
                "%s plm:yarn:finish_app_master: error happened when recv response_msg from AM",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        goto cleanup;
    }

    appmaster_finished = true;

cleanup:
    if (response_msg) {
        pbc_rmessage_delete(response_msg);
    }
}