static struct pbc_wmessage * test_wmessage(struct pbc_env * env) { struct pbc_wmessage * msg = pbc_wmessage_new(env, "tutorial.Person"); pbc_wmessage_string(msg, "name", "Alice", -1); pbc_wmessage_integer(msg, "id" , 12345, 0); pbc_wmessage_string(msg, "email", "alice@unkown", -1); pbc_wmessage_integer(msg, "testAdd", 7777, 0); struct pbc_wmessage * phone = pbc_wmessage_message(msg , "phone"); pbc_wmessage_string(phone , "number", "87654321" , -1); phone = pbc_wmessage_message(msg , "phone"); pbc_wmessage_string(phone , "number", "13901234567" , -1); pbc_wmessage_string(phone , "type" , "MOBILE" , -1); pbc_wmessage_integer(msg, "test", -123,0); pbc_wmessage_integer(msg, "test", 12345,0); pbc_wmessage_integer(msg, "test", 1234567,0); pbc_wmessage_integer(msg, "tutorial.Ext.test", 54321 , 0); return msg; }
static int pack_env_to_launch_ctx(const char* env_key, struct pbc_wmessage* ctx) { int rc; char* env_val = getenv(env_key); if (!env_val) { opal_output(0, "no %s set in environment.\n", env_key); return 0; } struct pbc_wmessage* env_msg = pbc_wmessage_message(ctx, "environment"); if (!env_msg) { opal_output(0, "get env message from context failed.\n"); return -1; } // pack key rc = pbc_wmessage_string(env_msg, "key", env_key, 0); if (rc != 0) { opal_output(0, "set key to environment failed.\n"); return -1; } // pack val rc = pbc_wmessage_string(env_msg, "value", env_val, 0); if (rc != 0) { opal_output(0, "set value to environment failed.\n"); return -1; } return 0; }
/* * generate query container state request */ static int generate_query_container_state_request( char** buffer, int* size, hadoop_rpc_proxy_t* proxy, int container_id) { int rc; struct pbc_wmessage* req = pbc_wmessage_new(env, "GetContainerStatusRequestProto"); if (!req) { opal_output(0, "get GetContainerStatusRequestProto message failed.\n"); return -1; } // set container_id struct pbc_wmessage* id_proto = pbc_wmessage_message(req, "container_id"); if (!id_proto) { opal_output(0, "get ContainerIdProto from ContainerLaunchContextProto failed.\n"); pbc_wmessage_delete(req); return -1; } rc = pbc_wmessage_integer(id_proto, "id", container_id, NULL); if (0 != rc) { opal_output(0, "pack container-id failed.\n"); pbc_wmessage_delete(req); return -1; } rc = set_app_attempt_id(id_proto, "app_attempt_id", proxy); if (0 != rc) { opal_output(0, "pack app_attempt_id failed.\n"); pbc_wmessage_delete(req); return -1; } rc = set_app_id(id_proto, "app_id", proxy); if (0 != rc) { opal_output(0, "pack app_id failed.\n"); pbc_wmessage_delete(req); return -1; } struct pbc_slice slice; pbc_wmessage_buffer(req, &slice); /* try to create HadoopRpcRequestProto */ rc = generate_hadoop_request((const char*)(slice.buffer), slice.len, CONTAINER_MANAGER_PROTOCOL_NAME, GET_CONTAINER_STATUS_METHOD_NAME, buffer, size); if (0 != rc) { opal_output(0, "create HadoopRpcRequestProto failed.\n"); pbc_wmessage_delete(req); return -1; } pbc_wmessage_delete(req); return 0; }
static int _wmessage_message(lua_State *L) { struct pbc_wmessage * m = (struct pbc_wmessage *)checkuserdata(L,1); const char * key = luaL_checkstring(L,2); struct pbc_wmessage * ret = pbc_wmessage_message(m, key); lua_pushlightuserdata(L, ret); return 1; }
static struct pbc_wmessage * test_ClientSevermessage(struct pbc_env * env) { struct pbc_wmessage * msg = pbc_wmessage_new(env, "fgame.C2ServerMsg"); struct pbc_wmessage * Mov_Req = pbc_wmessage_message(msg, "Mov_Req"); pbc_wmessage_integer(Mov_Req, "Uin", 1000005, 0); struct pbc_wmessage * position = pbc_wmessage_message(Mov_Req, "position"); pbc_wmessage_integer(position, "y", 0, 0); pbc_wmessage_integer(position, "x", 0, 0); pbc_wmessage_integer(Mov_Req, "status", 1, 0); struct pbc_wmessage * velocity = pbc_wmessage_message(Mov_Req, "velocity"); pbc_wmessage_integer(velocity, "y", 0, 0); pbc_wmessage_integer(velocity, "x", 0, 0); return msg; }
static struct pbc_wmessage * test_wmessage(struct pbc_env * env) { struct pbc_wmessage * msg = pbc_wmessage_new(env, "test"); int i; for ( i = 0;i<5;i++) { struct pbc_wmessage * element = pbc_wmessage_message(msg , "el"); pbc_wmessage_string(element , "str", "abcedf" , -1); pbc_wmessage_integer(element, "int8_min", 1*(i+1),0); pbc_wmessage_integer(element, "int8_max", 2*(i+1),0); pbc_wmessage_integer(element, "uint8_min", 3*(i+1),0); pbc_wmessage_integer(element, "uint8_max", 4*(i+1),0); pbc_wmessage_integer(element, "int16_min", 5*(i+1),0); pbc_wmessage_integer(element, "int16_max", 6*(i+1),0); pbc_wmessage_integer(element, "uint16_min", 7*(i+1),0); pbc_wmessage_integer(element, "uint16_max", 8*(i+1),0); pbc_wmessage_integer(element, "int32_min", 9*(i+1),0); pbc_wmessage_integer(element, "int32_max", 10*(i+1),0); pbc_wmessage_integer(element, "uint32_min", 11*(i+1),0); pbc_wmessage_integer(element, "uint32_max", 12*(i+1),0); pbc_wmessage_integer(element, "int64_min", 13*(i+1),0); pbc_wmessage_integer(element, "int64_max", 14*(i+1),0); pbc_wmessage_integer(element, "uint64_min", 15*(i+1),0); pbc_wmessage_integer(element, "uint64_max", 16*(i+1),0); pbc_wmessage_real(element, "double_min", 17*(i+1)); pbc_wmessage_real(element, "double_max", 18*(i+1)); } return msg; }
/** * generate launch container PB request message ContainerLaunchContextProto { optional ContainerIdProto container_id = 1; optional string user = 2; optional ResourceProto resource = 3; repeated StringLocalResourceMapProto localResources = 4; optional bytes container_tokens = 5; repeated StringBytesMapProto service_data = 6; repeated StringStringMapProto environment = 7; repeated string command = 8; repeated ApplicationACLMapProto application_ACLs = 9; } message StartContainerRequestProto { optional ContainerLaunchContextProto container_launch_context = 1; } */ static int generate_launch_container_request( char** buffer, int* size, hadoop_rpc_proxy_t* proxy, int container_id, containers_launch_context_t* launch_context) { int rc; struct pbc_wmessage* req = pbc_wmessage_new(env, "StartContainerRequestProto"); if (!req) { opal_output(0, "get StartContainerRequestProto message failed.\n"); return -1; } struct pbc_wmessage* ctx = pbc_wmessage_message(req, "container_launch_context"); if (!ctx) { opal_output(0, "get container_launch_context from StartContainerRequestProto failed.\n"); pbc_wmessage_delete(req); return -1; } // set container_id struct pbc_wmessage* id_proto = pbc_wmessage_message(ctx, "container_id"); if (!id_proto) { opal_output(0, "get ContainerIdProto from ContainerLaunchContextProto failed.\n"); pbc_wmessage_delete(req); return -1; } rc = pbc_wmessage_integer(id_proto, "id", container_id, NULL); if (0 != rc) { opal_output(0, "pack container-id failed.\n"); pbc_wmessage_delete(req); return -1; } rc = set_app_attempt_id(id_proto, "app_attempt_id", proxy); if (0 != rc) { opal_output(0, "pack app_attempt_id failed.\n"); pbc_wmessage_delete(req); return -1; } rc = set_app_id(id_proto, "app_id", proxy); if (0 != rc) { opal_output(0, "pack app_id failed.\n"); pbc_wmessage_delete(req); return -1; } // pack user rc = pbc_wmessage_string(ctx, "user", getlogin(), 0); if (rc != 0) { opal_output(0, "pack user name failed.\n"); return -1; } // pack resource struct pbc_wmessage* res_msg = pbc_wmessage_message(ctx, "resource"); if (!res_msg) { opal_output(0, "get resource_proto from context failed.\n"); return -1; } rc = pbc_wmessage_integer(res_msg, "memory", launch_context->resource.memory_per_slot, NULL); if (rc != 0) { pbc_wmessage_delete(req); opal_output(0, "pack memory to resource failed.\n"); return -1; } // TODO, in 2.0.3, need pack cpu // pack localResources rc = set_local_resources(ctx, "localResources"); if (rc != 0) { pbc_wmessage_delete(req); opal_output(0, "pack local resources failed.\n"); return -1; } // pack env int offset = 0; if (launch_context->env) { while (launch_context->env[offset]) { struct pbc_wmessage* env_msg = pbc_wmessage_message(ctx, "environment"); if (!env_msg) { pbc_wmessage_delete(req); opal_output(0, "get env message from context failed.\n"); return -1; } char* key = get_env_key(launch_context->env[offset]); char* val = get_env_val(launch_context->env[offset]); if ((!key) || (!val)) { if (key) { free(key); } if (val) { free(val); } pbc_wmessage_delete(req); opal_output(0, "get env key or value failed, env=%s.\n", launch_context->env[offset]); return -1; } // pack key rc = pbc_wmessage_string(env_msg, "key", key, 0); free(key); if (rc != 0) { free(val); pbc_wmessage_delete(req); opal_output(0, "set key to environment failed.\n"); return -1; } // pack val rc = pbc_wmessage_string(env_msg, "value", val, 0); if (rc != 0) { free(val); pbc_wmessage_delete(req); opal_output(0, "set value to environment failed.\n"); return -1; } free(val); offset++; } } // pack $PATH, $LD_LIBRARY_PATH, $DYLD_LIBRARY_PATH, $CLASSPATH to env if (0 != (rc = pack_env_to_launch_ctx("PATH", ctx))) { pbc_wmessage_delete(req); return -1; } if (0 != (rc = pack_env_to_launch_ctx("LD_LIBRARY_PATH", ctx))) { pbc_wmessage_delete(req); return -1; } if (0 != (rc = pack_env_to_launch_ctx("DYLD_LIBRARY_PATH", ctx))) { pbc_wmessage_delete(req); return -1; } if (0 != (rc = pack_env_to_launch_ctx("CLASSPATH", ctx))) { pbc_wmessage_delete(req); return -1; } // pack command char* command = concat_argv_to_cmd(launch_context->argv); if (!command) { pbc_wmessage_delete(req); opal_output(0, "concat argv to command to command failed. argv[0]:%s.\n", launch_context->argv[0]); return -1; } rc = pbc_wmessage_string(ctx, "command", command, 0); free(command); if (rc != 0) { opal_output(0, "pack command to context failed.\n"); pbc_wmessage_delete(req); return -1; } struct pbc_slice slice; pbc_wmessage_buffer(req, &slice); /* try to create HadoopRpcRequestProto */ rc = generate_hadoop_request((const char*)(slice.buffer), slice.len, CONTAINER_MANAGER_PROTOCOL_NAME, START_CONTAINER_METHOD_NAME, buffer, size); if (0 != rc) { opal_output(0, "create HadoopRpcRequestProto failed.\n"); pbc_wmessage_delete(req); return -1; } pbc_wmessage_delete(req); return 0; }
static int common_launch_process(orte_job_t *jdata, bool launch_daemon, int *launched_proc_num) { int i, rc; orte_proc_t* proc = NULL; char **argv; int argc; char **env; bool error_flag = false; int launched_num = 0; /* 1. create launch message */ /* message LaunchRequestProto { repeated LaunchContextProto launch_contexts = 1; } message LaunchContextProto { repeated string envars = 1; optional string args = 2; optional string host_name = 3; optional ProcessNameProto name = 4; } message ProcessNameProto { optional int32 jobid = 1; optional int32 vpid = 2; } */ struct pbc_wmessage* request_msg = pbc_wmessage_new(orte_hdclient_pb_env, "LaunchRequestProto"); if (!request_msg) { opal_output(0, "%s plm:yarn:common_process_launch: failed to create AllocateRequestProto", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); return ORTE_ERROR; } /* when launch_daemon, start from 1 because we don't need launch HNP process */ i = launch_daemon ? 1 : 0; for (; i < jdata->num_procs; i++) { argv = NULL; argc = 0; env = NULL; /* setup env/argv */ proc = opal_pointer_array_get_item(jdata->procs, i); if (!proc) { opal_output(0, "%s plm:yarn:common_launch_process: proc[%d] is NULL", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i); ORTE_ERROR_LOG(ORTE_ERROR_DEFAULT_EXIT_CODE); } if (launch_daemon) { rc = setup_daemon_proc_env_and_argv(proc, &argv, &argc, &env); } else { orte_app_context_t* app = (orte_app_context_t*) opal_pointer_array_get_item(jdata->apps, proc->app_idx); rc = setup_proc_env_and_argv(jdata, app, proc, &argv, &env); } if (0 != rc) { opal_output(0, "%s plm:yarn:common_launch_process: failed to setup env/argv of proc[%d]", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i); ORTE_ERROR_LOG(ORTE_ERROR_DEFAULT_EXIT_CODE); error_flag = true; goto cleanup; } /* print launch commandline and env when this env is specified */ if (getenv("HAMSTER_VERBOSE")) { char* join_argv = opal_argv_join(argv, ' '); OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:yarn:common_launch_process: launch argv=%s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), join_argv)); if (join_argv) { free(join_argv); } } OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:yarn:common_launch_process: after setup env and argv for proc=%d.", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i)); /* now start packing request_msg */ struct pbc_wmessage *launch_contexts_msg = pbc_wmessage_message(request_msg, "launch_contexts"); if (!launch_contexts_msg) { opal_output(0, "%s plm:yarn:common_process_launch: create launch_contexts_msg failed", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); error_flag = true; goto cleanup; } char **tmp_env = env; while (*tmp_env) { pbc_wmessage_string(launch_contexts_msg, "envars", *tmp_env, strlen(*tmp_env)); tmp_env++; } char* join_argv = opal_argv_join(argv, ' '); pbc_wmessage_string(launch_contexts_msg, "args", join_argv, strlen(join_argv)); pbc_wmessage_string(launch_contexts_msg, "host_name", proc->node->name, strlen(proc->node->name)); struct pbc_wmessage *proccess_name_msg = pbc_wmessage_message(launch_contexts_msg, "name"); if (!proccess_name_msg) { opal_output(0, "%s plm:yarn:common_process_launch: create proccess_name_msg failed", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); error_flag = true; goto cleanup; } rc = pbc_wmessage_integer(proccess_name_msg, "jobid", ORTE_LOCAL_JOBID(proc->name.jobid), 0); if (0 != rc) { opal_output(0, "%s plm:yarn:common_process_launch: pack jobid in proccess_name_msg failed", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); error_flag = true; goto cleanup; } rc = pbc_wmessage_integer(proccess_name_msg, "vpid", proc->name.vpid, 0); if (0 != rc) { opal_output(0, "%s plm:yarn:common_process_launch: pack vpid in proccess_name_msg failed", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); error_flag = true; goto cleanup; } cleanup: /* free argv and env for this proc */ if (argv) { opal_argv_free(argv); } if (env) { opal_argv_free(env); } if (join_argv) { free(join_argv); } if (error_flag) { pbc_wmessage_delete(request_msg); ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); return ORTE_ERROR; } } /* 2. send launch deamon procs request msg */ rc = orte_hdclient_send_message_and_delete(request_msg, HAMSTER_MSG_LAUNCH); if (rc != 0) { opal_output(0, "%s plm:yarn:common_process_launch: error happened when send launch proc request to AM", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); if (request_msg) { pbc_wmessage_delete(request_msg); } return ORTE_ERROR; } /* 3. recv response and parse the msg*/ /* message LaunchResponseProto { repeated LaunchResultProto results = 1; } message LaunchResultProto { optional ProcessNameProto name = 1; optional bool success = 2; } message ProcessNameProto { optional int32 jobid = 1; optional int32 vpid = 2; } */ struct pbc_rmessage* response_msg = NULL; response_msg = orte_hdclient_recv_message("LaunchResponseProto"); if (!response_msg) { opal_output(0, "%s plm:yarn:common_process_launch: error happened when recv launch response msg from AM", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); goto launch_failed; } int n = pbc_rmessage_size(response_msg, "results"); if (n < 0) { opal_output(0, "%s plm:yarn:common_process_launch: got n(=%d) < 0, please check", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), n); goto launch_failed; } for (i = 0; i < n; i++) { struct pbc_rmessage* results_msg = pbc_rmessage_message(response_msg, "results", i); if (!results_msg) { opal_output(0, "%s plm:yarn:launch_daemons: error when parse returned launch results from AM", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); goto launch_failed; } struct pbc_rmessage* proc_name_msg = pbc_rmessage_message(results_msg, "name", 0); if (!proc_name_msg) { opal_output(0, "%s plm:yarn:common_process_launch: error when parse returned proc_name_msg from AM", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); goto launch_failed; } orte_jobid_t local_jobid = pbc_rmessage_integer(proc_name_msg, "jobid", 0, NULL); orte_vpid_t vpid = pbc_rmessage_integer(proc_name_msg, "vpid", 0, NULL); bool success = pbc_rmessage_integer(results_msg, "success", 0, NULL); orte_proc_t* proc = (orte_proc_t*) opal_pointer_array_get_item(jdata->procs, vpid); if (success) { proc->state = ORTE_PROC_STATE_RUNNING; launched_num++; } else { opal_output(0, "%s plm:yarn:common_process_launch: launch proc failed when jobid = %u, vpid = %u", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_jobid, vpid); proc->state = ORTE_PROC_STATE_FAILED_TO_START; jdata->state = ORTE_JOB_STATE_FAILED_TO_START; goto launch_failed; } } /* to return back */ *launched_proc_num = launched_num; return ORTE_SUCCESS; launch_failed: if (response_msg) { pbc_rmessage_delete(response_msg); } ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); return ORTE_ERROR; }