Exemplo n.º 1
0
static void
test_rmessage(struct pbc_env *env, struct pbc_slice *slice) {
	struct pbc_rmessage * m = pbc_rmessage_new(env, "tutorial.Person", slice);
	printf("name = %s\n", pbc_rmessage_string(m , "name" , 0 , NULL));
	printf("id = %d\n", pbc_rmessage_integer(m , "id" , 0 , NULL));
	printf("email = %s\n", pbc_rmessage_string(m , "email" , 0 , NULL));

	int phone_n = pbc_rmessage_size(m, "phone");
	int i;

	for (i=0;i<phone_n;i++) {
		struct pbc_rmessage * p = pbc_rmessage_message(m , "phone", i);
		printf("\tnumber[%d] = %s\n",i,pbc_rmessage_string(p , "number", i ,NULL));
		printf("\ttype[%d] = %s\n",i,pbc_rmessage_string(p, "type", i, NULL));
	}

	int n = pbc_rmessage_size(m , "test");

	for (i=0;i<n;i++) {
		printf("test[%d] = %d\n",i, pbc_rmessage_integer(m , "test" , i , NULL));
	}

	printf("tutorial.Ext.test = %d\n", pbc_rmessage_integer(m,"tutorial.Ext.test",0,NULL));
	pbc_rmessage_delete(m);
}
Exemplo n.º 2
0
static void
_register(struct pbc_env *p, struct pbc_rmessage * file, struct _stringpool *pool) {
	int package_sz;
	const char *package = pbc_rmessage_string(file, "package", 0, &package_sz);

	pbc_array queue;
	_pbcA_open(queue);

	int enum_count = pbc_rmessage_size(file, "enum_type");
	int i;

	for (i=0;i<enum_count;i++) {
		struct pbc_rmessage * enum_type = pbc_rmessage_message(file, "enum_type", i);
		_register_enum(p,  pool , enum_type, package, package_sz);
	}

	int message_count = pbc_rmessage_size(file, "message_type");
	for (i=0;i<message_count;i++) {
		struct pbc_rmessage * message_type = pbc_rmessage_message(file, "message_type", i);
		_register_message(p, pool, message_type, package, package_sz, queue);
	}

	_register_extension(p, pool, package, package_sz, file , queue);

	_pbcB_register_fields(p, queue);

	_pbcA_close(queue);
}
Exemplo n.º 3
0
static void
test_rmessage(struct pbc_env *env, struct pbc_slice *slice) {
	struct pbc_rmessage * m = pbc_rmessage_new(env, "tutorial.Person", slice);
	if (m==NULL) {
		printf("Error : %s",pbc_error(env));
		return;
	}
	printf("name = %s\n", pbc_rmessage_string(m , "name" , 0 , NULL));
	printf("id = %d\n", pbc_rmessage_integer(m , "id" , 0 , NULL));
	printf("email = %s\n", pbc_rmessage_string(m , "email" , 0 , NULL));

	int phone_n = pbc_rmessage_size(m, "phone");
	int i;
	const char * field_name;
	pbc_type(env, "tutorial.Person", "phone", &field_name);
	printf("phone type [%s]\n",field_name);

	for (i=0;i<phone_n;i++) {
		struct pbc_rmessage * p = pbc_rmessage_message(m , "phone", i);
		printf("\tnumber[%d] = %s\n",i,pbc_rmessage_string(p , "number", i ,NULL));
		printf("\ttype[%d] = %s\n",i,pbc_rmessage_string(p, "type", i, NULL));
	}

	int n = pbc_rmessage_size(m , "test");

	for (i=0;i<n;i++) {
		printf("test[%d] = %d\n",i, pbc_rmessage_integer(m , "test" , i , NULL));
	}

	printf("tutorial.Ext.test = %d\n", pbc_rmessage_integer(m,"tutorial.Ext.test",0,NULL));
	pbc_rmessage_delete(m);
}
Exemplo n.º 4
0
static void
_register_extension(struct pbc_env *p, struct _stringpool *pool , const char * prefix, int prefix_sz, struct pbc_rmessage * msg, pbc_array queue) {
	int extension_count = pbc_rmessage_size(msg , "extension");
	if (extension_count <= 0) 
		return;
	int i;

	const char * last = NULL;

	for (i=0;i<extension_count;i++) {
		struct pbc_rmessage * extension = pbc_rmessage_message(msg, "extension", i);
		int field_name_sz = 0;
		struct _field f;
		const char * field_name = pbc_rmessage_string(extension , "name" , 0, &field_name_sz);
		f.name =  _concat_name(pool, prefix, prefix_sz, field_name, field_name_sz, NULL);

		_register_field(extension, &f , pool);

		const char * extendee = pbc_rmessage_string(extension , "extendee" , 0, NULL);

		_pbcP_push_message(p, extendee + 1 , &f , queue);

		if (last == NULL) {
			last = extendee;
		} else if (strcmp(extendee,last) != 0) {
			_pbcP_init_message(p, last+1);
			last = extendee;
		} 
	}
	_pbcP_init_message(p, last+1);
}
Exemplo n.º 5
0
static int
_rmessage_size(lua_State *L) {
	struct pbc_rmessage * m = (struct pbc_rmessage *)checkuserdata(L,1);
	const char * key = luaL_checkstring(L,2);

	int sz = pbc_rmessage_size(m, key);

	lua_pushinteger(L, sz);

	return 1;
}
Exemplo n.º 6
0
static void
_register_message(struct pbc_env *p, struct _stringpool *pool, struct pbc_rmessage * message_type, const char *prefix, int prefix_sz, pbc_array queue) {
	int name_sz;
	const char * name = pbc_rmessage_string(message_type, "name", 0 , &name_sz);
	int sz = 0;
	const char *temp = _concat_name(pool, prefix , prefix_sz , name , name_sz, &sz);

	int field_count = pbc_rmessage_size(message_type, "field");
	int i;
	for (i=0;i<field_count;i++) {
		struct pbc_rmessage * field = pbc_rmessage_message(message_type, "field" , i);
		struct _field f;
		int field_name_sz;
		const char * field_name = pbc_rmessage_string(field, "name", 0 , &field_name_sz);
		f.name = _pbcS_build(pool,field_name,field_name_sz);

		_register_field(field, &f , pool);

		_pbcP_push_message(p, temp , &f , queue);
	}

	_pbcP_init_message(p, temp);

	_register_extension(p, pool, temp, sz,message_type, queue);

	// nested enum

	int enum_count = pbc_rmessage_size(message_type, "enum_type");

	for (i=0;i<enum_count;i++) {
		struct pbc_rmessage * enum_type = pbc_rmessage_message(message_type, "enum_type", i);
		_register_enum(p, pool, enum_type, temp, sz);
	}
	
	// nested type
	int message_count = pbc_rmessage_size(message_type, "nested_type");
	for (i=0;i<message_count;i++) {
		struct pbc_rmessage * nested_type = pbc_rmessage_message(message_type, "nested_type", i);
		_register_message(p, pool, nested_type, temp, sz, queue);
	}
}
Exemplo n.º 7
0
static void
_register_field(struct pbc_rmessage * field, struct _field * f, struct _stringpool *pool) {
	f->id = pbc_rmessage_integer(field, "number", 0 , 0);
	f->type = pbc_rmessage_integer(field, "type", 0 , 0);	// enum
	f->label = pbc_rmessage_integer(field, "label", 0, 0) - 1; // LABEL_OPTIONAL = 0
	if (pbc_rmessage_size(field , "options") > 0) {
		struct pbc_rmessage * options = pbc_rmessage_message(field, "options" , 0);
		int packed = pbc_rmessage_integer(options , "packed" , 0 , NULL);
		if (packed) {
			f->label = LABEL_PACKED;
		}
	}
	f->type_name.n = pbc_rmessage_string(field, "type_name", 0 , NULL) +1;	// abandon prefix '.' 
	int vsz;
	const char * default_value = pbc_rmessage_string(field, "default_value", 0 , &vsz);
	_set_default(pool , f , f->type, default_value , vsz);
}
Exemplo n.º 8
0
static void
_register_enum(struct pbc_env *p, struct _stringpool *pool, struct pbc_rmessage * enum_type, const char *prefix, int prefix_sz) {
	int field_count = pbc_rmessage_size(enum_type, "value");
	struct map_kv *table = malloc(field_count * sizeof(struct map_kv));
	int i;
	for (i=0;i<field_count;i++) {
		struct pbc_rmessage * value = pbc_rmessage_message(enum_type, "value", i);
		int enum_name_sz;
		const char *enum_name = pbc_rmessage_string(value , "name" , 0 , &enum_name_sz);
		table[i].pointer = (void *)_pbcS_build(pool, enum_name , enum_name_sz);
		table[i].id = pbc_rmessage_integer(value , "number", 0 , 0);
	}
	int name_sz;
	const char * name = pbc_rmessage_string(enum_type, "name", 0 , &name_sz);
	const char *temp = _concat_name(pool, prefix , prefix_sz , name , name_sz, NULL);

	_pbcP_push_enum(p,temp,table,field_count);
	free(table);
}
Exemplo n.º 9
0
int
pbc_register(struct pbc_env * p, struct pbc_slice *slice) {
	//dump((uint8_t *)slice->buffer, slice->len);
	struct pbc_rmessage * message = pbc_rmessage_new(p, "google.protobuf.FileDescriptorSet", slice);
	if (message == NULL) {
		p->lasterror = "register open google.protobuf.FileDescriptorSet fail";
		return 1;
	}
	int n = pbc_rmessage_size(message, "file");

	struct pbc_rmessage ** files = (struct pbc_rmessage **)_pbcM_malloc(n*(sizeof(struct pbc_rmessage *)));
	int i;
    int r = n;
	if (n == 0) {
		p->lasterror = "register empty";
		goto _error;
	}
	for (i=0;i<n;i++) {
		files[i] = pbc_rmessage_message(message, "file", i);
		if (files[i] == NULL) {
			p->lasterror = "register open fail";
			goto _error;
		}
	}

	
	do {
		int rr = _register_no_dependency(p,files , n);
		if (rr == r) {
			p->lasterror = "register dependency error";
			goto _error;
		}
		r = rr;
	} while (r>0);

	pbc_rmessage_delete(message);
    free(files);
	return 0;
_error:
	pbc_rmessage_delete(message);
    free(files);
	return 1;
}
Exemplo n.º 10
0
static void
dump_message(struct pbc_rmessage *m, int level) {
	int t = 0;
	const char *key = NULL;
	for (;;) {
		t = pbc_rmessage_next(m, &key);
		if (key == NULL)
			break;
		if (t & PBC_REPEATED) {
			int n = pbc_rmessage_size(m, key);
			int i;
			for (i=0;i<n;i++) {
				dump_value(m, key , t , i , level);
			}
		} else {
			dump_value(m, key , t , 0 , level);
		}
	}
}
Exemplo n.º 11
0
static int
_check_file_name(struct pbc_env * p , struct pbc_rmessage * file, const char ** fname) {
	const char * filename = pbc_rmessage_string(file, "name", 0, NULL);
//	printf("reg :%s\n",filename);
	if (_pbcM_sp_query(p->files, filename)) {
		return CHECK_FILE_EXIST;
	}
	int sz = pbc_rmessage_size(file, "dependency"); 
	int i;
	for (i=0;i<sz;i++) {
		const char *dname = pbc_rmessage_string(file,"dependency",i,NULL);
//		printf("dependency :%s\n",dname);
		if (_pbcM_sp_query(p->files, dname) == NULL) {
			return CHECK_FILE_DEPENDENCY;
		}
	}

	*fname = filename;

	return CHECK_FILE_OK;
}
Exemplo n.º 12
0
static void
test_rmessage(struct pbc_env *env, struct pbc_slice *slice) {
	struct pbc_rmessage * m = pbc_rmessage_new(env, "test", slice);
	if (m==NULL) {
		printf("Error : %s",pbc_error(env));
		return;
	}

	int phone_n = pbc_rmessage_size(m, "el");
	int i;


	for (i=0;i<phone_n;i++) {
		struct pbc_rmessage * p = pbc_rmessage_message(m , "el", i);
		printf("\tint16_min[%d] = %d\n",i,pbc_rmessage_integer(p , "int16_min", i ,NULL));
		printf("\tdouble_max[%d] = %f\n",i,pbc_rmessage_real(p, "double_max", i));
		printf("\tstring[%d] = %s\n",i,pbc_rmessage_string(p, "str", i, NULL));
	}

	pbc_rmessage_delete(m);
}
Exemplo n.º 13
0
int
pbc_register(struct pbc_env * p, struct pbc_slice *slice) {
	struct pbc_rmessage * message = pbc_rmessage_new(p, "google.protobuf.FileDescriptorSet", slice);
	if (message == NULL) {
		p->lasterror = "register open google.protobuf.FileDescriptorSet fail";
		return 1;
	}
	int n = pbc_rmessage_size(message, "file");
	struct pbc_rmessage * files[n];
	int i;
	if (n == 0) {
		p->lasterror = "register empty";
		goto _error;
	}
	for (i=0;i<n;i++) {
		files[i] = pbc_rmessage_message(message, "file", i);
		if (files[i] == NULL) {
			p->lasterror = "register open fail";
			goto _error;
		}
	}

	int r = n;
	do {
		int rr = _register_no_dependency(p,files , n);
		if (rr == r) {
			p->lasterror = "register dependency error";
			goto _error;
		}
		r = rr;
	} while (r>0);

	pbc_rmessage_delete(message);
	return 0;
_error:
	pbc_rmessage_delete(message);
	return 1;
}
Exemplo n.º 14
0
static int orte_ras_yarn_allocate_internal(int np, opal_list_t* nodes) {
    int rc, i;

    // create and send allocate message
    struct pbc_wmessage* msg = pbc_wmessage_new(orte_hdclient_pb_env, "AllocateRequestProto");
    if (!msg) {
        opal_output(0, "%s ras:yarn failed to create AllocateRequestProto", 
            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        return ORTE_ERROR;
    }
    pbc_wmessage_integer(msg, "resource_count", np, 0);
    rc = orte_hdclient_send_message_and_delete(msg, HAMSTER_MSG_ALLOCATE);
    if (rc != 0) {
        opal_output(0, "%s ras:yarn error happened when send allocate msg to AM",
            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        return ORTE_ERROR;
    }

    // read rmessage out
    struct pbc_rmessage* response = orte_hdclient_recv_message("AllocateResponseProto");
    if (!response) {
        opal_output(0, "%s ras:yarn error happened when recv allocate response from AM",
            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        return ORTE_ERROR;
    }

    int n = pbc_rmessage_size(response, "node_resources");
    if (n <= 0) {
        opal_output(0, "%s ras:yarn got n(=%d) <= 0, please check",
            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), n);
        return ORTE_ERROR;
    }

    // read node resources
    for (i = 0; i < n; i++) {
        struct pbc_rmessage* node_res = pbc_rmessage_message(response, "node_resources", i);
        if (!node_res) {
            opal_output(0, "%s ras:yarn error when parse returned resource from AM", 
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            return ORTE_ERROR;
        }
        
        // parse host, slot
        const char* host = pbc_rmessage_string(node_res, "host_name", 0, NULL);
        if (!host) {
            opal_output(0, "%s ras:yarn error when parse host from returned resource from AM",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            return ORTE_ERROR;
        }
        int slot = pbc_rmessage_integer(node_res, "slot", 0, NULL);

        // make node_t and add it to nodes
        orte_node_t* node = OBJ_NEW(orte_node_t);
        if (!node) {
            opal_output(0, "%s ras:yarn failed to create orte_node_t obj", 
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            return ORTE_ERROR;
        }

        node->name = strdup(host);
        node->state = ORTE_NODE_STATE_UP;
        node->slots_inuse = 0;
        node->slots_max = 0;
        node->slots = slot;
        opal_list_append(nodes, &node->super);

        OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
                     "%s ras:yarn: adding node %s with %d slot",
                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                     host, slot));
    }

    // All done
    OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                         "%s ras:yarn:allocate: success",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

    return ORTE_SUCCESS;
}
Exemplo n.º 15
0
static int common_launch_process(orte_job_t *jdata, bool launch_daemon, int *launched_proc_num)
{
	int i, rc;
	orte_proc_t* proc = NULL;
	char **argv;
	int argc;
	char **env;
	bool error_flag = false;
	int launched_num = 0;

	/* 1. create launch message */
	/*
	 message LaunchRequestProto {
	 repeated LaunchContextProto launch_contexts = 1;
	 }

	 message LaunchContextProto {
	 repeated string envars = 1;
	 optional string args = 2;
	 optional string host_name = 3;
	 optional ProcessNameProto name = 4;
	 }

	 message ProcessNameProto {
	 optional int32 jobid = 1;
	 optional int32 vpid = 2;
	 }
	 */
	struct pbc_wmessage* request_msg = pbc_wmessage_new(orte_hdclient_pb_env, "LaunchRequestProto");
	if (!request_msg) {
		opal_output(0, "%s plm:yarn:common_process_launch: failed to create AllocateRequestProto",
				ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
		return ORTE_ERROR;
	}

	/* when launch_daemon, start from 1 because we don't need launch HNP process */
	i = launch_daemon ? 1 : 0;

	for (; i < jdata->num_procs; i++) {
		argv = NULL;
		argc = 0;
		env = NULL;
		/* setup env/argv  */
		proc = opal_pointer_array_get_item(jdata->procs, i);
		if (!proc) {
			opal_output(0, "%s plm:yarn:common_launch_process: proc[%d] is NULL",
					ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i);
			ORTE_ERROR_LOG(ORTE_ERROR_DEFAULT_EXIT_CODE);
		}

		if (launch_daemon) {
			rc = setup_daemon_proc_env_and_argv(proc, &argv, &argc, &env);
		} else {
			orte_app_context_t* app = (orte_app_context_t*) opal_pointer_array_get_item(jdata->apps, proc->app_idx);
			rc = setup_proc_env_and_argv(jdata, app, proc, &argv, &env);
		}
		if (0 != rc) {
			opal_output(0,
					"%s plm:yarn:common_launch_process: failed to setup env/argv of proc[%d]",
					ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i);
			ORTE_ERROR_LOG(ORTE_ERROR_DEFAULT_EXIT_CODE);
			error_flag = true;
			goto cleanup;
		}

		 /* print launch commandline and env when this env is specified */
		if (getenv("HAMSTER_VERBOSE")) {

			char* join_argv = opal_argv_join(argv, ' ');

			OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:yarn:common_launch_process: launch argv=%s",
							ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), join_argv));
			if (join_argv) {
				free(join_argv);
			}
		}

		OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
									"%s plm:yarn:common_launch_process: after setup env and argv for proc=%d.",
									ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i));

		/* now start packing request_msg */
		struct pbc_wmessage *launch_contexts_msg = pbc_wmessage_message(request_msg, "launch_contexts");
		if (!launch_contexts_msg) {
			opal_output(0,
					"%s plm:yarn:common_process_launch: create launch_contexts_msg failed",
					ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
			error_flag = true;
			goto cleanup;
		}

		char **tmp_env = env;
		while (*tmp_env) {
			pbc_wmessage_string(launch_contexts_msg, "envars", *tmp_env, strlen(*tmp_env));
			tmp_env++;
		}

		char* join_argv = opal_argv_join(argv, ' ');
		pbc_wmessage_string(launch_contexts_msg, "args", join_argv, strlen(join_argv));

		pbc_wmessage_string(launch_contexts_msg, "host_name", proc->node->name, strlen(proc->node->name));

		struct pbc_wmessage *proccess_name_msg = pbc_wmessage_message(launch_contexts_msg, "name");
		if (!proccess_name_msg) {
			opal_output(0,
					"%s plm:yarn:common_process_launch: create proccess_name_msg failed",
					ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
			error_flag = true;
			goto cleanup;
		}

		rc = pbc_wmessage_integer(proccess_name_msg, "jobid", ORTE_LOCAL_JOBID(proc->name.jobid), 0);
		if (0 != rc) {
			opal_output(0,
					"%s plm:yarn:common_process_launch: pack jobid in proccess_name_msg failed",
					ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
			error_flag = true;
			goto cleanup;
		}

		rc = pbc_wmessage_integer(proccess_name_msg, "vpid", proc->name.vpid,
				0);
		if (0 != rc) {
			opal_output(0,
					"%s plm:yarn:common_process_launch: pack vpid in proccess_name_msg failed",
					ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
			error_flag = true;
			goto cleanup;
		}

cleanup:
		/* free argv and env for this proc */
		if (argv) {
			opal_argv_free(argv);
		}
		if (env) {
			opal_argv_free(env);
		}
		if (join_argv) {
			free(join_argv);
		}
		if (error_flag) {
			pbc_wmessage_delete(request_msg);
			ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
			return ORTE_ERROR;
		}
	}

	/* 2. send launch deamon procs request msg */
	rc = orte_hdclient_send_message_and_delete(request_msg, HAMSTER_MSG_LAUNCH);
	if (rc != 0) {
		opal_output(0,
				"%s plm:yarn:common_process_launch: error happened when send launch proc request to AM",
				ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
		if (request_msg) {
			pbc_wmessage_delete(request_msg);
		}
		return ORTE_ERROR;
	}

	/* 3. recv response and parse the msg*/
	/*
	 message LaunchResponseProto {
	 repeated LaunchResultProto results = 1;
	 }

	 message LaunchResultProto {
	 optional ProcessNameProto name = 1;
	 optional bool success = 2;
	 }

	 message ProcessNameProto {
	 optional int32 jobid = 1;
	 optional int32 vpid = 2;
	 }
	 */
	struct pbc_rmessage* response_msg = NULL;
	response_msg = orte_hdclient_recv_message("LaunchResponseProto");
	if (!response_msg) {
		opal_output(0,
				"%s plm:yarn:common_process_launch: error happened when recv launch response msg from AM",
				ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
		goto launch_failed;
	}

	int n = pbc_rmessage_size(response_msg, "results");
	if (n < 0) {
		opal_output(0,
				"%s plm:yarn:common_process_launch: got n(=%d) < 0, please check",
				ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), n);
		goto launch_failed;
	}

	for (i = 0; i < n; i++) {
		struct pbc_rmessage* results_msg = pbc_rmessage_message(response_msg, "results", i);
		if (!results_msg) {
			opal_output(0,
					"%s plm:yarn:launch_daemons: error when parse returned launch results from AM",
					ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
			goto launch_failed;
		}

		struct pbc_rmessage* proc_name_msg = pbc_rmessage_message(results_msg, "name", 0);
		if (!proc_name_msg) {
			opal_output(0,
					"%s plm:yarn:common_process_launch: error when parse returned proc_name_msg from AM",
					ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
			goto launch_failed;
		}

		orte_jobid_t local_jobid = pbc_rmessage_integer(proc_name_msg, "jobid", 0, NULL);
		orte_vpid_t vpid = pbc_rmessage_integer(proc_name_msg, "vpid", 0, NULL);

		bool success = pbc_rmessage_integer(results_msg, "success", 0, NULL);

		orte_proc_t* proc = (orte_proc_t*) opal_pointer_array_get_item(jdata->procs, vpid);
		if (success) {
			proc->state = ORTE_PROC_STATE_RUNNING;
			launched_num++;
		} else {
			opal_output(0,
					"%s plm:yarn:common_process_launch: launch proc failed when jobid = %u, vpid = %u",
					ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_jobid, vpid);
			proc->state = ORTE_PROC_STATE_FAILED_TO_START;
			jdata->state = ORTE_JOB_STATE_FAILED_TO_START;
			goto launch_failed;
		}
	}

	/* to return back */
	*launched_proc_num = launched_num;
	return ORTE_SUCCESS;

launch_failed:
	    if (response_msg) {
	        pbc_rmessage_delete(response_msg);
	    }
	    ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
	    return ORTE_ERROR;
}
Exemplo n.º 16
0
static void heartbeat_with_AM_cb(int fd, short event, void *data)
{
    int i, rc;
    orte_job_t *jdata = (orte_job_t*)data;
    orte_job_t* daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);

    /* 1. create heartbeat request msg */
    /*
    message HeartbeatRequestProto {
    }
    */
    struct pbc_wmessage* request_msg = pbc_wmessage_new(orte_hdclient_pb_env, "HeartbeatRequestProto");
    if (!request_msg) {
        opal_output(0, "%s plm:yarn:heartbeat_with_AM_cb: failed to create request_msg",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        ORTE_ERROR_LOG(ORTE_ERROR_DEFAULT_EXIT_CODE);
        return;
    }

    /* 2. send heartbeat request msg */
    rc = orte_hdclient_send_message_and_delete(request_msg, HAMSTER_MSG_HEARTBEAT);
    if (rc != 0) {
        opal_output(0,
                "%s plm:yarn:heartbeat_with_AM_cb: error happened when send request_msg to AM",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        ORTE_ERROR_LOG(ORTE_ERROR_DEFAULT_EXIT_CODE);
        return;
    }

    /* 3. recv response and parse the msg*/
    /*
     message HeartbeatResponseProto {
         repeated ProcessStatusProto completed_processes = 1;
     }

     message ProcessStatusProto {
         optional ProcessNameProto name = 1;
         optional ProcessStateProto state = 2;
         optional int32 exit_value = 3;
     }

     enum ProcessStateProto {
         RUNNING = 1;
         COMPLETED = 2;
     }

     message ProcessNameProto {
         optional int32 jobid = 1;
         optional int32 vpid = 2;
     }
     */

    struct pbc_rmessage* response_msg = orte_hdclient_recv_message("HeartbeatResponseProto");
    if (!response_msg) {
        opal_output(0,
                "%s plm:yarn:heartbeat_with_AM_cb: error happened when recv HeartbeatResponseProto msg from AM",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        goto cleanup;
    }

    int n = pbc_rmessage_size(response_msg, "completed_processes");
    if (n < 0) {
        opal_output(0,
                "%s plm:yarn:heartbeat_with_AM_cb: got n(=%d) < 0, please check",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), n);
        goto cleanup;
    }

    for (i = 0; i < n; i++) {
        struct pbc_rmessage* completed_procs_msg = pbc_rmessage_message(response_msg, "completed_processes", i);
        if (!completed_procs_msg) {
            opal_output(0,
                    "%s plm:yarn:heartbeat_with_AM_cb: error when parse returned completed_procs_msg from AM",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            goto cleanup;
        }

        struct pbc_rmessage* proc_name_msg = pbc_rmessage_message(completed_procs_msg, "name", 0);
        if (!proc_name_msg) {
            opal_output(0,
                    "%s plm:yarn:heartbeat_with_AM_cb: error when parse proc_name_msg",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            goto cleanup;
        }

        uint32_t local_jobid = pbc_rmessage_integer(proc_name_msg, "jobid", 0, NULL);
        uint32_t vpid = pbc_rmessage_integer(proc_name_msg, "vpid", 0, NULL);

        uint32_t exit_value = pbc_rmessage_integer(completed_procs_msg, "exit_value", 0, NULL);

        /* next, we will modify proc's state */
        orte_job_t* tmp_jdata = (orte_job_t*) opal_pointer_array_get_item(orte_job_data, local_jobid);
        orte_proc_t* proc = (orte_proc_t*) opal_pointer_array_get_item(tmp_jdata->procs, vpid);


        if (tmp_jdata->jobid == jdata->jobid) {
			num_completed_jdata_procs++;
		}

        if (exit_value == 0) {
        	proc->state = ORTE_PROC_STATE_TERMINATED;
        }

        /* if this process is already terminated, just skip over */
        if (proc->state >= ORTE_PROC_STATE_TERMINATED) {
            continue;
        }

        if (exit_value == -1000 || exit_value == -100 || exit_value == -101) {
            opal_output(0, "%s plm:yarn:heartbeat_with_AM_cb proc failed to start", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            ORTE_ERROR_LOG(ORTE_ERROR);
            proc->state = ORTE_PROC_STATE_FAILED_TO_START;
            ORTE_ACTIVATE_PROC_STATE(&proc->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
        } else {
            /* here, means currently the proc's state < ORTE_PROC_STATE_TERMINATED,
             * however, from AM's heartbeat response, we got the proc's container is terminated,
             * to solve this dilemma , we set a timer event to reconfirm this proc's state,
             */
            opal_event_t *ev = NULL;
            ev = (opal_event_t*) malloc(sizeof(opal_event_t));

            struct timeval delay;
            delay.tv_sec = 15;
            delay.tv_usec = 0;

            opal_event_evtimer_set(orte_event_base, ev, process_state_monitor_cb, proc);
            opal_event_evtimer_add(ev, &delay);
        }
    }

cleanup:
    if (response_msg) {
        pbc_rmessage_delete(response_msg);
    }

    if (num_completed_jdata_procs == jdata->num_procs) {
        /*
         * all procs are completed, send finish request to AM,
         * modify job state to ORTE_JOB_STATE_TERMINATED
         */
        jdata->state = ORTE_JOB_STATE_TERMINATED;
        finish_app_master(0 == orte_exit_status);
        return;
    } else {
        /* next heartbeat */
        opal_event_t *ev = NULL;
        ev = (opal_event_t*) malloc(sizeof(opal_event_t));

        struct timeval delay;
        delay.tv_sec = 1;
        delay.tv_usec = 0;

        opal_event_evtimer_set(orte_event_base, ev, heartbeat_with_AM_cb, jdata);
		opal_event_evtimer_add(ev, &delay);
    }
}