예제 #1
0
/**
 * Returns 0 if the memory constraints are not reached. Otherwise, 1 is returned.
 * In case of an error, -1 is returned.
 */
static int probe_cobj_memcheck(size_t item_cnt)
{
	if (item_cnt > PROBE_RESULT_MEMCHECK_CTRESHOLD) {
		struct proc_memusage mu_proc;
		struct sys_memusage  mu_sys;
		double c_ratio;

		if (oscap_proc_memusage (&mu_proc) != 0)
			return (-1);

		if (oscap_sys_memusage (&mu_sys) != 0)
			return (-1);

		c_ratio = (double)mu_proc.mu_rss/(double)(mu_sys.mu_total);

		if (c_ratio > PROBE_RESULT_MEMCHECK_MAXRATIO) {
			dW("Memory usage ratio limit reached! limit=%f, current=%f",
			   PROBE_RESULT_MEMCHECK_MAXRATIO, c_ratio);
			errno = ENOMEM;
			return (1);
		}

		if ((mu_sys.mu_realfree / 1024) < PROBE_RESULT_MEMCHECK_MINFREEMEM) {
			dW("Minimum free memory limit reached! limit=%zu, current=%zu",
			   PROBE_RESULT_MEMCHECK_MINFREEMEM, mu_sys.mu_realfree / 1024);
			errno = ENOMEM;
			return (1);
		}
	}

	return (0);
}
예제 #2
0
int oval_result_test_parse_tag(xmlTextReaderPtr reader, struct oval_parser_context *context, void *usr) {

	struct oval_result_system *sys = (struct oval_result_system *) usr;
	int return_code = 0;
	struct oval_definition_model *dmod;
	struct oval_test *dtst;
	struct oval_result_test *test;
	xmlChar *test_id = xmlTextReaderGetAttribute(reader, BAD_CAST "test_id");

	dmod = context->definition_model;
	dtst = oval_definition_model_get_new_test(dmod, (char *) test_id);
	oval_result_t result = oval_result_parse(reader, "result", 0);
	int variable_instance = oval_parser_int_attribute(reader, "variable_instance", 1);

	test = oval_result_system_get_new_test(sys, dtst, variable_instance);
	if (test == NULL)
		return -1;
	oval_result_test_set_result(test, result);
	oval_result_test_set_instance(test, variable_instance);

	struct oval_test *ovaltst = oval_result_test_get_test(test);

	oval_existence_t check_existence = oval_existence_parse(reader, "check_existence", OVAL_AT_LEAST_ONE_EXISTS);
	oval_existence_t tst_check_existence = oval_test_get_existence(ovaltst);
	if (tst_check_existence == OVAL_EXISTENCE_UNKNOWN) {
		oval_test_set_existence(ovaltst, check_existence);
	} else if (tst_check_existence != check_existence) {
		dW("@check_existence does not match, test_id: %s.", test_id);
	}

	oval_check_t check = oval_check_parse(reader, "check", OVAL_CHECK_UNKNOWN);
	oval_check_t tst_check = oval_test_get_check(ovaltst);
	if (tst_check == OVAL_CHECK_UNKNOWN) {
		oval_test_set_check(ovaltst, check);
	} else if (tst_check != check) {
		dW("@check does not match, test_id: %s.", test_id);
	}

	int version = oval_parser_int_attribute(reader, "version", 0);
	int tst_version = oval_test_get_version(ovaltst);
	if (tst_version == 0) {
		oval_test_set_version(ovaltst, version);
	} else if (tst_version != version) {
		dW("@version does not match, test_id: %s.", test_id);
	}

	struct oval_string_map *itemmap = oval_string_map_new();
	void *args[] = { sys, test, itemmap };
	return_code = oval_parser_parse_tag(reader, context, (oval_xml_tag_parser) _oval_result_test_parse, args);
	oval_string_map_free(itemmap, NULL);
	test->bindings_initialized = true;

	oscap_free(test_id);
	return return_code;
}
예제 #3
0
/* this function will gather all the necessary ingredients and call 'evaluate_items' when it finds them */
static oval_result_t _oval_result_test_result(struct oval_result_test *rtest, void **args)
{
	__attribute__nonnull__(rtest);

	/* is the test already evaluated? */
	if (rtest->result != OVAL_RESULT_NOT_EVALUATED) {
		dI("Found result from previous evaluation: %d, returning without further processing.\n", rtest->result);
		return (rtest->result);
	}

	/* get syschar of rtest */
	struct oval_test *test = oval_result_test_get_test(rtest);
	struct oval_object * object = oval_test_get_object(test);
	char * object_id = oval_object_get_id(object);

	struct oval_result_system *sys = oval_result_test_get_system(rtest);
	struct oval_syschar_model *syschar_model = oval_result_system_get_syschar_model(sys);

	struct oval_syschar * syschar = oval_syschar_model_get_syschar(syschar_model, object_id);
	if (syschar == NULL) {
		dW("No syschar for object: %s\n", object_id);
		return OVAL_RESULT_UNKNOWN;
	}

	/* evaluate items */
	oval_result_t result = _oval_result_test_evaluate_items(test, syschar, args);

	return result;
}
예제 #4
0
static inline int ipv4addr_parse(const char *oval_ipv4_string, uint32_t *netmask_out, struct in_addr *ip_out)
{
	char *s, *pfx;
	int result = -1;

	s = strdup(oval_ipv4_string);
	pfx = strchr(s, '/');
	if (pfx) {
		int cnt;
		unsigned char nm[4];

		*pfx++ = '\0';
		cnt = sscanf(pfx, "%hhu.%hhu.%hhu.%hhu", &nm[0], &nm[1], &nm[2], &nm[3]);
		if (cnt > 1) { /* netmask */
			*netmask_out = (nm[0] << 24) + (nm[1] << 16) + (nm[2] << 8) + nm[3];
		} else { /* prefix */
			*netmask_out = (~0) << (32 - nm[0]);
		}
	} else {
		*netmask_out = ~0;
	}

	if (inet_pton(AF_INET, s, ip_out) <= 0)
		dW("inet_pton() failed.\n");
	else
		result = 0;

	oscap_free(s);
	return result;
}
예제 #5
0
파일: entcmp.c 프로젝트: swizzley/openscap
oval_result_t probe_ent_cmp_debian_evr(SEXP_t * val1, SEXP_t * val2, oval_operation_t op)
{
	//TODO: implement Debian's epoch-version-release comparing algorithm
	// it is different algorithm than RPM algorithm
	dW("Using RPM algorithm to compare epoch, version and release.\n");
	return probe_ent_cmp_evr(val1, val2, op);
}
예제 #6
0
int oval_result_definition_parse_tag(xmlTextReaderPtr reader, struct oval_parser_context *context, void *usr) {

	struct oval_result_system *sys = (struct oval_result_system *) usr;
	int return_code = 0;
	struct oval_definition_model *dmod;
	struct oval_definition *ddef;
	struct oval_result_definition *definition;
	xmlChar *definition_id = xmlTextReaderGetAttribute(reader, BAD_CAST "definition_id");
	xmlChar *definition_version = xmlTextReaderGetAttribute(reader, BAD_CAST "version");
	int resvsn = atoi((char *)definition_version);

	oval_result_t result = oval_result_parse(reader, "result", OVAL_ENUMERATION_INVALID);

	int instance = oval_parser_int_attribute(reader, "variable_instance", 1);

	dmod = context->definition_model;
	ddef = oval_definition_model_get_new_definition(dmod, (char *) definition_id);
	definition = oval_result_system_get_new_definition(sys, ddef, instance);
	if (definition == NULL)
		return -1;

	int defvsn = oval_definition_get_version(definition->definition);
	if (defvsn && resvsn != defvsn) {
		dW("Definition versions don't match: definition id: %s, ovaldef vsn: %d, resdef vsn: %d.", definition_id, defvsn, resvsn);
	}
	oval_definition_set_version(definition->definition, resvsn);
	// The following _set_instance() might be overabundant, since it should be already set
	// by oval_result_system_get_new_definition() Let's see if the assert agrees over time:
	assert(oval_result_definition_get_instance(definition) == instance);
	oval_result_definition_set_instance(definition, instance);
	

	if ((int)result != OVAL_ENUMERATION_INVALID) {
		oval_result_definition_set_result(definition, result);
	} else {
		dW("Can't resolve result attribute, definition id: %s.", definition_id);
		oval_result_definition_set_result(definition, OVAL_RESULT_UNKNOWN);
	}

	return_code = oval_parser_parse_tag(reader, context, oval_result_definition_parse, definition);

	oscap_free(definition_id);
	oscap_free(definition_version);

	return return_code;
}
예제 #7
0
int oval_test_parse_tag(xmlTextReaderPtr reader, struct oval_parser_context *context, void *usr)
{
	int ret = 0;
	char *comm = NULL;
	char *version = NULL;
	struct oval_definition_model *model = context->definition_model;

	char *id = (char *)xmlTextReaderGetAttribute(reader, BAD_CAST "id");
	struct oval_test *test = oval_definition_model_get_new_test(model, id);

	oval_subtype_t subtype = oval_subtype_parse(reader);
        if ( subtype == OVAL_SUBTYPE_UNKNOWN) {
		oscap_seterr(OSCAP_EFAMILY_OVAL, "Unknown test type %s.", id);
		ret = -1;
		goto cleanup;
        }
	oval_test_set_subtype(test, subtype);

	oval_operator_t ste_operator = oval_operator_parse(reader, "state_operator", OVAL_OPERATOR_AND);
	oval_test_set_state_operator(test, ste_operator);

	oval_check_t check = oval_check_parse(reader, "check", OVAL_CHECK_UNKNOWN);
	if (check == OVAL_CHECK_NONE_EXIST) {
		dW("The 'none exist' CheckEnumeration value has been deprecated. "
		   "Converted to check='none satisfy' and check_existence='none exist'.\n");
		oval_test_set_check(test, OVAL_CHECK_NONE_SATISFY);
		oval_test_set_existence(test, OVAL_NONE_EXIST);
	} else {
		oval_existence_t existence;

		oval_test_set_check(test, check);
		existence = oval_existence_parse(reader, "check_existence", OVAL_AT_LEAST_ONE_EXISTS);
		oval_test_set_existence(test, existence);
	}

	comm = (char *)xmlTextReaderGetAttribute(reader, BAD_CAST "comment");
	if (comm != NULL) {
		oval_test_set_comment(test, comm);
	}

	int deprecated = oval_parser_boolean_attribute(reader, "deprecated", 0);
	oval_test_set_deprecated(test, deprecated);

	version = (char *)xmlTextReaderGetAttribute(reader, BAD_CAST "version");
	oval_test_set_version(test, atoi(version));


	ret = oval_parser_parse_tag(reader, context, &_oval_test_parse_tag, test);

cleanup:
	oscap_free(version);
	oscap_free(comm);
	oscap_free(id);
	return ret;
}
예제 #8
0
void oval_string_map_put(struct oval_string_map *map, const char *key, void *val)
{
        char *key_copy;

	assume_d(map != NULL, /* void */);
	assume_d(key != NULL, /* void */);

	if (rbt_str_add((rbt_t *)map, key_copy = strdup(key), val) != 0) {
		dW("rbt_str_add: non-zero return code");
                oscap_free(key_copy);
        }
}
예제 #9
0
VectorXd feedForwardNetwork::rpropTrain(MatrixXd x, MatrixXd y, int numepochs, int batchsize, double incScale, double decScale, double incScaleMax, double decScaleMin, bool verbose) {
	// initialize training parameters
	std::vector<MatrixXd> dW(layer_size.size()-1);
	std::vector<VectorXd> dB(layer_size.size()-1);
	std::vector<ArrayXXi> signDeltaW(layer_size.size()-1);
	std::vector<ArrayXi> signDeltaB(layer_size.size()-1);

	for(int i = 0; i < layer_size.size()-1; i++) {
		dW[i].setConstant(layer_size[i+1], layer_size[i], 0.1);
		dB[i].setConstant(layer_size[i+1], 0.1);
		signDeltaW[i].setZero(layer_size[i+1], layer_size[i]);
		signDeltaB[i].setZero(layer_size[i+1]);
		}

	long n_sample = x.cols();
	if (batchsize > n_sample) batchsize = n_sample;
	int n_batch = n_sample / batchsize; // truncated if not divided
	int remainder = n_sample - n_batch*batchsize;

	int n_batch2 = n_batch; // n_batch2 is the actual batch number
	if (remainder > 0) n_batch2++;

	int s = 0;  // update iteration, total iteration = numepoch x numbatch
	VectorXd loss(numepochs*n_batch2);  // mean sum of square error/loss
	MatrixXd error;  //raw error: per sample per output dimension
	error.setConstant(numepochs, n_batch2, -1);
	PermutationMatrix<Dynamic, Dynamic> perm(n_sample);

	MatrixXd x_perm(x);
	MatrixXd y_perm(y);

	for (int i = 0; i < numepochs; i++) {
		if (verbose) cout <<  "Epoch " << i + 1 << endl;
		perm.setIdentity();
		random_shuffle(perm.indices().data(), perm.indices().data() + perm.indices().size());
		x_perm = x_perm * perm;  // col = sample, shuffle samples
		y_perm = y_perm * perm;
		int this_batchsize = batchsize;

		for(int j = 0; j < n_sample; j +=batchsize) {
			if (j >= n_sample - remainder) this_batchsize = remainder;
			error = ff(x_perm.middleCols(j, this_batchsize), y_perm.middleCols(j,  this_batchsize));
			rprop(error, dW, dB, signDeltaW, signDeltaB, incScale, decScale, incScaleMax, decScaleMin);
			if (output == "softmax") {
				loss[s] = -(y_perm.middleCols(j, this_batchsize).array() * post[layer_size.size()-1].array().log()).colwise().sum().mean();
			} else {
				loss[s] = error.array().square().mean();
				}
			s++;
			}
		}
	return loss;
	}
예제 #10
0
struct oscap_reference *oscap_reference_new_parse(xmlTextReaderPtr reader)
{
    assert(reader != NULL);

    struct oscap_reference *ref = oscap_calloc(1, sizeof(struct oscap_reference));

    int depth = oscap_element_depth(reader);

    xmlNode* ref_node = xmlTextReaderExpand(reader);

    ref->href = (char*) xmlGetProp(ref_node, BAD_CAST "href");

    for (xmlNode* cur = ref_node->children; cur != NULL; cur = cur->next)
		if (cur->type == XML_ELEMENT_NODE) { ref->is_dublincore = true; break; }

    if (ref->is_dublincore) {
        for (xmlNode* cur = ref_node->children; cur != NULL; cur = cur->next) {
            if (cur->type != XML_ELEMENT_NODE
				|| cur->ns == NULL
				|| !oscap_streq((const char* ) cur->ns->href, (const char *) NS_DUBLINCORE))
					continue;

            DC_DOM_SCAN(title);
            DC_DOM_SCAN(creator);
            DC_DOM_SCAN(subject);
            DC_DOM_SCAN(description);
            DC_DOM_SCAN(publisher);
            DC_DOM_SCAN(contributor);
            DC_DOM_SCAN(date);
            DC_DOM_SCAN(type);
            DC_DOM_SCAN(format);
            DC_DOM_SCAN(identifier);
            DC_DOM_SCAN(source);
            DC_DOM_SCAN(language);
            DC_DOM_SCAN(relation);
            DC_DOM_SCAN(coverage);
            DC_DOM_SCAN(rights);
        }
    }
    else {
        ref->title = (char*) xmlNodeGetContent(ref_node);
    }

    if (!oscap_to_start_element(reader, depth))
	    dW("oscap_to_start_element returned `false'");

    return ref;
}
예제 #11
0
파일: layer.cpp 프로젝트: hgaolbb/MiniNet
/*!
* \brief backward
*             in:       [N, C, Hx, Wx]
*             weight:   [F, C, Hw, Ww]
*             bias:     [F, 1, 1, 1]
*             out:      [N, F, (Hx+pad*2-Hw)/stride+1, (Wx+pad*2-Ww)/stride+1]
* \param[in]  const Blob* dout              dout
* \param[in]  const vector<Blob*>& cache    cache[0]:X, cache[1]:weights, cache[2]:bias
* \param[out] vector<Blob*>& grads          grads[0]:dX, grads[1]:dW, grads[2]:db
*/
void ConvLayer::backward(shared_ptr<Blob>& dout,
                         const vector<shared_ptr<Blob>>& cache,
                         vector<shared_ptr<Blob>>& grads,
                         Param& param) {
    int N = cache[0]->get_N();
    int F = cache[1]->get_N();
    int C = cache[0]->get_C();
    int Hx = cache[0]->get_H();
    int Wx = cache[0]->get_W();
    int Hw = cache[1]->get_H();
    int Ww = cache[1]->get_W();
    int Hy = dout->get_H();
    int Wy = dout->get_W();
    assert(C == cache[1]->get_C());
    assert(F == cache[2]->get_N());

    shared_ptr<Blob> dX(new Blob(cache[0]->size(), TZEROS));
    shared_ptr<Blob> dW(new Blob(cache[1]->size(), TZEROS));
    shared_ptr<Blob> db(new Blob(cache[2]->size(), TZEROS));

    Blob pad_dX(N, C, Hx + param.conv_pad*2, Wx + param.conv_pad*2, TZEROS);
    Blob pad_X = (*cache[0]).pad(1);

    for (int n = 0; n < N; ++n) {
        for (int f = 0; f < F; ++f) {
            for (int hh = 0; hh < Hy; ++hh) {
                for (int ww = 0; ww < Wy; ++ww) {
                    cube window = pad_X[n](span(hh * param.conv_stride,  hh * param.conv_stride + Hw - 1),
                                            span(ww * param.conv_stride, ww * param.conv_stride + Ww - 1),
                                            span::all);
                    (*db)[f](0, 0, 0) += (*dout)[n](hh, ww, f);
                    (*dW)[f] += window * (*dout)[n](hh, ww, f);
                    pad_dX[n](span(hh * param.conv_stride, hh * param.conv_stride + Hw - 1),
                        span(ww * param.conv_stride, ww * param.conv_stride + Ww - 1),
                        span::all) += (*cache[1])[f] * (*dout)[n](hh, ww, f);
                }
            }
        }
    }
    *dX = pad_dX.dePad(param.conv_pad);
    grads[0] = dX;
    grads[1] = dW;
    grads[2] = db;

    return;
}
예제 #12
0
void oval_probe_meta_list(FILE *output, int flags)
{
	register size_t i;
	const char *probe_dir;
	oval_probe_meta_t *meta = OSCAP_GSYM(__probe_meta);
	size_t probe_dirlen;
	char probe_path[PATH_MAX+1];

	if (output == NULL)
		output = stdout;

	probe_dir = oval_probe_ext_getdir();
	assume_d(probe_dir != NULL, /* void */);
	probe_dirlen = strlen(probe_dir);
	assume_r(probe_dirlen + 1 <= PATH_MAX, /* void */);

	for (i = 0; i < OSCAP_GSYM(__probe_meta_count); ++i) {
		if (meta[i].flags & OVAL_PROBEMETA_EXTERNAL) {
			strncpy(probe_path, probe_dir, PATH_MAX);
			probe_path[probe_dirlen] = '/';
			probe_path[probe_dirlen+1] = '\0';
			strncat(probe_path, meta[i].pname, PATH_MAX - strlen(probe_dir) - 1);

			if (flags & OVAL_PROBEMETA_LIST_DYNAMIC) {
				dI("Checking access to \"%s\"\n", probe_path);
				if (access(probe_path, X_OK) != 0) {
					dW("access: errno=%d, %s\n", errno, strerror(errno));
					continue;
				}
			}
		}

		fprintf(output, "%-28s %-28s", meta[i].stype, meta[i].pname);

		if (flags & OVAL_PROBEMETA_LIST_VERBOSE) {
			if (meta[i].flags & OVAL_PROBEMETA_EXTERNAL) {
				fprintf(output, " %-5u %s\n", meta[i].otype, probe_path);
			} else {
				fprintf(output, " %-5u\n", meta[i].otype);
			}
		} else
			fprintf(output, "\n");
	}

	return;
}
예제 #13
0
파일: shrot.cpp 프로젝트: molsimmsu/3mview
float SHRot::Ry(const int k, const int l, const int m, const int n) {
    if(((l==0 && m==0 && n==0) || (l==1 && m==-1 && n==-1)) && k == 0)
        return 1.f;
    else if((l==0 && m==0 && n==0) || (l==1 && m==-1 && n==-1))
        return 0.f;
    else if(l==1 && ((m==-1 && n==0) || (m==-1 && n==1) || (m==0 && n==-1) || (m==1 && n==-1)))
        return 0.f;
    else if(l==1 && ((m==0 && n==0) || (m==1 && n==1)))
        return 1.f - (float)k; // ONLY FOR k <= 2!!!!!
    else if(l==1 && ((m==0 && n==1)))
        return -(float)(k%2); // ONLY FOR k <= 2!!!!!
    else if(l==1 && ((m==1 && n==0)))
        return (float)(k%2); // ONLY FOR k <= 2!!!!!
    else
        return u(l,m,n) * dU(k,l,m,n) +
               v(l,m,n) * dV(k,l,m,n) +
               w(l,m,n) * dW(k,l,m,n);
}
예제 #14
0
static int _oval_result_test_parse(xmlTextReaderPtr reader, struct oval_parser_context *context, void **args) {
	int return_code = 0;
	xmlChar *localName = xmlTextReaderLocalName(reader);

	if (strcmp((const char *)localName, "message") == 0) {
		return_code = oval_message_parse_tag(reader, context, (oscap_consumer_func) _oval_test_message_consumer, TEST);
	} else if (strcmp((const char *)localName, "tested_item") == 0) {
		return_code = oval_result_item_parse_tag(reader, context, SYSTEM, (oscap_consumer_func) _oval_test_item_consumer, args);
	} else if (strcmp((const char *)localName, "tested_variable") == 0) {
		return_code = _oval_result_test_binding_parse(reader, context, args);
	} else {
		dW( "Unhandled tag: <%s>.\n", localName);
		oval_parser_skip_tag(reader, context);
	}

	oscap_free(localName);

	return return_code;
}
예제 #15
0
static char *get_selinux_label(int pid) {
#ifdef HAVE_SELINUX_SELINUX_H
	char *selinux_label;
	security_context_t pid_context;
	context_t context;

	if (getpidcon(pid, &pid_context) == -1) {
		/* error getting pid selinux context */
		dW("Can't get selinux context for process %d\n", pid);
		return NULL;
	}
	context = context_new(pid_context);
	selinux_label = strdup(context_type_get(context));
	context_free(context);
	freecon(pid_context);
	return selinux_label;

#else
	return NULL;
#endif /* HAVE_SELINUX_SELINUX_H */
}
예제 #16
0
static inline int ipv6addr_parse(const char *oval_ipv6_string, uint32_t *len_out, struct in6_addr *ip_out)
{
	char *s, *pfx;
	int result = -1;

	s = strdup(oval_ipv6_string);
	pfx = strchr(s, '/');
	if (pfx) {
		*pfx++ = '\0';
		*len_out = strtol(pfx, NULL, 10);
	} else {
		*len_out = 128;
	}

	if (inet_pton(AF_INET6, s, ip_out) <= 0)
		dW("inet_pton() failed.\n");
	else
		result = 0;

	oscap_free(s);
	return result;
}
예제 #17
0
static int get_uids(int pid, struct result_info *r)
{
	char buf[100];
	FILE *sf;

	r->ruid = -1;
	r->user_id = -1;
	r->loginuid = -1;

	snprintf(buf, sizeof(buf), "/proc/%d/status", pid);
	sf = fopen(buf, "rt");
	if (sf) {
		int line = 0;
		__fsetlocking(sf, FSETLOCKING_BYCALLER);
		while (fgets(buf, sizeof(buf), sf)) {
			if (line == 0) {
				line++;
				continue;
			}
			if (memcmp(buf, "Uid:", 4) == 0) {
				sscanf(buf, "Uid: %d %d", &r->ruid, &r->user_id);
				break;
			}
		}
		fclose(sf);
	}

	snprintf(buf, sizeof(buf), "/proc/%d/loginuid", pid);
	sf = fopen(buf, "rt");
	if (sf) {
		if (fscanf(sf, "%u", &r->loginuid) < 1) {
			dW("fscanf failed from %s\n", buf);
		}
		fclose(sf);
	}

	return 0;
}
예제 #18
0
파일: rule.c 프로젝트: AxelNennker/openscap
struct xccdf_item *xccdf_group_parse(xmlTextReaderPtr reader, struct xccdf_item *parent)
{
	XCCDF_ASSERT_ELEMENT(reader, XCCDFE_GROUP);

	struct xccdf_item *group = xccdf_group_new_internal(parent);

	if (!xccdf_item_process_attributes(group, reader)) {
		xccdf_group_free(group);
		return NULL;
	}

	int depth = oscap_element_depth(reader) + 1;

	while (oscap_to_start_element(reader, depth)) {
		switch (xccdf_element_get(reader)) {
		case XCCDFE_REQUIRES:
		case XCCDFE_CONFLICTS:
			xccdf_item_parse_deps(reader, group);
			break;
		case XCCDFE_GROUP:
		case XCCDFE_RULE:
			xccdf_content_parse(reader, group);
			break;
		case XCCDFE_VALUE:
			oscap_list_add(group->sub.group.values, xccdf_value_parse(reader, group));
			break;
		default:
			if (!xccdf_item_process_element(group, reader))
				dW("Encountered an unknown element '%s' while parsing XCCDF group.",
				   xmlTextReaderConstLocalName(reader));
		}
		xmlTextReaderRead(reader);
	}

	return group;
}
예제 #19
0
/**
    Purpose
    -------
    CLATRD2 reduces NB rows and columns of a complex Hermitian matrix A to
    Hermitian tridiagonal form by an orthogonal similarity
    transformation Q' * A * Q, and returns the matrices V and W which are
    needed to apply the transformation to the unreduced part of A.

    If UPLO = MagmaUpper, CLATRD reduces the last NB rows and columns of a
    matrix, of which the upper triangle is supplied;
    if UPLO = MagmaLower, CLATRD reduces the first NB rows and columns of a
    matrix, of which the lower triangle is supplied.

    This is an auxiliary routine called by CHETRD2_GPU. It uses an
    accelerated HEMV that needs extra memory.

    Arguments
    ---------
    @param[in]
    uplo    magma_uplo_t
            Specifies whether the upper or lower triangular part of the
            Hermitian matrix A is stored:
      -     = MagmaUpper: Upper triangular
      -     = MagmaLower: Lower triangular

    @param[in]
    n       INTEGER
            The order of the matrix A.

    @param[in]
    nb      INTEGER
            The number of rows and columns to be reduced.

    @param[in,out]
    A       COMPLEX array, dimension (LDA,N)
            On entry, the Hermitian matrix A.  If UPLO = MagmaUpper, the leading
            n-by-n upper triangular part of A contains the upper
            triangular part of the matrix A, and the strictly lower
            triangular part of A is not referenced.  If UPLO = MagmaLower, the
            leading n-by-n lower triangular part of A contains the lower
            triangular part of the matrix A, and the strictly upper
            triangular part of A is not referenced.
            On exit:
      -     if UPLO = MagmaUpper, the last NB columns have been reduced to
              tridiagonal form, with the diagonal elements overwriting
              the diagonal elements of A; the elements above the diagonal
              with the array TAU, represent the orthogonal matrix Q as a
              product of elementary reflectors;
      -     if UPLO = MagmaLower, the first NB columns have been reduced to
              tridiagonal form, with the diagonal elements overwriting
              the diagonal elements of A; the elements below the diagonal
              with the array TAU, represent the  orthogonal matrix Q as a
              product of elementary reflectors.
            See Further Details.

    @param[in]
    lda     INTEGER
            The leading dimension of the array A.  LDA >= (1,N).

    @param[out]
    e       COMPLEX array, dimension (N-1)
            If UPLO = MagmaUpper, E(n-nb:n-1) contains the superdiagonal
            elements of the last NB columns of the reduced matrix;
            if UPLO = MagmaLower, E(1:nb) contains the subdiagonal elements of
            the first NB columns of the reduced matrix.

    @param[out]
    tau     COMPLEX array, dimension (N-1)
            The scalar factors of the elementary reflectors, stored in
            TAU(n-nb:n-1) if UPLO = MagmaUpper, and in TAU(1:nb) if UPLO = MagmaLower.
            See Further Details.

    @param[out]
    W       COMPLEX array, dimension (LDW,NB)
            The n-by-nb matrix W required to update the unreduced part
            of A.

    @param[in]
    ldw     INTEGER
            The leading dimension of the array W. LDW >= max(1,N).

    Further Details
    ---------------
    If UPLO = MagmaUpper, the matrix Q is represented as a product of elementary
    reflectors

        Q = H(n) H(n-1) . . . H(n-nb+1).

    Each H(i) has the form

        H(i) = I - tau * v * v'

    where tau is a complex scalar, and v is a complex vector with
    v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),
    and tau in TAU(i-1).

    If UPLO = MagmaLower, the matrix Q is represented as a product of elementary
    reflectors

        Q = H(1) H(2) . . . H(nb).

    Each H(i) has the form

        H(i) = I - tau * v * v'

    where tau is a complex scalar, and v is a complex vector with
    v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
    and tau in TAU(i).

    The elements of the vectors v together form the n-by-nb matrix V
    which is needed, with W, to apply the transformation to the unreduced
    part of the matrix, using a Hermitian rank-2k update of the form:
    A := A - V*W' - W*V'.

    The contents of A on exit are illustrated by the following examples
    with n = 5 and nb = 2:

    if UPLO = MagmaUpper:                       if UPLO = MagmaLower:

        (  a   a   a   v4  v5 )              (  d                  )
        (      a   a   v4  v5 )              (  1   d              )
        (          a   1   v5 )              (  v1  1   a          )
        (              d   1  )              (  v1  v2  a   a      )
        (                  d  )              (  v1  v2  a   a   a  )

    where d denotes a diagonal element of the reduced matrix, a denotes
    an element of the original matrix that is unchanged, and vi denotes
    an element of the vector defining H(i).

    @ingroup magma_cheev_aux
    ********************************************************************/
extern "C" magma_int_t
magma_clatrd2(magma_uplo_t uplo, magma_int_t n, magma_int_t nb,
              magmaFloatComplex *A,  magma_int_t lda,
              float *e, magmaFloatComplex *tau,
              magmaFloatComplex *W,  magma_int_t ldw,
              magmaFloatComplex *dA, magma_int_t ldda,
              magmaFloatComplex *dW, magma_int_t lddw,
              magmaFloatComplex *dwork, magma_int_t ldwork)
{
#define A(i, j) (A + (j)*lda + (i))
#define W(i, j) (W + (j)*ldw + (i))

#define dA(i, j) (dA + (j)*ldda + (i))
#define dW(i, j) (dW + (j)*lddw + (i))

    magma_int_t i;

    magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
    magmaFloatComplex c_one     = MAGMA_C_ONE;
    magmaFloatComplex c_zero    = MAGMA_C_ZERO;

    magmaFloatComplex value = MAGMA_C_ZERO;

    magma_int_t ione = 1;

    magma_int_t i_n, i_1, iw;

    magmaFloatComplex alpha;
    magmaFloatComplex *f;

    if (n <= 0) {
        return 0;
    }

    magma_queue_t stream;
    magma_queue_create( &stream );
    magma_cmalloc_cpu( &f, n );
    assert( f != NULL );  // TODO return error, or allocate outside clatrd

    if (uplo == MagmaUpper) {
        /* Reduce last NB columns of upper triangle */
        for (i = n-1; i >= n - nb; --i) {
            i_1 = i + 1;
            i_n = n - i - 1;

            iw = i - n + nb;
            if (i < n-1) {
                /* Update A(1:i,i) */
#if defined(PRECISION_z) || defined(PRECISION_c)
                lapackf77_clacgv(&i_n, W(i, iw+1), &ldw);
#endif
                blasf77_cgemv("No transpose", &i_1, &i_n, &c_neg_one, A(0, i+1), &lda,
                              W(i, iw+1), &ldw, &c_one, A(0, i), &ione);
#if defined(PRECISION_z) || defined(PRECISION_c)
                lapackf77_clacgv(&i_n, W(i, iw+1), &ldw);
                lapackf77_clacgv(&i_n, A(i, i+1), &ldw);
#endif
                blasf77_cgemv("No transpose", &i_1, &i_n, &c_neg_one, W(0, iw+1), &ldw,
                              A(i, i+1), &lda, &c_one, A(0, i), &ione);
#if defined(PRECISION_z) || defined(PRECISION_c)
                lapackf77_clacgv(&i_n, A(i, i+1), &ldw);
#endif
            }
            if (i > 0) {
                /* Generate elementary reflector H(i) to annihilate A(1:i-2,i) */

                alpha = *A(i-1, i);

                lapackf77_clarfg(&i, &alpha, A(0, i), &ione, &tau[i - 1]);

                e[i-1] = MAGMA_C_REAL( alpha );
                *A(i-1,i) = MAGMA_C_MAKE( 1, 0 );

                /* Compute W(1:i-1,i) */
                // 1. Send the block reflector  A(0:n-i-1,i) to the GPU
                magma_csetvector( i, A(0, i), 1, dA(0, i), 1 );

                //#if (GPUSHMEM < 200)
                //magma_chemv(MagmaUpper, i, c_one, dA(0, 0), ldda,
                //            dA(0, i), ione, c_zero, dW(0, iw), ione);
                //#else
                magmablas_chemv_work(MagmaUpper, i, c_one, dA(0, 0), ldda,
                                     dA(0, i), ione, c_zero, dW(0, iw), ione,
                                     dwork, ldwork);
                //#endif

                // 2. Start putting the result back (asynchronously)
                magma_cgetmatrix_async( i, 1,
                                        dW(0, iw),         lddw,
                                        W(0, iw) /*test*/, ldw, stream );

                if (i < n-1) {
                    blasf77_cgemv(MagmaConjTransStr, &i, &i_n, &c_one, W(0, iw+1), &ldw,
                                  A(0, i), &ione, &c_zero, W(i+1, iw), &ione);
                }

                // 3. Here is where we need it // TODO find the right place
                magma_queue_sync( stream );

                if (i < n-1) {
                    blasf77_cgemv("No transpose", &i, &i_n, &c_neg_one, A(0, i+1), &lda,
                                  W(i+1, iw), &ione, &c_one, W(0, iw), &ione);

                    blasf77_cgemv(MagmaConjTransStr, &i, &i_n, &c_one, A(0, i+1), &lda,
                                  A(0, i), &ione, &c_zero, W(i+1, iw), &ione);

                    blasf77_cgemv("No transpose", &i, &i_n, &c_neg_one, W(0, iw+1), &ldw,
                                  W(i+1, iw), &ione, &c_one, W(0, iw), &ione);
                }

                blasf77_cscal(&i, &tau[i - 1], W(0, iw), &ione);

#if defined(PRECISION_z) || defined(PRECISION_c)
                cblas_cdotc_sub( i, W(0,iw), ione, A(0,i), ione, &value );
#else
                value = cblas_cdotc( i, W(0,iw), ione, A(0,i), ione );
#endif
                alpha = tau[i - 1] * -0.5f * value;
                blasf77_caxpy(&i, &alpha, A(0, i), &ione,
                              W(0, iw), &ione);
            }
        }
    }
    else {
        /*  Reduce first NB columns of lower triangle */
        for (i = 0; i < nb; ++i) {

            /* Update A(i:n,i) */
            i_n = n - i;
#if defined(PRECISION_z) || defined(PRECISION_c)
            lapackf77_clacgv(&i, W(i, 0), &ldw);
#endif
            blasf77_cgemv("No transpose", &i_n, &i, &c_neg_one, A(i, 0), &lda,
                          W(i, 0), &ldw, &c_one, A(i, i), &ione);
#if defined(PRECISION_z) || defined(PRECISION_c)
            lapackf77_clacgv(&i, W(i, 0), &ldw);
            lapackf77_clacgv(&i, A(i, 0), &lda);
#endif
            blasf77_cgemv("No transpose", &i_n, &i, &c_neg_one, W(i, 0), &ldw,
                          A(i, 0), &lda, &c_one, A(i, i), &ione);
#if defined(PRECISION_z) || defined(PRECISION_c)
            lapackf77_clacgv(&i, A(i, 0), &lda);
#endif

            if (i < n-1) {
                /* Generate elementary reflector H(i) to annihilate A(i+2:n,i) */
                i_n = n - i - 1;
                alpha = *A(i+1, i);
                lapackf77_clarfg(&i_n, &alpha, A(min(i+2,n-1), i), &ione, &tau[i]);
                e[i] = MAGMA_C_REAL( alpha );
                *A(i+1,i) = MAGMA_C_MAKE( 1, 0 );

                /* Compute W(i+1:n,i) */
                // 1. Send the block reflector  A(i+1:n,i) to the GPU
                magma_csetvector( i_n, A(i+1, i), 1, dA(i+1, i), 1 );

                //#if (GPUSHMEM < 200)
                //magma_chemv(MagmaLower, i_n, c_one, dA(i+1, i+1), ldda, dA(i+1, i), ione, c_zero,
                //            dW(i+1, i), ione);
                //#else
                magmablas_chemv_work(MagmaLower, i_n, c_one, dA(i+1, i+1), ldda, dA(i+1, i), ione, c_zero,
                                     dW(i+1, i), ione,
                                     dwork, ldwork);
                //#endif

                // 2. Start putting the result back (asynchronously)
                magma_cgetmatrix_async( i_n, 1,
                                        dW(i+1, i), lddw,
                                        W(i+1, i),  ldw, stream );

                blasf77_cgemv(MagmaConjTransStr, &i_n, &i, &c_one, W(i+1, 0), &ldw,
                              A(i+1, i), &ione, &c_zero, W(0, i), &ione);

                blasf77_cgemv("No transpose", &i_n, &i, &c_neg_one, A(i+1, 0), &lda,
                              W(0, i), &ione, &c_zero, f, &ione);

                blasf77_cgemv(MagmaConjTransStr, &i_n, &i, &c_one, A(i+1, 0), &lda,
                              A(i+1, i), &ione, &c_zero, W(0, i), &ione);

                // 3. Here is where we need it
                magma_queue_sync( stream );

                if (i != 0)
                    blasf77_caxpy(&i_n, &c_one, f, &ione, W(i+1, i), &ione);

                blasf77_cgemv("No transpose", &i_n, &i, &c_neg_one, W(i+1, 0), &ldw,
                              W(0, i), &ione, &c_one, W(i+1, i), &ione);
                blasf77_cscal(&i_n, &tau[i], W(i+1,i), &ione);
#if defined(PRECISION_z) || defined(PRECISION_c)
                cblas_cdotc_sub( i_n, W(i+1,i), ione, A(i+1,i), ione, &value );
#else
                value = cblas_cdotc( i_n, W(i+1,i), ione, A(i+1,i), ione );
#endif
                alpha = tau[i] * -0.5f * value;
                blasf77_caxpy(&i_n, &alpha, A(i+1, i), &ione, W(i+1,i), &ione);
            }
        }
    }

    magma_free_cpu(f);
    magma_queue_destroy( stream );

    return 0;
} /* magma_clatrd */
예제 #20
0
void *probe_signal_handler(void *arg)
{
        probe_t  *probe = (probe_t *)arg;
	siginfo_t siinf;
	sigset_t  siset;

#if defined(HAVE_PTHREAD_SETNAME_NP)
# if defined(__APPLE__)
	pthread_setname_np("signal_handler");
# else
	pthread_setname_np(pthread_self(), "signal_handler");
# endif
#endif

	sigemptyset(&siset);
	sigaddset(&siset, SIGHUP);
	sigaddset(&siset, SIGUSR1);
	sigaddset(&siset, SIGUSR2);
	sigaddset(&siset, SIGINT);
	sigaddset(&siset, SIGTERM);
	sigaddset(&siset, SIGQUIT);
        sigaddset(&siset, SIGPIPE);

#if defined(__linux__)
        if (prctl(PR_SET_PDEATHSIG, SIGTERM) != 0)
                dW("prctl(PR_SET_PDEATHSIG, SIGTERM) failed");
#endif
       
	dD("Signal handler ready");
	switch (errno = pthread_barrier_wait(&OSCAP_GSYM(th_barrier)))
	{
	case 0:
	case PTHREAD_BARRIER_SERIAL_THREAD:
		break;
	default:
		dE("pthread_barrier_wait: %d, %s.", errno, strerror(errno));
		return (NULL);
	}

	while (sigwaitinfo(&siset, &siinf) != -1) {

		dD("Received signal %d from %u (%s)",
		   siinf.si_signo, (unsigned int)siinf.si_pid,
		   getppid() == siinf.si_pid ? "parent" : "not my parent");

#if defined(PROBE_SIGNAL_PARENTONLY)
		/* Listen only to signals sent from the parent process */
		if (getppid() != siinf.si_pid)
			continue;
#endif

		switch(siinf.si_signo) {
		case SIGUSR1:/* probe abort */
                        probe->probe_exitcode = ECONNABORTED;
			/* FALLTHROUGH */
                case SIGINT:
                case SIGTERM:
                case SIGQUIT:
                case SIGPIPE:
		{
			__thr_collection coll;

			coll.thr = NULL;
			coll.cnt = 0;

                        pthread_cancel(probe->th_input);

			/* collect IDs and cancel threads */
			rbt_walk_inorder2(probe->workers, __abort_cb, &coll, 0);

			/*
			 * Wait till all threads are canceled (they may temporarily disable
			 * cancelability), but at most 60 seconds per thread.
			 */
			for (; coll.cnt > 0; --coll.cnt) {
				probe_worker_t *thr = coll.thr[coll.cnt - 1];
#if defined(HAVE_PTHREAD_TIMEDJOIN_NP) && defined(HAVE_CLOCK_GETTIME)
				struct timespec j_tm;

				if (clock_gettime(CLOCK_REALTIME, &j_tm) == -1) {
					dE("clock_gettime(CLOCK_REALTIME): %d, %s.", errno, strerror(errno));
					continue;
				}

				j_tm.tv_sec += 60;

				if ((errno = pthread_timedjoin_np(thr->tid, NULL, &j_tm)) != 0) {
					dE("[%llu] pthread_timedjoin_np: %d, %s.", (uint64_t)thr->sid, errno, strerror(errno));
					/*
					 * Memory will be leaked here by continuing to the next thread. However, we are in the
					 * process of shutting down the whole probe. We're just nice and gave the probe_main()
					 * thread a chance to finish it's critical section which shouldn't take that long...
					 */
					continue;
				}
#else
				if ((errno = pthread_join(thr->tid, NULL)) != 0) {
					dE("pthread_join: %d, %s.", errno, strerror(errno));
					continue;
				}
#endif
				SEAP_msg_free(coll.thr[coll.cnt - 1]->msg);
                                oscap_free(coll.thr[coll.cnt - 1]);
			}

			oscap_free(coll.thr);
			goto exitloop;
		}
                case SIGUSR2:
                case SIGHUP:
                        /* ignore */
                        break;
                }
	}
exitloop:
	return (NULL);
}
int main(int argc, char** argv) {

	NcError error(NcError::silent_nonfatal);

try {
	// Input filename
	std::string strInputFile;

	// Output mesh filename
	std::string strOutputMesh;

	// Output connectivity filename
	std::string strOutputConnectivity;

	// Number of elements in mesh
	int nP;

	// Use uniformly spaced sub-volumes
	bool fUniformSpacing = false;

	// Do not merge faces
	bool fNoMergeFaces = false;

	// Nodes appear at GLL nodes
	bool fCGLL = true;

	// Parse the command line
	BeginCommandLine()
		CommandLineString(strInputFile, "in", "");
		CommandLineString(strOutputMesh, "out", "");
		CommandLineString(strOutputConnectivity, "out_connect", "");
		CommandLineInt(nP, "np", 2);
		CommandLineBool(fUniformSpacing, "uniform");
		//CommandLineBool(fNoMergeFaces, "no-merge-face");
		//CommandLineBool(fCGLL, "cgll");

		ParseCommandLine(argc, argv);
	EndCommandLine(argv)

	AnnounceBanner();

	// Check file names
	if (strInputFile == "") {
		_EXCEPTIONT("No input file specified");
	}
	if (nP < 2) {
		_EXCEPTIONT("--np must be >= 2");
	}

	if ((fNoMergeFaces) && (strOutputConnectivity != "")) {
		_EXCEPTIONT("--out_connect and --no-merge-face not implemented");
	}

	// Load input mesh
	std::cout << std::endl;
	std::cout << "..Loading input mesh" << std::endl;

	Mesh meshIn(strInputFile);
	meshIn.RemoveZeroEdges();

	// Number of elements
	int nElements = meshIn.faces.size();

	// Gauss-Lobatto quadrature nodes and weights
	std::cout << "..Computing sub-volume boundaries" << std::endl;

	DataArray1D<double> dG(nP);
	DataArray1D<double> dW(nP);

	// Uniformly spaced nodes
	if (fUniformSpacing) {
		for (int i = 0; i < nP; i++) {
			dG[i] = (2.0 * static_cast<double>(i) + 1.0) / (2.0 * nP);
			dW[i] = 1.0 / nP;
		}
		dG[0] = 0.0;
		dG[1] = 1.0;

	// Get Gauss-Lobatto Weights
	} else {
		GaussLobattoQuadrature::GetPoints(nP, 0.0, 1.0, dG, dW);
	}

	// Accumulated weight vector
	DataArray1D<double> dAccumW(nP+1);
	dAccumW[0] = 0.0;
	for (int i = 1; i < nP+1; i++) {
		dAccumW[i] = dAccumW[i-1] + dW[i-1];
	}
	if (fabs(dAccumW[dAccumW.GetRows()-1] - 1.0) > 1.0e-14) {
		_EXCEPTIONT("Logic error in accumulated weight");
	}

	// Data structures used for generating connectivity
	DataArray3D<int> dataGLLnodes(nP, nP, nElements);
	std::vector<Node> vecNodes;
	std::map<Node, int> mapFaces;

	// Data structure for 

	// Data structure used for avoiding coincident nodes on the fly
	std::map<Node, int> mapNewNodes;

	// Generate new mesh
	std::cout << "..Generating sub-volumes" << std::endl;
	Mesh meshOut;

	for (size_t f = 0; f < nElements; f++) {

		const Face & face = meshIn.faces[f];

		if (face.edges.size() != 4) {
			_EXCEPTIONT("Input mesh must only contain quadrilaterals");
		}

		const Node & node0 = meshIn.nodes[face[0]];
		const Node & node1 = meshIn.nodes[face[1]];
		const Node & node2 = meshIn.nodes[face[2]];
		const Node & node3 = meshIn.nodes[face[3]];

		for (int q = 0; q < nP; q++) {
		for (int p = 0; p < nP; p++) {

			bool fNewFace = true;

			// Build unique node array if CGLL
			if (fCGLL) {

				// Get local nodal location
				Node nodeGLL;
				Node dDx1G;
				Node dDx2G;

				ApplyLocalMap(
					face,
					meshIn.nodes,
					dG[p],
					dG[q],
					nodeGLL,
					dDx1G,
					dDx2G);

				// Determine if this is a unique Node
				std::map<Node, int>::const_iterator iter =
					mapFaces.find(nodeGLL);

				if (iter == mapFaces.end()) {

					// Insert new unique node into map
					int ixNode = static_cast<int>(mapFaces.size());
					mapFaces.insert(std::pair<Node, int>(nodeGLL, ixNode));
					dataGLLnodes[q][p][f] = ixNode + 1;
					vecNodes.push_back(nodeGLL);

				} else {
					dataGLLnodes[q][p][f] = iter->second + 1;

					fNewFace = false;
				}

			// Non-unique node array if DGLL
			} else {
				dataGLLnodes[q][p][f] = nP * nP * f + q * nP + p;
			}

			// Get volumetric region
			Face faceNew(4);

			for (int i = 0; i < 4; i++) {
				int px = p+((i+1)/2)%2; // p,p+1,p+1,p
				int qx = q+(i/2);       // q,q,q+1,q+1

				Node nodeOut =
					InterpolateQuadrilateralNode(
						node0, node1, node2, node3,
						dAccumW[px], dAccumW[qx]);
	
				std::map<Node, int>::const_iterator iterNode =
					mapNewNodes.find(nodeOut);
				if (iterNode == mapNewNodes.end()) {
					mapNewNodes.insert(
						std::pair<Node, int>(nodeOut, meshOut.nodes.size()));
					faceNew.SetNode(i, meshOut.nodes.size());
					meshOut.nodes.push_back(nodeOut);
				} else {
					faceNew.SetNode(i, iterNode->second);
				}
			}

			// Insert new Face or merge with existing Face
			meshOut.faces.push_back(faceNew);
/*
			if ((fNoMergeFaces) || (fNewFace)) {
			} else {
				std::cout << dataGLLnodes[q][p][f]-1 << " " << mapFaces.size()-1 << std::endl;
				meshOut.faces[dataGLLnodes[q][p][f]-1].Merge(faceNew);
			}
*/
		}
		}
	}

	meshOut.RemoveCoincidentNodes();

	// Build connectivity and write to file
	if (strOutputConnectivity != "") {

		std::cout << "..Constructing connectivity file" << std::endl;

		std::vector< std::set<int> > vecConnectivity;
		vecConnectivity.resize(mapFaces.size());

		for (size_t f = 0; f < nElements; f++) {

			for (int q = 0; q < nP; q++) {
			for (int p = 0; p < nP; p++) {

				std::set<int> & setLocalConnectivity =
					vecConnectivity[dataGLLnodes[q][p][f]-1];

				// Connect in all directions
				if (p != 0) {
					setLocalConnectivity.insert(
						dataGLLnodes[q][p-1][f]);
				}
				if (p != (nP-1)) {
					setLocalConnectivity.insert(
						dataGLLnodes[q][p+1][f]);
				}
				if (q != 0) {
					setLocalConnectivity.insert(
						dataGLLnodes[q-1][p][f]);
				}
				if (q != (nP-1)) {
					setLocalConnectivity.insert(
						dataGLLnodes[q+1][p][f]);
				}
			}
			}
		}

		// Open output file
		FILE * fp = fopen(strOutputConnectivity.c_str(), "w");
		fprintf(fp, "%lu\n", vecConnectivity.size());
		for (size_t f = 0; f < vecConnectivity.size(); f++) {
			const Node & node = vecNodes[f];

			double dLon = atan2(node.y, node.x);
			double dLat = asin(node.z);

			if (dLon < 0.0) {
				dLon += 2.0 * M_PI;
			}

			fprintf(fp, "%1.14f,", dLon / M_PI * 180.0);
			fprintf(fp, "%1.14f,", dLat / M_PI * 180.0);
			fprintf(fp, "%lu", vecConnectivity[f].size());

			std::set<int>::const_iterator iter = vecConnectivity[f].begin();
			for (; iter != vecConnectivity[f].end(); iter++) {
				fprintf(fp, ",%i", *iter);
			}
			if (f != vecConnectivity.size()-1) {
				fprintf(fp,"\n");
			}
		}
		fclose(fp);
	}

	// Remove coincident nodes
	//std::cout << "..Removing coincident nodes" << std::endl;
	//meshOut.RemoveCoincidentNodes();

	// Write the mesh
	if (strOutputMesh != "") {
		std::cout << "..Writing mesh" << std::endl;
		meshOut.Write(strOutputMesh);
	}

	// Announce
	std::cout << "..Mesh generator exited successfully" << std::endl;
	std::cout << "=========================================================";
	std::cout << std::endl;

	return (0);

} catch(Exception & e) {
	Announce(e.ToString().c_str());
	return (-1);

} catch(...) {
	return (-2);
}
}
예제 #22
0
oval_result_t ores_get_result_bychk(struct oresults *ores, oval_check_t check)
{
	oval_result_t result = OVAL_RESULT_ERROR;

	if (ores->true_cnt == 0 &&
	    ores->false_cnt == 0 &&
	    ores->error_cnt == 0 &&
	    ores->unknown_cnt == 0 &&
	    ores->notappl_cnt == 0 &&
	    ores->noteval_cnt == 0)
		return OVAL_RESULT_UNKNOWN;

	if (ores->notappl_cnt > 0 &&
	    ores->noteval_cnt == 0 &&
	    ores->false_cnt == 0 && ores->error_cnt == 0 && ores->unknown_cnt == 0 && ores->true_cnt == 0)
		return OVAL_RESULT_NOT_APPLICABLE;

	switch (check) {
	case OVAL_CHECK_ALL:
		if (ores->true_cnt > 0 &&
		    ores->false_cnt == 0 && ores->error_cnt == 0 && ores->unknown_cnt == 0 && ores->noteval_cnt == 0) {
			result = OVAL_RESULT_TRUE;
		} else if (ores->false_cnt > 0) {
			result = OVAL_RESULT_FALSE;
		} else if (ores->false_cnt == 0 && ores->error_cnt > 0) {
			result = OVAL_RESULT_ERROR;
		} else if (ores->false_cnt == 0 && ores->error_cnt == 0 && ores->unknown_cnt > 0) {
			result = OVAL_RESULT_UNKNOWN;
		} else if (ores->false_cnt == 0 && ores->error_cnt == 0 && ores->unknown_cnt == 0 && ores->noteval_cnt > 0) {
			result = OVAL_RESULT_NOT_EVALUATED;
		}
		break;
	case OVAL_CHECK_AT_LEAST_ONE:
		if (ores->true_cnt > 0) {
			result = OVAL_RESULT_TRUE;
		} else if (ores->false_cnt > 0 &&
			   ores->true_cnt == 0 &&
			   ores->unknown_cnt == 0 && ores->error_cnt == 0 && ores->noteval_cnt == 0) {
			result = OVAL_RESULT_FALSE;
		} else if (ores->true_cnt == 0 && ores->error_cnt > 0) {
			result = OVAL_RESULT_ERROR;
		} else if (ores->false_cnt == 0 && ores->error_cnt == 0 && ores->unknown_cnt > 0) {
			result = OVAL_RESULT_UNKNOWN;
		} else if (ores->false_cnt == 0 && ores->error_cnt == 0 && ores->unknown_cnt == 0 && ores->noteval_cnt > 0) {
			result = OVAL_RESULT_NOT_EVALUATED;
		}
		break;
	case OVAL_CHECK_NONE_EXIST:
		dW("The 'none exist' CheckEnumeration value has been deprecated. "
		   "Converted to check='none satisfy'.\n");
		/* FALLTHROUGH */
	case OVAL_CHECK_NONE_SATISFY:
		if (ores->true_cnt > 0) {
			result = OVAL_RESULT_FALSE;
		} else if (ores->true_cnt == 0 && ores->error_cnt > 0) {
			result = OVAL_RESULT_ERROR;
		} else if (ores->true_cnt == 0 && ores->error_cnt == 0 && ores->unknown_cnt > 0) {
			result = OVAL_RESULT_UNKNOWN;
		} else if (ores->true_cnt == 0 && ores->error_cnt == 0 && ores->unknown_cnt == 0 && ores->noteval_cnt > 0) {
			result = OVAL_RESULT_NOT_EVALUATED;
		} else if (ores->false_cnt > 0 &&
			   ores->error_cnt == 0 &&
			   ores->unknown_cnt == 0 && ores->noteval_cnt == 0 && ores->true_cnt == 0) {
			result = OVAL_RESULT_TRUE;
		}
		break;
	case OVAL_CHECK_ONLY_ONE:
		if (ores->true_cnt == 1 && ores->error_cnt == 0 && ores->unknown_cnt == 0 && ores->noteval_cnt == 0) {
			result = OVAL_RESULT_TRUE;
		} else if (ores->true_cnt > 1) {
			result = OVAL_RESULT_FALSE;
		} else if (ores->true_cnt < 2 && ores->error_cnt > 0) {
			result = OVAL_RESULT_ERROR;
		} else if (ores->true_cnt < 2 && ores->error_cnt == 0 && ores->unknown_cnt > 0) {
			result = OVAL_RESULT_UNKNOWN;
		} else if (ores->true_cnt < 2 && ores->error_cnt == 0 && ores->unknown_cnt == 0 && ores->noteval_cnt > 0) {
			result = OVAL_RESULT_NOT_EVALUATED;
		} else if (ores->true_cnt != 1 && ores->false_cnt > 0) {
			result = OVAL_RESULT_FALSE;
		}
		break;
	default:
                oscap_seterr(OSCAP_EFAMILY_OSCAP, "Invalid check value: %d.", check);
		result = OVAL_RESULT_ERROR;
	}

	return result;
}
예제 #23
0
static oval_result_t eval_item(struct oval_syschar_model *syschar_model, struct oval_sysitem *cur_sysitem, struct oval_state *state)
{
	struct oval_state_content_iterator *state_contents_itr;
	struct oresults ste_ores;
	oval_operator_t operator;
	oval_result_t result = OVAL_RESULT_ERROR;

	ores_clear(&ste_ores);

	state_contents_itr = oval_state_get_contents(state);
	while (oval_state_content_iterator_has_more(state_contents_itr)) {
		struct oval_state_content *content;
		struct oval_entity *state_entity;
		char *state_entity_name;
		oval_operation_t state_entity_operation;
		oval_check_t entity_check;
		oval_existence_t check_existence;
		oval_result_t ste_ent_res;
		struct oval_sysent_iterator *item_entities_itr;
		struct oresults ent_ores;
		struct oval_status_counter counter;
		bool found_matching_item;

		if ((content = oval_state_content_iterator_next(state_contents_itr)) == NULL) {
			oscap_seterr(OSCAP_EFAMILY_OVAL, "OVAL internal error: found NULL state content");
			goto fail;
		}
		if ((state_entity = oval_state_content_get_entity(content)) == NULL) {
			oscap_seterr(OSCAP_EFAMILY_OVAL, "OVAL internal error: found NULL entity");
			goto fail;
		}
		if ((state_entity_name = oval_entity_get_name(state_entity)) == NULL) {
			oscap_seterr(OSCAP_EFAMILY_OVAL, "OVAL internal error: found NULL entity name");
			goto fail;
		}

		if (oscap_streq(state_entity_name, "line") &&
			oval_state_get_subtype(state) == (oval_subtype_t) OVAL_INDEPENDENT_TEXT_FILE_CONTENT) {
			/* Hack: textfilecontent_state/line shall be compared against textfilecontent_item/text.
			 *
			 * textfilecontent_test and textfilecontent54_test share the same syschar
			 * (textfilecontent_item). In OVAL 5.3 and below this syschar did not hold any usable
			 * information ('text' ent). In OVAL 5.4 textfilecontent_test was deprecated. But the
			 * 'text' ent has been added to textfilecontent_item, making it potentially usable. */
			oval_schema_version_t over = oval_state_get_platform_schema_version(state);
			if (oval_schema_version_cmp(over, OVAL_SCHEMA_VERSION(5.4)) >= 0) {
				/* The OVAL-5.3 does not have textfilecontent_item/text */
				state_entity_name = "text";
			}
		}

		entity_check = oval_state_content_get_ent_check(content);
		check_existence = oval_state_content_get_check_existence(content);
		state_entity_operation = oval_entity_get_operation(state_entity);

		ores_clear(&ent_ores);
		found_matching_item = false;
		oval_status_counter_clear(&counter);

		item_entities_itr = oval_sysitem_get_sysents(cur_sysitem);
		while (oval_sysent_iterator_has_more(item_entities_itr)) {
			struct oval_sysent *item_entity;
			oval_result_t ent_val_res;
			char *item_entity_name;
			oval_syschar_status_t item_status;

			item_entity = oval_sysent_iterator_next(item_entities_itr);
			if (item_entity == NULL) {
				oscap_seterr(OSCAP_EFAMILY_OVAL, "OVAL internal error: found NULL sysent");
				oval_sysent_iterator_free(item_entities_itr);
				goto fail;
			}
			item_status = oval_sysent_get_status(item_entity);
			oval_status_counter_add_status(&counter, item_status);

			item_entity_name = oval_sysent_get_name(item_entity);
			if (strcmp(item_entity_name, state_entity_name))
				continue;

			found_matching_item = true;

			/* copy mask attribute from state to item */
			if (oval_entity_get_mask(state_entity))
				oval_sysent_set_mask(item_entity,1);

			ent_val_res = _evaluate_sysent(syschar_model, item_entity, state_entity,
					state_entity_operation, content);
			if (((signed) ent_val_res) == -1) {
				oval_sysent_iterator_free(item_entities_itr);
				goto fail;
			}

			ores_add_res(&ent_ores, ent_val_res);
		}
		oval_sysent_iterator_free(item_entities_itr);

		if (!found_matching_item)
			dW("Entity name '%s' from state (id: '%s') not found in item (id: '%s').\n",
			   state_entity_name, oval_state_get_id(state), oval_sysitem_get_id(cur_sysitem));

		ste_ent_res = ores_get_result_bychk(&ent_ores, entity_check);
		ores_add_res(&ste_ores, ste_ent_res);
		oval_result_t cres = oval_status_counter_get_result(&counter, check_existence);
		ores_add_res(&ste_ores, cres);
	}
	oval_state_content_iterator_free(state_contents_itr);

	operator = oval_state_get_operator(state);
	result = ores_get_result_byopr(&ste_ores, operator);

	return result;

 fail:
	oval_state_content_iterator_free(state_contents_itr);

	return OVAL_RESULT_ERROR;
}
예제 #24
0
struct xccdf_tailoring *xccdf_tailoring_parse(xmlTextReaderPtr reader, struct xccdf_item *benchmark)
{
	XCCDF_ASSERT_ELEMENT(reader, XCCDFE_TAILORING);

	struct xccdf_tailoring *tailoring = xccdf_tailoring_new();

	const char *id = xccdf_attribute_get(reader, XCCDFA_ID);
	xccdf_tailoring_set_id(tailoring, id);

	int depth = oscap_element_depth(reader) + 1;

	// Read to the inside of Tailoring.
	xmlTextReaderRead(reader);

	while (oscap_to_start_element(reader, depth)) {
		switch (xccdf_element_get(reader)) {
		case XCCDFE_BENCHMARK_REF: {
			oscap_free(tailoring->benchmark_ref);
			tailoring->benchmark_ref = 0;

			oscap_free(tailoring->benchmark_ref_version);
			tailoring->benchmark_ref_version = 0;

			const char *ref = xccdf_attribute_get(reader, XCCDFA_HREF);
			if (ref)
				tailoring->benchmark_ref = oscap_strdup(ref);

			const char *ref_version = xccdf_attribute_get(reader, XCCDFA_VERSION);
			if (ref_version)
				tailoring->benchmark_ref_version = oscap_strdup(ref_version);

			break;
		}
		case XCCDFE_STATUS: {
			const char *date = xccdf_attribute_get(reader, XCCDFA_DATE);
			char *str = oscap_element_string_copy(reader);
			struct xccdf_status *status = xccdf_status_new_fill(str, date);
			oscap_free(str);
			oscap_list_add(tailoring->statuses, status);
			break;
		}
		case XCCDFE_DC_STATUS:
			oscap_list_add(tailoring->dc_statuses, oscap_reference_new_parse(reader));
			break;
		case XCCDFE_VERSION: {
			xmlNode *ver = xmlTextReaderExpand(reader);
			/* optional attributes */
			tailoring->version_time = (char*) xmlGetProp(ver, BAD_CAST "time");
			tailoring->version_update = (char*) xmlGetProp(ver, BAD_CAST "update");
			/* content */
			tailoring->version = (char *) xmlNodeGetContent(ver);
			if (oscap_streq(tailoring->version, "")) {
				oscap_free(tailoring->version);
				tailoring->version = NULL;
			}
			break;
		}
		case XCCDFE_METADATA: {
			char* xml = oscap_get_xml(reader);
			oscap_list_add(tailoring->metadata, oscap_strdup(xml));
			oscap_free(xml);
			break;
		}
		case XCCDFE_PROFILE: {
			struct xccdf_item *item = xccdf_profile_parse(reader, benchmark);
			if (!xccdf_tailoring_add_profile(tailoring, XPROFILE(item))) {
				dW("Failed to add profile to tailoring while parsing!");
			}
			break;
		}
		default:
			dW("Encountered an unknown element '%s' while parsing XCCDF Tailoring element.",
				xmlTextReaderConstLocalName(reader));
		}
		xmlTextReaderRead(reader);
	}

	return tailoring;
}
예제 #25
0
extern "C" magma_int_t 
magma_dsytrf_nopiv(magma_uplo_t uplo, magma_int_t n, 
                   double *A, magma_int_t lda, 
                   magma_int_t *info)
{
/*  -- MAGMA (version 1.6.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       November 2011

    Purpose   
    =======   

    DSYTRF_nopiv computes the LDLt factorization of a real symmetric   
    matrix A. This version does not require work space on the GPU passed 
    as input. GPU memory is allocated in the routine.

    The factorization has the form   
       A = U\*\*H * D * U,  if UPLO = 'U', or   
       A = L  * D * L\*\*H, if UPLO = 'L',   
    where U is an upper triangular matrix, L is lower triangular, and
    D is a diagonal matrix.

    This is the block version of the algorithm, calling Level 3 BLAS.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            = 'U':  Upper triangle of A is stored;   
            = 'L':  Lower triangle of A is stored.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    A       (input/output) DOUBLE_PRECISION array, dimension (LDA,N)   
            On entry, the symmetric matrix A.  If UPLO = 'U', the leading   
            N-by-N upper triangular part of A contains the upper   
            triangular part of the matrix A, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading N-by-N lower triangular part of A contains the lower   
            triangular part of the matrix A, and the strictly upper   
            triangular part of A is not referenced.   

            On exit, if INFO = 0, the factor U or L from the Cholesky   
            factorization A = U\*\*H*U or A = L*L\*\*H.   

            Higher performance is achieved if A is in pinned memory, e.g.
            allocated using cudaMallocHost.

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value 
                  if INFO = -6, the GPU memory allocation failed 
            > 0:  if INFO = i, the leading minor of order i is not   
                  positive definite, and the factorization could not be   
                  completed.   

    =====================================================================    */


    /* Local variables */
    double zone  = MAGMA_D_ONE;
    double mzone = MAGMA_D_NEG_ONE;
    int                upper = (uplo == MagmaUpper);
    magma_int_t j, k, jb, ldda, nb, ib, iinfo;
    magmaDouble_ptr dA;
    magmaDouble_ptr dW;

    *info = 0;
    if (! upper && uplo != MagmaLower) {
      *info = -1;
    } else if (n < 0) {
      *info = -2;
    } else if (lda < max(1,n)) {
      *info = -4;
    }
    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return MAGMA_ERR_ILLEGAL_VALUE;
    }

    /* Quick return */
    if ( n == 0 )
      return MAGMA_SUCCESS;

    ldda = ((n+31)/32)*32;
    nb = magma_get_dsytrf_nopiv_nb(n);
    ib = min(32, nb); // inner-block for diagonal factorization

    if ((MAGMA_SUCCESS != magma_dmalloc(&dA, n *ldda)) ||
        (MAGMA_SUCCESS != magma_dmalloc(&dW, nb*ldda))) {
        /* alloc failed so call the non-GPU-resident version */
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }

    magma_queue_t stream[2];
    magma_event_t event;
    magma_queue_create(&stream[0]);
    magma_queue_create(&stream[1]);
    magma_event_create( &event );
    trace_init( 1, 1, 2, (CUstream_st**)stream );

    //if (nb <= 1 || nb >= n) 
    //{
    //    lapackf77_dpotrf(uplo_, &n, a, &lda, info);
    //} else 
    {
        /* Use hybrid blocked code. */
        if (upper) {
            //=========================================================
            // Compute the LDLt factorization A = U'*D*U without pivoting.
            // copy matrix to GPU
            for (j=0; j<n; j+=nb) {
                jb = min(nb, (n-j));
                trace_gpu_start( 0, 0, "set", "set" );
                magma_dsetmatrix_async(j+jb, jb, A(0, j), lda, dA(0, j), ldda, stream[0]);
                trace_gpu_end( 0, 0 );
            }

            // main loop
            for (j=0; j<n; j += nb) {
                jb = min(nb, (n-j));

                // copy A(j,j) back to CPU
                trace_gpu_start( 0, 0, "get", "get" );
                magma_dgetmatrix_async(jb, jb, dA(j, j), ldda, A(j,j), lda, stream[0]);
                trace_gpu_end( 0, 0 );

                // copy j-th column of U back to CPU
                magma_queue_wait_event( stream[1], event );
                trace_gpu_start( 0, 1, "get", "get" );
                magma_dgetmatrix_async(j, jb, dA(0, j), ldda, A(0, j), lda, stream[1]);
                trace_gpu_end( 0, 1 );

                // factorize the diagonal block
                magma_queue_sync(stream[0]);
                trace_cpu_start( 0, "potrf", "potrf" );
                dsytrf_nopiv_cpu(MagmaUpper, jb, ib, A(j, j), lda, info);
                trace_cpu_end( 0 );
                if (*info != 0){
                    *info = *info + j;
                    break;
                }

                // copy A(j,j) back to GPU
                trace_gpu_start( 0, 0, "set", "set" );
                magma_dsetmatrix_async(jb, jb, A(j, j), lda, dA(j, j), ldda, stream[0]);
                trace_gpu_end( 0, 0 );
                
                if ( (j+jb) < n) {
                    // compute the off-diagonal blocks of current block column
                    magmablasSetKernelStream( stream[0] );
                    trace_gpu_start( 0, 0, "trsm", "trsm" );
                    magma_dtrsm(MagmaLeft, MagmaUpper, MagmaConjTrans, MagmaUnit, 
                                jb, (n-j-jb), 
                                zone, dA(j, j),    ldda, 
                                      dA(j, j+jb), ldda);
                    magma_dcopymatrix( jb, n-j-jb, dA( j, j+jb ), ldda, dWt( 0, j+jb ), nb );

                    // update the trailing submatrix with D
                    magmablas_dlascl_diag(MagmaUpper, jb, n-j-jb,
                                          dA(j,    j), ldda,
                                          dA(j, j+jb), ldda,
                                          &iinfo);
                    magma_event_record( event, stream[0] );
                    trace_gpu_end( 0, 0 );

                    // update the trailing submatrix with U and W
                    trace_gpu_start( 0, 0, "gemm", "gemm" );
                    for (k=j+jb; k<n; k+=nb)
                    {
                        magma_int_t kb = min(nb,n-k);
                        magma_dgemm(MagmaConjTrans, MagmaNoTrans, kb, n-k, jb,
                                    mzone, dWt(0, k), nb, 
                                           dA(j, k), ldda,
                                    zone,  dA(k, k), ldda);
                    }
                    trace_gpu_end( 0, 0 );
                }
            }
        } else {
            //=========================================================
            // Compute the LDLt factorization A = L*D*L' without pivoting.
            // copy the matrix to GPU
            for (j=0; j<n; j+=nb) {
                jb = min(nb, (n-j));
                trace_gpu_start( 0, 0, "set", "set" );
                magma_dsetmatrix_async((n-j), jb, A(j, j), lda, dA(j, j), ldda, stream[0]);
                trace_gpu_end( 0, 0 );
            }

            // main loop
            for (j=0; j<n; j+=nb) {
                jb = min(nb, (n-j));

                // copy A(j,j) back to CPU
                trace_gpu_start( 0, 0, "get", "get" );
                magma_dgetmatrix_async(jb, jb, dA(j, j), ldda, A(j,j), lda, stream[0]);
                trace_gpu_end( 0, 0 );

                // copy j-th row of L back to CPU
                magma_queue_wait_event( stream[1], event );
                trace_gpu_start( 0, 1, "get", "get" );
                magma_dgetmatrix_async(jb, j, dA(j, 0), ldda, A(j, 0), lda, stream[1]);
                trace_gpu_end( 0, 1 );

                // factorize the diagonal block
                magma_queue_sync(stream[0]);
                trace_cpu_start( 0, "potrf", "potrf" );
                dsytrf_nopiv_cpu(MagmaLower, jb, ib, A(j, j), lda, info);
                trace_cpu_end( 0 );
                if (*info != 0){
                    *info = *info + j;
                    break;
                }
                // copy A(j,j) back to GPU
                trace_gpu_start( 0, 0, "set", "set" );
                magma_dsetmatrix_async(jb, jb, A(j, j), lda, dA(j, j), ldda, stream[0]);
                trace_gpu_end( 0, 0 );
                
                if ( (j+jb) < n) {
                    // compute the off-diagonal blocks of current block column
                    magmablasSetKernelStream( stream[0] );
                    trace_gpu_start( 0, 0, "trsm", "trsm" );
                    magma_dtrsm(MagmaRight, MagmaLower, MagmaConjTrans, MagmaUnit, 
                                (n-j-jb), jb, 
                                zone, dA(j,    j), ldda, 
                                      dA(j+jb, j), ldda);
                    magma_dcopymatrix( n-j-jb,jb, dA( j+jb, j ), ldda, dW( j+jb, 0 ), ldda );

                    // update the trailing submatrix with D
                    magmablas_dlascl_diag(MagmaLower, n-j-jb, jb,
                                          dA(j,    j), ldda,
                                          dA(j+jb, j), ldda,
                                          &iinfo);
                    magma_event_record( event, stream[0] );
                    trace_gpu_end( 0, 0 );

                    // update the trailing submatrix with L and W
                    trace_gpu_start( 0, 0, "gemm", "gemm" );
                    for (k=j+jb; k<n; k+=nb)
                    {
                        magma_int_t kb = min(nb,n-k);
                        magma_dgemm(MagmaNoTrans, MagmaConjTrans, n-k, kb, jb,
                                    mzone, dA(k, j), ldda, 
                                           dW(k, 0), ldda,
                                    zone,  dA(k, k), ldda);
                    }
                    trace_gpu_end( 0, 0 );
                }
            }
        }
    }
    
    trace_finalize( "dsytrf.svg","trace.css" );
    magma_queue_destroy(stream[0]);
    magma_queue_destroy(stream[1]);
    magma_event_destroy( event );
    magma_free(dW);
    magma_free(dA);
    
    return MAGMA_SUCCESS;
} /* magma_dsytrf_nopiv */
예제 #26
0
/**
    Purpose   
    =======   

    SSYTRF_nopiv_gpu computes the LDLt factorization of a real symmetric   
    matrix A.

    The factorization has the form   
       A = U^H * D * U , if UPLO = 'U', or   
       A = L  * D * L^H, if UPLO = 'L',   
    where U is an upper triangular matrix, L is lower triangular, and
    D is a diagonal matrix.

    This is the block version of the algorithm, calling Level 3 BLAS.   

    Arguments
    ---------
    @param[in]
    UPLO    CHARACTER*1   
      -     = 'U':  Upper triangle of A is stored;   
      -     = 'L':  Lower triangle of A is stored.   

    @param[in]
    N       INTEGER   
            The order of the matrix A.  N >= 0.   

    @param[in,out]
    dA      REAL array on the GPU, dimension (LDA,N)   
            On entry, the symmetric matrix A.  If UPLO = 'U', the leading   
            N-by-N upper triangular part of A contains the upper   
            triangular part of the matrix A, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading N-by-N lower triangular part of A contains the lower   
            triangular part of the matrix A, and the strictly upper   
            triangular part of A is not referenced.   
    \n
            On exit, if INFO = 0, the factor U or L from the Cholesky   
            factorization A = U^H D U or A = L D L^H.   
    \n 
            Higher performance is achieved if A is in pinned memory, e.g.
            allocated using cudaMallocHost.

    @param[in]
    LDA     INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    @param[out]
    INFO    INTEGER   
      -     = 0:  successful exit   
      -     < 0:  if INFO = -i, the i-th argument had an illegal value 
                  if INFO = -6, the GPU memory allocation failed 
      -     > 0:  if INFO = i, the leading minor of order i is not   
                  positive definite, and the factorization could not be   
                  completed.   
    
    @ingroup magma_ssytrf_comp
    ******************************************************************* */
extern "C" magma_int_t
magma_ssytrf_nopiv_gpu(
    magma_uplo_t uplo, magma_int_t n,
    magmaFloat_ptr dA, magma_int_t ldda,
    magma_int_t *info)
{
    #define  A(i, j)  (A)
    #define dA(i, j)  (dA +(j)*ldda + (i))
    #define dW(i, j)  (dW +(j)*ldda + (i))
    #define dWt(i, j) (dW +(j)*nb   + (i))

    /* Local variables */
    float zone  = MAGMA_S_ONE;
    float mzone = MAGMA_S_NEG_ONE;
    int                upper = (uplo == MagmaUpper);
    magma_int_t j, k, jb, nb, ib, iinfo;

    *info = 0;
    if (! upper && uplo != MagmaLower) {
      *info = -1;
    } else if (n < 0) {
      *info = -2;
    } else if (ldda < max(1,n)) {
      *info = -4;
    }
    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return MAGMA_ERR_ILLEGAL_VALUE;
    }

    /* Quick return */
    if ( n == 0 )
      return MAGMA_SUCCESS;

    nb = magma_get_ssytrf_nopiv_nb(n);
    ib = min(32, nb); // inner-block for diagonal factorization

    magma_queue_t orig_stream;
    magmablasGetKernelStream( &orig_stream );


    magma_queue_t stream[2];
    magma_event_t event;
    magma_queue_create(&stream[0]);
    magma_queue_create(&stream[1]);
    magma_event_create( &event );
    trace_init( 1, 1, 2, stream );

    // CPU workspace
    float *A;
    if (MAGMA_SUCCESS != magma_smalloc_pinned( &A, nb*nb )) {
        *info = MAGMA_ERR_HOST_ALLOC;
        return *info;
    }

    // GPU workspace
    magmaFloat_ptr dW;
    if (MAGMA_SUCCESS != magma_smalloc( &dW, (1+nb)*ldda )) {
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }

    /* Use hybrid blocked code. */
    if (upper) {
        //=========================================================
        // Compute the LDLt factorization A = U'*D*U without pivoting.
        // main loop
        for (j=0; j<n; j += nb) {
            jb = min(nb, (n-j));
            
            // copy A(j,j) back to CPU
            trace_gpu_start( 0, 0, "get", "get" );
            //magma_queue_wait_event( stream[1], event );                                                                
            magma_event_sync(event);
            magma_sgetmatrix_async(jb, jb, dA(j, j), ldda, A(j,j), nb, stream[1]);
            trace_gpu_end( 0, 0 );

            // factorize the diagonal block
            magma_queue_sync(stream[1]);
            trace_cpu_start( 0, "potrf", "potrf" );
            ssytrf_nopiv_cpu(MagmaUpper, jb, ib, A(j, j), nb, info);
            trace_cpu_end( 0 );
            if (*info != 0){
                *info = *info + j;
                break;
            }
            
            // copy A(j,j) back to GPU
            trace_gpu_start( 0, 0, "set", "set" );
            magma_ssetmatrix_async(jb, jb, A(j, j), nb, dA(j, j), ldda, stream[0]);
            trace_gpu_end( 0, 0 );
                
            if ( (j+jb) < n) {
                // compute the off-diagonal blocks of current block column
                magmablasSetKernelStream( stream[0] );
                trace_gpu_start( 0, 0, "trsm", "trsm" );
                magma_strsm(MagmaLeft, MagmaUpper, MagmaConjTrans, MagmaUnit, 
                            jb, (n-j-jb), 
                            zone, dA(j, j),    ldda, 
                            dA(j, j+jb), ldda);
                magma_scopymatrix( jb, n-j-jb, dA( j, j+jb ), ldda, dWt( 0, j+jb ), nb );
                
                // update the trailing submatrix with D
                magmablas_slascl_diag(MagmaUpper, jb, n-j-jb,
                                      dA(j,    j), ldda,
                                      dA(j, j+jb), ldda,
                                      &iinfo);
                trace_gpu_end( 0, 0 );
                
                // update the trailing submatrix with U and W
                trace_gpu_start( 0, 0, "gemm", "gemm" );
                for (k=j+jb; k<n; k+=nb) {
                    magma_int_t kb = min(nb,n-k);
                    magma_sgemm(MagmaConjTrans, MagmaNoTrans, kb, n-k, jb,
                                mzone, dWt(0, k), nb, 
                                       dA(j, k), ldda,
                                zone,  dA(k, k), ldda);
                    if (k==j+jb)
                        magma_event_record( event, stream[0] );
                }
                trace_gpu_end( 0, 0 );
            }
        }
    } else {
        //=========================================================
        // Compute the LDLt factorization A = L*D*L' without pivoting.
        // main loop
        for (j=0; j<n; j+=nb) {
            jb = min(nb, (n-j));
            
            // copy A(j,j) back to CPU
            trace_gpu_start( 0, 0, "get", "get" );
            //magma_queue_wait_event( stream[0], event );                                                                
            magma_event_sync(event);
            magma_sgetmatrix_async(jb, jb, dA(j, j), ldda, A(j,j), nb, stream[1]);
            trace_gpu_end( 0, 0 );
            
            // factorize the diagonal block
            magma_queue_sync(stream[1]);
            trace_cpu_start( 0, "potrf", "potrf" );
            ssytrf_nopiv_cpu(MagmaLower, jb, ib, A(j, j), nb, info);
            trace_cpu_end( 0 );
            if (*info != 0){
                *info = *info + j;
                break;
            }

            // copy A(j,j) back to GPU
            trace_gpu_start( 0, 0, "set", "set" );
            magma_ssetmatrix_async(jb, jb, A(j, j), nb, dA(j, j), ldda, stream[0]);
            trace_gpu_end( 0, 0 );
            
            if ( (j+jb) < n) {
                // compute the off-diagonal blocks of current block column
                magmablasSetKernelStream( stream[0] );
                trace_gpu_start( 0, 0, "trsm", "trsm" );
                magma_strsm(MagmaRight, MagmaLower, MagmaConjTrans, MagmaUnit, 
                            (n-j-jb), jb, 
                            zone, dA(j,    j), ldda, 
                            dA(j+jb, j), ldda);
                magma_scopymatrix( n-j-jb,jb, dA( j+jb, j ), ldda, dW( j+jb, 0 ), ldda );
                
                // update the trailing submatrix with D
                magmablas_slascl_diag(MagmaLower, n-j-jb, jb,
                                      dA(j,    j), ldda,
                                      dA(j+jb, j), ldda,
                                      &iinfo);
                trace_gpu_end( 0, 0 );
                
                // update the trailing submatrix with L and W
                trace_gpu_start( 0, 0, "gemm", "gemm" );
                for (k=j+jb; k<n; k+=nb) {
                    magma_int_t kb = min(nb,n-k);
                    magma_sgemm(MagmaNoTrans, MagmaConjTrans, n-k, kb, jb,
                                mzone, dA(k, j), ldda, 
                                       dW(k, 0), ldda,
                                zone,  dA(k, k), ldda);
                    if (k==j+jb)
                        magma_event_record( event, stream[0] );
                }
                trace_gpu_end( 0, 0 );
            }
        }
    }
    
    trace_finalize( "ssytrf.svg","trace.css" );
    magma_queue_destroy(stream[0]);
    magma_queue_destroy(stream[1]);
    magma_event_destroy( event );
    magma_free( dW );
    magma_free_pinned( A );
    
    magmablasSetKernelStream( orig_stream );
    return MAGMA_SUCCESS;
} /* magma_ssytrf_nopiv */
예제 #27
0
int oval_probe_query_object(oval_probe_session_t *psess, struct oval_object *object, int flags, struct oval_syschar **out_syschar)
{
	char *oid;
	struct oval_syschar *sysc;
        oval_subtype_t type;
        oval_ph_t *ph;
	struct oval_string_map *vm;
	struct oval_syschar_model *model;
	int ret;

	oid = oval_object_get_id(object);
	model = psess->sys_model;

	dI("Querying object id: \"%s\", flags: %u.\n", oid, flags);

	sysc = oval_syschar_model_get_syschar(model, oid);
	if (sysc != NULL) {
		int variable_instance_hint = oval_syschar_get_variable_instance_hint(sysc);
		if (oval_syschar_get_variable_instance_hint(sysc) != oval_syschar_get_variable_instance(sysc)) {
			dI("Creating another syschar for variable_instance=%d)\n", variable_instance_hint);
			sysc = oval_syschar_new(model, object);
			oval_syschar_set_variable_instance(sysc, variable_instance_hint);
			oval_syschar_set_variable_instance_hint(sysc, variable_instance_hint);
		}
		else {
			oval_syschar_collection_flag_t sc_flg;

			sc_flg = oval_syschar_get_flag(sysc);

			dI("Syschar already exists, flag: %u, '%s'.\n", sc_flg, oval_syschar_collection_flag_get_text(sc_flg));

			if (sc_flg != SYSCHAR_FLAG_UNKNOWN || (flags & OVAL_PDFLAG_NOREPLY)) {
				if (out_syschar)
					*out_syschar = sysc;
				return 0;
			}
		}
	} else
		sysc = oval_syschar_new(model, object);

	if (out_syschar)
		*out_syschar = sysc;

	type = oval_object_get_subtype(object);
	ph = oval_probe_handler_get(psess->ph, type);

        if (ph == NULL) {
                char *msg = "OVAL object not supported.";

		dW("%s\n", msg);
		oval_syschar_add_new_message(sysc, msg, OVAL_MESSAGE_LEVEL_WARNING);
		oval_syschar_set_flag(sysc, SYSCHAR_FLAG_NOT_COLLECTED);

		return 1;
        }

	if ((ret = ph->func(type, ph->uptr, PROBE_HANDLER_ACT_EVAL, sysc, flags)) != 0) {
		return ret;
	}

	if (!(flags & OVAL_PDFLAG_NOREPLY)) {
		vm = oval_string_map_new();
		oval_obj_collect_var_refs(object, vm);
		_syschar_add_bindings(sysc, vm);
		oval_string_map_free(vm, NULL);
	}

	return 0;
}
예제 #28
0
/**
    Purpose
    =======

    SSYTRF_nopiv computes the LDLt factorization of a real symmetric
    matrix A. This version does not require work space on the GPU passed
    as input. GPU memory is allocated in the routine.

    The factorization has the form
       A = U^H * D * U,   if UPLO = MagmaUpper, or
       A = L   * D * L^H, if UPLO = MagmaLower,
    where U is an upper triangular matrix, L is lower triangular, and
    D is a diagonal matrix.

    This is the block version of the algorithm, calling Level 3 BLAS.

    Arguments
    ---------
    @param[in]
    uplo    magma_uplo_t
      -     = MagmaUpper:  Upper triangle of A is stored;
      -     = MagmaLower:  Lower triangle of A is stored.

    @param[in]
    n       INTEGER
            The order of the matrix A.  N >= 0.

    @param[in,out]
    A       REAL array, dimension (LDA,N)
            On entry, the symmetric matrix A.  If UPLO = MagmaUpper, the leading
            N-by-N upper triangular part of A contains the upper
            triangular part of the matrix A, and the strictly lower
            triangular part of A is not referenced.  If UPLO = MagmaLower, the
            leading N-by-N lower triangular part of A contains the lower
            triangular part of the matrix A, and the strictly upper
            triangular part of A is not referenced.
    \n
            On exit, if INFO = 0, the factor U or L from the Cholesky
            factorization A = U^H D U or A = L D L^H.
    \n
            Higher performance is achieved if A is in pinned memory.

    @param[in]
    lda     INTEGER
            The leading dimension of the array A.  LDA >= max(1,N).

    @param[out]
    info    INTEGER
      -     = 0:  successful exit
      -     < 0:  if INFO = -i, the i-th argument had an illegal value
                  if INFO = -6, the GPU memory allocation failed
      -     > 0:  if INFO = i, the leading minor of order i is not
                  positive definite, and the factorization could not be
                  completed.

    @ingroup magma_ssysv_comp
    ******************************************************************* */
extern "C" magma_int_t
magma_ssytrf_nopiv(
    magma_uplo_t uplo, magma_int_t n,
    float *A, magma_int_t lda,
    magma_int_t *info)
{
    #define  A(i, j)  ( A +(j)*lda  + (i))
    #define dA(i, j)  (dA +(j)*ldda + (i))
    #define dW(i, j)  (dW +(j)*ldda + (i))
    #define dWt(i, j) (dW +(j)*nb   + (i))

    /* Constants */
    const float c_one     = MAGMA_S_ONE;
    const float c_neg_one = MAGMA_S_NEG_ONE;
    
    /* Local variables */
    bool upper = (uplo == MagmaUpper);
    magma_int_t j, k, jb, ldda, nb, ib, iinfo;
    magmaFloat_ptr dA;
    magmaFloat_ptr dW;

    *info = 0;
    if (! upper && uplo != MagmaLower) {
        *info = -1;
    } else if (n < 0) {
        *info = -2;
    } else if (lda < max(1,n)) {
        *info = -4;
    }
    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return MAGMA_ERR_ILLEGAL_VALUE;
    }

    /* Quick return */
    if ( n == 0 )
      return MAGMA_SUCCESS;

    ldda = magma_roundup( n, 32 );
    nb = magma_get_ssytrf_nopiv_nb(n);
    ib = min(32, nb); // inner-block for diagonal factorization

    if ((MAGMA_SUCCESS != magma_smalloc(&dA, n *ldda)) ||
        (MAGMA_SUCCESS != magma_smalloc(&dW, nb*ldda))) {
        /* alloc failed so call the non-GPU-resident version */
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }

    magma_device_t cdev;
    magma_queue_t queues[2];
    magma_event_t event;

    magma_getdevice( &cdev );
    magma_queue_create( cdev, &queues[0] );
    magma_queue_create( cdev, &queues[1] );
    magma_event_create( &event );
    trace_init( 1, 1, 2, queues );

    /* Use hybrid blocked code. */
    if (upper) {
        //=========================================================
        // Compute the LDLt factorization A = U'*D*U without pivoting.
        // copy matrix to GPU
        for (j=0; j < n; j += nb) {
            jb = min(nb, (n-j));
            trace_gpu_start( 0, 0, "set", "set" );
            magma_ssetmatrix_async(j+jb, jb, A(0, j), lda, dA(0, j), ldda, queues[0]);
            trace_gpu_end( 0, 0 );
        }
        
        // main loop
        for (j=0; j < n; j += nb) {
            jb = min(nb, (n-j));
            
            // copy A(j,j) back to CPU
            trace_gpu_start( 0, 0, "get", "get" );
            if ( j != 0) {
                //magma_event_sync(event);
                magma_sgetmatrix_async(jb, jb, dA(j, j), ldda, A(j,j), lda, queues[1]);
            }
            trace_gpu_end( 0, 0 );
            
            // factorize the diagonal block
            magma_queue_sync(queues[1]);
            trace_cpu_start( 0, "potrf", "potrf" );
            magma_ssytrf_nopiv_cpu( MagmaUpper, jb, ib, A(j, j), lda, info );
            trace_cpu_end( 0 );
            if (*info != 0) {
                *info = *info + j;
                break;
            }
            
            // copy A(j,j) back to GPU
            trace_gpu_start( 0, 0, "set", "set" );
            magma_ssetmatrix_async(jb, jb, A(j, j), lda, dA(j, j), ldda, queues[0]);
            trace_gpu_end( 0, 0 );
            
            // copy j-th column of U back to CPU
            trace_gpu_start( 0, 1, "get", "get" );
            magma_sgetmatrix_async(j, jb, dA(0, j), ldda, A(0, j), lda, queues[1]);
            trace_gpu_end( 0, 1 );

            if ( (j+jb) < n) {
                // compute the off-diagonal blocks of current block column
                trace_gpu_start( 0, 0, "trsm", "trsm" );
                magma_strsm( MagmaLeft, MagmaUpper, MagmaConjTrans, MagmaUnit,
                             jb, (n-j-jb),
                             c_one, dA(j, j), ldda,
                             dA(j, j+jb), ldda,
                             queues[0] );
                magma_scopymatrix( jb, n-j-jb, dA( j, j+jb ), ldda, dWt( 0, j+jb ), nb, queues[0] );
                
                // update the trailing submatrix with D
                magmablas_slascl_diag( MagmaUpper, jb, n-j-jb,
                                       dA(j,    j), ldda,
                                       dA(j, j+jb), ldda,
                                       queues[0], &iinfo);
                trace_gpu_end( 0, 0 );
                
                // update the trailing submatrix with U and W
                trace_gpu_start( 0, 0, "gemm", "gemm" );
                for (k=j+jb; k < n; k += nb) {
                    magma_int_t kb = min(nb,n-k);
                    magma_sgemm( MagmaConjTrans, MagmaNoTrans, kb, n-k, jb,
                                 c_neg_one, dWt(0, k), nb,
                                            dA(j, k),  ldda,
                                 c_one,     dA(k, k),  ldda,
                                 queues[0]);
                    if (k == j+jb) {
                        // magma_event_record( event, queues[0] );
                        magma_queue_sync( queues[0] );
                    }
                }
                trace_gpu_end( 0, 0 );
            }
        }
    } else {
        //=========================================================
        // Compute the LDLt factorization A = L*D*L' without pivoting.
        // copy the matrix to GPU
        for (j=0; j < n; j += nb) {
            jb = min(nb, (n-j));
            trace_gpu_start( 0, 0, "set", "set" );
            magma_ssetmatrix_async((n-j), jb, A(j, j), lda, dA(j, j), ldda, queues[0]);
            trace_gpu_end( 0, 0 );
        }
        
        // main loop
        for (j=0; j < n; j += nb) {
            jb = min(nb, (n-j));
            
            // copy A(j,j) back to CPU
            trace_gpu_start( 0, 0, "get", "get" );
            if (j != 0) {
                //magma_event_sync(event);
                magma_sgetmatrix_async(jb, jb, dA(j, j), ldda, A(j,j), lda, queues[1]);
            }
            trace_gpu_end( 0, 0 );
            
            // factorize the diagonal block
            magma_queue_sync(queues[1]);
            trace_cpu_start( 0, "potrf", "potrf" );
            magma_ssytrf_nopiv_cpu( MagmaLower, jb, ib, A(j, j), lda, info );
            trace_cpu_end( 0 );
            if (*info != 0) {
                *info = *info + j;
                break;
            }

            // copy A(j,j) back to GPU
            trace_gpu_start( 0, 0, "set", "set" );
            magma_ssetmatrix_async(jb, jb, A(j, j), lda, dA(j, j), ldda, queues[0]);
            trace_gpu_end( 0, 0 );
            
            // copy j-th row of L back to CPU
            trace_gpu_start( 0, 1, "get", "get" );
            magma_sgetmatrix_async(jb, j, dA(j, 0), ldda, A(j, 0), lda, queues[1]);
            trace_gpu_end( 0, 1 );
            
            if ( (j+jb) < n) {
                // compute the off-diagonal blocks of current block column
                trace_gpu_start( 0, 0, "trsm", "trsm" );
                magma_strsm( MagmaRight, MagmaLower, MagmaConjTrans, MagmaUnit,
                             (n-j-jb), jb,
                             c_one, dA(j, j), ldda,
                             dA(j+jb, j), ldda,
                             queues[0] );
                magma_scopymatrix( n-j-jb,jb, dA( j+jb, j ), ldda, dW( j+jb, 0 ), ldda, queues[0] );
                
                // update the trailing submatrix with D
                magmablas_slascl_diag( MagmaLower, n-j-jb, jb,
                                       dA(j,    j), ldda,
                                       dA(j+jb, j), ldda,
                                       queues[0], &iinfo );
                trace_gpu_end( 0, 0 );
                
                // update the trailing submatrix with L and W
                trace_gpu_start( 0, 0, "gemm", "gemm" );
                for (k=j+jb; k < n; k += nb) {
                    magma_int_t kb = min(nb,n-k);
                    magma_sgemm( MagmaNoTrans, MagmaConjTrans, n-k, kb, jb,
                                 c_neg_one, dA(k, j), ldda,
                                            dW(k, 0), ldda,
                                 c_one,     dA(k, k), ldda,
                                 queues[0] );
                    if (k == j+jb) {
                        //magma_event_record( event, queues[0] );
                        magma_queue_sync(queues[0]);
                    }
                }
                trace_gpu_end( 0, 0 );
            }
        }
    }
    
    trace_finalize( "ssytrf.svg","trace.css" );
    magma_queue_destroy(queues[0]);
    magma_queue_destroy(queues[1]);
    magma_event_destroy( event );
    magma_free(dW);
    magma_free(dA);
    
    return MAGMA_SUCCESS;
} /* magma_ssytrf_nopiv */
예제 #29
0
/**
    Purpose
    -------
    CLAHRU is an auxiliary MAGMA routine that is used in CGEHRD to update
    the trailing sub-matrices after the reductions of the corresponding
    panels.
    See further details below.

    Arguments
    ---------
    @param[in]
    n       INTEGER
            The order of the matrix A.  N >= 0.

    @param[in]
    ihi     INTEGER
            Last row to update. Same as IHI in cgehrd.

    @param[in]
    k       INTEGER
            Number of rows of the matrix Am (see details below)

    @param[in]
    nb      INTEGER
            Block size

    @param[out]
    A       COMPLEX array, dimension (LDA,N-K)
            On entry, the N-by-(N-K) general matrix to be updated. The
            computation is done on the GPU. After Am is updated on the GPU
            only Am(1:NB) is transferred to the CPU - to update the
            corresponding Am matrix. See Further Details below.

    @param[in]
    lda     INTEGER
            The leading dimension of the array A.  LDA >= max(1,N).

    @param[in,out]
    data    Structure with pointers to dA, dT, dV, dW, dY
            which are distributed across multiple GPUs.

    Further Details
    ---------------
    This implementation follows the algorithm and notations described in:

    S. Tomov and J. Dongarra, "Accelerating the reduction to upper Hessenberg
    form through hybrid GPU-based computing," University of Tennessee Computer
    Science Technical Report, UT-CS-09-642 (also LAPACK Working Note 219),
    May 24, 2009.

    The difference is that here Am is computed on the GPU.
    M is renamed Am, G is renamed Ag.

    @ingroup magma_cgeev_aux
    ********************************************************************/
extern "C" magma_int_t
magma_clahru_m(
    magma_int_t n, magma_int_t ihi, magma_int_t k, magma_int_t nb,
    magmaFloatComplex *A, magma_int_t lda,
    struct cgehrd_data* data )
{
    #define dA(  d, i, j ) (data->A [d] + (i) + (j)*ldda)
    #define dTi( d       ) (data->Ti[d])
    #define dV(  d, i, j ) (data->V [d] + (i) + (j)*ldv )
    #define dVd( d, i, j ) (data->Vd[d] + (i) + (j)*ldvd)
    #define dW(  d, i, j ) (data->W [d] + (i) + (j)*ldda)
    #define dY(  d, i, j ) (data->Y [d] + (i) + (j)*ldda)
    
    magmaFloatComplex c_zero    = MAGMA_C_ZERO;
    magmaFloatComplex c_one     = MAGMA_C_ONE;
    magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;

    magma_int_t ngpu = data->ngpu;
    magma_int_t ldda = data->ldda;
    magma_int_t ldv  = data->ldv;
    magma_int_t ldvd = data->ldvd;
    
    magma_int_t d;
    magma_int_t dk, dkhi, dknb, dn;
    
    magma_int_t info = 0;
    if (n < 0) {
        info = -1;
    } else if (ihi < 0 || ihi > n) {
        info = -2;
    } else if (k < 0 || k > n) {
        info = -3;
    } else if (nb < 1 || nb > n) {
        info = -4;
    } else if (lda < max(1,n)) {
        info = -6;
    }
    if (info != 0) {
        magma_xerbla( __func__, -(info) );
        return info;
    }
    
    magma_device_t orig_dev;
    magma_getdevice( &orig_dev );
    magma_queue_t orig_stream;
    magmablasGetKernelStream( &orig_stream );
    
    for( d = 0; d < ngpu; ++d ) {
        magma_setdevice( d );
        magmablasSetKernelStream( data->streams[d] );
        
        // convert global indices (k) to local indices (dk)
        magma_indices_1D_bcyclic( nb, ngpu, d, k,    ihi, &dk,   &dkhi );
        magma_indices_1D_bcyclic( nb, ngpu, d, k+nb, n,   &dknb, &dn   );
        
        // -----
        // on right, A := A Q = A - A V T V'
        // Update Am = Am - Am V T Vd' = Am - Ym Wd', with Wd = Vd T'
        // Wd = Vd T' = V(k:ihi-1, 0:nb-1) * T(0:nb-1, 0:nb-1)'
        // Vd and Wd are the portions corresponding to the block cyclic dkstribution
        magma_cgemm( MagmaNoTrans, MagmaConjTrans, dkhi-dk, nb, nb,
                     c_one,  dVd(d, dk, 0), ldvd,
                             dTi(d),        nb,
                     c_zero, dW (d, dk, 0), ldda );
        
        // Am = Am - Ym Wd' = A(0:k-1, k:ihi-1) - Ym(0:k-1, 0:nb-1) * W(k:ihi-1, 0:nb-1)'
        magma_cgemm( MagmaNoTrans, MagmaConjTrans, k, dkhi-dk, nb,
                     c_neg_one, dY(d, 0,  0),  ldda,
                                dW(d, dk, 0),  ldda,
                     c_one,     dA(d, 0,  dk), ldda );

        // -----
        // on right, A := A Q = A - A V T V'
        // Update Ag = Ag - Ag V T V' = Ag - Yg Wd'
        // Ag = Ag - Yg Wd' = A(k:ihi-1, nb:ihi-k-1) - Y(k:ihi-1, 0:nb-1) * W(k+nb:ihi-1, 0:nb-1)'
        magma_cgemm( MagmaNoTrans, MagmaConjTrans, ihi-k, dkhi-dknb, nb,
                     c_neg_one, dY(d, k,    0),    ldda,
                                dW(d, dknb, 0),    ldda,
                     c_one,     dA(d, k,    dknb), ldda );
        
        // -----
        // on left, A := Q' A = A - V T' V' A
        // Ag2 = Ag2 - V T' V' Ag2 = W Yg, with W = V T' and Yg = V' Ag2
        // Note that Ag is A(k:ihi, nb+1:ihi-k)
        // while    Ag2 is A(k:ihi, nb+1: n -k)
        
        // here V and W are the whole matrices, not just block cyclic portion
        // W = V T' = V(k:ihi-1, 0:nb-1) * T(0:nb-1, 0:nb-1)'
        // TODO would it be cheaper to compute the whole matrix and
        // copy the block cyclic portions to another workspace?
        magma_cgemm( MagmaNoTrans, MagmaConjTrans, ihi-k, nb, nb,
                     c_one,  dV (d, k, 0), ldv,
                             dTi(d),       nb,
                     c_zero, dW (d, k, 0), ldda );
        
        // Z = V(k:ihi-1, 0:nb-1)' * A(k:ihi-1, nb:n-k-1);  Z is stored over Y
        magma_cgemm( MagmaConjTrans, MagmaNoTrans, nb, dn-dknb, ihi-k,
                     c_one,  dV(d, k, 0),    ldv,
                             dA(d, k, dknb), ldda,
                     c_zero, dY(d, 0, 0),    nb );
        
        // Ag2 = Ag2 - W Z = A(k:ihi-1, k+nb:n-1) - W(k+nb:n-1, 0:nb-1) * Z(0:nb-1, k+nb:n-1)
        magma_cgemm( MagmaNoTrans, MagmaNoTrans, ihi-k, dn-dknb, nb,
                     c_neg_one, dW(d, k, 0),    ldda,
                                dY(d, 0, 0),    nb,
                     c_one,     dA(d, k, dknb), ldda );
    }
    
    magma_setdevice( orig_dev );
    magmablasSetKernelStream( orig_stream );
    
    return info;
}
예제 #30
0
파일: rule.c 프로젝트: AxelNennker/openscap
struct xccdf_item *xccdf_rule_parse(xmlTextReaderPtr reader, struct xccdf_item *parent)
{
	XCCDF_ASSERT_ELEMENT(reader, XCCDFE_RULE);

	struct xccdf_item *rule = xccdf_rule_new_internal(parent);

	if (!xccdf_item_process_attributes(rule, reader)) {
		xccdf_rule_free(rule);
		return NULL;
	}
	if (xccdf_attribute_has(reader, XCCDFA_ROLE)) {
		rule->sub.rule.role = oscap_string_to_enum(XCCDF_ROLE_MAP, xccdf_attribute_get(reader, XCCDFA_ROLE));
		rule->item.defined_flags.role = true;
	}
	if (xccdf_attribute_has(reader, XCCDFA_SEVERITY)) {
		rule->sub.rule.severity =
		    oscap_string_to_enum(XCCDF_LEVEL_MAP, xccdf_attribute_get(reader, XCCDFA_SEVERITY));
		rule->item.defined_flags.severity = true;
	}

	int depth = oscap_element_depth(reader) + 1;

	while (oscap_to_start_element(reader, depth)) {
		switch (xccdf_element_get(reader)) {
		case XCCDFE_REQUIRES:
		case XCCDFE_CONFLICTS:
			xccdf_item_parse_deps(reader, rule);
			break;
		case XCCDFE_PROFILE_NOTE:{
				const char *tag = xccdf_attribute_get(reader, XCCDFA_TAG);
				if (tag == NULL)
					break;
				struct xccdf_profile_note *note = xccdf_profile_note_new();
				note->reftag = strdup(tag);
				note->text = oscap_text_new_parse(XCCDF_TEXT_PROFNOTE, reader);
				oscap_list_add(rule->sub.rule.profile_notes, note);
				break;
			}
                case XCCDFE_COMPLEX_CHECK:
		case XCCDFE_CHECK:{
				struct xccdf_check *check = xccdf_check_parse(reader);
				if (check == NULL)
					break;
				oscap_list_add(rule->sub.rule.checks, check);
				break;
			}
		case XCCDFE_FIX:
			oscap_list_add(rule->sub.rule.fixes, xccdf_fix_parse(reader));
			break;
		case XCCDFE_FIXTEXT:
			oscap_list_add(rule->sub.rule.fixtexts, xccdf_fixtext_parse(reader));
			break;
		case XCCDFE_IDENT:
			oscap_list_add(rule->sub.rule.idents, xccdf_ident_parse(reader));
			break;
		default:
			if (!xccdf_item_process_element(rule, reader))
				dW("Encountered an unknown element '%s' while parsing XCCDF group.",
				   xmlTextReaderConstLocalName(reader));
		}
		xmlTextReaderRead(reader);
	}

	return rule;
}