 * this is an actual instance of the scan, running on a scan thread
 * It reads on the node fd till it finds the last msg, in the meantime calling
 * task->callback on the returned data. The returned data is a bin of name SUCCESS/FAILURE
 * and the value of the bin is the return value from the udf.
static int cl_scan_worker_do(cl_cluster_node * node, cl_scan_task * task) {

    uint8_t     rd_stack_buf[STACK_BUF_SZ] = {0};    
    uint8_t *   rd_buf = rd_stack_buf;
    size_t      rd_buf_sz = 0;

    int fd = cl_cluster_node_fd_get(node, false, task->asc->nbconnect);
    if ( fd == -1 ) { 
        LOG("[ERROR] cl_scan_worker_do: cannot get fd for node %s ",node->name);

    // send it to the cluster - non blocking socket, but we're blocking
    if (0 != cf_socket_write_forever(fd, (uint8_t *) task->scan_buf, (size_t) task->scan_sz)) {

    cl_proto  proto;
    int       rc   = CITRUSLEAF_OK;
    bool      done = false;

    do {
        // multiple CL proto per response
        // Now turn around and read a fine cl_proto - that's the first 8 bytes 
        // that has types and lengths
        if ( (rc = cf_socket_read_forever(fd, (uint8_t *) &proto, sizeof(cl_proto) ) ) ) {
            LOG("[ERROR] cl_scan_worker_do: network error: errno %d fd %d node name %s\n", rc, fd, node->name);
            return CITRUSLEAF_FAIL_CLIENT;

        if ( proto.version != CL_PROTO_VERSION) {
            LOG("[ERROR] cl_scan_worker_do: network error: received protocol message of wrong version %d from node %s\n", proto.version, node->name);
            return CITRUSLEAF_FAIL_CLIENT;

        if ( proto.type != CL_PROTO_TYPE_CL_MSG && proto.type != CL_PROTO_TYPE_CL_MSG_COMPRESSED ) {
            LOG("[ERROR] cl_scan_worker_do: network error: received incorrect message version %d from node %s \n",proto.type, node->name);
            return CITRUSLEAF_FAIL_CLIENT;

        // second read for the remainder of the message - expect this to cover 
        // lots of data, many lines if there's no error
        rd_buf_sz =  proto.sz;
        if (rd_buf_sz > 0) {

            if (rd_buf_sz > sizeof(rd_stack_buf)){
                rd_buf = malloc(rd_buf_sz);
            else {
                rd_buf = rd_stack_buf;

            if (rd_buf == NULL) {
            	return CITRUSLEAF_FAIL_CLIENT;

            if ( (rc = cf_socket_read_forever(fd, rd_buf, rd_buf_sz)) ) {
                LOG("[ERROR] cl_scan_worker_do: network error: errno %d fd %d node name %s\n", rc, fd, node->name);
                if ( rd_buf != rd_stack_buf ) free(rd_buf);
                return CITRUSLEAF_FAIL_CLIENT;

        // process all the cl_msg in this proto
        uint8_t *   buf = rd_buf;
        uint        pos = 0;
        cl_bin      stack_bins[STACK_BINS];
        cl_bin *    bins;

        while (pos < rd_buf_sz) {

            uint8_t *   buf_start = buf;
            cl_msg *    msg = (cl_msg *) buf;

            buf += sizeof(cl_msg);

            if ( msg->header_sz != sizeof(cl_msg) ) {
                LOG("[ERROR] cl_scan_worker_do: received cl msg of unexpected size: expecting %zd found %d, internal error\n",
                return CITRUSLEAF_FAIL_CLIENT;

            // parse through the fields
            cf_digest       keyd;
            char            ns_ret[33]  = {0};
            char *          set_ret     = NULL;
            cl_msg_field *  mf          = (cl_msg_field *)buf;

            for (int i=0; i < msg->n_fields; i++) {
                if (mf->type == CL_MSG_FIELD_TYPE_KEY) {
                    LOG("[ERROR] cl_scan_worker_do: read: found a key - unexpected\n");
                else if (mf->type == CL_MSG_FIELD_TYPE_DIGEST_RIPE) {
                    memcpy(&keyd, mf->data, sizeof(cf_digest));
                else if (mf->type == CL_MSG_FIELD_TYPE_NAMESPACE) {
                    memcpy(ns_ret, mf->data, cl_msg_field_get_value_sz(mf));
                    ns_ret[ cl_msg_field_get_value_sz(mf) ] = 0;
                else if (mf->type == CL_MSG_FIELD_TYPE_SET) {
                    uint32_t set_name_len = cl_msg_field_get_value_sz(mf);
                    set_ret = (char *)malloc(set_name_len + 1);
                    memcpy(set_ret, mf->data, set_name_len);
                    set_ret[ set_name_len ] = '\0';
                mf = cl_msg_field_get_next(mf);

            buf = (uint8_t *) mf;
            if (msg->n_ops > STACK_BINS) {
                bins = malloc(sizeof(cl_bin) * msg->n_ops);
            else {
                bins = stack_bins;

            if (bins == NULL) {
                if (set_ret) {
               return CITRUSLEAF_FAIL_CLIENT;

            // parse through the bins/ops
            cl_msg_op * op = (cl_msg_op *) buf;
            for (int i=0;i<msg->n_ops;i++) {

                LOG("[DEBUG] cl_scan_worker_do: op receive: %p size %d op %d ptype %d pversion %d namesz %d \n",
                        op,op->op_sz, op->op, op->particle_type, op->version, op->name_sz);

                dump_buf("individual op (host order)", (uint8_t *) op, op->op_sz + sizeof(uint32_t));

                cl_set_value_particular(op, &bins[i]);
                op = cl_msg_op_get_next(op);
            buf = (uint8_t *) op;

            if (msg->result_code != CL_RESULT_OK) {

                rc = (int) msg->result_code;
                done = true;
                if (rc == CITRUSLEAF_FAIL_SCAN_ABORT) {
                    LOG("[INFO] cl_scan_worker_do: Scan successfully aborted at node [%s]\n", node->name);
            else if (msg->info3 & CL_MSG_INFO3_LAST)    {
                if ( cf_debug_enabled() ) {
                    LOG("[INFO] cl_scan_worker_do: Received final message from node [%s], scan complete\n", node->name);
                done = true;
            else if ((msg->n_ops || (msg->info1 & CL_MSG_INFO1_NOBINDATA))) {

                cl_scan_response_rec rec;
                cl_scan_response_rec *recp = &rec;

                recp->ns         = strdup(ns_ret);
                recp->keyd       = keyd;
                recp->set        = set_ret;
                recp->generation = msg->generation;
                recp->record_ttl = msg->record_ttl;
                recp->bins       = bins;
                recp->n_bins     = msg->n_ops;
                recp->ismalloc   = false;

                as_rec r;
                as_rec *rp = &r;
                rp = as_rec_init(rp, recp, &scan_response_hooks);

                as_val * v = as_rec_get(rp, "SUCCESS");
                if ( v  != NULL && task->callback) {
                    // Got a non null value for the resposne bin,
                    // call callback on it and destroy the record
                    task->callback(v, task->udata);



                rc = CITRUSLEAF_OK;

            // if done free it 
            if (done) {
                citrusleaf_bins_free(bins, msg->n_ops);
                if (bins != stack_bins) {
                    bins = 0;

                if (set_ret) {
                    set_ret = NULL;

            // don't have to free object internals. They point into the read buffer, where
            // a pointer is required
            pos += buf - buf_start;


        if (rd_buf && (rd_buf != rd_stack_buf))    {
            rd_buf = 0;

    } while ( done == false );
    cl_cluster_node_fd_put(node, fd, false);

#ifdef DEBUG_VERBOSE    
    LOG("[DEBUG] cl_scan_worker_do: exited loop: rc %d\n", rc );

    return rc;
Beispiel #2
// Request the info of a particular sockaddr_in.
// Reject info request if response length is greater than max_response_length.
// Return 0 on success and -1 on error.
citrusleaf_info_host_limit(struct sockaddr_in *sa_in, char *names, char **values, int timeout_ms, bool send_asis, uint64_t max_response_length)
	int rv = -1;
    int io_rv;
	*values = 0;
	// Deal with the incoming 'names' parameter
	// Translate interior ';'  in the passed-in names to \n
	uint32_t	slen = 0;
	if (names) {
		if (send_asis) {
			slen = strlen(names);
		} else {
			char *_t = names;
			while (*_t) { 
				if ((*_t == ';') || (*_t == ':') || (*_t == ',')) *_t = '\n'; 
	// Sometimes people forget/cant add the trailing '\n'. Be nice and add it for them.
	// using a stack allocated variable so we dn't have to clean up, Pain in the ass syntactically
	// but a nice little thing
	if (names) {
		if (names[slen-1] == '\n') {
			slen = 0;
		} else { 
			slen++; if (slen > 1024) { return(-1); } 
	char names_with_term[slen+1];
	if (slen) { 
		strcpy(names_with_term, names);
		names_with_term[slen-1] = '\n';
		names_with_term[slen] = 0;
		names = names_with_term;
	// Actually doing a non-blocking connect
	int fd = cf_socket_create_and_connect_nb(sa_in);
	if (fd == -1) {
		return -1;

	cl_proto 	*req;
	uint8_t		buf[1024];
	uint		buf_sz;

	// Un-initialized buf can lead to junk lastshiptimes values. 
	// Initialize buf to 0.
	bzero(buf, 1024);
	if (names) {
		uint sz = strlen(names);
		buf_sz = sz + sizeof(cl_proto);
		if (buf_sz < 1024)
			req = (cl_proto *) buf;
			req = (cl_proto *) malloc(buf_sz);
		if (req == NULL)	goto Done;

		req->sz = sz;
	else {
		req = (cl_proto *) buf;
		req->sz = 0;
		buf_sz = sizeof(cl_proto);
		names = "";
	req->version = CL_PROTO_VERSION;
	req->type = CL_PROTO_TYPE_INFO;
    if (timeout_ms)
        io_rv = cf_socket_write_timeout(fd, (uint8_t *) req, buf_sz, 0, timeout_ms);
        io_rv = cf_socket_write_forever(fd, (uint8_t *) req, buf_sz);
	if ((uint8_t *)req != buf)	free(req);
	if (io_rv != 0) {
#ifdef DEBUG        
		cf_debug("info returned error, rv %d errno %d bufsz %d", io_rv, errno, buf_sz);
		goto Done;
	cl_proto	*rsp = (cl_proto *)buf;
    if (timeout_ms) 
        io_rv = cf_socket_read_timeout(fd, buf, 8, 0, timeout_ms);
        io_rv = cf_socket_read_forever(fd, buf, 8);
    if (0 != io_rv) {
#ifdef DEBUG        
		cf_debug("info socket read failed: rv %d errno %d", io_rv, errno);
		goto Done;
	if (rsp->sz) {
		size_t read_length = rsp->sz;
		bool limit_reached = false;

		if (max_response_length > 0 && rsp->sz > max_response_length) {
			// Response buffer is too big.  Read a few bytes just to see what the buffer contains.
			read_length = 100;
			limit_reached = true;

		uint8_t *v_buf = malloc(read_length + 1);
		if (!v_buf) {
			cf_warn("Info request '%s' failed. Failed to malloc %d bytes", names, read_length);
			goto Done;

		if (timeout_ms)
			io_rv = cf_socket_read_timeout(fd, v_buf, read_length, 0, timeout_ms);
			io_rv = cf_socket_read_forever(fd, v_buf, read_length);

		if (io_rv != 0) {

			if (io_rv != ETIMEDOUT) {
				cf_warn("Info request '%s' failed. Failed to read %d bytes. Return code %d", names, read_length, io_rv);
			goto Done;
		v_buf[read_length] = 0;

		if (limit_reached) {
			// Response buffer is too big.  Log warning and reject.
			cf_warn("Info request '%s' failed. Response buffer length %lu is excessive. Buffer: %s", names, rsp->sz, v_buf);
			goto Done;
		*values = (char *) v_buf;
	else {
		*values = 0;
	rv = 0;

	shutdown(fd, SHUT_RDWR);
Beispiel #3
static int
do_scan_monte(cl_cluster *asc, char *node_name, uint operation_info, uint operation_info2, const char *ns, const char *set, 
	cl_bin *bins, int n_bins, uint8_t scan_pct, 
	citrusleaf_get_many_cb cb, void *udata, cl_scan_parameters *scan_opt)
	int rv = -1;

	uint8_t		rd_stack_buf[STACK_BUF_SZ];	
	uint8_t		*rd_buf = 0;
	size_t		rd_buf_sz = 0;
	uint8_t		wr_stack_buf[STACK_BUF_SZ];
	uint8_t		*wr_buf = wr_stack_buf;
	size_t		wr_buf_sz = sizeof(wr_stack_buf);

	cl_scan_param_field	scan_param_field;

	if (scan_opt) {
		scan_param_field.scan_pct = scan_pct>100? 100:scan_pct;
		scan_param_field.byte1 = (scan_opt->priority<<4) | (scan_opt->fail_on_cluster_change<<3);

	// we have a single namespace and/or set to get
	if (cl_compile(operation_info, operation_info2, 0, ns, set, 0, 0, 0, 0, 0, 0, &wr_buf, &wr_buf_sz, 0, NULL, 0,
			scan_opt ? &scan_param_field : NULL)) {
	dump_buf("sending request to cluster:", wr_buf, wr_buf_sz);

	int fd;
	cl_cluster_node *node = 0;

	// Get an FD from a cluster
	if (node_name) {
		node = cl_cluster_node_get_byname(asc,node_name);
		// grab a reservation
		if (node)
			cl_cluster_node_reserve(node, "T+");
	} else {
		node = cl_cluster_node_get_random(asc);
	if (!node) {
#ifdef DEBUG
		cf_debug("warning: no healthy nodes in cluster, failing");
	fd = cl_cluster_node_fd_get(node, false, asc->nbconnect);
	if (fd == -1) {
#ifdef DEBUG			
		cf_debug("warning: node %s has no file descriptors, retrying transaction", node->name);
	// send it to the cluster - non blocking socket, but we're blocking
	if (0 != cf_socket_write_forever(fd, wr_buf, wr_buf_sz)) {
#ifdef DEBUG			
		cf_debug("Citrusleaf: write timeout or error when writing header to server - %d fd %d errno %d", rv, fd, errno);

	cl_proto 		proto;
	bool done = false;
	do { // multiple CL proto per response
		// Now turn around and read a fine cl_pro - that's the first 8 bytes that has types and lengths
		if ((rv = cf_socket_read_forever(fd, (uint8_t *) &proto, sizeof(cl_proto) ) ) ) {
			cf_error("network error: errno %d fd %d",rv, fd);
		dump_buf("read proto header from cluster", (uint8_t *) &proto, sizeof(cl_proto));

		if (proto.version != CL_PROTO_VERSION) {
			cf_error("network error: received protocol message of wrong version %d", proto.version);
		if (proto.type != CL_PROTO_TYPE_CL_MSG) {
			cf_error("network error: received incorrect message version %d", proto.type);
		// second read for the remainder of the message - expect this to cover lots of data, many lines
		// if there's no error
		rd_buf_sz =  proto.sz;
		if (rd_buf_sz > 0) {
//            cf_debug("message read: size %u",(uint)proto.sz);

			if (rd_buf_sz > sizeof(rd_stack_buf))
				rd_buf = malloc(rd_buf_sz);
				rd_buf = rd_stack_buf;
			if (rd_buf == NULL) {
				return (-1);

			if ((rv = cf_socket_read_forever(fd, rd_buf, rd_buf_sz))) {
				cf_error("network error: errno %d fd %d", rv, fd);
				if (rd_buf != rd_stack_buf)	{ free(rd_buf); }
// this one's a little much: printing the entire body before printing the other bits			
			dump_buf("read msg body header (multiple msgs)", rd_buf, rd_buf_sz);
		// process all the cl_msg in this proto
		uint8_t *buf = rd_buf;
		uint pos = 0;
		cl_bin stack_bins[STACK_BINS];
		cl_bin *bins_local;
		while (pos < rd_buf_sz) {

			dump_buf("individual message header", buf, sizeof(cl_msg));
			uint8_t *buf_start = buf;
			cl_msg *msg = (cl_msg *) buf;
			buf += sizeof(cl_msg);
			if (msg->header_sz != sizeof(cl_msg)) {
				cf_error("received cl msg of unexpected size: expecting %zd found %d, internal error",

			// parse through the fields
			cf_digest *keyd = 0;
			char ns_ret[33] = {0};
			char *set_ret = NULL;
			cl_msg_field *mf = (cl_msg_field *)buf;
			for (int i=0;i<msg->n_fields;i++) {
				if (mf->type == CL_MSG_FIELD_TYPE_KEY) {
					cf_error("read: found a key - unexpected");
				else if (mf->type == CL_MSG_FIELD_TYPE_DIGEST_RIPE) {
					keyd = (cf_digest *) mf->data;
				else if (mf->type == CL_MSG_FIELD_TYPE_NAMESPACE) {
					memcpy(ns_ret, mf->data, cl_msg_field_get_value_sz(mf));
					ns_ret[ cl_msg_field_get_value_sz(mf) ] = 0;
				else if (mf->type == CL_MSG_FIELD_TYPE_SET) {
					uint32_t set_name_len = cl_msg_field_get_value_sz(mf);
					set_ret = (char *)malloc(set_name_len + 1);
					memcpy(set_ret, mf->data, set_name_len);
					set_ret[ set_name_len ] = '\0';

				mf = cl_msg_field_get_next(mf);
			buf = (uint8_t *) mf;

			cf_debug("message header fields: nfields %u nops %u", msg->n_fields, msg->n_ops);

			if (msg->n_ops > STACK_BINS) {
				bins_local = malloc(sizeof(cl_bin) * msg->n_ops);
			else {
				bins_local = stack_bins;
			if (bins_local == NULL) {
				if (set_ret) {
				return (-1);
			// parse through the bins/ops
			cl_msg_op *op = (cl_msg_op *)buf;
			for (int i=0;i<msg->n_ops;i++) {


				cf_debug("op receive: %p size %d op %d ptype %d pversion %d namesz %d",
					op,op->op_sz, op->op, op->particle_type, op->version, op->name_sz);				

				dump_buf("individual op (host order)", (uint8_t *) op, op->op_sz + sizeof(uint32_t));

				cl_set_value_particular(op, &bins_local[i]);
				op = cl_msg_op_get_next(op);
			buf = (uint8_t *) op;
			if (msg->result_code != CL_RESULT_OK) {
				// Special case - if we scan a set name that doesn't exist on a
				// node, it will return "not found" - we unify this with the
				// case where OK is returned and no callbacks were made. [AKG]
				if (msg->result_code == CL_RESULT_NOTFOUND) {
					msg->result_code = CL_RESULT_OK;
				rv = (int)msg->result_code;
				done = true;
			else if (msg->info3 & CL_MSG_INFO3_LAST)	{
#ifdef DEBUG
				cf_debug("received final message");
				done = true;
			else if ((msg->n_ops) || (operation_info & CL_MSG_INFO1_NOBINDATA)) {
				// got one good value? call it a success!
				(*cb) ( ns_ret, keyd, set_ret, msg->generation, msg->record_ttl, bins_local, msg->n_ops, false /*islast*/, udata);
				rv = 0;
//			else
//				cf_debug("received message with no bins, signal of an error");

			if (bins_local != stack_bins) {
				bins_local = 0;

			if (set_ret) {
				set_ret = NULL;

			// don't have to free object internals. They point into the read buffer, where
			// a pointer is required
			pos += buf - buf_start;
		if (rd_buf && (rd_buf != rd_stack_buf))	{
			rd_buf = 0;

	} while ( done == false );

	if (wr_buf != wr_stack_buf) {
		wr_buf = 0;

	cf_atomic32_set(&node->intervals_unreachable, 0);
	cl_cluster_node_fd_put(node, fd, false);
	node = 0;
	cf_debug("exited loop: rv %d", rv );
Beispiel #4
// Request the info of a particular sockaddr_in.
// Reject info request if response length is greater than max_response_length.
// Return 0 on success and -1 on error.
citrusleaf_info_host_limit(int fd, char *names, char **values, int timeout_ms, bool send_asis, uint64_t max_response_length, bool check_bounds)
	uint bb_size = 2048;
	int rv = -1;
    int io_rv;
	*values = 0;
	// Deal with the incoming 'names' parameter
	// Translate interior ';'  in the passed-in names to \n
	uint32_t	slen = 0;
	if (names) {
		if (send_asis) {
			slen = (uint32_t)strlen(names);
		} else {
			char *_t = names;
			while (*_t) { 
				if ((*_t == ';') || (*_t == ':') || (*_t == ',')) *_t = '\n'; 
	// Sometimes people forget/cant add the trailing '\n'. Be nice and add it for them.
	// using a stack allocated variable so we dn't have to clean up, Pain in the ass syntactically
	// but a nice little thing
	if (names) {
		if (names[slen-1] == '\n') {
			slen = 0;
		} else { 
			// If check bounds is true, do not allow beyond a certain limit
			if	(check_bounds && (slen > bb_size)) {
	char names_with_term[slen+1];
	if (slen) { 
		strcpy(names_with_term, names);
		names_with_term[slen-1] = '\n';
		names_with_term[slen] = 0;
		names = names_with_term;

	cl_proto 	*req;
	uint8_t		buf[bb_size];
	uint		buf_sz;
	bool        rmalloced = false;
	if (names) {
		uint sz = (uint)strlen(names);
		buf_sz = sz + sizeof(cl_proto);
		if (buf_sz < bb_size)
			req = (cl_proto *) buf;
		else {
			req = (cl_proto *) malloc(buf_sz);
			rmalloced = true;
		if (req == NULL)	goto Done;

		req->sz = sz;
		memcpy((void*)req + sizeof(cl_proto), names, sz);
	else {
		req = (cl_proto *) buf;
		req->sz = 0;
		buf_sz = sizeof(cl_proto);
		names = "";
	req->version = CL_PROTO_VERSION;
	req->type = CL_PROTO_TYPE_INFO;
    if (timeout_ms)
        io_rv = cf_socket_write_timeout(fd, (uint8_t *) req, buf_sz, 0, timeout_ms);
        io_rv = cf_socket_write_forever(fd, (uint8_t *) req, buf_sz);
	if (rmalloced) {
		free (req); 
	if (io_rv != 0) {
#ifdef DEBUG        
		cf_debug("info returned error, rv %d errno %d bufsz %d", io_rv, errno, buf_sz);
		goto Done;
	cl_proto	*rsp = (cl_proto *)buf;
    if (timeout_ms) 
        io_rv = cf_socket_read_timeout(fd, buf, 8, 0, timeout_ms);
        io_rv = cf_socket_read_forever(fd, buf, 8);
    if (0 != io_rv) {
#ifdef DEBUG        
		cf_debug("info socket read failed: rv %d errno %d", io_rv, errno);
		goto Done;
	if (rsp->sz) {
		size_t read_length = rsp->sz;
		bool limit_reached = false;

		if (max_response_length > 0 && rsp->sz > max_response_length) {
			// Response buffer is too big.  Read a few bytes just to see what the buffer contains.
			read_length = 100;
			limit_reached = true;

		uint8_t *v_buf = malloc(read_length + 1);
		if (!v_buf) {
			cf_warn("Info request '%s' failed. Failed to malloc %d bytes", names, read_length);
			goto Done;

        if (timeout_ms)
            io_rv = cf_socket_read_timeout(fd, v_buf, read_length, 0, timeout_ms);
            io_rv = cf_socket_read_forever(fd, v_buf, read_length);
        if (io_rv != 0) {

            if (io_rv != ETIMEDOUT) {
            	cf_warn("Info request '%s' failed. Failed to read %d bytes. Return code %d", names, read_length, io_rv);
            goto Done;
		v_buf[read_length] = 0;

		if (limit_reached) {
			// Response buffer is too big.  Log warning and reject.
			cf_warn("Info request '%s' failed. Response buffer length %lu is excessive. Buffer: %s", names, rsp->sz, v_buf);
			goto Done;
		*values = (char *) v_buf;
	else {
		cf_warn("rsp size is 0");
		*values = 0;
	rv = 0;
