コード例 #1
0
ファイル: replicator.c プロジェクト: chengyishi/moosefs
/* srcs: srccnt * (chunkid:64 version:32 ip:32 port:16) */
uint8_t replicate(uint64_t chunkid,uint32_t version,uint8_t srccnt,const uint8_t *srcs) {
	replication r;
	uint8_t status,i,vbuffs,first;
	uint16_t b,blocks;
	uint32_t xcrc,crc;
	uint8_t *wptr;
	const uint8_t *rptr;
	int s;

	if (srccnt==0) {
		return ERROR_EINVAL;
	}

//	syslog(LOG_NOTICE,"replication begin (chunkid:%08"PRIX64",version:%04"PRIX32",srccnt:%"PRIu8")",chunkid,version,srccnt);

	pthread_mutex_lock(&statslock);
	stats_repl++;
	pthread_mutex_unlock(&statslock);

// init replication structure
	r.chunkid = chunkid;
	r.version = version;
	r.srccnt = 0;
	r.created = 0;
	r.opened = 0;
	r.fds = malloc(sizeof(struct pollfd)*srccnt);
	passert(r.fds);
	r.repsources = malloc(sizeof(repsrc)*srccnt);
	passert(r.repsources);
	if (srccnt>1) {
		r.xorbuff = malloc(65536+4);
		passert(r.xorbuff);
	} else {
		r.xorbuff = NULL;
	}
// create chunk
	status = hdd_create(chunkid,0);
	if (status!=STATUS_OK) {
		syslog(LOG_NOTICE,"replicator: hdd_create status: %u",status);
		rep_cleanup(&r);
		return status;
	}
	r.created = 1;
// init sources
	r.srccnt = srccnt;
	for (i=0 ; i<srccnt ; i++) {
		r.repsources[i].chunkid = get64bit(&srcs);
		r.repsources[i].version = get32bit(&srcs);
		r.repsources[i].ip = get32bit(&srcs);
		r.repsources[i].port = get16bit(&srcs);
		r.repsources[i].sock = -1;
		r.repsources[i].packet = NULL;
	}
// connect
	for (i=0 ; i<srccnt ; i++) {
		s = tcpsocket();
		if (s<0) {
			mfs_errlog_silent(LOG_NOTICE,"replicator: socket error");
			rep_cleanup(&r);
			return ERROR_CANTCONNECT;
		}
		r.repsources[i].sock = s;
		r.fds[i].fd = s;
		if (tcpnonblock(s)<0) {
			mfs_errlog_silent(LOG_NOTICE,"replicator: nonblock error");
			rep_cleanup(&r);
			return ERROR_CANTCONNECT;
		}
		s = tcpnumconnect(s,r.repsources[i].ip,r.repsources[i].port);
		if (s<0) {
			mfs_errlog_silent(LOG_NOTICE,"replicator: connect error");
			rep_cleanup(&r);
			return ERROR_CANTCONNECT;
		}
		if (s==0) {
			r.repsources[i].mode = IDLE;
		} else {
			r.repsources[i].mode = CONNECTING;
		}
	}
	if (rep_wait_for_connection(&r,CONNMSECTO)<0) {
		rep_cleanup(&r);
		return ERROR_CANTCONNECT;
	}
// open chunk
	status = hdd_open(chunkid);
	if (status!=STATUS_OK) {
		syslog(LOG_NOTICE,"replicator: hdd_open status: %u",status);
		rep_cleanup(&r);
		return status;
	}
	r.opened = 1;
// get block numbers
	for (i=0 ; i<srccnt ; i++) {
		wptr = rep_create_packet(r.repsources+i,CSTOCS_GET_CHUNK_BLOCKS,8+4);
		if (wptr==NULL) {
			syslog(LOG_NOTICE,"replicator: out of memory");
			rep_cleanup(&r);
			return ERROR_OUTOFMEMORY;
		}
		put64bit(&wptr,r.repsources[i].chunkid);
		put32bit(&wptr,r.repsources[i].version);
	}
// send packet
	if (rep_send_all_packets(&r,SENDMSECTO)<0) {
		rep_cleanup(&r);
		return ERROR_DISCONNECTED;
	}
// receive answers
	for (i=0 ; i<srccnt ; i++) {
		r.repsources[i].mode = HEADER;
		r.repsources[i].startptr = r.repsources[i].hdrbuff;
		r.repsources[i].bytesleft = 8;
	}
	if (rep_receive_all_packets(&r,RECVMSECTO)<0) {
		rep_cleanup(&r);
		return ERROR_DISCONNECTED;
	}
// get block no
	blocks = 0;
	for (i=0 ; i<srccnt ; i++) {
		uint32_t type,size;
		uint64_t pchid;
		uint32_t pver;
		uint16_t pblocks;
		uint8_t pstatus;
		rptr = r.repsources[i].hdrbuff;
		type = get32bit(&rptr);
		size = get32bit(&rptr);
		rptr = r.repsources[i].packet;
		if (rptr==NULL || type!=CSTOCS_GET_CHUNK_BLOCKS_STATUS || size!=15) {
			syslog(LOG_WARNING,"replicator: got wrong answer (type/size) from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port);
			rep_cleanup(&r);
			return ERROR_DISCONNECTED;
		}
		pchid = get64bit(&rptr);
		pver = get32bit(&rptr);
		pblocks = get16bit(&rptr);
		pstatus = get8bit(&rptr);
		if (pchid!=r.repsources[i].chunkid) {
			syslog(LOG_WARNING,"replicator: got wrong answer (chunk_status:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port);
			rep_cleanup(&r);
			return ERROR_WRONGCHUNKID;
		}
		if (pver!=r.repsources[i].version) {
			syslog(LOG_WARNING,"replicator: got wrong answer (chunk_status:version:%"PRIX32"/%"PRIX32") from (%08"PRIX32":%04"PRIX16")",pver,r.repsources[i].version,r.repsources[i].ip,r.repsources[i].port);
			rep_cleanup(&r);
			return ERROR_WRONGVERSION;
		}
		if (pstatus!=STATUS_OK) {
			syslog(LOG_NOTICE,"replicator: got status: %u from (%08"PRIX32":%04"PRIX16")",pstatus,r.repsources[i].ip,r.repsources[i].port);
			rep_cleanup(&r);
			return pstatus;
		}
		r.repsources[i].blocks = pblocks;
		if (pblocks>blocks) {
			blocks=pblocks;
		}
	}
// create read request
	for (i=0 ; i<srccnt ; i++) {
		if (r.repsources[i].blocks>0) {
			uint32_t leng;
			wptr = rep_create_packet(r.repsources+i,CUTOCS_READ,8+4+4+4);
			if (wptr==NULL) {
				syslog(LOG_NOTICE,"replicator: out of memory");
				rep_cleanup(&r);
				return ERROR_OUTOFMEMORY;
			}
			leng = r.repsources[i].blocks*0x10000;
			put64bit(&wptr,r.repsources[i].chunkid);
			put32bit(&wptr,r.repsources[i].version);
			put32bit(&wptr,0);
			put32bit(&wptr,leng);
		} else {
			rep_no_packet(r.repsources+i);
		}
	}
// send read request
	if (rep_send_all_packets(&r,SENDMSECTO)<0) {
		rep_cleanup(&r);
		return ERROR_DISCONNECTED;
	}
// receive data and write to hdd
	for (b=0 ; b<blocks ; b++) {
// prepare receive
		for (i=0 ; i<srccnt ; i++) {
			if (b<r.repsources[i].blocks) {
				r.repsources[i].mode = HEADER;
				r.repsources[i].startptr = r.repsources[i].hdrbuff;
				r.repsources[i].bytesleft = 8;
			} else {
				r.repsources[i].mode = IDLE;
				r.repsources[i].bytesleft = 0;
			}
		}
// receive data
		if (rep_receive_all_packets(&r,RECVMSECTO)<0) {
			rep_cleanup(&r);
			return ERROR_DISCONNECTED;
		}
// check packets
		vbuffs = 0;
		for (i=0 ; i<srccnt ; i++) {
			if (r.repsources[i].mode!=IDLE) {
				uint32_t type,size;
				uint64_t pchid;
				uint16_t pblocknum;
				uint16_t poffset;
				uint32_t psize;
				uint8_t pstatus;
				rptr = r.repsources[i].hdrbuff;
				type = get32bit(&rptr);
				size = get32bit(&rptr);
				rptr = r.repsources[i].packet;
				if (rptr==NULL) {
					rep_cleanup(&r);
					return ERROR_DISCONNECTED;
				}
				if (type==CSTOCU_READ_STATUS && size==9) {
					pchid = get64bit(&rptr);
					pstatus = get8bit(&rptr);
					rep_cleanup(&r);
					if (pchid!=r.repsources[i].chunkid) {
						syslog(LOG_WARNING,"replicator: got wrong answer (read_status:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port);
						return ERROR_WRONGCHUNKID;
					}
					if (pstatus==STATUS_OK) {	// got status too early or got incorrect packet
						syslog(LOG_WARNING,"replicator: got unexpected ok status from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port);
						return ERROR_DISCONNECTED;
					}
					syslog(LOG_NOTICE,"replicator: got status: %u from (%08"PRIX32":%04"PRIX16")",pstatus,r.repsources[i].ip,r.repsources[i].port);
					return pstatus;
				} else if (type==CSTOCU_READ_DATA && size==20+65536) {
					pchid = get64bit(&rptr);
					pblocknum = get16bit(&rptr);
					poffset = get16bit(&rptr);
					psize = get32bit(&rptr);
					if (pchid!=r.repsources[i].chunkid) {
						syslog(LOG_WARNING,"replicator: got wrong answer (read_data:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port);
						rep_cleanup(&r);
						return ERROR_WRONGCHUNKID;
					}
					if (pblocknum!=b) {
						syslog(LOG_WARNING,"replicator: got wrong answer (read_data:blocknum:%"PRIu16"/%"PRIu16") from (%08"PRIX32":%04"PRIX16")",pblocknum,b,r.repsources[i].ip,r.repsources[i].port);
						rep_cleanup(&r);
						return ERROR_DISCONNECTED;
					}
					if (poffset!=0) {
						syslog(LOG_WARNING,"replicator: got wrong answer (read_data:offset:%"PRIu16") from (%08"PRIX32":%04"PRIX16")",poffset,r.repsources[i].ip,r.repsources[i].port);
						rep_cleanup(&r);
						return ERROR_WRONGOFFSET;
					}
					if (psize!=65536) {
						syslog(LOG_WARNING,"replicator: got wrong answer (read_data:size:%"PRIu32") from (%08"PRIX32":%04"PRIX16")",psize,r.repsources[i].ip,r.repsources[i].port);
						rep_cleanup(&r);
						return ERROR_WRONGSIZE;
					}
				} else {
					syslog(LOG_WARNING,"replicator: got wrong answer (type/size) from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port);
					rep_cleanup(&r);
					return ERROR_DISCONNECTED;
				}
				vbuffs++;
			}
		}
// write data
		if (vbuffs==0) {	// no buffers ? - it should never happen
			syslog(LOG_WARNING,"replicator: no data received for block: %"PRIu16,b);
			rep_cleanup(&r);
			return ERROR_DISCONNECTED;
		} else if (vbuffs==1) { // xor not needed, so just find block and write it
			for (i=0 ; i<srccnt ; i++) {
				if (r.repsources[i].mode!=IDLE) {
					rptr = r.repsources[i].packet;
					status = hdd_write(chunkid,0,b,rptr+20,0,65536,rptr+16);
					if (status!=STATUS_OK) {
						syslog(LOG_WARNING,"replicator: write status: %u",status);
						rep_cleanup(&r);
						return status;
					}
				}
			}
		} else {
			first=1;
			if (vbuffs&1) {
				xcrc = 0;
			} else {
				xcrc = 0xD7978EEBU; // = mycrc32_zeroblock(0,0x10000);
			}
			for (i=0 ; i<srccnt ; i++) {
				if (r.repsources[i].mode!=IDLE) {
					rptr = r.repsources[i].packet;
					rptr+=16;	// skip chunkid,blockno,offset and size
					if (first) {
						memcpy(r.xorbuff+4,rptr+4,65536);
						first=0;
					} else {
						xordata(r.xorbuff+4,rptr+4,65536);
					}
					crc = get32bit(&rptr);
					if (crc!=mycrc32(0,rptr,65536)) {
						syslog(LOG_WARNING,"replicator: received data with wrong checksum from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port);
						rep_cleanup(&r);
						return ERROR_CRC;
					}
					xcrc^=crc;
				}
			}
			wptr = r.xorbuff;
			put32bit(&wptr,xcrc);
			status = hdd_write(chunkid,0,b,r.xorbuff+4,0,65536,r.xorbuff);
			if (status!=STATUS_OK) {
				syslog(LOG_WARNING,"replicator: xor write status: %u",status);
				rep_cleanup(&r);
				return status;
			}
		}
	}
// receive status
	for (i=0 ; i<srccnt ; i++) {
		if (r.repsources[i].blocks>0) {
//			if (r.repsources[i].packet) {
//				free(r.repsources[i].packet);
//				r.repsources[i].packet=NULL;
//			}
			r.repsources[i].mode = HEADER;
			r.repsources[i].startptr = r.repsources[i].hdrbuff;
			r.repsources[i].bytesleft = 8;
		} else {
			r.repsources[i].mode = IDLE;
			r.repsources[i].bytesleft = 0;
		}
	}
	if (rep_receive_all_packets(&r,RECVMSECTO)<0) {
		rep_cleanup(&r);
		return ERROR_DISCONNECTED;
	}
	for (i=0 ; i<srccnt ; i++) {
		if (r.repsources[i].blocks>0) {
			uint32_t type,size;
			uint64_t pchid;
			uint8_t pstatus;
			rptr = r.repsources[i].hdrbuff;
			type = get32bit(&rptr);
			size = get32bit(&rptr);
			rptr = r.repsources[i].packet;
			if (rptr==NULL || type!=CSTOCU_READ_STATUS || size!=9) {
				syslog(LOG_WARNING,"replicator: got wrong answer (type/size) from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port);
				rep_cleanup(&r);
				return ERROR_DISCONNECTED;
			}
			pchid = get64bit(&rptr);
			pstatus = get8bit(&rptr);
			if (pchid!=r.repsources[i].chunkid) {
				syslog(LOG_WARNING,"replicator: got wrong answer (read_status:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port);
				rep_cleanup(&r);
				return ERROR_WRONGCHUNKID;
			}
			if (pstatus!=STATUS_OK) {
				syslog(LOG_NOTICE,"replicator: got status: %u from (%08"PRIX32":%04"PRIX16")",pstatus,r.repsources[i].ip,r.repsources[i].port);
				rep_cleanup(&r);
				return pstatus;
			}
		}
	}
// close chunk and change version
	status = hdd_close(chunkid);
	if (status!=STATUS_OK) {
		syslog(LOG_NOTICE,"replicator: hdd_close status: %u",status);
		rep_cleanup(&r);
		return status;
	}
	r.opened = 0;
	status = hdd_version(chunkid,0,version);
	if (status!=STATUS_OK) {
		syslog(LOG_NOTICE,"replicator: hdd_version status: %u",status);
		rep_cleanup(&r);
		return status;
	}
	r.created = 0;
	rep_cleanup(&r);
	return STATUS_OK;
}
コード例 #2
0
ファイル: replicator.c プロジェクト: davies/moosefs
/* srcs: srccnt * (chunkid:64 version:32 ip:32 port:16) */
uint8_t replicate(uint64_t chunkid,uint32_t version,const uint32_t xormasks[4],uint8_t srccnt,const uint8_t *srcs) {
	replication r;
	uint8_t status,i,j,vbuffs,first;
	uint16_t b,blocks;
	uint32_t xcrc[4],crc;
	uint32_t codeindex,codeword;
	uint8_t *wptr;
	const uint8_t *rptr;
	int s;

	if (srccnt==0) {
		return ERROR_EINVAL;
	}

//	syslog(LOG_NOTICE,"replication begin (chunkid:%08"PRIX64",version:%04"PRIX32",srccnt:%"PRIu8")",chunkid,version,srccnt);

	pthread_mutex_lock(&statslock);
	stats_repl++;
	pthread_mutex_unlock(&statslock);

// init replication structure
	r.chunkid = chunkid;
	r.version = version;
	r.srccnt = 0;
	r.created = 0;
	r.opened = 0;
	r.fds = malloc(sizeof(struct pollfd)*srccnt);
	passert(r.fds);
	r.repsources = malloc(sizeof(repsrc)*srccnt);
	passert(r.repsources);
	if (srccnt>1) {
		r.xorbuff = malloc(MFSBLOCKSIZE+4);
		passert(r.xorbuff);
	} else {
		r.xorbuff = NULL;
	}
// create chunk
	status = hdd_create(chunkid,0);
	if (status!=STATUS_OK) {
		syslog(LOG_NOTICE,"replicator: hdd_create status: %s",mfsstrerr(status));
		rep_cleanup(&r);
		return status;
	}
	r.created = 1;
// init sources
	r.srccnt = srccnt;
	for (i=0 ; i<srccnt ; i++) {
		r.repsources[i].chunkid = get64bit(&srcs);
		r.repsources[i].version = get32bit(&srcs);
		r.repsources[i].ip = get32bit(&srcs);
		r.repsources[i].port = get16bit(&srcs);
		r.repsources[i].sock = -1;
		r.repsources[i].packet = NULL;
	}
// connect
	for (i=0 ; i<srccnt ; i++) {
		s = tcpsocket();
		if (s<0) {
			mfs_errlog_silent(LOG_NOTICE,"replicator: socket error");
			rep_cleanup(&r);
			return ERROR_CANTCONNECT;
		}
		r.repsources[i].sock = s;
		r.fds[i].fd = s;
		if (tcpnonblock(s)<0) {
			mfs_errlog_silent(LOG_NOTICE,"replicator: nonblock error");
			rep_cleanup(&r);
			return ERROR_CANTCONNECT;
		}
		s = tcpnumconnect(s,r.repsources[i].ip,r.repsources[i].port);
		if (s<0) {
			mfs_errlog_silent(LOG_NOTICE,"replicator: connect error");
			rep_cleanup(&r);
			return ERROR_CANTCONNECT;
		}
		if (s==0) {
			r.repsources[i].mode = IDLE;
		} else {
			r.repsources[i].mode = CONNECTING;
		}
	}
	if (rep_wait_for_connection(&r,CONNMSECTO)<0) {
		rep_cleanup(&r);
		return ERROR_CANTCONNECT;
	}
// disable Nagle
	for (i=0 ; i<srccnt ; i++) {
		tcpnodelay(r.repsources[i].sock);
	}
// open chunk
	status = hdd_open(chunkid,0);
	if (status!=STATUS_OK) {
		syslog(LOG_NOTICE,"replicator: hdd_open status: %s",mfsstrerr(status));
		rep_cleanup(&r);
		return status;
	}
	r.opened = 1;
// get block numbers
	for (i=0 ; i<srccnt ; i++) {
		wptr = rep_create_packet(r.repsources+i,ANTOCS_GET_CHUNK_BLOCKS,8+4);
		if (wptr==NULL) {
			syslog(LOG_NOTICE,"replicator: out of memory");
			rep_cleanup(&r);
			return ERROR_OUTOFMEMORY;
		}
		put64bit(&wptr,r.repsources[i].chunkid);
		put32bit(&wptr,r.repsources[i].version);
	}
// send packet
	if (rep_send_all_packets(&r,SENDMSECTO)<0) {
		rep_cleanup(&r);
		return ERROR_DISCONNECTED;
	}
// receive answers
	for (i=0 ; i<srccnt ; i++) {
		r.repsources[i].mode = HEADER;
		r.repsources[i].startptr = r.repsources[i].hdrbuff;
		r.repsources[i].bytesleft = 8;
	}
	if (rep_receive_all_packets(&r,RECVMSECTO)<0) {
		rep_cleanup(&r);
		return ERROR_DISCONNECTED;
	}
// get # of blocks
	blocks = 0;
	for (i=0 ; i<srccnt ; i++) {
		uint32_t type,size;
		uint64_t pchid;
		uint32_t pver;
		uint16_t pblocks;
		uint8_t pstatus;
		uint32_t ip;
		rptr = r.repsources[i].hdrbuff;
		type = get32bit(&rptr);
		size = get32bit(&rptr);
		rptr = r.repsources[i].packet;
		ip = r.repsources[i].ip;
		if (rptr==NULL || type!=CSTOAN_CHUNK_BLOCKS || size!=15) {
			syslog(LOG_WARNING,"replicator,get # of blocks: got wrong answer (type:0x%08"PRIX32"/size:0x%08"PRIX32") from (%u.%u.%u.%u:%04"PRIX16")",type,size,(ip>>24)&0xFF,(ip>>16)&0xFF,(ip>>8)&0xFF,ip&0xFF,r.repsources[i].port);
			rep_cleanup(&r);
			return ERROR_DISCONNECTED;
		}
		pchid = get64bit(&rptr);
		pver = get32bit(&rptr);
		pblocks = get16bit(&rptr);
		pstatus = get8bit(&rptr);
		if (pchid!=r.repsources[i].chunkid) {
			syslog(LOG_WARNING,"replicator,get # of blocks: got wrong answer (chunk_status:chunkid:%"PRIX64"/%"PRIX64") from (%u.%u.%u.%u:%04"PRIX16")",pchid,r.repsources[i].chunkid,(ip>>24)&0xFF,(ip>>16)&0xFF,(ip>>8)&0xFF,ip&0xFF,r.repsources[i].port);
			rep_cleanup(&r);
			return ERROR_WRONGCHUNKID;
		}