Beispiel #1
0
void charts_fill_crc(uint8_t *buff,uint32_t leng) {
	uint8_t *ptr,*eptr;
	uint32_t crc,chleng;
	ptr = buff+8;
	eptr = buff+leng;
	while (ptr+4<=eptr) {
		chleng = get32bit((const uint8_t **)&ptr);
		if (ptr+8+chleng<=eptr) {
			crc = mycrc32(0,ptr,chleng+4);
			ptr += chleng+4;
			if (memcmp(ptr,"CRC#",4)==0) {
				put32bit(&ptr,crc);
			} else {
				syslog(LOG_WARNING,"charts: unexpected data in generated png stream");
			}
		}
	}
}
Beispiel #2
0
void matoslaserv_download_data(serventry *eptr,const uint8_t *data,uint32_t length) {
	uint8_t *ptr;
	uint64_t offset;
	uint32_t leng;
	uint32_t crc;
	ssize_t ret;

	if (length!=12) {
		MFSLOG(LOG_NOTICE,"slaTOMA_DOWNLOAD_DATA - wrong size (%"PRIu32"/12)",length);
		eptr->mode=KILL;
		return;
	}
	if (eptr->metafd<0) {
		MFSLOG(LOG_NOTICE,"slaTOMA_DOWNLOAD_DATA - file not opened");
		eptr->mode=KILL;
		return;
	}
	offset = get64bit(&data);
	leng = get32bit(&data);
	ptr = matoslaserv_createpacket(eptr,MATOSLA_DOWNLOAD_DATA,16+leng);
	if (ptr==NULL) {
		eptr->mode=KILL;
		return;
	}
	put64bit(&ptr,offset);
	put32bit(&ptr,leng);
#ifdef HAVE_PREAD
	ret = pread(eptr->metafd,ptr+4,leng,offset);
#else /* HAVE_PWRITE */
	lseek(eptr->metafd,offset,SEEK_SET);
	ret = read(eptr->metafd,ptr+4,leng);
#endif /* HAVE_PWRITE */
	if (ret!=(ssize_t)leng) {
		MFSLOG(LOG_NOTICE,"error reading metafile: %m");
		eptr->mode=KILL;
		return;
	}
	crc = mycrc32(0,ptr+4,leng);
	put32bit(&ptr,crc);
}
Beispiel #3
0
static int socket_end_key_sync(struct socket_end_st *se, struct internal_ctl_socket_key_sync_st *cks) {
	uint32_t crc32;
	uint8_t plainkey[SHAREDKEY_BYTESIZE];

	mylog(L_DEBUG, "Socket end key sync, se[%d]", se->id);
	if (se->shared_key_flag) {
		mylog(L_ERR, "Shared key is existed, client %s:%u, se[%d]", se->client_str, se->client_port, se->id);
		socket_end_send_key_reject(se);
	}

	if (cks->encrypted_shared_key_len != RSA_KEYSIZE) {
		mylog(L_ERR, "Key len error");
		socket_end_send_key_reject(se);
		return -1;
	}

	if (decrypt_synckey(cks->encrypted_shared_key,
				plainkey,
				l7_param.server_privkey) < 0) {
		mylog(L_ERR, "Decrypt from encrypted shared key failed, client %s, se[%d]", se->client_str, se->id);
		socket_end_send_key_reject(se);
		return -1;
	}

	crc32 = mycrc32(plainkey, SHAREDKEY_BYTESIZE);
	if (cks->crc32 != crc32) {
		mylog(L_ERR, "Check key crc32 failed, client %s, se{%d}", se->client_str, se->id);
		socket_end_send_key_reject(se);
		return -1;
	} 

	memcpy(se->shared_key, plainkey, SHAREDKEY_BYTESIZE);
	se->shared_key_flag = 1;

	socket_end_send_key_ok(se);

	return 0;
}
Beispiel #4
0
void masterconn_download_data(masterconn *eptr,const uint8_t *data,uint32_t length) {
    uint64_t offset;
    uint32_t leng;
    uint32_t crc;
    ssize_t ret;
    if (eptr->metafd<0) {
        syslog(LOG_NOTICE,"MATOAN_DOWNLOAD_DATA - file not opened");
        eptr->mode = KILL;
        return;
    }
    if (length<16) {
        syslog(LOG_NOTICE,"MATOAN_DOWNLOAD_DATA - wrong size (%"PRIu32"/16+data)",length);
        eptr->mode = KILL;
        return;
    }
    passert(data);
    offset = get64bit(&data);
    leng = get32bit(&data);
    crc = get32bit(&data);
    if (leng+16!=length) {
        syslog(LOG_NOTICE,"MATOAN_DOWNLOAD_DATA - wrong size (%"PRIu32"/16+%"PRIu32")",length,leng);
        eptr->mode = KILL;
        return;
    }
    if (offset!=eptr->dloffset) {
        syslog(LOG_NOTICE,"MATOAN_DOWNLOAD_DATA - unexpected file offset (%"PRIu64"/%"PRIu64")",offset,eptr->dloffset);
        eptr->mode = KILL;
        return;
    }
    if (offset+leng>eptr->filesize) {
        syslog(LOG_NOTICE,"MATOAN_DOWNLOAD_DATA - unexpected file size (%"PRIu64"/%"PRIu64")",offset+leng,eptr->filesize);
        eptr->mode = KILL;
        return;
    }
#ifdef HAVE_PWRITE
    ret = pwrite(eptr->metafd,data,leng,offset);
#else /* HAVE_PWRITE */
    lseek(eptr->metafd,offset,SEEK_SET);
    ret = write(eptr->metafd,data,leng);
#endif /* HAVE_PWRITE */
    if (ret!=(ssize_t)leng) {
        mfs_errlog_silent(LOG_NOTICE,"error writing metafile");
        if (eptr->downloadretrycnt>=5) {
            masterconn_download_end(eptr);
        } else {
            eptr->downloadretrycnt++;
            masterconn_download_next(eptr);
        }
        return;
    }
    if (crc!=mycrc32(0,data,leng)) {
        syslog(LOG_NOTICE,"metafile data crc error");
        if (eptr->downloadretrycnt>=5) {
            masterconn_download_end(eptr);
        } else {
            eptr->downloadretrycnt++;
            masterconn_download_next(eptr);
        }
        return;
    }
    if (fsync(eptr->metafd)<0) {
        mfs_errlog_silent(LOG_NOTICE,"error syncing metafile");
        if (eptr->downloadretrycnt>=5) {
            masterconn_download_end(eptr);
        } else {
            eptr->downloadretrycnt++;
            masterconn_download_next(eptr);
        }
        return;
    }
    eptr->dloffset+=leng;
    eptr->downloadretrycnt=0;
    masterconn_download_next(eptr);
}
Beispiel #5
0
/* srcs: srccnt * (chunkid:64 version:32 ip:32 port:16) */
uint8_t replicate(uint64_t chunkid,uint32_t version,uint8_t srccnt,const uint8_t *srcs) {
	replication r;
	uint8_t status,i,vbuffs,first;
	uint16_t b,blocks;
	uint32_t xcrc,crc;
	uint8_t *wptr;
	const uint8_t *rptr;
	int s;

	if (srccnt==0) {
		return ERROR_EINVAL;
	}

//	syslog(LOG_NOTICE,"replication begin (chunkid:%08"PRIX64",version:%04"PRIX32",srccnt:%"PRIu8")",chunkid,version,srccnt);

	pthread_mutex_lock(&statslock);
	stats_repl++;
	pthread_mutex_unlock(&statslock);

// init replication structure
	r.chunkid = chunkid;
	r.version = version;
	r.srccnt = 0;
	r.created = 0;
	r.opened = 0;
	r.fds = malloc(sizeof(struct pollfd)*srccnt);
	passert(r.fds);
	r.repsources = malloc(sizeof(repsrc)*srccnt);
	passert(r.repsources);
	if (srccnt>1) {
		r.xorbuff = malloc(65536+4);
		passert(r.xorbuff);
	} else {
		r.xorbuff = NULL;
	}
// create chunk
	status = hdd_create(chunkid,0);
	if (status!=STATUS_OK) {
		syslog(LOG_NOTICE,"replicator: hdd_create status: %u",status);
		rep_cleanup(&r);
		return status;
	}
	r.created = 1;
// init sources
	r.srccnt = srccnt;
	for (i=0 ; i<srccnt ; i++) {
		r.repsources[i].chunkid = get64bit(&srcs);
		r.repsources[i].version = get32bit(&srcs);
		r.repsources[i].ip = get32bit(&srcs);
		r.repsources[i].port = get16bit(&srcs);
		r.repsources[i].sock = -1;
		r.repsources[i].packet = NULL;
	}
// connect
	for (i=0 ; i<srccnt ; i++) {
		s = tcpsocket();
		if (s<0) {
			mfs_errlog_silent(LOG_NOTICE,"replicator: socket error");
			rep_cleanup(&r);
			return ERROR_CANTCONNECT;
		}
		r.repsources[i].sock = s;
		r.fds[i].fd = s;
		if (tcpnonblock(s)<0) {
			mfs_errlog_silent(LOG_NOTICE,"replicator: nonblock error");
			rep_cleanup(&r);
			return ERROR_CANTCONNECT;
		}
		s = tcpnumconnect(s,r.repsources[i].ip,r.repsources[i].port);
		if (s<0) {
			mfs_errlog_silent(LOG_NOTICE,"replicator: connect error");
			rep_cleanup(&r);
			return ERROR_CANTCONNECT;
		}
		if (s==0) {
			r.repsources[i].mode = IDLE;
		} else {
			r.repsources[i].mode = CONNECTING;
		}
	}
	if (rep_wait_for_connection(&r,CONNMSECTO)<0) {
		rep_cleanup(&r);
		return ERROR_CANTCONNECT;
	}
// open chunk
	status = hdd_open(chunkid);
	if (status!=STATUS_OK) {
		syslog(LOG_NOTICE,"replicator: hdd_open status: %u",status);
		rep_cleanup(&r);
		return status;
	}
	r.opened = 1;
// get block numbers
	for (i=0 ; i<srccnt ; i++) {
		wptr = rep_create_packet(r.repsources+i,CSTOCS_GET_CHUNK_BLOCKS,8+4);
		if (wptr==NULL) {
			syslog(LOG_NOTICE,"replicator: out of memory");
			rep_cleanup(&r);
			return ERROR_OUTOFMEMORY;
		}
		put64bit(&wptr,r.repsources[i].chunkid);
		put32bit(&wptr,r.repsources[i].version);
	}
// send packet
	if (rep_send_all_packets(&r,SENDMSECTO)<0) {
		rep_cleanup(&r);
		return ERROR_DISCONNECTED;
	}
// receive answers
	for (i=0 ; i<srccnt ; i++) {
		r.repsources[i].mode = HEADER;
		r.repsources[i].startptr = r.repsources[i].hdrbuff;
		r.repsources[i].bytesleft = 8;
	}
	if (rep_receive_all_packets(&r,RECVMSECTO)<0) {
		rep_cleanup(&r);
		return ERROR_DISCONNECTED;
	}
// get block no
	blocks = 0;
	for (i=0 ; i<srccnt ; i++) {
		uint32_t type,size;
		uint64_t pchid;
		uint32_t pver;
		uint16_t pblocks;
		uint8_t pstatus;
		rptr = r.repsources[i].hdrbuff;
		type = get32bit(&rptr);
		size = get32bit(&rptr);
		rptr = r.repsources[i].packet;
		if (rptr==NULL || type!=CSTOCS_GET_CHUNK_BLOCKS_STATUS || size!=15) {
			syslog(LOG_WARNING,"replicator: got wrong answer (type/size) from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port);
			rep_cleanup(&r);
			return ERROR_DISCONNECTED;
		}
		pchid = get64bit(&rptr);
		pver = get32bit(&rptr);
		pblocks = get16bit(&rptr);
		pstatus = get8bit(&rptr);
		if (pchid!=r.repsources[i].chunkid) {
			syslog(LOG_WARNING,"replicator: got wrong answer (chunk_status:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port);
			rep_cleanup(&r);
			return ERROR_WRONGCHUNKID;
		}
		if (pver!=r.repsources[i].version) {
			syslog(LOG_WARNING,"replicator: got wrong answer (chunk_status:version:%"PRIX32"/%"PRIX32") from (%08"PRIX32":%04"PRIX16")",pver,r.repsources[i].version,r.repsources[i].ip,r.repsources[i].port);
			rep_cleanup(&r);
			return ERROR_WRONGVERSION;
		}
		if (pstatus!=STATUS_OK) {
			syslog(LOG_NOTICE,"replicator: got status: %u from (%08"PRIX32":%04"PRIX16")",pstatus,r.repsources[i].ip,r.repsources[i].port);
			rep_cleanup(&r);
			return pstatus;
		}
		r.repsources[i].blocks = pblocks;
		if (pblocks>blocks) {
			blocks=pblocks;
		}
	}
// create read request
	for (i=0 ; i<srccnt ; i++) {
		if (r.repsources[i].blocks>0) {
			uint32_t leng;
			wptr = rep_create_packet(r.repsources+i,CUTOCS_READ,8+4+4+4);
			if (wptr==NULL) {
				syslog(LOG_NOTICE,"replicator: out of memory");
				rep_cleanup(&r);
				return ERROR_OUTOFMEMORY;
			}
			leng = r.repsources[i].blocks*0x10000;
			put64bit(&wptr,r.repsources[i].chunkid);
			put32bit(&wptr,r.repsources[i].version);
			put32bit(&wptr,0);
			put32bit(&wptr,leng);
		} else {
			rep_no_packet(r.repsources+i);
		}
	}
// send read request
	if (rep_send_all_packets(&r,SENDMSECTO)<0) {
		rep_cleanup(&r);
		return ERROR_DISCONNECTED;
	}
// receive data and write to hdd
	for (b=0 ; b<blocks ; b++) {
// prepare receive
		for (i=0 ; i<srccnt ; i++) {
			if (b<r.repsources[i].blocks) {
				r.repsources[i].mode = HEADER;
				r.repsources[i].startptr = r.repsources[i].hdrbuff;
				r.repsources[i].bytesleft = 8;
			} else {
				r.repsources[i].mode = IDLE;
				r.repsources[i].bytesleft = 0;
			}
		}
// receive data
		if (rep_receive_all_packets(&r,RECVMSECTO)<0) {
			rep_cleanup(&r);
			return ERROR_DISCONNECTED;
		}
// check packets
		vbuffs = 0;
		for (i=0 ; i<srccnt ; i++) {
			if (r.repsources[i].mode!=IDLE) {
				uint32_t type,size;
				uint64_t pchid;
				uint16_t pblocknum;
				uint16_t poffset;
				uint32_t psize;
				uint8_t pstatus;
				rptr = r.repsources[i].hdrbuff;
				type = get32bit(&rptr);
				size = get32bit(&rptr);
				rptr = r.repsources[i].packet;
				if (rptr==NULL) {
					rep_cleanup(&r);
					return ERROR_DISCONNECTED;
				}
				if (type==CSTOCU_READ_STATUS && size==9) {
					pchid = get64bit(&rptr);
					pstatus = get8bit(&rptr);
					rep_cleanup(&r);
					if (pchid!=r.repsources[i].chunkid) {
						syslog(LOG_WARNING,"replicator: got wrong answer (read_status:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port);
						return ERROR_WRONGCHUNKID;
					}
					if (pstatus==STATUS_OK) {	// got status too early or got incorrect packet
						syslog(LOG_WARNING,"replicator: got unexpected ok status from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port);
						return ERROR_DISCONNECTED;
					}
					syslog(LOG_NOTICE,"replicator: got status: %u from (%08"PRIX32":%04"PRIX16")",pstatus,r.repsources[i].ip,r.repsources[i].port);
					return pstatus;
				} else if (type==CSTOCU_READ_DATA && size==20+65536) {
					pchid = get64bit(&rptr);
					pblocknum = get16bit(&rptr);
					poffset = get16bit(&rptr);
					psize = get32bit(&rptr);
					if (pchid!=r.repsources[i].chunkid) {
						syslog(LOG_WARNING,"replicator: got wrong answer (read_data:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port);
						rep_cleanup(&r);
						return ERROR_WRONGCHUNKID;
					}
					if (pblocknum!=b) {
						syslog(LOG_WARNING,"replicator: got wrong answer (read_data:blocknum:%"PRIu16"/%"PRIu16") from (%08"PRIX32":%04"PRIX16")",pblocknum,b,r.repsources[i].ip,r.repsources[i].port);
						rep_cleanup(&r);
						return ERROR_DISCONNECTED;
					}
					if (poffset!=0) {
						syslog(LOG_WARNING,"replicator: got wrong answer (read_data:offset:%"PRIu16") from (%08"PRIX32":%04"PRIX16")",poffset,r.repsources[i].ip,r.repsources[i].port);
						rep_cleanup(&r);
						return ERROR_WRONGOFFSET;
					}
					if (psize!=65536) {
						syslog(LOG_WARNING,"replicator: got wrong answer (read_data:size:%"PRIu32") from (%08"PRIX32":%04"PRIX16")",psize,r.repsources[i].ip,r.repsources[i].port);
						rep_cleanup(&r);
						return ERROR_WRONGSIZE;
					}
				} else {
					syslog(LOG_WARNING,"replicator: got wrong answer (type/size) from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port);
					rep_cleanup(&r);
					return ERROR_DISCONNECTED;
				}
				vbuffs++;
			}
		}
// write data
		if (vbuffs==0) {	// no buffers ? - it should never happen
			syslog(LOG_WARNING,"replicator: no data received for block: %"PRIu16,b);
			rep_cleanup(&r);
			return ERROR_DISCONNECTED;
		} else if (vbuffs==1) { // xor not needed, so just find block and write it
			for (i=0 ; i<srccnt ; i++) {
				if (r.repsources[i].mode!=IDLE) {
					rptr = r.repsources[i].packet;
					status = hdd_write(chunkid,0,b,rptr+20,0,65536,rptr+16);
					if (status!=STATUS_OK) {
						syslog(LOG_WARNING,"replicator: write status: %u",status);
						rep_cleanup(&r);
						return status;
					}
				}
			}
		} else {
			first=1;
			if (vbuffs&1) {
				xcrc = 0;
			} else {
				xcrc = 0xD7978EEBU; // = mycrc32_zeroblock(0,0x10000);
			}
			for (i=0 ; i<srccnt ; i++) {
				if (r.repsources[i].mode!=IDLE) {
					rptr = r.repsources[i].packet;
					rptr+=16;	// skip chunkid,blockno,offset and size
					if (first) {
						memcpy(r.xorbuff+4,rptr+4,65536);
						first=0;
					} else {
						xordata(r.xorbuff+4,rptr+4,65536);
					}
					crc = get32bit(&rptr);
					if (crc!=mycrc32(0,rptr,65536)) {
						syslog(LOG_WARNING,"replicator: received data with wrong checksum from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port);
						rep_cleanup(&r);
						return ERROR_CRC;
					}
					xcrc^=crc;
				}
			}
			wptr = r.xorbuff;
			put32bit(&wptr,xcrc);
			status = hdd_write(chunkid,0,b,r.xorbuff+4,0,65536,r.xorbuff);
			if (status!=STATUS_OK) {
				syslog(LOG_WARNING,"replicator: xor write status: %u",status);
				rep_cleanup(&r);
				return status;
			}
		}
	}
// receive status
	for (i=0 ; i<srccnt ; i++) {
		if (r.repsources[i].blocks>0) {
//			if (r.repsources[i].packet) {
//				free(r.repsources[i].packet);
//				r.repsources[i].packet=NULL;
//			}
			r.repsources[i].mode = HEADER;
			r.repsources[i].startptr = r.repsources[i].hdrbuff;
			r.repsources[i].bytesleft = 8;
		} else {
			r.repsources[i].mode = IDLE;
			r.repsources[i].bytesleft = 0;
		}
	}
	if (rep_receive_all_packets(&r,RECVMSECTO)<0) {
		rep_cleanup(&r);
		return ERROR_DISCONNECTED;
	}
	for (i=0 ; i<srccnt ; i++) {
		if (r.repsources[i].blocks>0) {
			uint32_t type,size;
			uint64_t pchid;
			uint8_t pstatus;
			rptr = r.repsources[i].hdrbuff;
			type = get32bit(&rptr);
			size = get32bit(&rptr);
			rptr = r.repsources[i].packet;
			if (rptr==NULL || type!=CSTOCU_READ_STATUS || size!=9) {
				syslog(LOG_WARNING,"replicator: got wrong answer (type/size) from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port);
				rep_cleanup(&r);
				return ERROR_DISCONNECTED;
			}
			pchid = get64bit(&rptr);
			pstatus = get8bit(&rptr);
			if (pchid!=r.repsources[i].chunkid) {
				syslog(LOG_WARNING,"replicator: got wrong answer (read_status:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port);
				rep_cleanup(&r);
				return ERROR_WRONGCHUNKID;
			}
			if (pstatus!=STATUS_OK) {
				syslog(LOG_NOTICE,"replicator: got status: %u from (%08"PRIX32":%04"PRIX16")",pstatus,r.repsources[i].ip,r.repsources[i].port);
				rep_cleanup(&r);
				return pstatus;
			}
		}
	}
// close chunk and change version
	status = hdd_close(chunkid);
	if (status!=STATUS_OK) {
		syslog(LOG_NOTICE,"replicator: hdd_close status: %u",status);
		rep_cleanup(&r);
		return status;
	}
	r.opened = 0;
	status = hdd_version(chunkid,0,version);
	if (status!=STATUS_OK) {
		syslog(LOG_NOTICE,"replicator: hdd_version status: %u",status);
		rep_cleanup(&r);
		return status;
	}
	r.created = 0;
	rep_cleanup(&r);
	return STATUS_OK;
}
Beispiel #6
0
/* main working thread | glock:UNLOCKED */
void* write_worker(void *arg) {
	uint32_t z1,z2,z3;
	uint8_t *data;
	int fd;
	int i;
	struct pollfd pfd[2];
	uint32_t sent,rcvd;
	uint8_t recvbuff[21];
	uint8_t sendbuff[32];
#ifdef HAVE_WRITEV
	struct iovec siov[2];
#endif
	uint8_t pipebuff[1024];
	uint8_t *wptr;
	const uint8_t *rptr;

	uint32_t reccmd;
	uint32_t recleng;
	uint64_t recchunkid;
	uint32_t recwriteid;
	uint8_t recstatus;

#ifdef WORKER_DEBUG
	uint32_t partialblocks;
	uint32_t bytessent;
	char debugchain[200];
	uint32_t cl;
#endif

	const uint8_t *cp,*cpe;
	uint32_t chainip[10];
	uint16_t chainport[10];
	uint16_t chainelements;

	uint16_t chindx;
	uint32_t ip;
	uint16_t port;
	uint32_t srcip;
	uint64_t mfleng;
	uint64_t maxwroffset;
	uint64_t chunkid;
	uint32_t version;
	uint32_t nextwriteid;
	const uint8_t *chain;
	uint32_t chainsize;
	const uint8_t *csdata;
	uint32_t csdatasize;
	uint8_t westatus;
	uint8_t wrstatus;
	int status;
	uint8_t waitforstatus;
	uint8_t havedata;
	struct timeval start,now,lastrcvd,lrdiff;

	uint8_t cnt;

	inodedata *id;
	cblock *cb,*rcb;
//	inodedata *id;

	chainelements = 0;

	(void)arg;
	for (;;) {
		for (cnt=0 ; cnt<chainelements ; cnt++) {
			csdb_writedec(chainip[cnt],chainport[cnt]);
		}
		chainelements=0;

		// get next job
		queue_get(jqueue,&z1,&z2,&data,&z3);
		id = (inodedata*)data;

		pthread_mutex_lock(&glock);
		if (id->datachainhead) {
			chindx = id->datachainhead->chindx;
		} else {
			syslog(LOG_WARNING,"writeworker got inode with no data to write !!!");
			chindx = 0xFFFF;
			status = EINVAL;	// this should never happen, so status is not important - just anything
		}
		status = id->status;
		pthread_mutex_unlock(&glock);

		if (status) {
			write_job_end(id,status,0);
			continue;
		}

		// syslog(LOG_NOTICE,"file: %"PRIu32", index: %"PRIu16" - debug1",id->inode,chindx);
		// get chunk data from master
		wrstatus = fs_writechunk(id->inode,chindx,&mfleng,&chunkid,&version,&csdata,&csdatasize);
		if (wrstatus!=STATUS_OK) {
			syslog(LOG_WARNING,"file: %"PRIu32", index: %"PRIu16" - fs_writechunk returns status %d",id->inode,chindx,wrstatus);
			if (wrstatus!=ERROR_LOCKED) {
				if (wrstatus==ERROR_ENOENT) {
					write_job_end(id,EBADF,0);
				} else if (wrstatus==ERROR_QUOTA) {
					write_job_end(id,EDQUOT,0);
				} else if (wrstatus==ERROR_NOSPACE) {
					write_job_end(id,ENOSPC,0);
				} else {
					id->trycnt++;
					if (id->trycnt>=maxretries) {
						if (wrstatus==ERROR_NOCHUNKSERVERS) {
							write_job_end(id,ENOSPC,0);
						} else {
							write_job_end(id,EIO,0);
						}
					} else {
						write_delayed_enqueue(id,1+(id->trycnt<30)?(id->trycnt/3):10);
					}
				}
			} else {
				write_delayed_enqueue(id,1+(id->trycnt<30)?(id->trycnt/3):10);
			}
			continue;	// get next job
		}
		if (csdata==NULL || csdatasize==0) {
			syslog(LOG_WARNING,"file: %"PRIu32", index: %"PRIu16", chunk: %"PRIu64", version: %"PRIu32" - there are no valid copies",id->inode,chindx,chunkid,version);
			id->trycnt+=6;
			if (id->trycnt>=maxretries) {
				write_job_end(id,ENXIO,0);
			} else {
				write_delayed_enqueue(id,60);
			}
			continue;
		}
		cp = csdata;
		cpe = csdata+csdatasize;
		while (cp<cpe && chainelements<10) {
			chainip[chainelements] = get32bit(&cp);
			chainport[chainelements] = get16bit(&cp);
			csdb_writeinc(chainip[chainelements],chainport[chainelements]);
			chainelements++;
		}

		chain = csdata;
		ip = get32bit(&chain);
		port = get16bit(&chain);
		chainsize = csdatasize-6;
		gettimeofday(&start,NULL);

/*
		if (csdatasize>CSDATARESERVE) {
			csdatasize = CSDATARESERVE;
		}
		memcpy(wrec->csdata,csdata,csdatasize);
		wrec->csdatasize=csdatasize;
		while (csdatasize>=6) {
			tmpip = get32bit(&csdata);
			tmpport = get16bit(&csdata);
			csdatasize-=6;
			csdb_writeinc(tmpip,tmpport);
		}
*/

		// make connection to cs
		srcip = fs_getsrcip();
		cnt=5;
		while (cnt>0) {
			fd = tcpsocket();
			if (fd<0) {
				syslog(LOG_WARNING,"can't create tcp socket: %m");
				cnt=0;
			}
			if (srcip) {
				if (tcpnumbind(fd,srcip,0)<0) {
					syslog(LOG_WARNING,"can't bind socket to given ip: %m");
					tcpclose(fd);
					fd=-1;
					break;
				}
			}
			if (tcpnumtoconnect(fd,ip,port,200)<0) {
				cnt--;
				if (cnt==0) {
					syslog(LOG_WARNING,"can't connect to (%08"PRIX32":%"PRIu16"): %m",ip,port);
				}
				tcpclose(fd);
				fd=-1;
			} else {
				cnt=0;
			}
		}
		if (fd<0) {
			fs_writeend(chunkid,id->inode,0);
			id->trycnt++;
			if (id->trycnt>=maxretries) {
				write_job_end(id,EIO,0);
			} else {
				write_delayed_enqueue(id,1+(id->trycnt<30)?(id->trycnt/3):10);
			}
			continue;
		}
		if (tcpnodelay(fd)<0) {
			syslog(LOG_WARNING,"can't set TCP_NODELAY: %m");
		}

#ifdef WORKER_DEBUG
		partialblocks=0;
		bytessent=0;
#endif
		nextwriteid=1;

		pfd[0].fd = fd;
		pfd[1].fd = id->pipe[0];
		rcvd = 0;
		sent = 0;
		waitforstatus=1;
		havedata=1;
		wptr = sendbuff;
		put32bit(&wptr,CUTOCS_WRITE);
		put32bit(&wptr,12+chainsize);
		put64bit(&wptr,chunkid);
		put32bit(&wptr,version);
// debug:	syslog(LOG_NOTICE,"writeworker: init packet prepared");
		cb = NULL;

		status = 0;
		wrstatus = STATUS_OK;

		lastrcvd.tv_sec = 0;

		do {
			gettimeofday(&now,NULL);

			if (lastrcvd.tv_sec==0) {
				lastrcvd = now;
			} else {
				lrdiff = now;
				if (lrdiff.tv_usec<lastrcvd.tv_usec) {
					lrdiff.tv_sec--;
					lrdiff.tv_usec+=1000000;
				}
				lrdiff.tv_sec -= lastrcvd.tv_sec;
				lrdiff.tv_usec -= lastrcvd.tv_usec;
				if (lrdiff.tv_sec>=2) {
					syslog(LOG_WARNING,"file: %"PRIu32", index: %"PRIu16", chunk: %"PRIu64", version: %"PRIu32" - writeworker: connection with (%08"PRIX32":%"PRIu16") was timed out (unfinished writes: %"PRIu8"; try counter: %"PRIu32")",id->inode,chindx,chunkid,version,ip,port,waitforstatus,id->trycnt+1);
					break;
				}
			}

			if (now.tv_usec<start.tv_usec) {
				now.tv_sec--;
				now.tv_usec+=1000000;
			}
			now.tv_sec -= start.tv_sec;
			now.tv_usec -= start.tv_usec;

			if (havedata==0 && now.tv_sec<5 && waitforstatus<5) {
				pthread_mutex_lock(&glock);
				if (cb==NULL) {
					if (id->datachainhead) {
						if (id->datachainhead->to-id->datachainhead->from==65536 || waitforstatus<=1) {
							cb = id->datachainhead;
							havedata=1;
						}
					}
				} else {
					if (cb->next) {
						if (cb->next->chindx==chindx) {
							if (cb->next->to-cb->next->from==65536 || waitforstatus<=1) {
								cb = cb->next;
								havedata=1;
							}
						}
					} else {
						id->waitingworker=1;
					}
				}
				if (havedata==1) {
					cb->writeid = nextwriteid++;
// debug:				syslog(LOG_NOTICE,"writeworker: data packet prepared (writeid:%"PRIu32",pos:%"PRIu16")",cb->writeid,cb->pos);
					waitforstatus++;
					wptr = sendbuff;
					put32bit(&wptr,CUTOCS_WRITE_DATA);
					put32bit(&wptr,24+(cb->to-cb->from));
					put64bit(&wptr,chunkid);
					put32bit(&wptr,cb->writeid);
					put16bit(&wptr,cb->pos);
					put16bit(&wptr,cb->from);
					put32bit(&wptr,cb->to-cb->from);
					put32bit(&wptr,mycrc32(0,cb->data+cb->from,cb->to-cb->from));
#ifdef WORKER_DEBUG
					if (cb->to-cb->from<65536) {
						partialblocks++;
					}
					bytessent+=(cb->to-cb->from);
#endif
					sent=0;
				}
				pthread_mutex_unlock(&glock);
			}

			pfd[0].events = POLLIN | (havedata?POLLOUT:0);
			pfd[0].revents = 0;
			pfd[1].events = POLLIN;
			pfd[1].revents = 0;
			if (poll(pfd,2,100)<0) { /* correct timeout - in msec */
				syslog(LOG_WARNING,"writeworker: poll error: %m");
				status=EIO;
				break;
			}
			if (pfd[1].revents&POLLIN) {	// used just to break poll - so just read all data from pipe to empty it
				i = read(id->pipe[0],pipebuff,1024);
			}
			if (pfd[0].revents&POLLIN) {
				i = read(fd,recvbuff+rcvd,21-rcvd);
				if (i==0) { 	// connection reset by peer ,读取文件头错误
					syslog(LOG_WARNING,"file: %"PRIu32", index: %"PRIu16", chunk: %"PRIu64", version: %"PRIu32" - writeworker: connection with (%08"PRIX32":%"PRIu16") was reset by peer (unfinished writes: %"PRIu8"; try counter: %"PRIu32")",id->inode,chindx,chunkid,version,ip,port,waitforstatus,id->trycnt+1);
					status=EIO;
					break;
				}
				gettimeofday(&lastrcvd,NULL);
				rcvd+=i;
				if (rcvd==21) {
					rptr = recvbuff;
					reccmd = get32bit(&rptr);
					recleng = get32bit(&rptr);
					recchunkid = get64bit(&rptr);
					recwriteid = get32bit(&rptr);
					recstatus = get8bit(&rptr);
					if (reccmd!=CSTOCU_WRITE_STATUS ||  recleng!=13) {
						syslog(LOG_WARNING,"writeworker: got unrecognized packet from chunkserver (cmd:%"PRIu32",leng:%"PRIu32")",reccmd,recleng);
						status=EIO;
						break;
					}
					if (recchunkid!=chunkid) {
						syslog(LOG_WARNING,"writeworker: got unexpected packet (expected chunkdid:%"PRIu64",packet chunkid:%"PRIu64")",chunkid,recchunkid);
						status=EIO;
						break;
					}
					if (recstatus!=STATUS_OK) {
						syslog(LOG_WARNING,"writeworker: write error: %"PRIu8,recstatus);
						wrstatus=recstatus;
						break;
					}
// debug:				syslog(LOG_NOTICE,"writeworker: received status ok for writeid:%"PRIu32,recwriteid);
					if (recwriteid>0) {
						pthread_mutex_lock(&glock);
						for (rcb = id->datachainhead ; rcb && rcb->writeid!=recwriteid ; rcb=rcb->next) {}
						if (rcb==NULL) {
							syslog(LOG_WARNING,"writeworker: got unexpected status (writeid:%"PRIu32")",recwriteid);
							pthread_mutex_unlock(&glock);
							status=EIO;
							break;
						}
						if (rcb==cb) {	// current block,cb为当前块儿指针
// debug:						syslog(LOG_NOTICE,"writeworker: received status for current block");
							if (havedata) {	// got status ok before all data had been sent - error
								syslog(LOG_WARNING,"writeworker: got status OK before all data have been sent");
								pthread_mutex_unlock(&glock);
								status=EIO;
								break;
							} else {
								cb = NULL;
							}
						}
						if (rcb->prev) {//将rcb所指块儿从链表中取出
							rcb->prev->next = rcb->next;
						} else {
							id->datachainhead = rcb->next;
						}
						if (rcb->next) {
							rcb->next->prev = rcb->prev;
						} else {
							id->datachaintail = rcb->prev;
						}
						maxwroffset = (((uint64_t)(chindx))<<26)+(((uint32_t)(rcb->pos))<<16)+rcb->to;
						if (maxwroffset>mfleng) {
							mfleng=maxwroffset;
						}
						write_cb_release(rcb);//
						id->cacheblocks--;
						if (id->cachewaiting>0) {
							pthread_cond_broadcast(&(id->cachecond));
						}
						pthread_mutex_unlock(&glock);
					}
					waitforstatus--;
					rcvd=0;
				}
			}
			if (havedata && (pfd[0].revents&POLLOUT)) {
				if (cb==NULL) {	// havedata==1 && cb==NULL means sending first packet (CUTOCS_WRITE)
					if (sent<20) {
#ifdef HAVE_WRITEV                //将多个数据存储在一起,将驻留在两个或更多的不连接的缓冲区中的数据一次写出去
						if (chainsize>0) {
							siov[0].iov_base = sendbuff+sent;
							siov[0].iov_len = 20-sent;
							siov[1].iov_base = (char*)chain;	// discard const (safe - because it's used in writev)
							siov[1].iov_len = chainsize;
							i = writev(fd,siov,2);
						} else {
#endif
							i = write(fd,sendbuff+sent,20-sent);
#ifdef HAVE_WRITEV
						}
#endif
					} else {
						i = write(fd,chain+(sent-20),chainsize-(sent-20));
					}
					if (i<0) {
						syslog(LOG_WARNING,"file: %"PRIu32", index: %"PRIu16", chunk: %"PRIu64", version: %"PRIu32" - writeworker: connection with (%08"PRIX32":%"PRIu16") was reset by peer (unfinished writes: %"PRIu8"; try counter: %"PRIu32")",id->inode,chindx,chunkid,version,ip,port,waitforstatus,id->trycnt+1);
						status=EIO;
						break;
					}
					sent+=i;
					if (sent==20+chainsize) {
						havedata=0;
					}
				} else {
					if (sent<32) {
#ifdef HAVE_WRITEV                
						siov[0].iov_base = sendbuff+sent;
						siov[0].iov_len = 32-sent;
						siov[1].iov_base = cb->data+cb->from;
						siov[1].iov_len = cb->to-cb->from;
						i = writev(fd,siov,2);
#else
						i = write(fd,sendbuff+sent,32-sent);
#endif
					} else {
						i = write(fd,cb->data+cb->from+(sent-32),cb->to-cb->from-(sent-32));
					}
					if (i<0) {
						syslog(LOG_WARNING,"file: %"PRIu32", index: %"PRIu16", chunk: %"PRIu64", version: %"PRIu32" - writeworker: connection with (%08"PRIX32":%"PRIu16") was reset by peer (unfinished writes: %"PRIu8"; try counter: %"PRIu32")",id->inode,chindx,chunkid,version,ip,port,waitforstatus,id->trycnt+1);
						status=EIO;
						break;
					}
					sent+=i;
					if (sent==32+cb->to-cb->from) {
						havedata=0;
					}
				}
			}
		} while (waitforstatus>0 && now.tv_sec<10);////////////////////


		id->waitingworker=0;

		tcpclose(fd);

#ifdef WORKER_DEBUG
		gettimeofday(&now,NULL);
		if (now.tv_usec<start.tv_usec) {
			now.tv_sec--;
			now.tv_usec+=1000000;
		}
		now.tv_sec -= start.tv_sec;
		now.tv_usec -= start.tv_usec;

		cl=0;
		for (cnt=0 ; cnt<chainelements ; cnt++) {
			cl+=snprintf(debugchain+cl,200-cl,"%u.%u.%u.%u:%u->",(chainip[cnt]>>24)&255,(chainip[cnt]>>16)&255,(chainip[cnt]>>8)&255,chainip[cnt]&255,chainport[cnt]);
		}
		if (cl>=2) {
			debugchain[cl-2]='\0';
		}
		syslog(LOG_NOTICE,"worker %lu sent %"PRIu32" blocks (%"PRIu32" partial) of chunk %016"PRIX64"_%08"PRIX32", received status for %"PRIu32" blocks (%"PRIu32" lost), bw: %.6lfMB ( %"PRIu32" B / %.0lf us ), chain: %s",(unsigned long)arg,nextwriteid-1,partialblocks,chunkid,version,nextwriteid-1-waitforstatus,waitforstatus,(double)bytessent/((double)(now.tv_sec)*1000000+(double)(now.tv_usec)),bytessent,((double)(now.tv_sec)*1000000+(double)(now.tv_usec)),debugchain);
#endif

		for (cnt=0 ; cnt<10 ; cnt++) {
			westatus = fs_writeend(chunkid,id->inode,mfleng);
			if (westatus!=STATUS_OK) {
				usleep(100000+(10000<<cnt));
			} else {
				break;
			}
		}

		if (westatus!=STATUS_OK) {
			write_job_end(id,ENXIO,0);
		} else if (status!=0 || wrstatus!=STATUS_OK) {
			if (wrstatus!=STATUS_OK) {	// convert MFS status to OS errno
				if (wrstatus==ERROR_NOSPACE) {
					status=ENOSPC;
				} else {
					status=EIO;
				}
			}
			id->trycnt++;
			if (id->trycnt>=maxretries) {
				write_job_end(id,status,0);
			} else {
				write_job_end(id,0,1+(id->trycnt<30)?(id->trycnt/3):10);
			}
		} else {
			read_inode_ops(id->inode);
			write_job_end(id,0,0);
		}
	}
}
Beispiel #7
0
int chunk_repair(const char *fname,uint8_t fastmode,uint8_t showok,uint8_t repair) {
	uint64_t namechunkid;
	uint32_t nameversion;
	uint32_t i;
	int fd;
	uint8_t buff[MFSBLOCKSIZE];
	uint32_t crc[1024];
	uint32_t crcblock;
	off_t s;
	const uint8_t *rp;
	uint8_t *wp;
	int ret=0;

	// check fname
	// name should be in format: ..../chunk_XXXXXXXXXXXXXXXX_YYYYYYYY.mfs
	if (repair) {
		fd = open(fname,O_RDWR);
	} else {
		fd = open(fname,O_RDONLY);
	}
	if (fd<0) {
		fprintf(stderr,"%s: error opening file !!!\n",fname);
		return -1;
	}
	i = strlen(fname);
	if (i<35) {
		fprintf(stderr,"%s: wrong chunk name format !!! (skip header)\n",fname);
		ret |= 1;
	} else {
		if (hdd_check_filename(fname+(i-35),&namechunkid,&nameversion)<0) {
			fprintf(stderr,"%s: wrong chunk name format !!! (skip header)\n",fname);
			ret |= 1;
		} else {
			if (read(fd,buff,20)!=20) {
				fprintf(stderr,"%s: error reading header !!!\n",fname);
				close(fd);
				return -1;
			}
			if (memcmp(buff,MFSSIGNATURE "C 1.0",8)!=0) {
				fprintf(stderr,"%s: wrong chunk header !!!\n",fname);
				memcpy(buff,MFSSIGNATURE "C 1.0",8);
				ret |= 1;
			}
			rp = buff+8;
			wp = (uint8_t*)rp;
			if (get64bit(&rp)!=namechunkid) {
				fprintf(stderr,"%s: wrong chunk number in header !!!\n",fname);
				put64bit(&wp,namechunkid);
				ret |= 1;
			}
			wp = (uint8_t*)rp;
			if (get32bit(&rp)!=nameversion) {
				fprintf(stderr,"%s: wrong chunk version in header !!!\n",fname);
				put32bit(&wp,nameversion);
				ret |= 1;
			}
			if (repair && (ret&1)) {
				if (lseek(fd,0,SEEK_SET)!=0) {
					fprintf(stderr,"%s: error setting file pointer\n",fname);
					close(fd);
					return -1;
				}
				if (write(fd,buff,20)!=20) {
					fprintf(stderr,"%s: error writing header !!!\n",fname);
					close(fd);
					return -1;
				}
				ret |= 4;
			}
		}
	}

	// read crc
	if (lseek(fd,CHUNKHDRCRC,SEEK_SET)!=CHUNKHDRCRC) {
		fprintf(stderr,"%s: error setting file pointer\n",fname);
		close(fd);
		return -1;
	}
	if (read(fd,buff,4096)!=4096) {
		fprintf(stderr,"%s: error reading checksum block\n",fname);
		close(fd);
		return -1;
	}
	rp = buff;
	for (i=0 ; i<1024 ; i++) {
		crc[i] = get32bit(&rp);
	}

	// check data crc
	if (fastmode && repair==0) {
		s = lseek(fd,-MFSBLOCKSIZE,SEEK_END);
		if (s<MFSHDRSIZE) {
			fprintf(stderr,"%s: wrong file size\n",fname);
			close(fd);
			return -1;
		}
		s -= MFSHDRSIZE;
		if ((s%MFSBLOCKSIZE)!=0) {
			fprintf(stderr,"%s: wrong file size\n",fname);
			close(fd);
			return -1;
		}
		s >>= 16;
		if (read(fd,buff,MFSBLOCKSIZE)!=MFSBLOCKSIZE) {
			fprintf(stderr,"%s: error reading last data block\n",fname);
			close(fd);
			return -1;
		}
		crcblock = mycrc32(0,buff,MFSBLOCKSIZE);
		if (crc[s]!=crcblock) {
			fprintf(stderr,"%s: crc error (last block ; header crc: %08"PRIX32" ; block crc: %08"PRIX32")\n",fname,crc[s],crcblock);
			ret |= 2;
		}
	} else {