void charts_fill_crc(uint8_t *buff,uint32_t leng) { uint8_t *ptr,*eptr; uint32_t crc,chleng; ptr = buff+8; eptr = buff+leng; while (ptr+4<=eptr) { chleng = get32bit((const uint8_t **)&ptr); if (ptr+8+chleng<=eptr) { crc = mycrc32(0,ptr,chleng+4); ptr += chleng+4; if (memcmp(ptr,"CRC#",4)==0) { put32bit(&ptr,crc); } else { syslog(LOG_WARNING,"charts: unexpected data in generated png stream"); } } } }
void matoslaserv_download_data(serventry *eptr,const uint8_t *data,uint32_t length) { uint8_t *ptr; uint64_t offset; uint32_t leng; uint32_t crc; ssize_t ret; if (length!=12) { MFSLOG(LOG_NOTICE,"slaTOMA_DOWNLOAD_DATA - wrong size (%"PRIu32"/12)",length); eptr->mode=KILL; return; } if (eptr->metafd<0) { MFSLOG(LOG_NOTICE,"slaTOMA_DOWNLOAD_DATA - file not opened"); eptr->mode=KILL; return; } offset = get64bit(&data); leng = get32bit(&data); ptr = matoslaserv_createpacket(eptr,MATOSLA_DOWNLOAD_DATA,16+leng); if (ptr==NULL) { eptr->mode=KILL; return; } put64bit(&ptr,offset); put32bit(&ptr,leng); #ifdef HAVE_PREAD ret = pread(eptr->metafd,ptr+4,leng,offset); #else /* HAVE_PWRITE */ lseek(eptr->metafd,offset,SEEK_SET); ret = read(eptr->metafd,ptr+4,leng); #endif /* HAVE_PWRITE */ if (ret!=(ssize_t)leng) { MFSLOG(LOG_NOTICE,"error reading metafile: %m"); eptr->mode=KILL; return; } crc = mycrc32(0,ptr+4,leng); put32bit(&ptr,crc); }
static int socket_end_key_sync(struct socket_end_st *se, struct internal_ctl_socket_key_sync_st *cks) { uint32_t crc32; uint8_t plainkey[SHAREDKEY_BYTESIZE]; mylog(L_DEBUG, "Socket end key sync, se[%d]", se->id); if (se->shared_key_flag) { mylog(L_ERR, "Shared key is existed, client %s:%u, se[%d]", se->client_str, se->client_port, se->id); socket_end_send_key_reject(se); } if (cks->encrypted_shared_key_len != RSA_KEYSIZE) { mylog(L_ERR, "Key len error"); socket_end_send_key_reject(se); return -1; } if (decrypt_synckey(cks->encrypted_shared_key, plainkey, l7_param.server_privkey) < 0) { mylog(L_ERR, "Decrypt from encrypted shared key failed, client %s, se[%d]", se->client_str, se->id); socket_end_send_key_reject(se); return -1; } crc32 = mycrc32(plainkey, SHAREDKEY_BYTESIZE); if (cks->crc32 != crc32) { mylog(L_ERR, "Check key crc32 failed, client %s, se{%d}", se->client_str, se->id); socket_end_send_key_reject(se); return -1; } memcpy(se->shared_key, plainkey, SHAREDKEY_BYTESIZE); se->shared_key_flag = 1; socket_end_send_key_ok(se); return 0; }
void masterconn_download_data(masterconn *eptr,const uint8_t *data,uint32_t length) { uint64_t offset; uint32_t leng; uint32_t crc; ssize_t ret; if (eptr->metafd<0) { syslog(LOG_NOTICE,"MATOAN_DOWNLOAD_DATA - file not opened"); eptr->mode = KILL; return; } if (length<16) { syslog(LOG_NOTICE,"MATOAN_DOWNLOAD_DATA - wrong size (%"PRIu32"/16+data)",length); eptr->mode = KILL; return; } passert(data); offset = get64bit(&data); leng = get32bit(&data); crc = get32bit(&data); if (leng+16!=length) { syslog(LOG_NOTICE,"MATOAN_DOWNLOAD_DATA - wrong size (%"PRIu32"/16+%"PRIu32")",length,leng); eptr->mode = KILL; return; } if (offset!=eptr->dloffset) { syslog(LOG_NOTICE,"MATOAN_DOWNLOAD_DATA - unexpected file offset (%"PRIu64"/%"PRIu64")",offset,eptr->dloffset); eptr->mode = KILL; return; } if (offset+leng>eptr->filesize) { syslog(LOG_NOTICE,"MATOAN_DOWNLOAD_DATA - unexpected file size (%"PRIu64"/%"PRIu64")",offset+leng,eptr->filesize); eptr->mode = KILL; return; } #ifdef HAVE_PWRITE ret = pwrite(eptr->metafd,data,leng,offset); #else /* HAVE_PWRITE */ lseek(eptr->metafd,offset,SEEK_SET); ret = write(eptr->metafd,data,leng); #endif /* HAVE_PWRITE */ if (ret!=(ssize_t)leng) { mfs_errlog_silent(LOG_NOTICE,"error writing metafile"); if (eptr->downloadretrycnt>=5) { masterconn_download_end(eptr); } else { eptr->downloadretrycnt++; masterconn_download_next(eptr); } return; } if (crc!=mycrc32(0,data,leng)) { syslog(LOG_NOTICE,"metafile data crc error"); if (eptr->downloadretrycnt>=5) { masterconn_download_end(eptr); } else { eptr->downloadretrycnt++; masterconn_download_next(eptr); } return; } if (fsync(eptr->metafd)<0) { mfs_errlog_silent(LOG_NOTICE,"error syncing metafile"); if (eptr->downloadretrycnt>=5) { masterconn_download_end(eptr); } else { eptr->downloadretrycnt++; masterconn_download_next(eptr); } return; } eptr->dloffset+=leng; eptr->downloadretrycnt=0; masterconn_download_next(eptr); }
/* srcs: srccnt * (chunkid:64 version:32 ip:32 port:16) */ uint8_t replicate(uint64_t chunkid,uint32_t version,uint8_t srccnt,const uint8_t *srcs) { replication r; uint8_t status,i,vbuffs,first; uint16_t b,blocks; uint32_t xcrc,crc; uint8_t *wptr; const uint8_t *rptr; int s; if (srccnt==0) { return ERROR_EINVAL; } // syslog(LOG_NOTICE,"replication begin (chunkid:%08"PRIX64",version:%04"PRIX32",srccnt:%"PRIu8")",chunkid,version,srccnt); pthread_mutex_lock(&statslock); stats_repl++; pthread_mutex_unlock(&statslock); // init replication structure r.chunkid = chunkid; r.version = version; r.srccnt = 0; r.created = 0; r.opened = 0; r.fds = malloc(sizeof(struct pollfd)*srccnt); passert(r.fds); r.repsources = malloc(sizeof(repsrc)*srccnt); passert(r.repsources); if (srccnt>1) { r.xorbuff = malloc(65536+4); passert(r.xorbuff); } else { r.xorbuff = NULL; } // create chunk status = hdd_create(chunkid,0); if (status!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: hdd_create status: %u",status); rep_cleanup(&r); return status; } r.created = 1; // init sources r.srccnt = srccnt; for (i=0 ; i<srccnt ; i++) { r.repsources[i].chunkid = get64bit(&srcs); r.repsources[i].version = get32bit(&srcs); r.repsources[i].ip = get32bit(&srcs); r.repsources[i].port = get16bit(&srcs); r.repsources[i].sock = -1; r.repsources[i].packet = NULL; } // connect for (i=0 ; i<srccnt ; i++) { s = tcpsocket(); if (s<0) { mfs_errlog_silent(LOG_NOTICE,"replicator: socket error"); rep_cleanup(&r); return ERROR_CANTCONNECT; } r.repsources[i].sock = s; r.fds[i].fd = s; if (tcpnonblock(s)<0) { mfs_errlog_silent(LOG_NOTICE,"replicator: nonblock error"); rep_cleanup(&r); return ERROR_CANTCONNECT; } s = tcpnumconnect(s,r.repsources[i].ip,r.repsources[i].port); if (s<0) { mfs_errlog_silent(LOG_NOTICE,"replicator: connect error"); rep_cleanup(&r); return ERROR_CANTCONNECT; } if (s==0) { r.repsources[i].mode = IDLE; } else { r.repsources[i].mode = CONNECTING; } } if (rep_wait_for_connection(&r,CONNMSECTO)<0) { rep_cleanup(&r); return ERROR_CANTCONNECT; } // open chunk status = hdd_open(chunkid); if (status!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: hdd_open status: %u",status); rep_cleanup(&r); return status; } r.opened = 1; // get block numbers for (i=0 ; i<srccnt ; i++) { wptr = rep_create_packet(r.repsources+i,CSTOCS_GET_CHUNK_BLOCKS,8+4); if (wptr==NULL) { syslog(LOG_NOTICE,"replicator: out of memory"); rep_cleanup(&r); return ERROR_OUTOFMEMORY; } put64bit(&wptr,r.repsources[i].chunkid); put32bit(&wptr,r.repsources[i].version); } // send packet if (rep_send_all_packets(&r,SENDMSECTO)<0) { rep_cleanup(&r); return ERROR_DISCONNECTED; } // receive answers for (i=0 ; i<srccnt ; i++) { r.repsources[i].mode = HEADER; r.repsources[i].startptr = r.repsources[i].hdrbuff; r.repsources[i].bytesleft = 8; } if (rep_receive_all_packets(&r,RECVMSECTO)<0) { rep_cleanup(&r); return ERROR_DISCONNECTED; } // get block no blocks = 0; for (i=0 ; i<srccnt ; i++) { uint32_t type,size; uint64_t pchid; uint32_t pver; uint16_t pblocks; uint8_t pstatus; rptr = r.repsources[i].hdrbuff; type = get32bit(&rptr); size = get32bit(&rptr); rptr = r.repsources[i].packet; if (rptr==NULL || type!=CSTOCS_GET_CHUNK_BLOCKS_STATUS || size!=15) { syslog(LOG_WARNING,"replicator: got wrong answer (type/size) from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_DISCONNECTED; } pchid = get64bit(&rptr); pver = get32bit(&rptr); pblocks = get16bit(&rptr); pstatus = get8bit(&rptr); if (pchid!=r.repsources[i].chunkid) { syslog(LOG_WARNING,"replicator: got wrong answer (chunk_status:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGCHUNKID; } if (pver!=r.repsources[i].version) { syslog(LOG_WARNING,"replicator: got wrong answer (chunk_status:version:%"PRIX32"/%"PRIX32") from (%08"PRIX32":%04"PRIX16")",pver,r.repsources[i].version,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGVERSION; } if (pstatus!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: got status: %u from (%08"PRIX32":%04"PRIX16")",pstatus,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return pstatus; } r.repsources[i].blocks = pblocks; if (pblocks>blocks) { blocks=pblocks; } } // create read request for (i=0 ; i<srccnt ; i++) { if (r.repsources[i].blocks>0) { uint32_t leng; wptr = rep_create_packet(r.repsources+i,CUTOCS_READ,8+4+4+4); if (wptr==NULL) { syslog(LOG_NOTICE,"replicator: out of memory"); rep_cleanup(&r); return ERROR_OUTOFMEMORY; } leng = r.repsources[i].blocks*0x10000; put64bit(&wptr,r.repsources[i].chunkid); put32bit(&wptr,r.repsources[i].version); put32bit(&wptr,0); put32bit(&wptr,leng); } else { rep_no_packet(r.repsources+i); } } // send read request if (rep_send_all_packets(&r,SENDMSECTO)<0) { rep_cleanup(&r); return ERROR_DISCONNECTED; } // receive data and write to hdd for (b=0 ; b<blocks ; b++) { // prepare receive for (i=0 ; i<srccnt ; i++) { if (b<r.repsources[i].blocks) { r.repsources[i].mode = HEADER; r.repsources[i].startptr = r.repsources[i].hdrbuff; r.repsources[i].bytesleft = 8; } else { r.repsources[i].mode = IDLE; r.repsources[i].bytesleft = 0; } } // receive data if (rep_receive_all_packets(&r,RECVMSECTO)<0) { rep_cleanup(&r); return ERROR_DISCONNECTED; } // check packets vbuffs = 0; for (i=0 ; i<srccnt ; i++) { if (r.repsources[i].mode!=IDLE) { uint32_t type,size; uint64_t pchid; uint16_t pblocknum; uint16_t poffset; uint32_t psize; uint8_t pstatus; rptr = r.repsources[i].hdrbuff; type = get32bit(&rptr); size = get32bit(&rptr); rptr = r.repsources[i].packet; if (rptr==NULL) { rep_cleanup(&r); return ERROR_DISCONNECTED; } if (type==CSTOCU_READ_STATUS && size==9) { pchid = get64bit(&rptr); pstatus = get8bit(&rptr); rep_cleanup(&r); if (pchid!=r.repsources[i].chunkid) { syslog(LOG_WARNING,"replicator: got wrong answer (read_status:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port); return ERROR_WRONGCHUNKID; } if (pstatus==STATUS_OK) { // got status too early or got incorrect packet syslog(LOG_WARNING,"replicator: got unexpected ok status from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port); return ERROR_DISCONNECTED; } syslog(LOG_NOTICE,"replicator: got status: %u from (%08"PRIX32":%04"PRIX16")",pstatus,r.repsources[i].ip,r.repsources[i].port); return pstatus; } else if (type==CSTOCU_READ_DATA && size==20+65536) { pchid = get64bit(&rptr); pblocknum = get16bit(&rptr); poffset = get16bit(&rptr); psize = get32bit(&rptr); if (pchid!=r.repsources[i].chunkid) { syslog(LOG_WARNING,"replicator: got wrong answer (read_data:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGCHUNKID; } if (pblocknum!=b) { syslog(LOG_WARNING,"replicator: got wrong answer (read_data:blocknum:%"PRIu16"/%"PRIu16") from (%08"PRIX32":%04"PRIX16")",pblocknum,b,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_DISCONNECTED; } if (poffset!=0) { syslog(LOG_WARNING,"replicator: got wrong answer (read_data:offset:%"PRIu16") from (%08"PRIX32":%04"PRIX16")",poffset,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGOFFSET; } if (psize!=65536) { syslog(LOG_WARNING,"replicator: got wrong answer (read_data:size:%"PRIu32") from (%08"PRIX32":%04"PRIX16")",psize,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGSIZE; } } else { syslog(LOG_WARNING,"replicator: got wrong answer (type/size) from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_DISCONNECTED; } vbuffs++; } } // write data if (vbuffs==0) { // no buffers ? - it should never happen syslog(LOG_WARNING,"replicator: no data received for block: %"PRIu16,b); rep_cleanup(&r); return ERROR_DISCONNECTED; } else if (vbuffs==1) { // xor not needed, so just find block and write it for (i=0 ; i<srccnt ; i++) { if (r.repsources[i].mode!=IDLE) { rptr = r.repsources[i].packet; status = hdd_write(chunkid,0,b,rptr+20,0,65536,rptr+16); if (status!=STATUS_OK) { syslog(LOG_WARNING,"replicator: write status: %u",status); rep_cleanup(&r); return status; } } } } else { first=1; if (vbuffs&1) { xcrc = 0; } else { xcrc = 0xD7978EEBU; // = mycrc32_zeroblock(0,0x10000); } for (i=0 ; i<srccnt ; i++) { if (r.repsources[i].mode!=IDLE) { rptr = r.repsources[i].packet; rptr+=16; // skip chunkid,blockno,offset and size if (first) { memcpy(r.xorbuff+4,rptr+4,65536); first=0; } else { xordata(r.xorbuff+4,rptr+4,65536); } crc = get32bit(&rptr); if (crc!=mycrc32(0,rptr,65536)) { syslog(LOG_WARNING,"replicator: received data with wrong checksum from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_CRC; } xcrc^=crc; } } wptr = r.xorbuff; put32bit(&wptr,xcrc); status = hdd_write(chunkid,0,b,r.xorbuff+4,0,65536,r.xorbuff); if (status!=STATUS_OK) { syslog(LOG_WARNING,"replicator: xor write status: %u",status); rep_cleanup(&r); return status; } } } // receive status for (i=0 ; i<srccnt ; i++) { if (r.repsources[i].blocks>0) { // if (r.repsources[i].packet) { // free(r.repsources[i].packet); // r.repsources[i].packet=NULL; // } r.repsources[i].mode = HEADER; r.repsources[i].startptr = r.repsources[i].hdrbuff; r.repsources[i].bytesleft = 8; } else { r.repsources[i].mode = IDLE; r.repsources[i].bytesleft = 0; } } if (rep_receive_all_packets(&r,RECVMSECTO)<0) { rep_cleanup(&r); return ERROR_DISCONNECTED; } for (i=0 ; i<srccnt ; i++) { if (r.repsources[i].blocks>0) { uint32_t type,size; uint64_t pchid; uint8_t pstatus; rptr = r.repsources[i].hdrbuff; type = get32bit(&rptr); size = get32bit(&rptr); rptr = r.repsources[i].packet; if (rptr==NULL || type!=CSTOCU_READ_STATUS || size!=9) { syslog(LOG_WARNING,"replicator: got wrong answer (type/size) from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_DISCONNECTED; } pchid = get64bit(&rptr); pstatus = get8bit(&rptr); if (pchid!=r.repsources[i].chunkid) { syslog(LOG_WARNING,"replicator: got wrong answer (read_status:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGCHUNKID; } if (pstatus!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: got status: %u from (%08"PRIX32":%04"PRIX16")",pstatus,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return pstatus; } } } // close chunk and change version status = hdd_close(chunkid); if (status!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: hdd_close status: %u",status); rep_cleanup(&r); return status; } r.opened = 0; status = hdd_version(chunkid,0,version); if (status!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: hdd_version status: %u",status); rep_cleanup(&r); return status; } r.created = 0; rep_cleanup(&r); return STATUS_OK; }
/* main working thread | glock:UNLOCKED */ void* write_worker(void *arg) { uint32_t z1,z2,z3; uint8_t *data; int fd; int i; struct pollfd pfd[2]; uint32_t sent,rcvd; uint8_t recvbuff[21]; uint8_t sendbuff[32]; #ifdef HAVE_WRITEV struct iovec siov[2]; #endif uint8_t pipebuff[1024]; uint8_t *wptr; const uint8_t *rptr; uint32_t reccmd; uint32_t recleng; uint64_t recchunkid; uint32_t recwriteid; uint8_t recstatus; #ifdef WORKER_DEBUG uint32_t partialblocks; uint32_t bytessent; char debugchain[200]; uint32_t cl; #endif const uint8_t *cp,*cpe; uint32_t chainip[10]; uint16_t chainport[10]; uint16_t chainelements; uint16_t chindx; uint32_t ip; uint16_t port; uint32_t srcip; uint64_t mfleng; uint64_t maxwroffset; uint64_t chunkid; uint32_t version; uint32_t nextwriteid; const uint8_t *chain; uint32_t chainsize; const uint8_t *csdata; uint32_t csdatasize; uint8_t westatus; uint8_t wrstatus; int status; uint8_t waitforstatus; uint8_t havedata; struct timeval start,now,lastrcvd,lrdiff; uint8_t cnt; inodedata *id; cblock *cb,*rcb; // inodedata *id; chainelements = 0; (void)arg; for (;;) { for (cnt=0 ; cnt<chainelements ; cnt++) { csdb_writedec(chainip[cnt],chainport[cnt]); } chainelements=0; // get next job queue_get(jqueue,&z1,&z2,&data,&z3); id = (inodedata*)data; pthread_mutex_lock(&glock); if (id->datachainhead) { chindx = id->datachainhead->chindx; } else { syslog(LOG_WARNING,"writeworker got inode with no data to write !!!"); chindx = 0xFFFF; status = EINVAL; // this should never happen, so status is not important - just anything } status = id->status; pthread_mutex_unlock(&glock); if (status) { write_job_end(id,status,0); continue; } // syslog(LOG_NOTICE,"file: %"PRIu32", index: %"PRIu16" - debug1",id->inode,chindx); // get chunk data from master wrstatus = fs_writechunk(id->inode,chindx,&mfleng,&chunkid,&version,&csdata,&csdatasize); if (wrstatus!=STATUS_OK) { syslog(LOG_WARNING,"file: %"PRIu32", index: %"PRIu16" - fs_writechunk returns status %d",id->inode,chindx,wrstatus); if (wrstatus!=ERROR_LOCKED) { if (wrstatus==ERROR_ENOENT) { write_job_end(id,EBADF,0); } else if (wrstatus==ERROR_QUOTA) { write_job_end(id,EDQUOT,0); } else if (wrstatus==ERROR_NOSPACE) { write_job_end(id,ENOSPC,0); } else { id->trycnt++; if (id->trycnt>=maxretries) { if (wrstatus==ERROR_NOCHUNKSERVERS) { write_job_end(id,ENOSPC,0); } else { write_job_end(id,EIO,0); } } else { write_delayed_enqueue(id,1+(id->trycnt<30)?(id->trycnt/3):10); } } } else { write_delayed_enqueue(id,1+(id->trycnt<30)?(id->trycnt/3):10); } continue; // get next job } if (csdata==NULL || csdatasize==0) { syslog(LOG_WARNING,"file: %"PRIu32", index: %"PRIu16", chunk: %"PRIu64", version: %"PRIu32" - there are no valid copies",id->inode,chindx,chunkid,version); id->trycnt+=6; if (id->trycnt>=maxretries) { write_job_end(id,ENXIO,0); } else { write_delayed_enqueue(id,60); } continue; } cp = csdata; cpe = csdata+csdatasize; while (cp<cpe && chainelements<10) { chainip[chainelements] = get32bit(&cp); chainport[chainelements] = get16bit(&cp); csdb_writeinc(chainip[chainelements],chainport[chainelements]); chainelements++; } chain = csdata; ip = get32bit(&chain); port = get16bit(&chain); chainsize = csdatasize-6; gettimeofday(&start,NULL); /* if (csdatasize>CSDATARESERVE) { csdatasize = CSDATARESERVE; } memcpy(wrec->csdata,csdata,csdatasize); wrec->csdatasize=csdatasize; while (csdatasize>=6) { tmpip = get32bit(&csdata); tmpport = get16bit(&csdata); csdatasize-=6; csdb_writeinc(tmpip,tmpport); } */ // make connection to cs srcip = fs_getsrcip(); cnt=5; while (cnt>0) { fd = tcpsocket(); if (fd<0) { syslog(LOG_WARNING,"can't create tcp socket: %m"); cnt=0; } if (srcip) { if (tcpnumbind(fd,srcip,0)<0) { syslog(LOG_WARNING,"can't bind socket to given ip: %m"); tcpclose(fd); fd=-1; break; } } if (tcpnumtoconnect(fd,ip,port,200)<0) { cnt--; if (cnt==0) { syslog(LOG_WARNING,"can't connect to (%08"PRIX32":%"PRIu16"): %m",ip,port); } tcpclose(fd); fd=-1; } else { cnt=0; } } if (fd<0) { fs_writeend(chunkid,id->inode,0); id->trycnt++; if (id->trycnt>=maxretries) { write_job_end(id,EIO,0); } else { write_delayed_enqueue(id,1+(id->trycnt<30)?(id->trycnt/3):10); } continue; } if (tcpnodelay(fd)<0) { syslog(LOG_WARNING,"can't set TCP_NODELAY: %m"); } #ifdef WORKER_DEBUG partialblocks=0; bytessent=0; #endif nextwriteid=1; pfd[0].fd = fd; pfd[1].fd = id->pipe[0]; rcvd = 0; sent = 0; waitforstatus=1; havedata=1; wptr = sendbuff; put32bit(&wptr,CUTOCS_WRITE); put32bit(&wptr,12+chainsize); put64bit(&wptr,chunkid); put32bit(&wptr,version); // debug: syslog(LOG_NOTICE,"writeworker: init packet prepared"); cb = NULL; status = 0; wrstatus = STATUS_OK; lastrcvd.tv_sec = 0; do { gettimeofday(&now,NULL); if (lastrcvd.tv_sec==0) { lastrcvd = now; } else { lrdiff = now; if (lrdiff.tv_usec<lastrcvd.tv_usec) { lrdiff.tv_sec--; lrdiff.tv_usec+=1000000; } lrdiff.tv_sec -= lastrcvd.tv_sec; lrdiff.tv_usec -= lastrcvd.tv_usec; if (lrdiff.tv_sec>=2) { syslog(LOG_WARNING,"file: %"PRIu32", index: %"PRIu16", chunk: %"PRIu64", version: %"PRIu32" - writeworker: connection with (%08"PRIX32":%"PRIu16") was timed out (unfinished writes: %"PRIu8"; try counter: %"PRIu32")",id->inode,chindx,chunkid,version,ip,port,waitforstatus,id->trycnt+1); break; } } if (now.tv_usec<start.tv_usec) { now.tv_sec--; now.tv_usec+=1000000; } now.tv_sec -= start.tv_sec; now.tv_usec -= start.tv_usec; if (havedata==0 && now.tv_sec<5 && waitforstatus<5) { pthread_mutex_lock(&glock); if (cb==NULL) { if (id->datachainhead) { if (id->datachainhead->to-id->datachainhead->from==65536 || waitforstatus<=1) { cb = id->datachainhead; havedata=1; } } } else { if (cb->next) { if (cb->next->chindx==chindx) { if (cb->next->to-cb->next->from==65536 || waitforstatus<=1) { cb = cb->next; havedata=1; } } } else { id->waitingworker=1; } } if (havedata==1) { cb->writeid = nextwriteid++; // debug: syslog(LOG_NOTICE,"writeworker: data packet prepared (writeid:%"PRIu32",pos:%"PRIu16")",cb->writeid,cb->pos); waitforstatus++; wptr = sendbuff; put32bit(&wptr,CUTOCS_WRITE_DATA); put32bit(&wptr,24+(cb->to-cb->from)); put64bit(&wptr,chunkid); put32bit(&wptr,cb->writeid); put16bit(&wptr,cb->pos); put16bit(&wptr,cb->from); put32bit(&wptr,cb->to-cb->from); put32bit(&wptr,mycrc32(0,cb->data+cb->from,cb->to-cb->from)); #ifdef WORKER_DEBUG if (cb->to-cb->from<65536) { partialblocks++; } bytessent+=(cb->to-cb->from); #endif sent=0; } pthread_mutex_unlock(&glock); } pfd[0].events = POLLIN | (havedata?POLLOUT:0); pfd[0].revents = 0; pfd[1].events = POLLIN; pfd[1].revents = 0; if (poll(pfd,2,100)<0) { /* correct timeout - in msec */ syslog(LOG_WARNING,"writeworker: poll error: %m"); status=EIO; break; } if (pfd[1].revents&POLLIN) { // used just to break poll - so just read all data from pipe to empty it i = read(id->pipe[0],pipebuff,1024); } if (pfd[0].revents&POLLIN) { i = read(fd,recvbuff+rcvd,21-rcvd); if (i==0) { // connection reset by peer ,读取文件头错误 syslog(LOG_WARNING,"file: %"PRIu32", index: %"PRIu16", chunk: %"PRIu64", version: %"PRIu32" - writeworker: connection with (%08"PRIX32":%"PRIu16") was reset by peer (unfinished writes: %"PRIu8"; try counter: %"PRIu32")",id->inode,chindx,chunkid,version,ip,port,waitforstatus,id->trycnt+1); status=EIO; break; } gettimeofday(&lastrcvd,NULL); rcvd+=i; if (rcvd==21) { rptr = recvbuff; reccmd = get32bit(&rptr); recleng = get32bit(&rptr); recchunkid = get64bit(&rptr); recwriteid = get32bit(&rptr); recstatus = get8bit(&rptr); if (reccmd!=CSTOCU_WRITE_STATUS || recleng!=13) { syslog(LOG_WARNING,"writeworker: got unrecognized packet from chunkserver (cmd:%"PRIu32",leng:%"PRIu32")",reccmd,recleng); status=EIO; break; } if (recchunkid!=chunkid) { syslog(LOG_WARNING,"writeworker: got unexpected packet (expected chunkdid:%"PRIu64",packet chunkid:%"PRIu64")",chunkid,recchunkid); status=EIO; break; } if (recstatus!=STATUS_OK) { syslog(LOG_WARNING,"writeworker: write error: %"PRIu8,recstatus); wrstatus=recstatus; break; } // debug: syslog(LOG_NOTICE,"writeworker: received status ok for writeid:%"PRIu32,recwriteid); if (recwriteid>0) { pthread_mutex_lock(&glock); for (rcb = id->datachainhead ; rcb && rcb->writeid!=recwriteid ; rcb=rcb->next) {} if (rcb==NULL) { syslog(LOG_WARNING,"writeworker: got unexpected status (writeid:%"PRIu32")",recwriteid); pthread_mutex_unlock(&glock); status=EIO; break; } if (rcb==cb) { // current block,cb为当前块儿指针 // debug: syslog(LOG_NOTICE,"writeworker: received status for current block"); if (havedata) { // got status ok before all data had been sent - error syslog(LOG_WARNING,"writeworker: got status OK before all data have been sent"); pthread_mutex_unlock(&glock); status=EIO; break; } else { cb = NULL; } } if (rcb->prev) {//将rcb所指块儿从链表中取出 rcb->prev->next = rcb->next; } else { id->datachainhead = rcb->next; } if (rcb->next) { rcb->next->prev = rcb->prev; } else { id->datachaintail = rcb->prev; } maxwroffset = (((uint64_t)(chindx))<<26)+(((uint32_t)(rcb->pos))<<16)+rcb->to; if (maxwroffset>mfleng) { mfleng=maxwroffset; } write_cb_release(rcb);// id->cacheblocks--; if (id->cachewaiting>0) { pthread_cond_broadcast(&(id->cachecond)); } pthread_mutex_unlock(&glock); } waitforstatus--; rcvd=0; } } if (havedata && (pfd[0].revents&POLLOUT)) { if (cb==NULL) { // havedata==1 && cb==NULL means sending first packet (CUTOCS_WRITE) if (sent<20) { #ifdef HAVE_WRITEV //将多个数据存储在一起,将驻留在两个或更多的不连接的缓冲区中的数据一次写出去 if (chainsize>0) { siov[0].iov_base = sendbuff+sent; siov[0].iov_len = 20-sent; siov[1].iov_base = (char*)chain; // discard const (safe - because it's used in writev) siov[1].iov_len = chainsize; i = writev(fd,siov,2); } else { #endif i = write(fd,sendbuff+sent,20-sent); #ifdef HAVE_WRITEV } #endif } else { i = write(fd,chain+(sent-20),chainsize-(sent-20)); } if (i<0) { syslog(LOG_WARNING,"file: %"PRIu32", index: %"PRIu16", chunk: %"PRIu64", version: %"PRIu32" - writeworker: connection with (%08"PRIX32":%"PRIu16") was reset by peer (unfinished writes: %"PRIu8"; try counter: %"PRIu32")",id->inode,chindx,chunkid,version,ip,port,waitforstatus,id->trycnt+1); status=EIO; break; } sent+=i; if (sent==20+chainsize) { havedata=0; } } else { if (sent<32) { #ifdef HAVE_WRITEV siov[0].iov_base = sendbuff+sent; siov[0].iov_len = 32-sent; siov[1].iov_base = cb->data+cb->from; siov[1].iov_len = cb->to-cb->from; i = writev(fd,siov,2); #else i = write(fd,sendbuff+sent,32-sent); #endif } else { i = write(fd,cb->data+cb->from+(sent-32),cb->to-cb->from-(sent-32)); } if (i<0) { syslog(LOG_WARNING,"file: %"PRIu32", index: %"PRIu16", chunk: %"PRIu64", version: %"PRIu32" - writeworker: connection with (%08"PRIX32":%"PRIu16") was reset by peer (unfinished writes: %"PRIu8"; try counter: %"PRIu32")",id->inode,chindx,chunkid,version,ip,port,waitforstatus,id->trycnt+1); status=EIO; break; } sent+=i; if (sent==32+cb->to-cb->from) { havedata=0; } } } } while (waitforstatus>0 && now.tv_sec<10);//////////////////// id->waitingworker=0; tcpclose(fd); #ifdef WORKER_DEBUG gettimeofday(&now,NULL); if (now.tv_usec<start.tv_usec) { now.tv_sec--; now.tv_usec+=1000000; } now.tv_sec -= start.tv_sec; now.tv_usec -= start.tv_usec; cl=0; for (cnt=0 ; cnt<chainelements ; cnt++) { cl+=snprintf(debugchain+cl,200-cl,"%u.%u.%u.%u:%u->",(chainip[cnt]>>24)&255,(chainip[cnt]>>16)&255,(chainip[cnt]>>8)&255,chainip[cnt]&255,chainport[cnt]); } if (cl>=2) { debugchain[cl-2]='\0'; } syslog(LOG_NOTICE,"worker %lu sent %"PRIu32" blocks (%"PRIu32" partial) of chunk %016"PRIX64"_%08"PRIX32", received status for %"PRIu32" blocks (%"PRIu32" lost), bw: %.6lfMB ( %"PRIu32" B / %.0lf us ), chain: %s",(unsigned long)arg,nextwriteid-1,partialblocks,chunkid,version,nextwriteid-1-waitforstatus,waitforstatus,(double)bytessent/((double)(now.tv_sec)*1000000+(double)(now.tv_usec)),bytessent,((double)(now.tv_sec)*1000000+(double)(now.tv_usec)),debugchain); #endif for (cnt=0 ; cnt<10 ; cnt++) { westatus = fs_writeend(chunkid,id->inode,mfleng); if (westatus!=STATUS_OK) { usleep(100000+(10000<<cnt)); } else { break; } } if (westatus!=STATUS_OK) { write_job_end(id,ENXIO,0); } else if (status!=0 || wrstatus!=STATUS_OK) { if (wrstatus!=STATUS_OK) { // convert MFS status to OS errno if (wrstatus==ERROR_NOSPACE) { status=ENOSPC; } else { status=EIO; } } id->trycnt++; if (id->trycnt>=maxretries) { write_job_end(id,status,0); } else { write_job_end(id,0,1+(id->trycnt<30)?(id->trycnt/3):10); } } else { read_inode_ops(id->inode); write_job_end(id,0,0); } } }
int chunk_repair(const char *fname,uint8_t fastmode,uint8_t showok,uint8_t repair) { uint64_t namechunkid; uint32_t nameversion; uint32_t i; int fd; uint8_t buff[MFSBLOCKSIZE]; uint32_t crc[1024]; uint32_t crcblock; off_t s; const uint8_t *rp; uint8_t *wp; int ret=0; // check fname // name should be in format: ..../chunk_XXXXXXXXXXXXXXXX_YYYYYYYY.mfs if (repair) { fd = open(fname,O_RDWR); } else { fd = open(fname,O_RDONLY); } if (fd<0) { fprintf(stderr,"%s: error opening file !!!\n",fname); return -1; } i = strlen(fname); if (i<35) { fprintf(stderr,"%s: wrong chunk name format !!! (skip header)\n",fname); ret |= 1; } else { if (hdd_check_filename(fname+(i-35),&namechunkid,&nameversion)<0) { fprintf(stderr,"%s: wrong chunk name format !!! (skip header)\n",fname); ret |= 1; } else { if (read(fd,buff,20)!=20) { fprintf(stderr,"%s: error reading header !!!\n",fname); close(fd); return -1; } if (memcmp(buff,MFSSIGNATURE "C 1.0",8)!=0) { fprintf(stderr,"%s: wrong chunk header !!!\n",fname); memcpy(buff,MFSSIGNATURE "C 1.0",8); ret |= 1; } rp = buff+8; wp = (uint8_t*)rp; if (get64bit(&rp)!=namechunkid) { fprintf(stderr,"%s: wrong chunk number in header !!!\n",fname); put64bit(&wp,namechunkid); ret |= 1; } wp = (uint8_t*)rp; if (get32bit(&rp)!=nameversion) { fprintf(stderr,"%s: wrong chunk version in header !!!\n",fname); put32bit(&wp,nameversion); ret |= 1; } if (repair && (ret&1)) { if (lseek(fd,0,SEEK_SET)!=0) { fprintf(stderr,"%s: error setting file pointer\n",fname); close(fd); return -1; } if (write(fd,buff,20)!=20) { fprintf(stderr,"%s: error writing header !!!\n",fname); close(fd); return -1; } ret |= 4; } } } // read crc if (lseek(fd,CHUNKHDRCRC,SEEK_SET)!=CHUNKHDRCRC) { fprintf(stderr,"%s: error setting file pointer\n",fname); close(fd); return -1; } if (read(fd,buff,4096)!=4096) { fprintf(stderr,"%s: error reading checksum block\n",fname); close(fd); return -1; } rp = buff; for (i=0 ; i<1024 ; i++) { crc[i] = get32bit(&rp); } // check data crc if (fastmode && repair==0) { s = lseek(fd,-MFSBLOCKSIZE,SEEK_END); if (s<MFSHDRSIZE) { fprintf(stderr,"%s: wrong file size\n",fname); close(fd); return -1; } s -= MFSHDRSIZE; if ((s%MFSBLOCKSIZE)!=0) { fprintf(stderr,"%s: wrong file size\n",fname); close(fd); return -1; } s >>= 16; if (read(fd,buff,MFSBLOCKSIZE)!=MFSBLOCKSIZE) { fprintf(stderr,"%s: error reading last data block\n",fname); close(fd); return -1; } crcblock = mycrc32(0,buff,MFSBLOCKSIZE); if (crc[s]!=crcblock) { fprintf(stderr,"%s: crc error (last block ; header crc: %08"PRIX32" ; block crc: %08"PRIX32")\n",fname,crc[s],crcblock); ret |= 2; } } else {