/* srcs: srccnt * (chunkid:64 version:32 ip:32 port:16) */ uint8_t replicate(uint64_t chunkid,uint32_t version,uint8_t srccnt,const uint8_t *srcs) { replication r; uint8_t status,i,vbuffs,first; uint16_t b,blocks; uint32_t xcrc,crc; uint8_t *wptr; const uint8_t *rptr; int s; if (srccnt==0) { return ERROR_EINVAL; } // syslog(LOG_NOTICE,"replication begin (chunkid:%08"PRIX64",version:%04"PRIX32",srccnt:%"PRIu8")",chunkid,version,srccnt); pthread_mutex_lock(&statslock); stats_repl++; pthread_mutex_unlock(&statslock); // init replication structure r.chunkid = chunkid; r.version = version; r.srccnt = 0; r.created = 0; r.opened = 0; r.fds = malloc(sizeof(struct pollfd)*srccnt); passert(r.fds); r.repsources = malloc(sizeof(repsrc)*srccnt); passert(r.repsources); if (srccnt>1) { r.xorbuff = malloc(65536+4); passert(r.xorbuff); } else { r.xorbuff = NULL; } // create chunk status = hdd_create(chunkid,0); if (status!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: hdd_create status: %u",status); rep_cleanup(&r); return status; } r.created = 1; // init sources r.srccnt = srccnt; for (i=0 ; i<srccnt ; i++) { r.repsources[i].chunkid = get64bit(&srcs); r.repsources[i].version = get32bit(&srcs); r.repsources[i].ip = get32bit(&srcs); r.repsources[i].port = get16bit(&srcs); r.repsources[i].sock = -1; r.repsources[i].packet = NULL; } // connect for (i=0 ; i<srccnt ; i++) { s = tcpsocket(); if (s<0) { mfs_errlog_silent(LOG_NOTICE,"replicator: socket error"); rep_cleanup(&r); return ERROR_CANTCONNECT; } r.repsources[i].sock = s; r.fds[i].fd = s; if (tcpnonblock(s)<0) { mfs_errlog_silent(LOG_NOTICE,"replicator: nonblock error"); rep_cleanup(&r); return ERROR_CANTCONNECT; } s = tcpnumconnect(s,r.repsources[i].ip,r.repsources[i].port); if (s<0) { mfs_errlog_silent(LOG_NOTICE,"replicator: connect error"); rep_cleanup(&r); return ERROR_CANTCONNECT; } if (s==0) { r.repsources[i].mode = IDLE; } else { r.repsources[i].mode = CONNECTING; } } if (rep_wait_for_connection(&r,CONNMSECTO)<0) { rep_cleanup(&r); return ERROR_CANTCONNECT; } // open chunk status = hdd_open(chunkid); if (status!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: hdd_open status: %u",status); rep_cleanup(&r); return status; } r.opened = 1; // get block numbers for (i=0 ; i<srccnt ; i++) { wptr = rep_create_packet(r.repsources+i,CSTOCS_GET_CHUNK_BLOCKS,8+4); if (wptr==NULL) { syslog(LOG_NOTICE,"replicator: out of memory"); rep_cleanup(&r); return ERROR_OUTOFMEMORY; } put64bit(&wptr,r.repsources[i].chunkid); put32bit(&wptr,r.repsources[i].version); } // send packet if (rep_send_all_packets(&r,SENDMSECTO)<0) { rep_cleanup(&r); return ERROR_DISCONNECTED; } // receive answers for (i=0 ; i<srccnt ; i++) { r.repsources[i].mode = HEADER; r.repsources[i].startptr = r.repsources[i].hdrbuff; r.repsources[i].bytesleft = 8; } if (rep_receive_all_packets(&r,RECVMSECTO)<0) { rep_cleanup(&r); return ERROR_DISCONNECTED; } // get block no blocks = 0; for (i=0 ; i<srccnt ; i++) { uint32_t type,size; uint64_t pchid; uint32_t pver; uint16_t pblocks; uint8_t pstatus; rptr = r.repsources[i].hdrbuff; type = get32bit(&rptr); size = get32bit(&rptr); rptr = r.repsources[i].packet; if (rptr==NULL || type!=CSTOCS_GET_CHUNK_BLOCKS_STATUS || size!=15) { syslog(LOG_WARNING,"replicator: got wrong answer (type/size) from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_DISCONNECTED; } pchid = get64bit(&rptr); pver = get32bit(&rptr); pblocks = get16bit(&rptr); pstatus = get8bit(&rptr); if (pchid!=r.repsources[i].chunkid) { syslog(LOG_WARNING,"replicator: got wrong answer (chunk_status:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGCHUNKID; } if (pver!=r.repsources[i].version) { syslog(LOG_WARNING,"replicator: got wrong answer (chunk_status:version:%"PRIX32"/%"PRIX32") from (%08"PRIX32":%04"PRIX16")",pver,r.repsources[i].version,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGVERSION; } if (pstatus!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: got status: %u from (%08"PRIX32":%04"PRIX16")",pstatus,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return pstatus; } r.repsources[i].blocks = pblocks; if (pblocks>blocks) { blocks=pblocks; } } // create read request for (i=0 ; i<srccnt ; i++) { if (r.repsources[i].blocks>0) { uint32_t leng; wptr = rep_create_packet(r.repsources+i,CUTOCS_READ,8+4+4+4); if (wptr==NULL) { syslog(LOG_NOTICE,"replicator: out of memory"); rep_cleanup(&r); return ERROR_OUTOFMEMORY; } leng = r.repsources[i].blocks*0x10000; put64bit(&wptr,r.repsources[i].chunkid); put32bit(&wptr,r.repsources[i].version); put32bit(&wptr,0); put32bit(&wptr,leng); } else { rep_no_packet(r.repsources+i); } } // send read request if (rep_send_all_packets(&r,SENDMSECTO)<0) { rep_cleanup(&r); return ERROR_DISCONNECTED; } // receive data and write to hdd for (b=0 ; b<blocks ; b++) { // prepare receive for (i=0 ; i<srccnt ; i++) { if (b<r.repsources[i].blocks) { r.repsources[i].mode = HEADER; r.repsources[i].startptr = r.repsources[i].hdrbuff; r.repsources[i].bytesleft = 8; } else { r.repsources[i].mode = IDLE; r.repsources[i].bytesleft = 0; } } // receive data if (rep_receive_all_packets(&r,RECVMSECTO)<0) { rep_cleanup(&r); return ERROR_DISCONNECTED; } // check packets vbuffs = 0; for (i=0 ; i<srccnt ; i++) { if (r.repsources[i].mode!=IDLE) { uint32_t type,size; uint64_t pchid; uint16_t pblocknum; uint16_t poffset; uint32_t psize; uint8_t pstatus; rptr = r.repsources[i].hdrbuff; type = get32bit(&rptr); size = get32bit(&rptr); rptr = r.repsources[i].packet; if (rptr==NULL) { rep_cleanup(&r); return ERROR_DISCONNECTED; } if (type==CSTOCU_READ_STATUS && size==9) { pchid = get64bit(&rptr); pstatus = get8bit(&rptr); rep_cleanup(&r); if (pchid!=r.repsources[i].chunkid) { syslog(LOG_WARNING,"replicator: got wrong answer (read_status:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port); return ERROR_WRONGCHUNKID; } if (pstatus==STATUS_OK) { // got status too early or got incorrect packet syslog(LOG_WARNING,"replicator: got unexpected ok status from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port); return ERROR_DISCONNECTED; } syslog(LOG_NOTICE,"replicator: got status: %u from (%08"PRIX32":%04"PRIX16")",pstatus,r.repsources[i].ip,r.repsources[i].port); return pstatus; } else if (type==CSTOCU_READ_DATA && size==20+65536) { pchid = get64bit(&rptr); pblocknum = get16bit(&rptr); poffset = get16bit(&rptr); psize = get32bit(&rptr); if (pchid!=r.repsources[i].chunkid) { syslog(LOG_WARNING,"replicator: got wrong answer (read_data:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGCHUNKID; } if (pblocknum!=b) { syslog(LOG_WARNING,"replicator: got wrong answer (read_data:blocknum:%"PRIu16"/%"PRIu16") from (%08"PRIX32":%04"PRIX16")",pblocknum,b,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_DISCONNECTED; } if (poffset!=0) { syslog(LOG_WARNING,"replicator: got wrong answer (read_data:offset:%"PRIu16") from (%08"PRIX32":%04"PRIX16")",poffset,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGOFFSET; } if (psize!=65536) { syslog(LOG_WARNING,"replicator: got wrong answer (read_data:size:%"PRIu32") from (%08"PRIX32":%04"PRIX16")",psize,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGSIZE; } } else { syslog(LOG_WARNING,"replicator: got wrong answer (type/size) from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_DISCONNECTED; } vbuffs++; } } // write data if (vbuffs==0) { // no buffers ? - it should never happen syslog(LOG_WARNING,"replicator: no data received for block: %"PRIu16,b); rep_cleanup(&r); return ERROR_DISCONNECTED; } else if (vbuffs==1) { // xor not needed, so just find block and write it for (i=0 ; i<srccnt ; i++) { if (r.repsources[i].mode!=IDLE) { rptr = r.repsources[i].packet; status = hdd_write(chunkid,0,b,rptr+20,0,65536,rptr+16); if (status!=STATUS_OK) { syslog(LOG_WARNING,"replicator: write status: %u",status); rep_cleanup(&r); return status; } } } } else { first=1; if (vbuffs&1) { xcrc = 0; } else { xcrc = 0xD7978EEBU; // = mycrc32_zeroblock(0,0x10000); } for (i=0 ; i<srccnt ; i++) { if (r.repsources[i].mode!=IDLE) { rptr = r.repsources[i].packet; rptr+=16; // skip chunkid,blockno,offset and size if (first) { memcpy(r.xorbuff+4,rptr+4,65536); first=0; } else { xordata(r.xorbuff+4,rptr+4,65536); } crc = get32bit(&rptr); if (crc!=mycrc32(0,rptr,65536)) { syslog(LOG_WARNING,"replicator: received data with wrong checksum from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_CRC; } xcrc^=crc; } } wptr = r.xorbuff; put32bit(&wptr,xcrc); status = hdd_write(chunkid,0,b,r.xorbuff+4,0,65536,r.xorbuff); if (status!=STATUS_OK) { syslog(LOG_WARNING,"replicator: xor write status: %u",status); rep_cleanup(&r); return status; } } } // receive status for (i=0 ; i<srccnt ; i++) { if (r.repsources[i].blocks>0) { // if (r.repsources[i].packet) { // free(r.repsources[i].packet); // r.repsources[i].packet=NULL; // } r.repsources[i].mode = HEADER; r.repsources[i].startptr = r.repsources[i].hdrbuff; r.repsources[i].bytesleft = 8; } else { r.repsources[i].mode = IDLE; r.repsources[i].bytesleft = 0; } } if (rep_receive_all_packets(&r,RECVMSECTO)<0) { rep_cleanup(&r); return ERROR_DISCONNECTED; } for (i=0 ; i<srccnt ; i++) { if (r.repsources[i].blocks>0) { uint32_t type,size; uint64_t pchid; uint8_t pstatus; rptr = r.repsources[i].hdrbuff; type = get32bit(&rptr); size = get32bit(&rptr); rptr = r.repsources[i].packet; if (rptr==NULL || type!=CSTOCU_READ_STATUS || size!=9) { syslog(LOG_WARNING,"replicator: got wrong answer (type/size) from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_DISCONNECTED; } pchid = get64bit(&rptr); pstatus = get8bit(&rptr); if (pchid!=r.repsources[i].chunkid) { syslog(LOG_WARNING,"replicator: got wrong answer (read_status:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGCHUNKID; } if (pstatus!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: got status: %u from (%08"PRIX32":%04"PRIX16")",pstatus,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return pstatus; } } } // close chunk and change version status = hdd_close(chunkid); if (status!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: hdd_close status: %u",status); rep_cleanup(&r); return status; } r.opened = 0; status = hdd_version(chunkid,0,version); if (status!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: hdd_version status: %u",status); rep_cleanup(&r); return status; } r.created = 0; rep_cleanup(&r); return STATUS_OK; }
/* srcs: srccnt * (chunkid:64 version:32 ip:32 port:16) */ uint8_t replicate(uint64_t chunkid,uint32_t version,const uint32_t xormasks[4],uint8_t srccnt,const uint8_t *srcs) { replication r; uint8_t status,i,j,vbuffs,first; uint16_t b,blocks; uint32_t xcrc[4],crc; uint32_t codeindex,codeword; uint8_t *wptr; const uint8_t *rptr; int s; if (srccnt==0) { return ERROR_EINVAL; } // syslog(LOG_NOTICE,"replication begin (chunkid:%08"PRIX64",version:%04"PRIX32",srccnt:%"PRIu8")",chunkid,version,srccnt); pthread_mutex_lock(&statslock); stats_repl++; pthread_mutex_unlock(&statslock); // init replication structure r.chunkid = chunkid; r.version = version; r.srccnt = 0; r.created = 0; r.opened = 0; r.fds = malloc(sizeof(struct pollfd)*srccnt); passert(r.fds); r.repsources = malloc(sizeof(repsrc)*srccnt); passert(r.repsources); if (srccnt>1) { r.xorbuff = malloc(MFSBLOCKSIZE+4); passert(r.xorbuff); } else { r.xorbuff = NULL; } // create chunk status = hdd_create(chunkid,0); if (status!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: hdd_create status: %s",mfsstrerr(status)); rep_cleanup(&r); return status; } r.created = 1; // init sources r.srccnt = srccnt; for (i=0 ; i<srccnt ; i++) { r.repsources[i].chunkid = get64bit(&srcs); r.repsources[i].version = get32bit(&srcs); r.repsources[i].ip = get32bit(&srcs); r.repsources[i].port = get16bit(&srcs); r.repsources[i].sock = -1; r.repsources[i].packet = NULL; } // connect for (i=0 ; i<srccnt ; i++) { s = tcpsocket(); if (s<0) { mfs_errlog_silent(LOG_NOTICE,"replicator: socket error"); rep_cleanup(&r); return ERROR_CANTCONNECT; } r.repsources[i].sock = s; r.fds[i].fd = s; if (tcpnonblock(s)<0) { mfs_errlog_silent(LOG_NOTICE,"replicator: nonblock error"); rep_cleanup(&r); return ERROR_CANTCONNECT; } s = tcpnumconnect(s,r.repsources[i].ip,r.repsources[i].port); if (s<0) { mfs_errlog_silent(LOG_NOTICE,"replicator: connect error"); rep_cleanup(&r); return ERROR_CANTCONNECT; } if (s==0) { r.repsources[i].mode = IDLE; } else { r.repsources[i].mode = CONNECTING; } } if (rep_wait_for_connection(&r,CONNMSECTO)<0) { rep_cleanup(&r); return ERROR_CANTCONNECT; } // disable Nagle for (i=0 ; i<srccnt ; i++) { tcpnodelay(r.repsources[i].sock); } // open chunk status = hdd_open(chunkid,0); if (status!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: hdd_open status: %s",mfsstrerr(status)); rep_cleanup(&r); return status; } r.opened = 1; // get block numbers for (i=0 ; i<srccnt ; i++) { wptr = rep_create_packet(r.repsources+i,ANTOCS_GET_CHUNK_BLOCKS,8+4); if (wptr==NULL) { syslog(LOG_NOTICE,"replicator: out of memory"); rep_cleanup(&r); return ERROR_OUTOFMEMORY; } put64bit(&wptr,r.repsources[i].chunkid); put32bit(&wptr,r.repsources[i].version); } // send packet if (rep_send_all_packets(&r,SENDMSECTO)<0) { rep_cleanup(&r); return ERROR_DISCONNECTED; } // receive answers for (i=0 ; i<srccnt ; i++) { r.repsources[i].mode = HEADER; r.repsources[i].startptr = r.repsources[i].hdrbuff; r.repsources[i].bytesleft = 8; } if (rep_receive_all_packets(&r,RECVMSECTO)<0) { rep_cleanup(&r); return ERROR_DISCONNECTED; } // get # of blocks blocks = 0; for (i=0 ; i<srccnt ; i++) { uint32_t type,size; uint64_t pchid; uint32_t pver; uint16_t pblocks; uint8_t pstatus; uint32_t ip; rptr = r.repsources[i].hdrbuff; type = get32bit(&rptr); size = get32bit(&rptr); rptr = r.repsources[i].packet; ip = r.repsources[i].ip; if (rptr==NULL || type!=CSTOAN_CHUNK_BLOCKS || size!=15) { syslog(LOG_WARNING,"replicator,get # of blocks: got wrong answer (type:0x%08"PRIX32"/size:0x%08"PRIX32") from (%u.%u.%u.%u:%04"PRIX16")",type,size,(ip>>24)&0xFF,(ip>>16)&0xFF,(ip>>8)&0xFF,ip&0xFF,r.repsources[i].port); rep_cleanup(&r); return ERROR_DISCONNECTED; } pchid = get64bit(&rptr); pver = get32bit(&rptr); pblocks = get16bit(&rptr); pstatus = get8bit(&rptr); if (pchid!=r.repsources[i].chunkid) { syslog(LOG_WARNING,"replicator,get # of blocks: got wrong answer (chunk_status:chunkid:%"PRIX64"/%"PRIX64") from (%u.%u.%u.%u:%04"PRIX16")",pchid,r.repsources[i].chunkid,(ip>>24)&0xFF,(ip>>16)&0xFF,(ip>>8)&0xFF,ip&0xFF,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGCHUNKID; }