//continue changelog transfer int matoslaserv_changelog(serventry *eptr,const uint8_t *data,uint32_t length) { uint8_t flag; //char *buff; //uint64_t ver; //uint32_t size; if (length!=1) { MFSLOG(LOG_NOTICE,"slaTOMA_DOWNLOAD_DATA - wrong size (%"PRIu32"/12)",length); eptr->mode=KILL; return -1; } flag = get8bit(&data); if (flag == 0) { if (matoslaserv_pack_log(eptr,0)<0) { MFSLOG(LOG_ERR,"pack log failed,no resend"); } //resend should be changed } else if (flag == 1) { if (matoslaserv_pack_log(eptr,1)<0) { MFSLOG(LOG_ERR,"pack log failed,resend"); } } else { MFSLOG(LOG_ERR,"unrecogize matoslaserv_changelog flag"); } return 0; }
static void mfs_attr_to_stat(uint32_t inode,uint8_t attr[35], struct stat *stbuf) { uint16_t attrmode; uint8_t attrtype; uint32_t attruid,attrgid,attratime,attrmtime,attrctime,attrnlink; uint64_t attrlength; const uint8_t *ptr; ptr = attr; attrtype = get8bit(&ptr); attrmode = get16bit(&ptr); attruid = get32bit(&ptr); attrgid = get32bit(&ptr); attratime = get32bit(&ptr); attrmtime = get32bit(&ptr); attrctime = get32bit(&ptr); attrnlink = get32bit(&ptr); attrlength = get64bit(&ptr); stbuf->st_ino = inode; if (attrtype==TYPE_FILE || attrtype==TYPE_TRASH || attrtype==TYPE_RESERVED) { stbuf->st_mode = S_IFREG | ( attrmode & 07777); } else { stbuf->st_mode = 0; } stbuf->st_size = attrlength; stbuf->st_blocks = (attrlength+511)/512; stbuf->st_uid = attruid; stbuf->st_gid = attrgid; stbuf->st_atime = attratime; stbuf->st_mtime = attrmtime; stbuf->st_ctime = attrctime; stbuf->st_nlink = attrnlink; }
void matomlserv_register(matomlserventry *eptr,const uint8_t *data,uint32_t length) { uint8_t rversion; uint64_t minversion; if (eptr->version>0) { syslog(LOG_WARNING,"got register message from registered metalogger !!!"); eptr->mode=KILL; return; } if (length<1) { syslog(LOG_NOTICE,"MLTOMA_REGISTER - wrong size (%"PRIu32")",length); eptr->mode=KILL; return; } else { rversion = get8bit(&data); if (rversion==1) { if (length!=7) { syslog(LOG_NOTICE,"MLTOMA_REGISTER (ver 1) - wrong size (%"PRIu32"/7)",length); eptr->mode=KILL; return; } eptr->version = get32bit(&data); eptr->timeout = get16bit(&data); } else if (rversion==2) { if (length!=7+8) { syslog(LOG_NOTICE,"MLTOMA_REGISTER (ver 2) - wrong size (%"PRIu32"/15)",length); eptr->mode=KILL; return; } eptr->version = get32bit(&data); eptr->timeout = get16bit(&data); minversion = get64bit(&data); matomlserv_send_old_changes(eptr,minversion); } else { syslog(LOG_NOTICE,"MLTOMA_REGISTER - wrong version (%"PRIu8"/1)",rversion); eptr->mode=KILL; return; } if (eptr->timeout<10) { syslog(LOG_NOTICE,"MLTOMA_REGISTER communication timeout too small (%"PRIu16" seconds - should be at least 10 seconds)",eptr->timeout); if (eptr->timeout<3) { eptr->timeout=3; } // eptr->mode=KILL; return; } } }
//rewrite metachanges_log func void masterconn_metachanges_log(serventry *eptr,const uint8_t *data,uint32_t length) { char log_data[1000]; char log_str[1000]; uint64_t version; uint32_t size; //FILE *new_fd; uint8_t log_count = 0; uint8_t log_limit; int ret = 0; NOT_USED(length); log_limit = get8bit(&data); if (eptr->logfd == NULL) { eptr->logfd = fopen("changelog.0.mfs","a"); } while(log_count < log_limit) { version = get64bit(&data); size = get32bit(&data); memcpy(log_str,data,size); data = data + size; if (log_str[size - 1] != '\0') { MFSLOG(LOG_NOTICE,"MATOSLA_CHANGELOG - invalide string the last is %c",log_str[size - 1]); } snprintf(log_data,sizeof(log_data),"%"PRIu64": %s\n",version,log_str); if (eptr->logfd) { ret = replay(log_data); if (ret != 0) { MFSLOG(LOG_ERR,"changelog replay failed"); break; //more complicated method to ensure consistency } fprintf(eptr->logfd,"%"PRIu64": %s",version,log_str); fflush(eptr->logfd); log_count++; } else { MFSLOG(LOG_NOTICE,"lost MFS change %"PRIu64": %s",version,log_str); ret = -1; break; } } if (ret == 0) { masterconn_ack_changelog(eptr,0); } else { masterconn_ack_changelog(eptr,1); } }
//confirm sync thread is ready to transfer metadata void masterconn_sync_thread(serventry *eptr,const uint8_t *data,uint32_t length) { uint8_t flag; //serventry *weptr; NOT_USED(length); flag = get8bit(&data); MFSLOG(LOG_NOTICE,"the masterconn_sync_thread flag is %d",flag); if (flag == 1) { masterconn_metadownloadinit(); } else if (flag == 2) { masterconn_changelog0downloadinit(); } else if (flag == 3) { masterconn_changelog_ready(eptr); } else { MFSLOG(LOG_NOTICE,"MATOSLA_SYNC_THREAD:unrecognize flag"); } }
void matoslaserv_download_start(serventry *eptr,const uint8_t *data,uint32_t length) { uint8_t filenum; uint64_t size; uint8_t *ptr; if (length!=1) { MFSLOG(LOG_NOTICE,"slaTOMA_DOWNLOAD_START - wrong size (%"PRIu32"/1)",length); eptr->mode=KILL; return; } if (eptr->metafd>0) { close(eptr->metafd); eptr->metafd=-1; } filenum = get8bit(&data); if (filenum==1) { eptr->metafd = open("metadata.mfs.back",O_RDONLY); } else if (filenum==2) { eptr->metafd = open("changelog.0.mfs",O_RDONLY); } else { eptr->mode=KILL; return; } if (eptr->metafd<0) { ptr = matoslaserv_createpacket(eptr,MATOSLA_DOWNLOAD_START,1); if (ptr==NULL) { eptr->mode=KILL; return; } put8bit(&ptr,0xff); // error return; } size = lseek(eptr->metafd,0,SEEK_END); if (filenum==2) { eptr->changelog_offset = size; // syslog(LOG_NOTICE,"changelog_offset is %d",changelog_offset); } ptr = matoslaserv_createpacket(eptr,MATOSLA_DOWNLOAD_START,8); if (ptr==NULL) { eptr->mode=KILL; return; } put64bit(&ptr,size); // ok }
void matoslaserv_register(serventry *eptr,const uint8_t *data,uint32_t length) { uint8_t rversion; int status = 0; int ret; if(isslave()) { MFSLOG(LOG_ERR, "slave will not send changelog to other master\n"); return; } if (eptr->version>0) { MFSLOG(LOG_WARNING,"got register message from registered metalogger !!!"); eptr->mode=KILL; return; } if (length<1) { MFSLOG(LOG_NOTICE,"slaTOMA_REGISTER - wrong size (%"PRIu32")",length); eptr->mode=KILL; return; } else { rversion = get8bit(&data); if (rversion==1) { if (length!=7) { MFSLOG(LOG_NOTICE,"slaTOMA_REGISTER (ver 1) - wrong size (%"PRIu32"/7)",length); eptr->mode=KILL; return; } eptr->version = get32bit(&data); eptr->timeout = get16bit(&data); ret = worker_thread_init(eptr->servip); // syslog(LOG_NOTICE,"eptr->servip is %d,eptr->servstrip is %d",eptr->servip,eptr->servstrip); if (status < 0) { MFSLOG(LOG_ERR,"worker of init failed"); } eptr->mode = KILL; return; } else { MFSLOG(LOG_NOTICE,"slaTOMA_REGISTER - wrong version (%"PRIu8"/1)",rversion); eptr->mode=KILL; return; } } }
//rewrited download end void matoslaserv_download_end(serventry *eptr,const uint8_t *data,uint32_t length) { uint8_t filenum; if (length!=1) { MFSLOG(LOG_NOTICE,"slaTOMA_DOWNLOAD_END - wrong size (%"PRIu32"/0)",length); eptr->mode=KILL; return; } if (eptr->metafd>0) { close(eptr->metafd); eptr->metafd=-1; } filenum = get8bit(&data); MFSLOG(LOG_NOTICE,"the matoslaserv_download_end filenum is %d",filenum); if (filenum == MFS_SYNC_META) { eptr->syncstep = MFS_SYNC_CHANGELOG_START; matosla_sync_thread(eptr, MFS_SYNC_CHANGELOG_START); } if (filenum == MFS_SYNC_CHANGELOG_START) { eptr->syncstep = MFS_SYNC_CHANGELOG_END; matosla_sync_thread(eptr, MFS_SYNC_CHANGELOG_END); } }
void mfs_meta_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, struct fuse_file_info *fi) { dirbuf *dirinfo = (dirbuf *)((unsigned long)(fi->fh)); char buffer[READDIR_BUFFSIZE]; char *name,c; const uint8_t *ptr,*eptr; uint8_t end; size_t opos,oleng; uint8_t nleng; uint32_t inode; uint8_t type; struct stat stbuf; if (off<0) { fuse_reply_err(req,EINVAL); return; } pthread_mutex_lock(&(dirinfo->lock)); if (dirinfo->wasread==0 || (dirinfo->wasread==1 && off==0)) { if (dirinfo->p!=NULL) { free(dirinfo->p); } dirbuf_meta_fill(dirinfo,ino); // syslog(LOG_WARNING,"inode: %lu , dirinfo->p: %p , dirinfo->size: %lu",(unsigned long)ino,dirinfo->p,(unsigned long)dirinfo->size); } dirinfo->wasread=1; if (off>=(off_t)(dirinfo->size)) { fuse_reply_buf(req, NULL, 0); } else { if (size>READDIR_BUFFSIZE) { size=READDIR_BUFFSIZE; } ptr = (const uint8_t*)(dirinfo->p)+off; eptr = (const uint8_t*)(dirinfo->p)+dirinfo->size; opos = 0; end = 0; while (ptr<eptr && end==0) { nleng = ptr[0]; ptr++; name = (char*)ptr; ptr+=nleng; off+=nleng+6; if (ptr+5<=eptr) { inode = get32bit(&ptr); type = get8bit(&ptr); mfs_meta_type_to_stat(inode,type,&stbuf); c = name[nleng]; name[nleng]=0; oleng = fuse_add_direntry(req, buffer + opos, size - opos, name, &stbuf, off); name[nleng] = c; if (opos+oleng>size) { end=1; } else { opos+=oleng; } } } fuse_reply_buf(req,buffer,opos); } pthread_mutex_unlock(&(dirinfo->lock)); }
int ppfs_write (const char *path, const char *buf, size_t st, off_t off, struct fuse_file_info *fi){ fprintf(stderr,"\n\n\nppfs_write:%s,size:%d,offset:%d\n\n\n",path,st,off); int nwrite = 0; int ost,ooff; ost = st; ooff = off; ppacket* p = createpacket_s(4+strlen(path),CLTOMD_READ_CHUNK_INFO,-1); uint8_t* ptr = p->startptr + HEADER_LEN; int plen = strlen(path); uint32_t ip; uint64_t* chunklist = NULL; int clen,calloc; const char* wbuf = buf; put32bit(&ptr,plen); memcpy(ptr,path,plen); ptr += plen; fprintf(stderr,"just to be clear\n"); const uint8_t* tmpptr = p->startptr + HEADER_LEN; int i; for(i=0;i<p->size;i+=1){ int x = get8bit(&tmpptr); fprintf(stderr,"%X\t",x); } fprintf(stderr,"\n"); sendpacket(fd,p); free(p); p = receivepacket(fd); const uint8_t* ptr2 = p->startptr; int status = get32bit(&ptr2); fprintf(stderr,"status:%d\n",status); if(status == 0){ ip = get32bit(&ptr2); if(ip == -1){ fprintf(stderr,"local mds\n"); } else { fprintf(stderr,"remote mds:%X\n",ip); } int chunks = get32bit(&ptr2); fprintf(stderr,"chunks=%d\n",chunks); int i; chunklist = (uint64_t*)malloc(sizeof(uint64_t)*(chunks+20)); clen = 0; calloc = chunks+20; for(i=0;i<chunks;i++){ uint64_t chunkid = get64bit(&ptr2); fprintf(stderr,"(%d):id=%lld\n",i,chunkid); chunklist[clen++] = chunkid; } ppfs_conn_entry* e = NULL; if(ip != -1){ if(remote_mds.sockfd != -1 && remote_mds.peerip != ip){ tcpclose(remote_mds.sockfd); remote_mds.sockfd = -1; } if(remote_mds.sockfd == -1){ if(serv_connect(&remote_mds,ip,MDS_PORT) < 0){ return -1; } } e = &remote_mds; } else { e = &local_mds; } fprintf(stderr,"connected\n"); if(chunks * CHUNKSIZE <= off + st){ fprintf(stderr,"clearing cache\n"); chunk_cache* cc; attr_cache* ac; if(lookup_chunk_cache(path,&cc) == 0){ remove_chunk_cache(cc); free_chunk_cache(cc); } if(lookup_attr_cache(path,&ac) == 0){ remove_attr_cache(ac); free_attr_cache(ac); } fprintf(stderr,"appending chunk\n"); while(chunks * CHUNKSIZE <= off + st){ ppacket* p = createpacket_s(4+plen,CLTOMD_APPEND_CHUNK,-1); uint8_t* ptr = p->startptr + HEADER_LEN; put32bit(&ptr,plen); memcpy(ptr,path,plen); sendpacket(e->sockfd,p); free(p); ppacket* rp = receivepacket(e->sockfd); const uint8_t* ptr2 = rp->startptr; int status = get32bit(&ptr2); printf("status:%d\n",status); if(status == 0){ uint64_t chunkid = get64bit(&ptr2); printf("chunkid=%lld\n",chunkid); if(clen < calloc){ chunklist[clen++] = chunkid; } else { chunklist = (uint64_t*)realloc(chunklist,calloc<<1); chunklist[clen++] = chunkid; } } else { free(rp); return status; } free(rp); chunks++; } } fprintf(stderr,"chunklist now:\n"); for(i=0;i<clen;i++){ fprintf(stderr,"\t(%d):%lld\n",i,chunklist[i]); } ppacket* p = createpacket_s(4+plen+4+4,CLTOMD_WRITE,-1); ptr = p->startptr + HEADER_LEN; put32bit(&ptr,plen); memcpy(ptr,path,plen); ptr += plen; put32bit(&ptr,ooff); put32bit(&ptr,ost); sendpacket(e->sockfd,p); free(p); int starti = off/CHUNKSIZE; int buflen = min(st,CHUNKSIZE - off % CHUNKSIZE); ppfs_conn_entry cs; cs.sockfd = -1; fprintf(stderr,"off=%d,st=%lld\n",off,st); while(st > 0){ uint64_t chunkid = chunklist[starti]; ppacket* p = createpacket_s(8,CLTOMD_LOOKUP_CHUNK,-1); uint8_t* ptr = p->startptr + HEADER_LEN; put64bit(&ptr,chunkid); sendpacket(e->sockfd,p); free(p); p = receivepacket(e->sockfd); const uint8_t* ptr2 = p->startptr; int status = get32bit(&ptr2); printf("status:%d\n",status); if(status == 0){ int csip = get32bit(&ptr2); printf("cid:%lld,csip:%X\n",chunkid,csip); if(cs.sockfd != -1 && cs.peerip != csip){ tcpclose(cs.sockfd); } if(cs.sockfd == -1){ if(serv_connect(&cs,csip,CS_PORT) < 0){ return -1; } } } else { return -1; } p = createpacket_s(8+4+4+buflen,CLTOCS_WRITE_CHUNK,-1); ptr = p->startptr + HEADER_LEN; put64bit(&ptr,chunkid); put32bit(&ptr,off % CHUNKSIZE); put32bit(&ptr,buflen); memcpy(ptr,wbuf,buflen); fprintf(stderr,"starti=%d,chunkid=%lld,off=%d,buflen=%d\n",starti,chunkid,off % CHUNKSIZE,buflen); sendpacket(cs.sockfd,p); free(p); p = receivepacket(cs.sockfd); ptr2 = p->startptr; status = get32bit(&ptr2); printf("status=%d\n",status); if(status == 0){ int wlen = get32bit(&ptr2); nwrite += wlen; printf("wlen=%d,nwrite=%d\n",wlen,nwrite); wbuf += wlen; } st -= buflen; off += buflen; starti = off/CHUNKSIZE; buflen = min(st,CHUNKSIZE - off % CHUNKSIZE); } } else { return status; } fprintf(stderr,"off=%d,st=%d,nwrite=%d\n",ooff,ost,nwrite); return nwrite; }
void masterconn_master_ack(masterconn *eptr,const uint8_t *data,uint32_t length) { uint8_t atype; uint64_t metaid; uint16_t csid; if (length!=33 && length!=17 && length!=15 && length!=9 && length!=7 && length!=5 && length!=1) { syslog(LOG_NOTICE,"MATOCS_MASTER_ACK - wrong size (%"PRIu32"/1|5|7|9|15|17|33)",length); eptr->mode = KILL; return; } atype = get8bit(&data); if (atype==0) { csid = 0; metaid = 0; if (length>=5) { eptr->masterversion = get32bit(&data); } if (length>=9) { if (Timeout==0) { eptr->timeout = get16bit(&data); } else { data+=2; } csid = get16bit(&data); } if (length>=17) { metaid = get64bit(&data); if (metaid>0 && MetaID>0 && metaid!=MetaID) { // wrong MFS instance - abort syslog(LOG_WARNING,"MATOCS_MASTER_ACK - wrong meta data id. Can't connect to master"); eptr->registerstate = REGISTERED; // do not switch to register ver. 5 eptr->mode = KILL; return; } } if (csid>0 || metaid>0) { masterconn_setcsid(csid,metaid); } if (eptr->masterversion<VERSION2INT(2,0,0)) { if (eptr->registerstate != REGISTERED) { if (eptr->registerstate == INPROGRESS) { hdd_get_chunks_end(); } eptr->registerstate = REGISTERED; masterconn_sendchunksinfo(eptr); } } else { if (eptr->registerstate == UNREGISTERED || eptr->registerstate == WAITING) { hdd_get_chunks_begin(1); eptr->registerstate = INPROGRESS; if (eptr->masterversion>=VERSION2INT(2,1,0)) { masterconn_sendlabels(eptr); } } if (eptr->registerstate == INPROGRESS) { masterconn_sendnextchunks(eptr); } } } else if (atype==1 && length==5) { uint32_t mip; mip = get32bit(&data); if (mip) { // redirect to leader eptr->masterip = mip; eptr->new_register_mode = 3; if (eptr->registerstate == INPROGRESS) { hdd_get_chunks_end(); } eptr->registerstate = WAITING; #ifdef MFSDEBUG syslog(LOG_NOTICE,"masterconn: redirected to other master"); #endif } else { // leader not known - just reconnect eptr->masteraddrvalid = 0; syslog(LOG_NOTICE,"masterconn: follower doesn't know who is the leader, reconnect to another master"); } eptr->mode = CLOSE; } else if (atype==2 && (length==7 || length==15)) { #ifdef MFSDEBUG syslog(LOG_NOTICE,"masterconn: wait for acceptance"); #endif if (eptr->registerstate == INPROGRESS) { hdd_get_chunks_end(); } eptr->registerstate = WAITING; eptr->masterversion = get32bit(&data); if (Timeout==0) { eptr->timeout = get16bit(&data); } else { data+=2; } if (length>=15) { metaid = get64bit(&data); if (metaid>0 && MetaID>0 && metaid!=MetaID) { // wrong MFS instance - abort syslog(LOG_WARNING,"MATOCS_MASTER_ACK - wrong meta data id. Can't connect to master"); eptr->registerstate = REGISTERED; // do not switch to register ver. 5 eptr->mode = KILL; return; } } } else if (atype==3 && length==33) { #ifdef MFSDEBUG syslog(LOG_NOTICE,"masterconn: authorization needed"); #endif if (AuthCode==NULL) { syslog(LOG_WARNING,"MATOCS_MASTER_ACK - master needs authorization, but password was not defined"); eptr->registerstate = REGISTERED; // do not switch to register ver. 5 eptr->mode = KILL; return; } memcpy(eptr->rndblob,data,32); eptr->gotrndblob = 1; masterconn_sendregister(eptr); } else { syslog(LOG_NOTICE,"MATOCS_MASTER_ACK - bad type/length: %u/%u",atype,length); eptr->mode = KILL; } }
int fs_loadnode(FILE *fd) { uint8_t unodebuff[4+1+2+4+4+4+4+4+4+8+4+2+8*65536+4*65536+4]; const uint8_t *ptr,*chptr; uint8_t type,goal; uint32_t nodeid,uid,gid,atimestamp,mtimestamp,ctimestamp,trashtime; uint16_t mode; char c; type = fgetc(fd); if (type==0) { // last node return 1; } switch (type) { case TYPE_DIRECTORY: case TYPE_FIFO: case TYPE_SOCKET: if (fread(unodebuff,1,4+1+2+4+4+4+4+4+4,fd)!=4+1+2+4+4+4+4+4+4) { fprintf(stderr,"loading node: read error\n"); return -1; } break; case TYPE_BLOCKDEV: case TYPE_CHARDEV: case TYPE_SYMLINK: if (fread(unodebuff,1,4+1+2+4+4+4+4+4+4+4,fd)!=4+1+2+4+4+4+4+4+4+4) { fprintf(stderr,"loading node: read error\n"); return -1; } break; case TYPE_FILE: case TYPE_TRASH: case TYPE_RESERVED: if (fread(unodebuff,1,4+1+2+4+4+4+4+4+4+8+4+2,fd)!=4+1+2+4+4+4+4+4+4+8+4+2) { fprintf(stderr,"loading node: read error\n"); return -1; } break; default: fprintf(stderr,"loading node: unrecognized node type: %c\n",type); return -1; } c='?'; switch (type) { case TYPE_DIRECTORY: c='D'; break; case TYPE_SOCKET: c='S'; break; case TYPE_FIFO: c='F'; break; case TYPE_BLOCKDEV: c='B'; break; case TYPE_CHARDEV: c='C'; break; case TYPE_SYMLINK: c='L'; break; case TYPE_FILE: c='-'; break; case TYPE_TRASH: c='T'; break; case TYPE_RESERVED: c='R'; break; } ptr = unodebuff; nodeid = get32bit(&ptr); goal = get8bit(&ptr); mode = get16bit(&ptr); uid = get32bit(&ptr); gid = get32bit(&ptr); atimestamp = get32bit(&ptr); mtimestamp = get32bit(&ptr); ctimestamp = get32bit(&ptr); trashtime = get32bit(&ptr); printf("%c|i:%10"PRIu32"|#:%"PRIu8"|e:%1"PRIX16"|m:%04"PRIo16"|u:%10"PRIu32"|g:%10"PRIu32"|a:%10"PRIu32",m:%10"PRIu32",c:%10"PRIu32"|t:%10"PRIu32,c,nodeid,goal,(uint16_t)(mode>>12),(uint16_t)(mode&0xFFF),uid,gid,atimestamp,mtimestamp,ctimestamp,trashtime); if (type==TYPE_BLOCKDEV || type==TYPE_CHARDEV) { uint32_t rdev; rdev = get32bit(&ptr); printf("|d:%5"PRIu32",%5"PRIu32"\n",rdev>>16,rdev&0xFFFF); } else if (type==TYPE_SYMLINK) {
/* srcs: srccnt * (chunkid:64 version:32 ip:32 port:16) */ uint8_t replicate(uint64_t chunkid,uint32_t version,const uint32_t xormasks[4],uint8_t srccnt,const uint8_t *srcs) { replication r; uint8_t status,i,j,vbuffs,first; uint16_t b,blocks; uint32_t xcrc[4],crc; uint32_t codeindex,codeword; uint8_t *wptr; const uint8_t *rptr; int s; if (srccnt==0) { return ERROR_EINVAL; } // syslog(LOG_NOTICE,"replication begin (chunkid:%08"PRIX64",version:%04"PRIX32",srccnt:%"PRIu8")",chunkid,version,srccnt); pthread_mutex_lock(&statslock); stats_repl++; pthread_mutex_unlock(&statslock); // init replication structure r.chunkid = chunkid; r.version = version; r.srccnt = 0; r.created = 0; r.opened = 0; r.fds = malloc(sizeof(struct pollfd)*srccnt); passert(r.fds); r.repsources = malloc(sizeof(repsrc)*srccnt); passert(r.repsources); if (srccnt>1) { r.xorbuff = malloc(MFSBLOCKSIZE+4); passert(r.xorbuff); } else { r.xorbuff = NULL; } // create chunk status = hdd_create(chunkid,0); if (status!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: hdd_create status: %s",mfsstrerr(status)); rep_cleanup(&r); return status; } r.created = 1; // init sources r.srccnt = srccnt; for (i=0 ; i<srccnt ; i++) { r.repsources[i].chunkid = get64bit(&srcs); r.repsources[i].version = get32bit(&srcs); r.repsources[i].ip = get32bit(&srcs); r.repsources[i].port = get16bit(&srcs); r.repsources[i].sock = -1; r.repsources[i].packet = NULL; } // connect for (i=0 ; i<srccnt ; i++) { s = tcpsocket(); if (s<0) { mfs_errlog_silent(LOG_NOTICE,"replicator: socket error"); rep_cleanup(&r); return ERROR_CANTCONNECT; } r.repsources[i].sock = s; r.fds[i].fd = s; if (tcpnonblock(s)<0) { mfs_errlog_silent(LOG_NOTICE,"replicator: nonblock error"); rep_cleanup(&r); return ERROR_CANTCONNECT; } s = tcpnumconnect(s,r.repsources[i].ip,r.repsources[i].port); if (s<0) { mfs_errlog_silent(LOG_NOTICE,"replicator: connect error"); rep_cleanup(&r); return ERROR_CANTCONNECT; } if (s==0) { r.repsources[i].mode = IDLE; } else { r.repsources[i].mode = CONNECTING; } } if (rep_wait_for_connection(&r,CONNMSECTO)<0) { rep_cleanup(&r); return ERROR_CANTCONNECT; } // disable Nagle for (i=0 ; i<srccnt ; i++) { tcpnodelay(r.repsources[i].sock); } // open chunk status = hdd_open(chunkid,0); if (status!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: hdd_open status: %s",mfsstrerr(status)); rep_cleanup(&r); return status; } r.opened = 1; // get block numbers for (i=0 ; i<srccnt ; i++) { wptr = rep_create_packet(r.repsources+i,ANTOCS_GET_CHUNK_BLOCKS,8+4); if (wptr==NULL) { syslog(LOG_NOTICE,"replicator: out of memory"); rep_cleanup(&r); return ERROR_OUTOFMEMORY; } put64bit(&wptr,r.repsources[i].chunkid); put32bit(&wptr,r.repsources[i].version); } // send packet if (rep_send_all_packets(&r,SENDMSECTO)<0) { rep_cleanup(&r); return ERROR_DISCONNECTED; } // receive answers for (i=0 ; i<srccnt ; i++) { r.repsources[i].mode = HEADER; r.repsources[i].startptr = r.repsources[i].hdrbuff; r.repsources[i].bytesleft = 8; } if (rep_receive_all_packets(&r,RECVMSECTO)<0) { rep_cleanup(&r); return ERROR_DISCONNECTED; } // get # of blocks blocks = 0; for (i=0 ; i<srccnt ; i++) { uint32_t type,size; uint64_t pchid; uint32_t pver; uint16_t pblocks; uint8_t pstatus; uint32_t ip; rptr = r.repsources[i].hdrbuff; type = get32bit(&rptr); size = get32bit(&rptr); rptr = r.repsources[i].packet; ip = r.repsources[i].ip; if (rptr==NULL || type!=CSTOAN_CHUNK_BLOCKS || size!=15) { syslog(LOG_WARNING,"replicator,get # of blocks: got wrong answer (type:0x%08"PRIX32"/size:0x%08"PRIX32") from (%u.%u.%u.%u:%04"PRIX16")",type,size,(ip>>24)&0xFF,(ip>>16)&0xFF,(ip>>8)&0xFF,ip&0xFF,r.repsources[i].port); rep_cleanup(&r); return ERROR_DISCONNECTED; } pchid = get64bit(&rptr); pver = get32bit(&rptr); pblocks = get16bit(&rptr); pstatus = get8bit(&rptr); if (pchid!=r.repsources[i].chunkid) { syslog(LOG_WARNING,"replicator,get # of blocks: got wrong answer (chunk_status:chunkid:%"PRIX64"/%"PRIX64") from (%u.%u.%u.%u:%04"PRIX16")",pchid,r.repsources[i].chunkid,(ip>>24)&0xFF,(ip>>16)&0xFF,(ip>>8)&0xFF,ip&0xFF,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGCHUNKID; }
/* main working thread | glock:UNLOCKED */ void* write_worker(void *arg) { uint32_t z1,z2,z3; uint8_t *data; int fd; int i; struct pollfd pfd[2]; uint32_t sent,rcvd; uint8_t recvbuff[21]; uint8_t sendbuff[32]; #ifdef HAVE_WRITEV struct iovec siov[2]; #endif uint8_t pipebuff[1024]; uint8_t *wptr; const uint8_t *rptr; uint32_t reccmd; uint32_t recleng; uint64_t recchunkid; uint32_t recwriteid; uint8_t recstatus; #ifdef WORKER_DEBUG uint32_t partialblocks; uint32_t bytessent; char debugchain[200]; uint32_t cl; #endif const uint8_t *cp,*cpe; uint32_t chainip[10]; uint16_t chainport[10]; uint16_t chainelements; uint16_t chindx; uint32_t ip; uint16_t port; uint32_t srcip; uint64_t mfleng; uint64_t maxwroffset; uint64_t chunkid; uint32_t version; uint32_t nextwriteid; const uint8_t *chain; uint32_t chainsize; const uint8_t *csdata; uint32_t csdatasize; uint8_t westatus; uint8_t wrstatus; int status; uint8_t waitforstatus; uint8_t havedata; struct timeval start,now,lastrcvd,lrdiff; uint8_t cnt; inodedata *id; cblock *cb,*rcb; // inodedata *id; chainelements = 0; (void)arg; for (;;) { for (cnt=0 ; cnt<chainelements ; cnt++) { csdb_writedec(chainip[cnt],chainport[cnt]); } chainelements=0; // get next job queue_get(jqueue,&z1,&z2,&data,&z3); id = (inodedata*)data; pthread_mutex_lock(&glock); if (id->datachainhead) { chindx = id->datachainhead->chindx; } else { syslog(LOG_WARNING,"writeworker got inode with no data to write !!!"); chindx = 0xFFFF; status = EINVAL; // this should never happen, so status is not important - just anything } status = id->status; pthread_mutex_unlock(&glock); if (status) { write_job_end(id,status,0); continue; } // syslog(LOG_NOTICE,"file: %"PRIu32", index: %"PRIu16" - debug1",id->inode,chindx); // get chunk data from master wrstatus = fs_writechunk(id->inode,chindx,&mfleng,&chunkid,&version,&csdata,&csdatasize); if (wrstatus!=STATUS_OK) { syslog(LOG_WARNING,"file: %"PRIu32", index: %"PRIu16" - fs_writechunk returns status %d",id->inode,chindx,wrstatus); if (wrstatus!=ERROR_LOCKED) { if (wrstatus==ERROR_ENOENT) { write_job_end(id,EBADF,0); } else if (wrstatus==ERROR_QUOTA) { write_job_end(id,EDQUOT,0); } else if (wrstatus==ERROR_NOSPACE) { write_job_end(id,ENOSPC,0); } else { id->trycnt++; if (id->trycnt>=maxretries) { if (wrstatus==ERROR_NOCHUNKSERVERS) { write_job_end(id,ENOSPC,0); } else { write_job_end(id,EIO,0); } } else { write_delayed_enqueue(id,1+(id->trycnt<30)?(id->trycnt/3):10); } } } else { write_delayed_enqueue(id,1+(id->trycnt<30)?(id->trycnt/3):10); } continue; // get next job } if (csdata==NULL || csdatasize==0) { syslog(LOG_WARNING,"file: %"PRIu32", index: %"PRIu16", chunk: %"PRIu64", version: %"PRIu32" - there are no valid copies",id->inode,chindx,chunkid,version); id->trycnt+=6; if (id->trycnt>=maxretries) { write_job_end(id,ENXIO,0); } else { write_delayed_enqueue(id,60); } continue; } cp = csdata; cpe = csdata+csdatasize; while (cp<cpe && chainelements<10) { chainip[chainelements] = get32bit(&cp); chainport[chainelements] = get16bit(&cp); csdb_writeinc(chainip[chainelements],chainport[chainelements]); chainelements++; } chain = csdata; ip = get32bit(&chain); port = get16bit(&chain); chainsize = csdatasize-6; gettimeofday(&start,NULL); /* if (csdatasize>CSDATARESERVE) { csdatasize = CSDATARESERVE; } memcpy(wrec->csdata,csdata,csdatasize); wrec->csdatasize=csdatasize; while (csdatasize>=6) { tmpip = get32bit(&csdata); tmpport = get16bit(&csdata); csdatasize-=6; csdb_writeinc(tmpip,tmpport); } */ // make connection to cs srcip = fs_getsrcip(); cnt=5; while (cnt>0) { fd = tcpsocket(); if (fd<0) { syslog(LOG_WARNING,"can't create tcp socket: %m"); cnt=0; } if (srcip) { if (tcpnumbind(fd,srcip,0)<0) { syslog(LOG_WARNING,"can't bind socket to given ip: %m"); tcpclose(fd); fd=-1; break; } } if (tcpnumtoconnect(fd,ip,port,200)<0) { cnt--; if (cnt==0) { syslog(LOG_WARNING,"can't connect to (%08"PRIX32":%"PRIu16"): %m",ip,port); } tcpclose(fd); fd=-1; } else { cnt=0; } } if (fd<0) { fs_writeend(chunkid,id->inode,0); id->trycnt++; if (id->trycnt>=maxretries) { write_job_end(id,EIO,0); } else { write_delayed_enqueue(id,1+(id->trycnt<30)?(id->trycnt/3):10); } continue; } if (tcpnodelay(fd)<0) { syslog(LOG_WARNING,"can't set TCP_NODELAY: %m"); } #ifdef WORKER_DEBUG partialblocks=0; bytessent=0; #endif nextwriteid=1; pfd[0].fd = fd; pfd[1].fd = id->pipe[0]; rcvd = 0; sent = 0; waitforstatus=1; havedata=1; wptr = sendbuff; put32bit(&wptr,CUTOCS_WRITE); put32bit(&wptr,12+chainsize); put64bit(&wptr,chunkid); put32bit(&wptr,version); // debug: syslog(LOG_NOTICE,"writeworker: init packet prepared"); cb = NULL; status = 0; wrstatus = STATUS_OK; lastrcvd.tv_sec = 0; do { gettimeofday(&now,NULL); if (lastrcvd.tv_sec==0) { lastrcvd = now; } else { lrdiff = now; if (lrdiff.tv_usec<lastrcvd.tv_usec) { lrdiff.tv_sec--; lrdiff.tv_usec+=1000000; } lrdiff.tv_sec -= lastrcvd.tv_sec; lrdiff.tv_usec -= lastrcvd.tv_usec; if (lrdiff.tv_sec>=2) { syslog(LOG_WARNING,"file: %"PRIu32", index: %"PRIu16", chunk: %"PRIu64", version: %"PRIu32" - writeworker: connection with (%08"PRIX32":%"PRIu16") was timed out (unfinished writes: %"PRIu8"; try counter: %"PRIu32")",id->inode,chindx,chunkid,version,ip,port,waitforstatus,id->trycnt+1); break; } } if (now.tv_usec<start.tv_usec) { now.tv_sec--; now.tv_usec+=1000000; } now.tv_sec -= start.tv_sec; now.tv_usec -= start.tv_usec; if (havedata==0 && now.tv_sec<5 && waitforstatus<5) { pthread_mutex_lock(&glock); if (cb==NULL) { if (id->datachainhead) { if (id->datachainhead->to-id->datachainhead->from==65536 || waitforstatus<=1) { cb = id->datachainhead; havedata=1; } } } else { if (cb->next) { if (cb->next->chindx==chindx) { if (cb->next->to-cb->next->from==65536 || waitforstatus<=1) { cb = cb->next; havedata=1; } } } else { id->waitingworker=1; } } if (havedata==1) { cb->writeid = nextwriteid++; // debug: syslog(LOG_NOTICE,"writeworker: data packet prepared (writeid:%"PRIu32",pos:%"PRIu16")",cb->writeid,cb->pos); waitforstatus++; wptr = sendbuff; put32bit(&wptr,CUTOCS_WRITE_DATA); put32bit(&wptr,24+(cb->to-cb->from)); put64bit(&wptr,chunkid); put32bit(&wptr,cb->writeid); put16bit(&wptr,cb->pos); put16bit(&wptr,cb->from); put32bit(&wptr,cb->to-cb->from); put32bit(&wptr,mycrc32(0,cb->data+cb->from,cb->to-cb->from)); #ifdef WORKER_DEBUG if (cb->to-cb->from<65536) { partialblocks++; } bytessent+=(cb->to-cb->from); #endif sent=0; } pthread_mutex_unlock(&glock); } pfd[0].events = POLLIN | (havedata?POLLOUT:0); pfd[0].revents = 0; pfd[1].events = POLLIN; pfd[1].revents = 0; if (poll(pfd,2,100)<0) { /* correct timeout - in msec */ syslog(LOG_WARNING,"writeworker: poll error: %m"); status=EIO; break; } if (pfd[1].revents&POLLIN) { // used just to break poll - so just read all data from pipe to empty it i = read(id->pipe[0],pipebuff,1024); } if (pfd[0].revents&POLLIN) { i = read(fd,recvbuff+rcvd,21-rcvd); if (i==0) { // connection reset by peer ,读取文件头错误 syslog(LOG_WARNING,"file: %"PRIu32", index: %"PRIu16", chunk: %"PRIu64", version: %"PRIu32" - writeworker: connection with (%08"PRIX32":%"PRIu16") was reset by peer (unfinished writes: %"PRIu8"; try counter: %"PRIu32")",id->inode,chindx,chunkid,version,ip,port,waitforstatus,id->trycnt+1); status=EIO; break; } gettimeofday(&lastrcvd,NULL); rcvd+=i; if (rcvd==21) { rptr = recvbuff; reccmd = get32bit(&rptr); recleng = get32bit(&rptr); recchunkid = get64bit(&rptr); recwriteid = get32bit(&rptr); recstatus = get8bit(&rptr); if (reccmd!=CSTOCU_WRITE_STATUS || recleng!=13) { syslog(LOG_WARNING,"writeworker: got unrecognized packet from chunkserver (cmd:%"PRIu32",leng:%"PRIu32")",reccmd,recleng); status=EIO; break; } if (recchunkid!=chunkid) { syslog(LOG_WARNING,"writeworker: got unexpected packet (expected chunkdid:%"PRIu64",packet chunkid:%"PRIu64")",chunkid,recchunkid); status=EIO; break; } if (recstatus!=STATUS_OK) { syslog(LOG_WARNING,"writeworker: write error: %"PRIu8,recstatus); wrstatus=recstatus; break; } // debug: syslog(LOG_NOTICE,"writeworker: received status ok for writeid:%"PRIu32,recwriteid); if (recwriteid>0) { pthread_mutex_lock(&glock); for (rcb = id->datachainhead ; rcb && rcb->writeid!=recwriteid ; rcb=rcb->next) {} if (rcb==NULL) { syslog(LOG_WARNING,"writeworker: got unexpected status (writeid:%"PRIu32")",recwriteid); pthread_mutex_unlock(&glock); status=EIO; break; } if (rcb==cb) { // current block,cb为当前块儿指针 // debug: syslog(LOG_NOTICE,"writeworker: received status for current block"); if (havedata) { // got status ok before all data had been sent - error syslog(LOG_WARNING,"writeworker: got status OK before all data have been sent"); pthread_mutex_unlock(&glock); status=EIO; break; } else { cb = NULL; } } if (rcb->prev) {//将rcb所指块儿从链表中取出 rcb->prev->next = rcb->next; } else { id->datachainhead = rcb->next; } if (rcb->next) { rcb->next->prev = rcb->prev; } else { id->datachaintail = rcb->prev; } maxwroffset = (((uint64_t)(chindx))<<26)+(((uint32_t)(rcb->pos))<<16)+rcb->to; if (maxwroffset>mfleng) { mfleng=maxwroffset; } write_cb_release(rcb);// id->cacheblocks--; if (id->cachewaiting>0) { pthread_cond_broadcast(&(id->cachecond)); } pthread_mutex_unlock(&glock); } waitforstatus--; rcvd=0; } } if (havedata && (pfd[0].revents&POLLOUT)) { if (cb==NULL) { // havedata==1 && cb==NULL means sending first packet (CUTOCS_WRITE) if (sent<20) { #ifdef HAVE_WRITEV //将多个数据存储在一起,将驻留在两个或更多的不连接的缓冲区中的数据一次写出去 if (chainsize>0) { siov[0].iov_base = sendbuff+sent; siov[0].iov_len = 20-sent; siov[1].iov_base = (char*)chain; // discard const (safe - because it's used in writev) siov[1].iov_len = chainsize; i = writev(fd,siov,2); } else { #endif i = write(fd,sendbuff+sent,20-sent); #ifdef HAVE_WRITEV } #endif } else { i = write(fd,chain+(sent-20),chainsize-(sent-20)); } if (i<0) { syslog(LOG_WARNING,"file: %"PRIu32", index: %"PRIu16", chunk: %"PRIu64", version: %"PRIu32" - writeworker: connection with (%08"PRIX32":%"PRIu16") was reset by peer (unfinished writes: %"PRIu8"; try counter: %"PRIu32")",id->inode,chindx,chunkid,version,ip,port,waitforstatus,id->trycnt+1); status=EIO; break; } sent+=i; if (sent==20+chainsize) { havedata=0; } } else { if (sent<32) { #ifdef HAVE_WRITEV siov[0].iov_base = sendbuff+sent; siov[0].iov_len = 32-sent; siov[1].iov_base = cb->data+cb->from; siov[1].iov_len = cb->to-cb->from; i = writev(fd,siov,2); #else i = write(fd,sendbuff+sent,32-sent); #endif } else { i = write(fd,cb->data+cb->from+(sent-32),cb->to-cb->from-(sent-32)); } if (i<0) { syslog(LOG_WARNING,"file: %"PRIu32", index: %"PRIu16", chunk: %"PRIu64", version: %"PRIu32" - writeworker: connection with (%08"PRIX32":%"PRIu16") was reset by peer (unfinished writes: %"PRIu8"; try counter: %"PRIu32")",id->inode,chindx,chunkid,version,ip,port,waitforstatus,id->trycnt+1); status=EIO; break; } sent+=i; if (sent==32+cb->to-cb->from) { havedata=0; } } } } while (waitforstatus>0 && now.tv_sec<10);//////////////////// id->waitingworker=0; tcpclose(fd); #ifdef WORKER_DEBUG gettimeofday(&now,NULL); if (now.tv_usec<start.tv_usec) { now.tv_sec--; now.tv_usec+=1000000; } now.tv_sec -= start.tv_sec; now.tv_usec -= start.tv_usec; cl=0; for (cnt=0 ; cnt<chainelements ; cnt++) { cl+=snprintf(debugchain+cl,200-cl,"%u.%u.%u.%u:%u->",(chainip[cnt]>>24)&255,(chainip[cnt]>>16)&255,(chainip[cnt]>>8)&255,chainip[cnt]&255,chainport[cnt]); } if (cl>=2) { debugchain[cl-2]='\0'; } syslog(LOG_NOTICE,"worker %lu sent %"PRIu32" blocks (%"PRIu32" partial) of chunk %016"PRIX64"_%08"PRIX32", received status for %"PRIu32" blocks (%"PRIu32" lost), bw: %.6lfMB ( %"PRIu32" B / %.0lf us ), chain: %s",(unsigned long)arg,nextwriteid-1,partialblocks,chunkid,version,nextwriteid-1-waitforstatus,waitforstatus,(double)bytessent/((double)(now.tv_sec)*1000000+(double)(now.tv_usec)),bytessent,((double)(now.tv_sec)*1000000+(double)(now.tv_usec)),debugchain); #endif for (cnt=0 ; cnt<10 ; cnt++) { westatus = fs_writeend(chunkid,id->inode,mfleng); if (westatus!=STATUS_OK) { usleep(100000+(10000<<cnt)); } else { break; } } if (westatus!=STATUS_OK) { write_job_end(id,ENXIO,0); } else if (status!=0 || wrstatus!=STATUS_OK) { if (wrstatus!=STATUS_OK) { // convert MFS status to OS errno if (wrstatus==ERROR_NOSPACE) { status=ENOSPC; } else { status=EIO; } } id->trycnt++; if (id->trycnt>=maxretries) { write_job_end(id,status,0); } else { write_job_end(id,0,1+(id->trycnt<30)?(id->trycnt/3):10); } } else { read_inode_ops(id->inode); write_job_end(id,0,0); } } }
int main(void) { uint64_t buff[2]; uint8_t *wp; const uint8_t *rp; uint32_t i; mfstest_init(); wp = (uint8_t*)buff; for (i=0 ; i<16 ; i++) { wp[i] = ((15-i)*0x10)+i; } mfstest_start(getbit_uneven); rp = (uint8_t*)buff; mfstest_assert_uint8_eq(get8bit(&rp),0xF0); mfstest_assert_uint16_eq(get16bit(&rp),0xE1D2); mfstest_assert_uint32_eq(get32bit(&rp),0xC3B4A596); mfstest_assert_uint64_eq(get64bit(&rp),0x8778695A4B3C2D1E); mfstest_end(); mfstest_start(getbit_even); rp = (uint8_t*)buff; mfstest_assert_uint64_eq(get64bit(&rp),0xF0E1D2C3B4A59687); mfstest_assert_uint32_eq(get32bit(&rp),0x78695A4B); mfstest_assert_uint16_eq(get16bit(&rp),0x3C2D); mfstest_assert_uint8_eq(get8bit(&rp),0x1E); mfstest_end(); wp = (uint8_t*)buff; for (i=0; i<16 ; i++) { wp[i] = 0; } put8bit(&wp,0xF0); put16bit(&wp,0xE1D2); put32bit(&wp,0xC3B4A596); put64bit(&wp,0x8778695A4B3C2D1E); put8bit(&wp,0x0F); mfstest_start(putbit_uneven); rp = (uint8_t*)buff; for (i=0 ; i<16 ; i++) { mfstest_assert_uint8_eq(rp[i],((15-i)*0x10)+i); } mfstest_end(); wp = (uint8_t*)buff; for (i=0; i<16 ; i++) { wp[i] = 0; } put64bit(&wp,0xF0E1D2C3B4A59687); put32bit(&wp,0x78695A4B); put16bit(&wp,0x3C2D); put8bit(&wp,0x1E); put8bit(&wp,0x0F); mfstest_start(putbit_even); rp = (uint8_t*)buff; for (i=0 ; i<16 ; i++) { mfstest_assert_uint8_eq(rp[i],((15-i)*0x10)+i); } mfstest_end(); mfstest_return(); }
/* srcs: srccnt * (chunkid:64 version:32 ip:32 port:16) */ uint8_t replicate(uint64_t chunkid,uint32_t version,uint8_t srccnt,const uint8_t *srcs) { replication r; uint8_t status,i,vbuffs,first; uint16_t b,blocks; uint32_t xcrc,crc; uint8_t *wptr; const uint8_t *rptr; int s; if (srccnt==0) { return ERROR_EINVAL; } // syslog(LOG_NOTICE,"replication begin (chunkid:%08"PRIX64",version:%04"PRIX32",srccnt:%"PRIu8")",chunkid,version,srccnt); pthread_mutex_lock(&statslock); stats_repl++; pthread_mutex_unlock(&statslock); // init replication structure r.chunkid = chunkid; r.version = version; r.srccnt = 0; r.created = 0; r.opened = 0; r.fds = malloc(sizeof(struct pollfd)*srccnt); passert(r.fds); r.repsources = malloc(sizeof(repsrc)*srccnt); passert(r.repsources); if (srccnt>1) { r.xorbuff = malloc(65536+4); passert(r.xorbuff); } else { r.xorbuff = NULL; } // create chunk status = hdd_create(chunkid,0); if (status!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: hdd_create status: %u",status); rep_cleanup(&r); return status; } r.created = 1; // init sources r.srccnt = srccnt; for (i=0 ; i<srccnt ; i++) { r.repsources[i].chunkid = get64bit(&srcs); r.repsources[i].version = get32bit(&srcs); r.repsources[i].ip = get32bit(&srcs); r.repsources[i].port = get16bit(&srcs); r.repsources[i].sock = -1; r.repsources[i].packet = NULL; } // connect for (i=0 ; i<srccnt ; i++) { s = tcpsocket(); if (s<0) { mfs_errlog_silent(LOG_NOTICE,"replicator: socket error"); rep_cleanup(&r); return ERROR_CANTCONNECT; } r.repsources[i].sock = s; r.fds[i].fd = s; if (tcpnonblock(s)<0) { mfs_errlog_silent(LOG_NOTICE,"replicator: nonblock error"); rep_cleanup(&r); return ERROR_CANTCONNECT; } s = tcpnumconnect(s,r.repsources[i].ip,r.repsources[i].port); if (s<0) { mfs_errlog_silent(LOG_NOTICE,"replicator: connect error"); rep_cleanup(&r); return ERROR_CANTCONNECT; } if (s==0) { r.repsources[i].mode = IDLE; } else { r.repsources[i].mode = CONNECTING; } } if (rep_wait_for_connection(&r,CONNMSECTO)<0) { rep_cleanup(&r); return ERROR_CANTCONNECT; } // open chunk status = hdd_open(chunkid); if (status!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: hdd_open status: %u",status); rep_cleanup(&r); return status; } r.opened = 1; // get block numbers for (i=0 ; i<srccnt ; i++) { wptr = rep_create_packet(r.repsources+i,CSTOCS_GET_CHUNK_BLOCKS,8+4); if (wptr==NULL) { syslog(LOG_NOTICE,"replicator: out of memory"); rep_cleanup(&r); return ERROR_OUTOFMEMORY; } put64bit(&wptr,r.repsources[i].chunkid); put32bit(&wptr,r.repsources[i].version); } // send packet if (rep_send_all_packets(&r,SENDMSECTO)<0) { rep_cleanup(&r); return ERROR_DISCONNECTED; } // receive answers for (i=0 ; i<srccnt ; i++) { r.repsources[i].mode = HEADER; r.repsources[i].startptr = r.repsources[i].hdrbuff; r.repsources[i].bytesleft = 8; } if (rep_receive_all_packets(&r,RECVMSECTO)<0) { rep_cleanup(&r); return ERROR_DISCONNECTED; } // get block no blocks = 0; for (i=0 ; i<srccnt ; i++) { uint32_t type,size; uint64_t pchid; uint32_t pver; uint16_t pblocks; uint8_t pstatus; rptr = r.repsources[i].hdrbuff; type = get32bit(&rptr); size = get32bit(&rptr); rptr = r.repsources[i].packet; if (rptr==NULL || type!=CSTOCS_GET_CHUNK_BLOCKS_STATUS || size!=15) { syslog(LOG_WARNING,"replicator: got wrong answer (type/size) from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_DISCONNECTED; } pchid = get64bit(&rptr); pver = get32bit(&rptr); pblocks = get16bit(&rptr); pstatus = get8bit(&rptr); if (pchid!=r.repsources[i].chunkid) { syslog(LOG_WARNING,"replicator: got wrong answer (chunk_status:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGCHUNKID; } if (pver!=r.repsources[i].version) { syslog(LOG_WARNING,"replicator: got wrong answer (chunk_status:version:%"PRIX32"/%"PRIX32") from (%08"PRIX32":%04"PRIX16")",pver,r.repsources[i].version,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGVERSION; } if (pstatus!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: got status: %u from (%08"PRIX32":%04"PRIX16")",pstatus,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return pstatus; } r.repsources[i].blocks = pblocks; if (pblocks>blocks) { blocks=pblocks; } } // create read request for (i=0 ; i<srccnt ; i++) { if (r.repsources[i].blocks>0) { uint32_t leng; wptr = rep_create_packet(r.repsources+i,CUTOCS_READ,8+4+4+4); if (wptr==NULL) { syslog(LOG_NOTICE,"replicator: out of memory"); rep_cleanup(&r); return ERROR_OUTOFMEMORY; } leng = r.repsources[i].blocks*0x10000; put64bit(&wptr,r.repsources[i].chunkid); put32bit(&wptr,r.repsources[i].version); put32bit(&wptr,0); put32bit(&wptr,leng); } else { rep_no_packet(r.repsources+i); } } // send read request if (rep_send_all_packets(&r,SENDMSECTO)<0) { rep_cleanup(&r); return ERROR_DISCONNECTED; } // receive data and write to hdd for (b=0 ; b<blocks ; b++) { // prepare receive for (i=0 ; i<srccnt ; i++) { if (b<r.repsources[i].blocks) { r.repsources[i].mode = HEADER; r.repsources[i].startptr = r.repsources[i].hdrbuff; r.repsources[i].bytesleft = 8; } else { r.repsources[i].mode = IDLE; r.repsources[i].bytesleft = 0; } } // receive data if (rep_receive_all_packets(&r,RECVMSECTO)<0) { rep_cleanup(&r); return ERROR_DISCONNECTED; } // check packets vbuffs = 0; for (i=0 ; i<srccnt ; i++) { if (r.repsources[i].mode!=IDLE) { uint32_t type,size; uint64_t pchid; uint16_t pblocknum; uint16_t poffset; uint32_t psize; uint8_t pstatus; rptr = r.repsources[i].hdrbuff; type = get32bit(&rptr); size = get32bit(&rptr); rptr = r.repsources[i].packet; if (rptr==NULL) { rep_cleanup(&r); return ERROR_DISCONNECTED; } if (type==CSTOCU_READ_STATUS && size==9) { pchid = get64bit(&rptr); pstatus = get8bit(&rptr); rep_cleanup(&r); if (pchid!=r.repsources[i].chunkid) { syslog(LOG_WARNING,"replicator: got wrong answer (read_status:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port); return ERROR_WRONGCHUNKID; } if (pstatus==STATUS_OK) { // got status too early or got incorrect packet syslog(LOG_WARNING,"replicator: got unexpected ok status from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port); return ERROR_DISCONNECTED; } syslog(LOG_NOTICE,"replicator: got status: %u from (%08"PRIX32":%04"PRIX16")",pstatus,r.repsources[i].ip,r.repsources[i].port); return pstatus; } else if (type==CSTOCU_READ_DATA && size==20+65536) { pchid = get64bit(&rptr); pblocknum = get16bit(&rptr); poffset = get16bit(&rptr); psize = get32bit(&rptr); if (pchid!=r.repsources[i].chunkid) { syslog(LOG_WARNING,"replicator: got wrong answer (read_data:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGCHUNKID; } if (pblocknum!=b) { syslog(LOG_WARNING,"replicator: got wrong answer (read_data:blocknum:%"PRIu16"/%"PRIu16") from (%08"PRIX32":%04"PRIX16")",pblocknum,b,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_DISCONNECTED; } if (poffset!=0) { syslog(LOG_WARNING,"replicator: got wrong answer (read_data:offset:%"PRIu16") from (%08"PRIX32":%04"PRIX16")",poffset,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGOFFSET; } if (psize!=65536) { syslog(LOG_WARNING,"replicator: got wrong answer (read_data:size:%"PRIu32") from (%08"PRIX32":%04"PRIX16")",psize,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGSIZE; } } else { syslog(LOG_WARNING,"replicator: got wrong answer (type/size) from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_DISCONNECTED; } vbuffs++; } } // write data if (vbuffs==0) { // no buffers ? - it should never happen syslog(LOG_WARNING,"replicator: no data received for block: %"PRIu16,b); rep_cleanup(&r); return ERROR_DISCONNECTED; } else if (vbuffs==1) { // xor not needed, so just find block and write it for (i=0 ; i<srccnt ; i++) { if (r.repsources[i].mode!=IDLE) { rptr = r.repsources[i].packet; status = hdd_write(chunkid,0,b,rptr+20,0,65536,rptr+16); if (status!=STATUS_OK) { syslog(LOG_WARNING,"replicator: write status: %u",status); rep_cleanup(&r); return status; } } } } else { first=1; if (vbuffs&1) { xcrc = 0; } else { xcrc = 0xD7978EEBU; // = mycrc32_zeroblock(0,0x10000); } for (i=0 ; i<srccnt ; i++) { if (r.repsources[i].mode!=IDLE) { rptr = r.repsources[i].packet; rptr+=16; // skip chunkid,blockno,offset and size if (first) { memcpy(r.xorbuff+4,rptr+4,65536); first=0; } else { xordata(r.xorbuff+4,rptr+4,65536); } crc = get32bit(&rptr); if (crc!=mycrc32(0,rptr,65536)) { syslog(LOG_WARNING,"replicator: received data with wrong checksum from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_CRC; } xcrc^=crc; } } wptr = r.xorbuff; put32bit(&wptr,xcrc); status = hdd_write(chunkid,0,b,r.xorbuff+4,0,65536,r.xorbuff); if (status!=STATUS_OK) { syslog(LOG_WARNING,"replicator: xor write status: %u",status); rep_cleanup(&r); return status; } } } // receive status for (i=0 ; i<srccnt ; i++) { if (r.repsources[i].blocks>0) { // if (r.repsources[i].packet) { // free(r.repsources[i].packet); // r.repsources[i].packet=NULL; // } r.repsources[i].mode = HEADER; r.repsources[i].startptr = r.repsources[i].hdrbuff; r.repsources[i].bytesleft = 8; } else { r.repsources[i].mode = IDLE; r.repsources[i].bytesleft = 0; } } if (rep_receive_all_packets(&r,RECVMSECTO)<0) { rep_cleanup(&r); return ERROR_DISCONNECTED; } for (i=0 ; i<srccnt ; i++) { if (r.repsources[i].blocks>0) { uint32_t type,size; uint64_t pchid; uint8_t pstatus; rptr = r.repsources[i].hdrbuff; type = get32bit(&rptr); size = get32bit(&rptr); rptr = r.repsources[i].packet; if (rptr==NULL || type!=CSTOCU_READ_STATUS || size!=9) { syslog(LOG_WARNING,"replicator: got wrong answer (type/size) from (%08"PRIX32":%04"PRIX16")",r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_DISCONNECTED; } pchid = get64bit(&rptr); pstatus = get8bit(&rptr); if (pchid!=r.repsources[i].chunkid) { syslog(LOG_WARNING,"replicator: got wrong answer (read_status:chunkid:%"PRIX64"/%"PRIX64") from (%08"PRIX32":%04"PRIX16")",pchid,r.repsources[i].chunkid,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return ERROR_WRONGCHUNKID; } if (pstatus!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: got status: %u from (%08"PRIX32":%04"PRIX16")",pstatus,r.repsources[i].ip,r.repsources[i].port); rep_cleanup(&r); return pstatus; } } } // close chunk and change version status = hdd_close(chunkid); if (status!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: hdd_close status: %u",status); rep_cleanup(&r); return status; } r.opened = 0; status = hdd_version(chunkid,0,version); if (status!=STATUS_OK) { syslog(LOG_NOTICE,"replicator: hdd_version status: %u",status); rep_cleanup(&r); return status; } r.created = 0; rep_cleanup(&r); return STATUS_OK; }
int flock_load(bio *fd,uint8_t mver,uint8_t ignoreflag) { uint8_t loadbuff[FLOCK_REC_SIZE]; const uint8_t *ptr; int32_t r; uint32_t inode,sessionid; uint64_t owner; uint8_t ltype; inodelocks *il; lock *l; if (mver!=0x10) { return -1; } for (;;) { r = bio_read(fd,loadbuff,FLOCK_REC_SIZE); if (r!=FLOCK_REC_SIZE) { return -1; } ptr = loadbuff; inode = get32bit(&ptr); owner = get64bit(&ptr); sessionid = get32bit(&ptr); ltype = get8bit(&ptr); if (inode==0 && owner==0 && sessionid==0) { return 0; } if (of_checknode(sessionid,inode)==0) { if (ignoreflag) { mfs_syslog(LOG_ERR,"loading flock_locks: lock on closed file !!! (ignoring)"); continue; } else { mfs_syslog(LOG_ERR,"loading flock_locks: lock on closed file !!!"); return -1; } } // add lock il = flock_inode_find(inode); if (il==NULL) { il = flock_inode_new(inode); } if (il->active!=NULL && (il->active->ltype==LTYPE_WRITER || ltype==LTYPE_WRITER)) { if (ignoreflag) { mfs_syslog(LOG_ERR,"loading flock_locks: wrong lock !!! (ignoring)"); continue; } else { mfs_syslog(LOG_ERR,"loading flock_locks: wrong lock !!!"); return -1; } } l = malloc(sizeof(lock)); l->owner = owner; l->sessionid = sessionid; l->state = STATE_ACTIVE; l->ltype = ltype; l->lock_instances = NULL; l->parent = il; l->next = NULL; l->prev = NULL; flock_do_lock_inode_attach(l); } return 0; // unreachable }
int ppfs_read(const char * path, char * buf, size_t st, off_t off, struct fuse_file_info *fi){ int nread = 0; int ooff = off; int ost = st; char* rbuf = buf; chunk_cache* cc; if(lookup_chunk_cache(path,&cc) != 0){ ppacket* p = createpacket_s(4+strlen(path),CLTOMD_READ_CHUNK_INFO,-1); uint8_t* ptr = p->startptr + HEADER_LEN; int plen = strlen(path); uint64_t* chunklist = NULL; int clen,calloc; put32bit(&ptr,plen); memcpy(ptr,path,plen); ptr += plen; fprintf(stderr,"just to be clear\n"); const uint8_t* tmpptr = p->startptr + HEADER_LEN; int i; for(i=0;i<p->size;i+=1){ int x = get8bit(&tmpptr); fprintf(stderr,"%X\t",x); } fprintf(stderr,"\n"); sendpacket(fd,p); free(p); p = receivepacket(fd); const uint8_t* ptr2 = p->startptr; int status = get32bit(&ptr2); fprintf(stderr,"status:%d\n",status); if(status != 0){ return -1; } uint32_t ip = get32bit(&ptr2); if(ip == -1){ fprintf(stderr,"local mds\n"); } else { fprintf(stderr,"remote mds:%X\n",ip); } int chunks = get32bit(&ptr2); fprintf(stderr,"chunks=%d\n",chunks); chunklist = (uint64_t*)malloc(sizeof(uint64_t)*(chunks+20)); clen = 0; calloc = chunks+20; for(i=0;i<chunks;i++){ uint64_t chunkid = get64bit(&ptr2); fprintf(stderr,"(%d):id=%lld\n",i,chunkid); chunklist[clen++] = chunkid; } cc = chunk_cache_add(path,chunklist,clen,ip); } else { fprintf(stderr,"\n\n\n\nfound chunk_cache\n\n\n\n"); } fprintf(stderr,"preparing mds connection:%X\n",ip); ppfs_conn_entry* e = NULL; uint32_t ip = cc->mdsid; if(ip != -1){ if(remote_mds.sockfd != -1 && remote_mds.peerip != ip){ tcpclose(remote_mds.sockfd); remote_mds.sockfd = -1; } if(remote_mds.sockfd == -1){ fprintf(stderr,"connecting\n"); if(serv_connect(&remote_mds,ip,MDS_PORT) < 0){ return -1; } } e = &remote_mds; } else { e = &local_mds; } fprintf(stderr,"done\n"); fprintf(stderr,"off=%d,st=%d\n",ooff,ost); if(cc->chunks * CHUNKSIZE < off + st){ return 0; } fprintf(stderr,"start reading now\n"); int starti = off/CHUNKSIZE; int buflen = min(st,CHUNKSIZE - off % CHUNKSIZE); ppfs_conn_entry cs; cs.sockfd = -1; while(st > 0){ uint64_t chunkid = cc->chunklist[starti]; ppacket* p = createpacket_s(8,CLTOMD_LOOKUP_CHUNK,-1); uint8_t* ptr = p->startptr + HEADER_LEN; put64bit(&ptr,chunkid); sendpacket(e->sockfd,p); free(p); p = receivepacket(e->sockfd); const uint8_t* ptr2 = p->startptr; int status = get32bit(&ptr2); printf("status:%d\n",status); if(status == 0){ int csip = get32bit(&ptr2); printf("cid:%lld,csip:%X\n",chunkid,csip); if(cs.sockfd != -1 && cs.peerip != csip){ tcpclose(cs.sockfd); } if(cs.sockfd == -1){ if(serv_connect(&cs,csip,CS_PORT) < 0){ return -1; } } } else { return -1; } fprintf(stderr,"chunkid=%lld,off=%d,buflen=%d\n",chunkid,off,buflen); p = createpacket_s(8+4+4,CLTOCS_READ_CHUNK,-1); ptr = p->startptr + HEADER_LEN; put64bit(&ptr,chunkid); put32bit(&ptr,off % CHUNKSIZE); put32bit(&ptr,buflen); sendpacket(cs.sockfd,p); free(p); p = receivepacket(cs.sockfd); ptr2 = p->startptr; status = get32bit(&ptr2); printf("status=%d\n",status); if(status == 0){ int rlen = get32bit(&ptr2); nread += rlen; printf("rlen=%d\n",rlen); memcpy(rbuf,ptr2,rlen); rbuf += rlen; } else { return -1; } st -= buflen; off += buflen; starti = off/CHUNKSIZE; buflen = min(st,CHUNKSIZE - off % CHUNKSIZE); } return nread; }
void matomlserv_download_start(matomlserventry *eptr,const uint8_t *data,uint32_t length) { uint8_t filenum; uint64_t size; uint8_t *ptr; if (length!=1) { syslog(LOG_NOTICE,"MLTOMA_DOWNLOAD_START - wrong size (%"PRIu32"/1)",length); eptr->mode=KILL; return; } filenum = get8bit(&data); if (filenum==1 || filenum==2) { if (eptr->metafd>=0) { close(eptr->metafd); eptr->metafd=-1; } if (eptr->chain1fd>=0) { close(eptr->chain1fd); eptr->chain1fd=-1; } if (eptr->chain2fd>=0) { close(eptr->chain2fd); eptr->chain2fd=-1; } } if (filenum==1) { eptr->metafd = open("metadata.mfs.back",O_RDONLY); eptr->chain1fd = open("changelog.0.mfs",O_RDONLY); eptr->chain2fd = open("changelog.1.mfs",O_RDONLY); } else if (filenum==2) { eptr->metafd = open("sessions.mfs",O_RDONLY); } else if (filenum==11) { if (eptr->metafd>=0) { close(eptr->metafd); } eptr->metafd = eptr->chain1fd; eptr->chain1fd = -1; } else if (filenum==12) { if (eptr->metafd>=0) { close(eptr->metafd); } eptr->metafd = eptr->chain2fd; eptr->chain2fd = -1; } else { eptr->mode=KILL; return; } if (eptr->metafd<0) { if (filenum==11 || filenum==12) { ptr = matomlserv_createpacket(eptr,MATOML_DOWNLOAD_START,8); put64bit(&ptr,0); return; } else { ptr = matomlserv_createpacket(eptr,MATOML_DOWNLOAD_START,1); put8bit(&ptr,0xff); // error return; } } size = lseek(eptr->metafd,0,SEEK_END); ptr = matomlserv_createpacket(eptr,MATOML_DOWNLOAD_START,8); put64bit(&ptr,size); // ok }
int sclass_load(bio *fd,uint8_t mver,int ignoreflag) { uint8_t *databuff = NULL; const uint8_t *ptr; uint32_t labelmask; uint32_t chunkcount; uint16_t sclassid; uint16_t arch_delay; uint8_t create_mode; uint8_t create_labelscnt; uint8_t keep_labelscnt; uint8_t arch_labelscnt; uint8_t descrleng; uint8_t nleng; uint8_t admin_only; uint8_t name[MAXSCLASSNLENG]; uint8_t i,j; uint8_t orgroup; uint8_t hdrleng; if (mver<0x16) { // skip label descriptions for (i=0 ; i<26 ; i++) { if (bio_read(fd,&descrleng,1)!=1) { int err = errno; fputc('\n',stderr); errno = err; mfs_errlog(LOG_ERR,"loading storage class data: read error"); return -1; } if (descrleng>128) { mfs_syslog(LOG_ERR,"loading storage class data: description too long"); return -1; } bio_skip(fd,descrleng); } } if (mver==0x10) { orgroup = 1; } else { if (bio_read(fd,&orgroup,1)!=1) { int err = errno; fputc('\n',stderr); errno = err; mfs_errlog(LOG_ERR,"loading storage class: read error"); return -1; } if (orgroup>MASKORGROUP) { if (ignoreflag) { mfs_syslog(LOG_ERR,"loading storage class data: too many or-groups - ignore"); } else { mfs_syslog(LOG_ERR,"loading storage class data: too many or-groups"); return -1; } } } if (orgroup<1) { mfs_syslog(LOG_ERR,"loading storage class data: zero or-groups !!!"); return -1; } databuff = malloc(3U*9U*4U*(uint32_t)orgroup); passert(databuff); hdrleng = (mver==0x12)?11:(mver<=0x13)?3:(mver<=0x14)?5:(mver<=0x15)?8:10; while (1) { if (bio_read(fd,databuff,hdrleng)!=hdrleng) { int err = errno; fputc('\n',stderr); errno = err; mfs_errlog(LOG_ERR,"loading storage class data: read error"); free(databuff); databuff=NULL; return -1; } ptr = databuff; sclassid = get16bit(&ptr); if (mver>0x15) { nleng = get8bit(&ptr); admin_only = get8bit(&ptr); create_mode = get8bit(&ptr); arch_delay = get16bit(&ptr); create_labelscnt = get8bit(&ptr); keep_labelscnt = get8bit(&ptr); arch_labelscnt = get8bit(&ptr); chunkcount = 0; } else if (mver>0x14) { nleng = 0; admin_only = 0; create_mode = get8bit(&ptr); arch_delay = get16bit(&ptr); create_labelscnt = get8bit(&ptr); keep_labelscnt = get8bit(&ptr); arch_labelscnt = get8bit(&ptr); chunkcount = 0; } else if (mver>0x13) { nleng = 0; admin_only = 0; create_mode = get8bit(&ptr); create_labelscnt = get8bit(&ptr); keep_labelscnt = get8bit(&ptr); arch_labelscnt = keep_labelscnt; arch_delay = 0; chunkcount = 0; } else { nleng = 0; admin_only = 0; create_labelscnt = get8bit(&ptr); keep_labelscnt = create_labelscnt; arch_labelscnt = create_labelscnt; create_mode = CREATE_MODE_STD; arch_delay = 0; if (mver==0x12) { chunkcount = get32bit(&ptr); ptr+=4; } else { chunkcount = 0; } } if (nleng==0) { if (sclassid>=FIRSTSCLASSID) { nleng = snprintf((char*)name,MAXSCLASSNLENG,"sclass_%"PRIu32,(uint32_t)(sclassid+1-FIRSTSCLASSID)); } else { nleng = 0; } } else { if (bio_read(fd,name,nleng)!=nleng) { int err = errno; fputc('\n',stderr); errno = err; mfs_errlog(LOG_ERR,"loading storage class data: read error"); free(databuff); databuff=NULL; return -1; } } if (sclassid==0 && create_labelscnt==0 && keep_labelscnt==0 && arch_labelscnt==0 && chunkcount==0 && arch_delay==0) { break; } if (create_labelscnt==0 || create_labelscnt>MAXLABELSCNT || keep_labelscnt==0 || keep_labelscnt>MAXLABELSCNT || arch_labelscnt==0 || arch_labelscnt>MAXLABELSCNT) { mfs_arg_syslog(LOG_ERR,"loading storage class data: data format error (sclassid: %"PRIu16" ; create_mode: %"PRIu8" ; create_labelscnt: %"PRIu8" ; keep_labelscnt: %"PRIu8" ; arch_labelscnt: %"PRIu8" ; arch_delay: %"PRIu16")",sclassid,create_mode,create_labelscnt,keep_labelscnt,arch_labelscnt,arch_delay); free(databuff); databuff = NULL; return -1; } if (sclassid==0 || sclassid>=MAXSCLASS || nleng==0) { if (ignoreflag) { mfs_arg_syslog(LOG_ERR,"loading storage class data: bad sclassid (%"PRIu16") - ignore",sclassid); if (mver>0x14) { bio_skip(fd,(create_labelscnt+keep_labelscnt+arch_labelscnt)*4*orgroup); } else if (mver>0x13) { bio_skip(fd,(create_labelscnt+keep_labelscnt)*4*orgroup); } else { bio_skip(fd,(create_labelscnt)*4*orgroup); } continue; } else { mfs_arg_syslog(LOG_ERR,"loading storage class data: bad sclassid (%"PRIu16")",sclassid); free(databuff); databuff=NULL; return -1; } } if (mver>0x14) { if (bio_read(fd,databuff,(create_labelscnt+keep_labelscnt+arch_labelscnt)*4*orgroup)!=(create_labelscnt+keep_labelscnt+arch_labelscnt)*4*orgroup) { int err = errno; fputc('\n',stderr); errno = err; mfs_errlog(LOG_ERR,"loading storage class data: read error"); free(databuff); databuff=NULL; return -1; } } else if (mver>0x13) { if (bio_read(fd,databuff,(create_labelscnt+keep_labelscnt)*4*orgroup)!=(create_labelscnt+keep_labelscnt)*4*orgroup) { int err = errno; fputc('\n',stderr); errno = err; mfs_errlog(LOG_ERR,"loading storage class data: read error"); free(databuff); databuff=NULL; return -1; } } else { if (bio_read(fd,databuff,create_labelscnt*4*orgroup)!=create_labelscnt*4*orgroup) { int err = errno; fputc('\n',stderr); errno = err; mfs_errlog(LOG_ERR,"loading storage class data: read error"); free(databuff); databuff=NULL; return -1; } } if (sclassid>=FIRSTSCLASSID && sclasstab[sclassid].nleng>0) { if (ignoreflag) { mfs_syslog(LOG_ERR,"loading storage class data: repeated sclassid - ignore"); if (chunkcount>0) { bio_skip(fd,chunkcount*8); } continue; } else { mfs_syslog(LOG_ERR,"loading storage class data: repeated sclassid"); free(databuff); databuff=NULL; return -1; } } ptr = databuff; for (i=0 ; i<create_labelscnt ; i++) { for (j=0 ; j<MASKORGROUP ; j++) { if (j<orgroup) { labelmask = get32bit(&ptr); } else { labelmask = 0; } sclasstab[sclassid].create_labelmasks[i][j] = labelmask; } } for (i=0 ; i<keep_labelscnt ; i++) { for (j=0 ; j<MASKORGROUP ; j++) { if (mver>0x13) { if (j<orgroup) { labelmask = get32bit(&ptr); } else { labelmask = 0; } } else { labelmask = sclasstab[sclassid].create_labelmasks[i][j]; } sclasstab[sclassid].keep_labelmasks[i][j] = labelmask; } } for (i=0 ; i<arch_labelscnt ; i++) { for (j=0 ; j<MASKORGROUP ; j++) { if (mver>0x14) { if (j<orgroup) { labelmask = get32bit(&ptr); } else { labelmask = 0; } } else { labelmask = sclasstab[sclassid].keep_labelmasks[i][j]; } sclasstab[sclassid].arch_labelmasks[i][j] = labelmask; } } sclasstab[sclassid].create_mode = create_mode; sclasstab[sclassid].arch_delay = arch_delay; sclasstab[sclassid].create_labelscnt = create_labelscnt; sclasstab[sclassid].keep_labelscnt = keep_labelscnt; sclasstab[sclassid].arch_labelscnt = arch_labelscnt; sclasstab[sclassid].admin_only = admin_only; sclasstab[sclassid].nleng = nleng; memcpy(sclasstab[sclassid].name,name,nleng); sclasstab[sclassid].files = 0; sclasstab[sclassid].directories = 0; sclass_fix_has_labels_fields(sclassid); if (sclassid>=firstneverused) { firstneverused = sclassid+1; } if (chunkcount>0) { bio_skip(fd,chunkcount*8); } } free(databuff); databuff=NULL; return 1; }