Ejemplo n.º 1
0
/// =-=-=-=-=-=-=-
/// @brief local function to replicate a new copy for
///        proc_results_for_rebalance
    static
    error repl_for_rebalance(
        rsComm_t*          _comm,
        const std::string& _obj_path,
        const std::string& _current_resc,
        const std::string& _src_hier,
        const std::string& _dst_hier,
        const std::string& _src_resc,
        const std::string& _dst_resc,
        int                _mode ) {
        // =-=-=-=-=-=-=-
        // generate a resource hierachy that ends at this resource for pdmo
        hierarchy_parser parser;
        parser.set_string( _src_hier );
        std::string sub_hier;
        parser.str( sub_hier, _current_resc );

        // =-=-=-=-=-=-=-
        // create a data obj input struct to call rsDataObjRepl which given
        // the _stage_sync_kw will either stage or sync the data object
        dataObjInp_t data_obj_inp;
        bzero( &data_obj_inp, sizeof( data_obj_inp ) );
        rstrcpy( data_obj_inp.objPath, _obj_path.c_str(), MAX_NAME_LEN );
        data_obj_inp.createMode = _mode;
        addKeyVal( &data_obj_inp.condInput, RESC_HIER_STR_KW,      _src_hier.c_str() );
        addKeyVal( &data_obj_inp.condInput, DEST_RESC_HIER_STR_KW, _dst_hier.c_str() );
        addKeyVal( &data_obj_inp.condInput, RESC_NAME_KW,          _src_resc.c_str() );
        addKeyVal( &data_obj_inp.condInput, DEST_RESC_NAME_KW,     _dst_resc.c_str() );
        addKeyVal( &data_obj_inp.condInput, IN_PDMO_KW,             sub_hier.c_str() );

        // =-=-=-=-=-=-=-
        // process the actual call for replication
        transferStat_t* trans_stat = NULL;
        int repl_stat = rsDataObjRepl( _comm, &data_obj_inp, &trans_stat );
        if ( repl_stat < 0 ) {
            std::stringstream msg;
            msg << "Failed to replicate the data object ["
                << _obj_path
                << "]";
            return ERROR( repl_stat, msg.str() );
        }

        return SUCCESS();

    } // repl_for_rebalance
Ejemplo n.º 2
0
int
_rsDataObjPut( rsComm_t *rsComm, dataObjInp_t *dataObjInp,
               bytesBuf_t *dataObjInpBBuf, portalOprOut_t **portalOprOut ) {
    int status;
    int l1descInx;
    int retval;
    openedDataObjInp_t dataObjCloseInp;
    int allFlag;
    transferStat_t *transStat = NULL;
    dataObjInp_t replDataObjInp;

    if ( getValByKey( &dataObjInp->condInput, ALL_KW ) != NULL ) {
        allFlag = 1;
    }
    else {
        allFlag = 0;
    }

    if ( getValByKey( &dataObjInp->condInput, DATA_INCLUDED_KW ) != NULL ) {
        /* single buffer put */
        status = l3DataPutSingleBuf( rsComm, dataObjInp, dataObjInpBBuf );
        if ( status >= 0 && allFlag == 1 ) {
            /* update the rest of copies */
            addKeyVal( &dataObjInp->condInput, UPDATE_REPL_KW, "" );
            status = rsDataObjRepl( rsComm, dataObjInp, &transStat );
            if ( transStat != NULL ) {
                free( transStat );
            }
        }
        if ( status >= 0 ) {
            status = applyRuleForPostProcForWrite(
                    rsComm, dataObjInpBBuf, dataObjInp->objPath );
            if ( status >= 0 ) {
                status = 0;
            }
        }
        return status;
    }

    /* get down here. will do parallel I/O */
    /* so that mmap will work */
    dataObjInp->openFlags |= O_RDWR;
    l1descInx = rsDataObjCreate( rsComm, dataObjInp );

    if ( l1descInx < 0 ) {
        return l1descInx;
    }

    L1desc[l1descInx].oprType = PUT_OPR;
    L1desc[l1descInx].dataSize = dataObjInp->dataSize;

    if ( getStructFileType( L1desc[l1descInx].dataObjInfo->specColl ) >= 0 ) { // JMC - backport 4682
        *portalOprOut = ( portalOprOut_t * ) malloc( sizeof( portalOprOut_t ) );
        bzero( *portalOprOut,  sizeof( portalOprOut_t ) );
        ( *portalOprOut )->l1descInx = l1descInx;
        return l1descInx;
    }


    status = preProcParaPut( rsComm, l1descInx, portalOprOut );

    if ( status < 0 ) {
        memset( &dataObjCloseInp, 0, sizeof( dataObjCloseInp ) );
        dataObjCloseInp.l1descInx = l1descInx;
        L1desc[l1descInx].oprStatus = status;
        rsDataObjClose( rsComm, &dataObjCloseInp );
        return status;
    }

    if ( allFlag == 1 ) {
        /* need to save dataObjInp. get freed in sendAndRecvBranchMsg */
        memset( &replDataObjInp, 0, sizeof( replDataObjInp ) );
        rstrcpy( replDataObjInp.objPath, dataObjInp->objPath, MAX_NAME_LEN );
        addKeyVal( &replDataObjInp.condInput, UPDATE_REPL_KW, "" );
        addKeyVal( &replDataObjInp.condInput, ALL_KW, "" );
    }
    /* return portalOprOut to the client and wait for the rcOprComplete
     * call. That is when the parallel I/O is done */
    retval = sendAndRecvBranchMsg( rsComm, rsComm->apiInx, status,
                                   ( void * ) * portalOprOut, NULL );

    if ( retval < 0 ) {
        memset( &dataObjCloseInp, 0, sizeof( dataObjCloseInp ) );
        dataObjCloseInp.l1descInx = l1descInx;
        L1desc[l1descInx].oprStatus = retval;
        rsDataObjClose( rsComm, &dataObjCloseInp );
        if ( allFlag == 1 ) {
            clearKeyVal( &replDataObjInp.condInput );
        }
    }
    else if ( allFlag == 1 ) {
        status = rsDataObjRepl( rsComm, &replDataObjInp, &transStat );
        if ( transStat != NULL ) {
            free( transStat );
        }
        clearKeyVal( &replDataObjInp.condInput );
    }

    /* already send the client the status */
    return SYS_NO_HANDLER_REPLY_MSG;

}
Ejemplo n.º 3
0
static int process_single_obj( rsComm_t *conn, char *parColl, char *fileName,
                               int required_num_replicas, char *grpRescForReplication, char *emailToNotify ) {
    genQueryInp_t genQueryInp;
    int i;
    genQueryOut_t *genQueryOut = NULL;
    sqlResult_t *replNumStruct, *rescStruct, *dataPathStruct, *chkSumStruct;
    char *replNum, *rescName, *dataPathName, *chkSum;
    int t;

    int i1a[10];
    int i1b[10];
    int i2a[10];
    char *condVal[2];
    char v1[200], v2[200];
    char vault_path[2048];

    char *chksum_str = NULL;
    dataObjInp_t myDataObjInp;
    dataObjInfo_t *myDataObjInfo = NULL;
    unregDataObj_t myUnregDataObjInp;
    transferStat_t *transStat = NULL;

    ReplicaCheckStatusStruct *pReplicaStatus;
    int nReplicas;

    char tmpstr[1024];

    int at_least_one_copy_is_good = 0;
    int newN;
    int rn;

    int validKwFlags;
    char *outBadKeyWd;
    msParam_t msGrpRescStr;   /* it can be a single resc name or a paired values. */

    /* fprintf(stderr,"msiAutoReplicateService():process_single_obj()\n");  */

    memset( &genQueryInp, 0, sizeof( genQueryInp_t ) );
    i1a[0] = COL_DATA_REPL_NUM;
    i1b[0] = 0;
    i1a[1] = COL_D_RESC_NAME;
    i1b[1] = 0;
    i1a[2] = COL_D_RESC_GROUP_NAME;
    i1b[2] = 0;
    i1a[3] = COL_D_DATA_PATH;
    i1b[3] = 0;
    i1a[4] = COL_D_DATA_CHECKSUM;
    i1b[4] = 0;
    genQueryInp.selectInp.inx = i1a;
    genQueryInp.selectInp.value = i1b;
    genQueryInp.selectInp.len = 5;

    i2a[0] = COL_COLL_NAME;
    i2a[1] = COL_DATA_NAME;
    genQueryInp.sqlCondInp.inx = i2a;
    sprintf( v1, "='%s'", parColl );
    condVal[0] = v1;
    sprintf( v2, "='%s'", fileName );
    condVal[1] = v2;
    genQueryInp.sqlCondInp.value = condVal;
    genQueryInp.sqlCondInp.len = 2;

    genQueryInp.maxRows = 10;
    genQueryInp.continueInx = 0;
    t = rsGenQuery( conn, &genQueryInp, &genQueryOut );
    if ( t < 0 ) {
        rodsLog( LOG_NOTICE, "msiAutoReplicateService():process_single_obj(): rsGenQuery failed errocode=%d", t );
        if ( t == CAT_NO_ROWS_FOUND ) { /* no data is found */
            return 0;
        }

        return( t );
    }

    if ( genQueryOut->rowCnt <= 0 ) {
        rodsLog( LOG_ERROR, "msiAutoReplicateService():process_single_obj(): return 0 record from calling rsGenQuery() for objid=%s/%s", parColl, fileName );
        return 0;
    }

    nReplicas = genQueryOut->rowCnt;
    pReplicaStatus = ( ReplicaCheckStatusStruct * )calloc( nReplicas, sizeof( ReplicaCheckStatusStruct ) );

    for ( i = 0; i < nReplicas; i++ ) {
        pReplicaStatus[i].registered = 0;
        pReplicaStatus[i].checksum[0] = '\0';
    }

    for ( i = 0; i < genQueryOut->rowCnt; i++ ) {
        replNumStruct = getSqlResultByInx( genQueryOut, COL_DATA_REPL_NUM );
        replNum = &replNumStruct->value[replNumStruct->len * i];
        pReplicaStatus[i].repl_num = atoi( replNum );

        rescStruct = getSqlResultByInx( genQueryOut, COL_D_RESC_NAME );
        rescName = &rescStruct->value[rescStruct->len * i];

        getSqlResultByInx( genQueryOut, COL_D_RESC_GROUP_NAME );

        dataPathStruct = getSqlResultByInx( genQueryOut, COL_D_DATA_PATH );
        dataPathName = &dataPathStruct->value[dataPathStruct->len * i];

        chkSumStruct = getSqlResultByInx( genQueryOut, COL_D_DATA_CHECKSUM );
        chkSum = &chkSumStruct->value[chkSumStruct->len * i];

        vault_path[0] = '\0';
        t = get_resource_path( conn, rescName, vault_path );
        if ( t < 0 ) {
            rodsLog( LOG_NOTICE, "msiAutoReplicateService():process_single_obj():get_resource_path failed, status=%d", t );
            free( pReplicaStatus ); // JMc cppcheck - leak
            return t;
        }

        if ( strncmp( dataPathName, vault_path, strlen( vault_path ) ) != 0 ) {
            /* fprintf(stderr,"AB1-> %s/%s, v=%s, is a registered copy.\n", parColl,fileName, replNum); */
            pReplicaStatus[i].registered = 1;
        }
        else {
            pReplicaStatus[i].registered = 0;
        }

        if ( ( chkSum != NULL ) && ( strlen( chkSum ) > 0 ) ) {
            strcpy( pReplicaStatus[i].checksum, chkSum );
        }
    }
    freeGenQueryOut( &genQueryOut );

    /* check replica status */
    at_least_one_copy_is_good = 0;
    for ( i = 0; i < nReplicas; i++ ) {
        /* check the file existence first */
        memset( &myDataObjInp, 0, sizeof( dataObjInp_t ) );
        snprintf( myDataObjInp.objPath, MAX_NAME_LEN, "%s/%s", parColl, fileName );
        myDataObjInp.openFlags = O_RDONLY;
        sprintf( tmpstr, "%d", pReplicaStatus[i].repl_num );
        addKeyVal( &myDataObjInp.condInput, REPL_NUM_KW, tmpstr );
        rn = pReplicaStatus[i].repl_num;
        t = rsDataObjOpen( conn, &myDataObjInp );
        if ( t < 0 ) {
            /* fprintf(stderr,"%d. %s/%s, %d failed to open and has checksum status=%d\n", i, parColl, fileName, rn, t); */
            if ( t == SYS_OUT_OF_FILE_DESC ) {
                free( pReplicaStatus ); // JMc cppcheck - leak
                return t;
            }
            else {
                pReplicaStatus[i].chksum_status = t;
            }
        }
        else {
            openedDataObjInp_t myDataObjCloseInp;
            memset( &myDataObjCloseInp, 0, sizeof( openedDataObjInp_t ) );
            myDataObjCloseInp.l1descInx = t;
            t = rsDataObjClose( conn, &myDataObjCloseInp );

            chksum_str =  NULL;
            /* compute checksum rsDataObjChksum() */
            memset( &myDataObjInp, 0, sizeof( dataObjInp_t ) );
            sprintf( myDataObjInp.objPath, "%s/%s", parColl, fileName );
            addKeyVal( &myDataObjInp.condInput, FORCE_CHKSUM_KW, "" );
            sprintf( tmpstr, "%d", pReplicaStatus[i].repl_num );
            addKeyVal( &myDataObjInp.condInput, REPL_NUM_KW, tmpstr );
            t = rsDataObjChksum( conn, &myDataObjInp, &chksum_str );
            pReplicaStatus[i].chksum_status = t;     /* t == USER_CHKSUM_MISMATCH means a bad copy */
            /* fprintf(stderr,"%d. %s/%s, %d has checksum status=%d\n", i, parColl, fileName, rn, t); */
            if ( t >= 0 ) {
                if ( strlen( pReplicaStatus[i].checksum ) > 0 ) {
                    if ( strcmp( pReplicaStatus[i].checksum, chksum_str ) != 0 ) { /* mismatch */
                        pReplicaStatus[i].chksum_status = USER_CHKSUM_MISMATCH;
                    }
                    else {
                        at_least_one_copy_is_good = 1;
                    }
                }
                else {
                    at_least_one_copy_is_good = 1;
                }
            }
        }
    }

    /* if there is none good copies left. In some casee, the checksum failed due
       to the fact that the server is down. We leave this case to be taken care of next time. */
    if ( at_least_one_copy_is_good == 0 ) {
        rodsLog( LOG_ERROR, "msiAutoReplicateService():process_single_obj(): Obj='%s/%s': Warning: The system detects that all copies might be corrupted.", parColl, fileName );

#ifndef windows_platform
        if ( ( emailToNotify != NULL ) && ( strlen( emailToNotify ) > 0 ) ) {
            char msg_sub[1024], msg_body[1024];
            strcpy( msg_sub, "iRODS msiAutoReplicateService() error" );
            sprintf( msg_body, "msiAutoReplicateService():process_single_obj(): Obj='%s/%s': at least one storage server is down or all copies are corrupted.",
                     parColl, fileName );
            UnixSendEmail( emailToNotify, msg_sub, msg_body );
        }
#endif

        free( pReplicaStatus ); // JMc cppcheck - leak
        return 0;
    }

    /* since we have at least one copy is good. We delete those bad copies (USER_CHKSUM_MISMATCH). */
    newN = nReplicas;
    for ( i = 0; i < nReplicas; i++ ) {
        memset( &myDataObjInp, 0, sizeof( dataObjInp_t ) );
        sprintf( myDataObjInp.objPath, "%s/%s", parColl, fileName );
        sprintf( tmpstr, "%d", pReplicaStatus[i].repl_num );
        addKeyVal( &myDataObjInp.condInput, REPL_NUM_KW, tmpstr );
        rn = pReplicaStatus[i].repl_num;
        if ( pReplicaStatus[i].registered == 1 ) {
            /* here is the catch. iRODS. */
            int adchksum;
            /* fprintf(stderr,"CD->%s/%s, v=%d, is a registered copy.\n", parColl, fileName, rn); */
            adchksum = ( ( int )( pReplicaStatus[i].chksum_status / 1000 ) ) * 1000;
            if ( ( pReplicaStatus[i].chksum_status == USER_CHKSUM_MISMATCH ) || ( pReplicaStatus[i].chksum_status == UNIX_FILE_OPEN_ERR ) || ( adchksum == UNIX_FILE_OPEN_ERR ) )
                /* USER_CHKSUM_MISMATCH  -> indicates the registered file is changed.
                 * UNIX_FILE_OPEN_ERR --> indicates the registered file is removed by original owner.
                 * -510002 is transformed from UNIX open error.
                 */
            {
                rodsLog( LOG_NOTICE, "msiAutoReplicateService():process_single_obj(): registered copy will be removed: %s, repl=%d", myDataObjInp.objPath, rn );
                t = getDataObjInfo( conn, &myDataObjInp, &myDataObjInfo, NULL, 0 );
                if ( t >= 0 ) {
                    myUnregDataObjInp.dataObjInfo = myDataObjInfo;
                    myUnregDataObjInp.condInput = &myDataObjInp.condInput;
                    t = rsUnregDataObj( conn, &myUnregDataObjInp );
                    if ( t >= 0 ) {
                        newN = newN - 1;
                    }
                    else  {
                        rodsLog( LOG_ERROR, "msiAutoReplicateService():rsUnregDataObj(): failed for %s/%s:%d. erStat=%d", parColl, fileName, rn, t );
                        return t;
                    }
                }
                else {
                    rodsLog( LOG_ERROR, "msiAutoReplicateService():getDataObjInfo(): failed for %s/%s:%d. erStat=%d", parColl, fileName, rn, t );
                    return t;
                }
            }
            else {
                rodsLog( LOG_ERROR, "%s:%d, the registered copy has errored checksum status=%d.", myDataObjInp.objPath, rn, pReplicaStatus[i].chksum_status );
                return t;
            }
        }
        else { /* the data file is in vault */
            if ( pReplicaStatus[i].chksum_status == USER_CHKSUM_MISMATCH ) {
                t = rsDataObjUnlink( conn, &myDataObjInp );
                if ( t >= 0 ) {
                    newN = newN - 1;
                }
                else  {
                    rodsLog( LOG_ERROR, "msiAutoReplicateService():rsDataObjUnlink() for %s:%d failed. errStat=%d", myDataObjInp.objPath, rn, t );
                    free( pReplicaStatus ); // JMc cppcheck - leak
                    return t;
                }
            }
        }
    }

    fillStrInMsParam( &msGrpRescStr, grpRescForReplication );

    /* make necessary copies based on the required number of copies */
    if ( newN < required_num_replicas ) {
        rodsLog( LOG_NOTICE, "msiAutoReplicateService():process_single_obj(): making necessary %d copies as required.", ( required_num_replicas - newN ) );
        for ( i = 0; i < ( required_num_replicas - newN ); i++ ) {
            memset( &myDataObjInp, 0, sizeof( dataObjInp_t ) );
            snprintf( myDataObjInp.objPath, MAX_NAME_LEN, "%s/%s", parColl, fileName );
            /* addKeyVal(&myDataObjInp.condInput, DEST_RESC_NAME_KW, grpRescForReplication); */
            validKwFlags = OBJ_PATH_FLAG | DEST_RESC_NAME_FLAG | NUM_THREADS_FLAG |
                           BACKUP_RESC_NAME_FLAG | RESC_NAME_FLAG | UPDATE_REPL_FLAG |
                           REPL_NUM_FLAG | ALL_FLAG | ADMIN_FLAG | VERIFY_CHKSUM_FLAG |
                           RBUDP_TRANSFER_FLAG | RBUDP_SEND_RATE_FLAG | RBUDP_PACK_SIZE_FLAG;
            t = parseMsKeyValStrForDataObjInp( &msGrpRescStr, &myDataObjInp, DEST_RESC_NAME_KW, validKwFlags, &outBadKeyWd );
            if ( t < 0 ) {
                if ( outBadKeyWd != NULL ) {
                    rodsLog( LOG_ERROR, "msiAutoReplicateService():rsDataObjRepl(): input keyWd - %s error. status = %d", outBadKeyWd, t );
                    free( outBadKeyWd );
                }
                else {
                    rodsLog( LOG_ERROR, "msiAutoReplicateService():rsDataObjRepl(): input msKeyValStr error. status = %d", t );
                }

                free( pReplicaStatus ); // JMc cppcheck - leak
                return t;
            }

            t = rsDataObjRepl( conn, &myDataObjInp, &transStat );
            if ( t < 0 ) {
                rodsLog( LOG_ERROR, "msiAutoReplicateService():rsDataObjRepl() failed for %s/%s:%d into '%s'. err code=%d.", parColl, fileName, rn, grpRescForReplication, t );
                repl_storage_error = 1;
                free( pReplicaStatus ); // JMc cppcheck - leak
                return t;
            }
            if ( transStat != NULL ) {
                free( transStat );
            }
        }
    }

    free( pReplicaStatus ); // JMC cppcheck - leak
    return 0;
}