Exemplo n.º 1
0
void ProjectionLayer::Compute( const MatrixBase& feature, MatrixBase* output ) {
    int n_order = feature.Columns();
    int n_example = feature.Rows();
    int n_project = m_projection.Columns();

    assert( feature.Rows() == output->Rows() );
    assert( output->Columns() == n_order * m_projection.Columns() );

    for ( int i = 0; i < n_order; i++ ) {
        SubMatrix gram( *output, 
                        MatrixRange(0, n_example, i * n_project, (i + 1) * n_project ) );
        SubMatrix index( feature,
                         MatrixRange(0, n_example, i, i + 1) );
        gram.GetRows( m_projection, index );
    }
}    // end of ForwardPropagate
Exemplo n.º 2
0
void NCELayer::BackPropagate( MatrixBase& out, 
                              const MatrixBase& in,
                              const MatrixBase& target,
                              float learningRate,
                              MatrixBase* inDiff ) {
    if ( m_nce_enabled ) {
        SubMatrix active( out, 
                          MatrixRange( 0, out.Rows(), 0, m_target_and_noise.Rows() ) );
        active.DiffNCE( active, m_unigram );

        if ( NULL != inDiff ) {
            inDiff->Sgemm( 0.0f, 1.0f, active, CUBLAS_OP_N, m_partial_weight, CUBLAS_OP_T );
        }

        m_partial_w_gradient.Reshape( m_partial_weight.Rows(), m_partial_weight.Columns() );
        m_partial_w_gradient.Sgemm( 0.0f, 1.0f, in, CUBLAS_OP_T, active, CUBLAS_OP_N );

        m_partial_b_gradient.Reshape( m_partial_bias.Rows(), m_partial_bias.Columns() );
        m_partial_b_gradient.SumRowsOf( active, 0.0f, 1.0f );

        if ( m_norm_type == "Clip" ) {
            m_partial_w_gradient.Clip( -m_norm_param, m_norm_param );
            m_partial_b_gradient.Clip( -m_norm_param, m_norm_param );
        }

        if ( m_norm_type == "L2Norm" ) {
            float w_norm = m_partial_w_gradient.L2Norm();
            if ( w_norm > m_norm_param ) {
                m_partial_w_gradient.Scale( 1.0f / w_norm );
            }

            float b_norm = m_partial_b_gradient.L2Norm();
            if ( b_norm > m_norm_param ) {
                m_partial_b_gradient.Scale( 1.0f / b_norm );
            }
        }

        float avg_lr = -learningRate / (float)out.Rows();
        m_linear.m_weight.AddColumns( m_partial_w_gradient, m_target_and_noise, avg_lr );
        m_linear.m_bias.AddColumns( m_partial_b_gradient, m_target_and_noise, avg_lr );
    }
    else  {
        SoftmaxLayer::BackPropagate( out, in, target, learningRate, inDiff );
        // cout << "checkpoint of back-propagate" << endl;
    }
}
Exemplo n.º 3
0
void LinearLayer::BackPropagate( MatrixBase& outDiff, 
                                 const MatrixBase& in,
                                 const MatrixBase& out,
                                 float learningRate,
                                 MatrixBase* inDiff ) {
    if ( m_w_momentum.Rows() != m_weight.Rows() || m_w_momentum.Columns() != m_weight.Columns() ) {
        m_w_momentum.Reshape( m_weight.Rows(), m_weight.Columns(), kSetZero );
    }

    if ( m_b_momentum.Rows() != 1 || m_b_momentum.Columns() != m_bias.Columns() ) {
        m_b_momentum.Reshape( 1, m_bias.Columns(), kSetZero );
    }

    if ( NULL != inDiff ) {
        inDiff->Sgemm( 0.0f, 1.0f, outDiff, CUBLAS_OP_N, m_weight, CUBLAS_OP_T );    
    }

    float avg_lr = -learningRate / (float)outDiff.Rows();

    m_w_momentum.Sgemm( m_momentum, 
                        avg_lr,
                        in, 
                        CUBLAS_OP_T,
                        outDiff,
                        CUBLAS_OP_N );

    m_b_momentum.SumRowsOf( outDiff, m_momentum, avg_lr );

    if ( m_weight_decay != 0.0f ) {
        m_w_momentum.Add( 1.0f, -learningRate * m_weight_decay, m_weight );
        m_b_momentum.Add( 1.0f, -learningRate * m_weight_decay, m_bias );        
    }

    if ( m_norm_type == "Clip" ) {
        float range = m_norm_param * (-avg_lr);
        m_w_momentum.Clip( -range, range );
        m_b_momentum.Clip( -range, range );
    }

    if ( m_norm_type == "L2Norm" ) {
        float w_norm = m_w_momentum.L2Norm() * (-avg_lr);
        if ( w_norm > m_norm_param ) {
            m_w_momentum.Scale( 1.0f / w_norm );
        }

        float b_norm = m_b_momentum.L2Norm() * (-avg_lr);
        if ( b_norm > m_norm_param ) {
            m_b_momentum.Scale( 1.0f / b_norm );
        }
    }

    m_weight.Add( 1.0f, 1.0f, m_w_momentum );

    if ( m_has_bias ) {
        m_bias.Add( 1.0f, 1.0f, m_b_momentum );
    }
}    // end of BackPropagate
Exemplo n.º 4
0
void vFSMNLayer::Compute( const MatrixBase& feature, MatrixBase* output ) {
    m_linear.Compute( feature, output );

    m_memory.Reshape( feature.Rows(), feature.Columns() );
    m_memory.VfsmnMemory( feature, m_filter, m_position );

    output->Sgemm( 1.0f, 1.0f, m_memory, CUBLAS_OP_N, m_weight, CUBLAS_OP_N );
    output->ReLU( *output );
}
Exemplo n.º 5
0
void sFSMNLayer::Compute( const MatrixBase& feature, MatrixBase* output ) {
    m_linear.Compute( feature, output );

    m_memory.Reshape( feature.Rows(), feature.Columns() );
    m_memory.Strmm( 1.0f, 
                    m_block_diagonal, CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_LOWER, 
                     feature, CUBLAS_OP_N );        // @xmb20160226
    // m_memory.Sgemm( 0.0f, 1.0f, m_block_diagonal, CUBLAS_OP_N, feature, CUBLAS_OP_N );

    output->Sgemm( 1.0f, 1.0f, m_memory, CUBLAS_OP_N, m_weight, CUBLAS_OP_N );
    output->ReLU( *output );
}
Exemplo n.º 6
0
void vFSMNLayer::BackPropagate( MatrixBase& outDiff, 
                                const MatrixBase& in,
                                const MatrixBase& out,
                                float learningRate,
                                MatrixBase* inDiff ) {
    outDiff.DiffReLU( out, outDiff );
    m_linear.BackPropagate( outDiff, in, Matrix(), learningRate, inDiff );

    if ( m_w_momentum.Rows() != m_weight.Rows() || 
         m_w_momentum.Columns() != m_weight.Columns() ) {
        m_w_momentum.Reshape( m_weight.Rows(), m_weight.Columns(), kSetZero );
    }    

    float avg_lr = -learningRate / (float)outDiff.Rows();

    m_w_momentum.Sgemm( m_momentum,
                        avg_lr,
                        m_memory,
                        CUBLAS_OP_T,
                        outDiff,
                        CUBLAS_OP_N );

    if ( m_weight_decay != 0.0f ) {
        m_w_momentum.Add( 1.0f, -learningRate * m_weight_decay, m_weight );
    }

    m_memory_diff.Reshape( m_memory.Rows(), m_memory.Columns() );
    m_memory_diff.Sgemm( 0.0f, 1.0f, outDiff, CUBLAS_OP_N, m_weight, CUBLAS_OP_T );

    if ( NULL != inDiff ) {
        inDiff->ComputeVfsmnHiddenDiff( m_memory_diff, m_filter, m_position );
    }

    if ( m_norm_type == "Clip" ) {
        float range = m_norm_param * (-avg_lr);
        m_w_momentum.Clip( -range, range );
    }

    if ( m_norm_type == "L2Norm" ) {
        float w_norm = m_w_momentum.L2Norm() * (-avg_lr);
        if ( w_norm > m_norm_param ) {
            m_w_momentum.Scale( 1.0f / w_norm );
        }
    }

    m_filter.UpdateVfsmnFilter( m_memory_diff, in, m_position, avg_lr );
    m_weight.Add( 1.0f, 1.0f, m_w_momentum );
}
Exemplo n.º 7
0
void ProjectionLayer::BackPropagate( MatrixBase& outDiff, 
                                     const MatrixBase& in,
                                     const MatrixBase& out,
                                     float learningRate,
                                     MatrixBase* inDiff ) {
    int n_example = outDiff.Rows();
    int n_project = m_projection.Columns();
    int n_order = outDiff.Columns() / n_project;
    ASSERT( n_order == in.Columns() );

    bool useMomentum = m_momentum != 0.0f && m_weight_decay != 0.0f;

    if ( m_norm_type == "Clip" ) {
        outDiff.Clip( -m_norm_param, m_norm_param );
    }
    if ( m_norm_type == "L2Norm" ) {
        float norm = outDiff.L2Norm();
        if ( norm > m_norm_param ) {
            outDiff.Scale( 1.0f / norm );
        }
    }

    if (  useMomentum && 
         (m_gradient.Rows() != m_projection.Rows() || m_gradient.Columns() != m_projection.Columns() ) ) {
        m_gradient.Reshape( m_projection.Rows(), m_projection.Columns(), kSetZero );
    }

    for ( int i = 0; i < n_order; i++ ) {
        SubMatrix gradient( outDiff, 
                            MatrixRange(0, n_example, i * n_project, (i + 1) * n_project ) );
        SubMatrix index( in,
                         MatrixRange(0, n_example, i, i + 1) );

        if ( useMomentum ) {
            m_gradient.Add( m_momentum, -learningRate * m_weight_decay, m_projection );
            m_gradient.AddRows( gradient, index, -learningRate / n_example );
        }
        else {
            m_projection.AddRows( gradient, index, -learningRate / n_example );
        }
    }

    if ( useMomentum ) {
        m_projection.Add( 1.0f, 1.0f, m_gradient );
    }
}    // end of BackPropagate