コード例 #1
0
ファイル: linelib.cpp プロジェクト: alexdalton/workspace
void line_trainer_edge::init(char edge_type, line_hin *p_hin, int negative)
{
    edge_tp = edge_type;
    phin = p_hin;
    neg_samples = negative;
    line_node *node_u = phin->node_u, *node_v = phin->node_v;
    if (node_u->vector_size != node_v->vector_size)
    {
        printf("ERROR: vector dimsions are not same!\n");
        exit(1);
    }
    
    // compute the degree of vertices
    u_nb_cnt = (int *)calloc(node_u->node_size, sizeof(int));
    u_wei = (double *)calloc(node_u->node_size, sizeof(double));
    v_wei = (double *)calloc(node_v->node_size, sizeof(double));
    for (int u = 0; u != node_u->node_size; u++)
    {
        for (int k = 0; k != (int)(phin->hin[u].size()); k++)
        {
            int v = phin->hin[u][k].nb_id;
            char cur_edge_type = phin->hin[u][k].eg_tp;
            double wei = phin->hin[u][k].eg_wei;
            
            if (cur_edge_type != edge_tp) continue;
            
            u_nb_cnt[u]++;
            u_wei[u] += wei;
            v_wei[v] += wei;
        }
    }
    
    // allocate spaces for edges
    u_nb_id = (int **)malloc(node_u->node_size * sizeof(int *));
    u_nb_wei = (double **)malloc(node_u->node_size * sizeof(double *));
    for (int k = 0; k != node_u->node_size; k++)
    {
        u_nb_id[k] = (int *)malloc(u_nb_cnt[k] * sizeof(int));
        u_nb_wei[k] = (double *)malloc(u_nb_cnt[k] * sizeof(double));
    }
    
    // read neighbors
    int *pst = (int *)calloc(node_u->node_size, sizeof(int));
    for (int u = 0; u != node_u->node_size; u++)
    {
        for (int k = 0; k != (int)(phin->hin[u].size()); k++)
        {
            int v = phin->hin[u][k].nb_id;
            char cur_edge_type = phin->hin[u][k].eg_tp;
            double wei = phin->hin[u][k].eg_wei;
            
            if (cur_edge_type != edge_tp) continue;
            
            u_nb_id[u][pst[u]] = v;
            u_nb_wei[u][pst[u]] = wei;
            pst[u]++;
        }
    }
    free(pst);
    
    // init sampler for edges
    smp_u = ransampl_alloc(node_u->node_size);
    ransampl_set(smp_u, u_wei);
    smp_u_nb = (ransampl_ws **)malloc(node_u->node_size * sizeof(ransampl_ws *));
    for (int k = 0; k != node_u->node_size; k++)
    {
        if (u_nb_cnt[k] == 0) continue;
        smp_u_nb[k] = ransampl_alloc(u_nb_cnt[k]);
        ransampl_set(smp_u_nb[k], u_nb_wei[k]);
    }
    
    // Init negative sampling table
    neg_table = (int *)malloc(neg_table_size * sizeof(int));
    
    int a, i;
    double total_pow = 0, d1;
    double power = 0.75;
    for (a = 0; a < node_v->node_size; a++) total_pow += pow(v_wei[a], power);
    a = 0; i = 0;
    d1 = pow(v_wei[i], power) / (double)total_pow;
    while (a < neg_table_size) {
        if ((a + 1) / (double)neg_table_size > d1) {
            i++;
            if (i >= node_v->node_size) {i = node_v->node_size - 1; d1 = 2;}
            d1 += pow(v_wei[i], power) / (double)total_pow;
        }
        else
            neg_table[a++] = i;
    }
    
    expTable = (real *)malloc((EXP_TABLE_SIZE + 1) * sizeof(real));
    for (int i = 0; i < EXP_TABLE_SIZE; i++) {
        expTable[i] = exp((i / (real)EXP_TABLE_SIZE * 2 - 1) * MAX_EXP); // Precompute the exp() table
        expTable[i] = expTable[i] / (expTable[i] + 1);                   // Precompute f(x) = x / (x + 1)
    }
    
    // add trans
    std::string tp = std::string();
    tp += edge_tp;
    if (line_trainer_edge::map_trans[tp] == 0)
    {
        line_trans *ptrans = new line_trans;
        ptrans->init(tp, phin->node_u->vector_size);
        line_trainer_edge::vec_trans.push_back(ptrans);
        line_trainer_edge::map_trans[tp] = line_trainer_edge::cnt_trans + 1;
        line_trainer_edge::cnt_trans++;
    }
}
コード例 #2
0
ファイル: linelib.cpp プロジェクト: alexdalton/workspace
void line_trainer_path::init(std::string meta_path, line_hin *p_hin, int negative)
{
    path = meta_path;
    if (path.size() % 2 == 0)
    {
        printf("ERROR: meta-path %s error!\n", path.c_str());
        exit(1);
    }
    path_size = ((int)(path.size()) + 1) / 2;
    for (int k = 0; k != path_size; k++) path_node.append(1, path[k * 2]);
    for (int k = 0; k != path_size - 1; k++) path_link.append(1, path[k * 2 + 1]);
    phin = p_hin;
    neg_samples = negative;
    
    line_node *node_u = p_hin->node_u, *node_v = p_hin->node_v;
    if (node_u->vector_size != node_v->vector_size)
    {
        printf("ERROR: vector dimsions are not same!\n");
        exit(1);
    }
    
    expTable = (real *)malloc((EXP_TABLE_SIZE + 1) * sizeof(real));
    for (int i = 0; i < EXP_TABLE_SIZE; i++) {
        expTable[i] = exp((i / (real)EXP_TABLE_SIZE * 2 - 1) * MAX_EXP); // Precompute the exp() table
        expTable[i] = expTable[i] / (expTable[i] + 1);                   // Precompute f(x) = x / (x + 1)
    }
    
    int node_size = node_u->node_size;
    
    dp_cnt = (double **)malloc(node_size * sizeof(double *));
    for (int k = 0; k != node_size; k++) dp_cnt[k] = (double *)malloc(path_size * sizeof(double));
    for (int i = 0; i != node_size; i++) for (int j = 0; j != path_size; j++) dp_cnt[i][j] = 0;
    
    char node_type, link_type;
    for (int step = path_size - 1; step >= 0; step--)
    {
        node_type = path_node[step];
        if (step == path_size - 1)
        {
            for (int u = 0; u != node_size; u++) if ((node_u->node[u]).type == node_type)
                dp_cnt[u][step] = 1;
        }
        else
        {
            link_type = path_link[step];
            for (int u = 0; u != node_size; u++) if ((node_u->node[u]).type == node_type)
            {
                int neighbor_size = (int)(phin->hin[u].size());
                for (int i = 0; i != neighbor_size; i++)
                {
                    int v = phin->hin[u][i].nb_id;
                    char cur_link_type = phin->hin[u][i].eg_tp;
                    double wei = phin->hin[u][i].eg_wei;
                    if ((node_u->node[v]).type == path_node[step + 1] && link_type == cur_link_type)
                        dp_cnt[u][step] += dp_cnt[v][step + 1] * wei;
                }
            }
        }
    }
    
    // Init negative sampling table
    neg_table = (int **)malloc(path_size * sizeof(int *));
    for (int k = 1; k != path_size; k++) neg_table[k] = (int *)malloc(neg_table_size * sizeof(int));
    for (int k = 1; k != path_size; k++)
    {
        int a, i;
        double total_pow = 0, d1;
        double power = 1;//0.75;
        for (a = 0; a < node_size; a++) total_pow += pow(dp_cnt[a][k], power);
        a = 0; i = 0;
        d1 = pow(dp_cnt[i][k], power) / (double)total_pow;
        while (a < neg_table_size) {
            if ((a + 1) / (double)neg_table_size > d1) {
                i++;
                if (i >= node_size) {i = node_size - 1; d1 = 2;}
                d1 += pow(dp_cnt[i][k], power) / (double)total_pow;
            }
            else
                neg_table[k][a++] = i;
        }
    }
    
    int node_cnt;
    
    // Init the sampling table of step 0
    node_cnt = 0;
    node_type = path_node[0];
    for (int u = 0; u != node_size; u++) if ((node_u->node[u]).type == node_type)
        node_cnt++;
    smp_init_index = (int *)malloc(node_cnt * sizeof(int));
    smp_init_weight = (double *)malloc(node_cnt * sizeof(double));
    smp_init = ransampl_alloc(node_cnt);
    node_cnt = 0;
    for (int u = 0; u != node_size; u++) if ((node_u->node[u]).type == node_type)
    {
        smp_init_index[node_cnt] = u;
        smp_init_weight[node_cnt] = dp_cnt[u][0];
        node_cnt++;
    }
    ransampl_set(smp_init, smp_init_weight);
    
    // Init sampling tables of the following steps
    smp_dp_index = (int ***)malloc(path_size * sizeof(int **));
    for (int k = 0; k != path_size; k++) smp_dp_index[k] = (int **)malloc(node_size * sizeof(int *));
    for (int i = 0; i != path_size; i++) for (int j = 0; j != node_size; j++) smp_dp_index[i][j] = NULL;
    smp_dp_weight = (double ***)malloc(path_size * sizeof(double **));
    for (int k = 0; k != path_size; k++) smp_dp_weight[k] = (double **)malloc(node_size * sizeof(double *));
    for (int i = 0; i != path_size; i++) for (int j = 0; j != node_size; j++) smp_dp_weight[i][j] = NULL;
    smp_dp = (ransampl_ws ***)malloc(path_size * sizeof(ransampl_ws));
    for (int k = 0; k != path_size; k++) smp_dp[k] = (ransampl_ws **)malloc(node_size * sizeof(ransampl_ws));
    for (int i = 0; i != path_size; i++) for (int j = 0; j != node_size; j++) smp_dp[i][j] = NULL;
    
    for (int step = 0; step != path_size - 1; step++)
    {
        node_type = path_node[step];
        link_type = path_link[step];
        for (int u = 0; u != node_size; u++) if((node_u->node[u]).type == node_type)
        {
            node_cnt = 0;
            int neighbor_size = (int)(phin->hin[u].size());
            for (int i = 0; i != neighbor_size; i++)
            {
                int v = phin->hin[u][i].nb_id;
                char cur_edge_tp = phin->hin[u][i].eg_tp;
                if ((node_u->node[v]).type == path_node[step + 1] && cur_edge_tp == link_type)
                    node_cnt++;
            }
            if (node_cnt == 0) continue;
            
            smp_dp_index[step][u] = (int *)malloc(node_cnt * sizeof(int));
            smp_dp_weight[step][u] = (double *)malloc(node_cnt * sizeof(double));
            smp_dp[step][u] = ransampl_alloc(node_cnt);
            node_cnt = 0;
            for (int i = 0; i != neighbor_size; i++)
            {
                int v = phin->hin[u][i].nb_id;
                char cur_edge_tp = phin->hin[u][i].eg_tp;
                double wei = phin->hin[u][i].eg_wei;
                if ((node_u->node[v]).type == path_node[step + 1] && cur_edge_tp == link_type)
                {
                    smp_dp_index[step][u][node_cnt] = v;
                    smp_dp_weight[step][u][node_cnt] = dp_cnt[v][step + 1] * wei;
                    node_cnt++;
                }
            }
            ransampl_set(smp_dp[step][u], smp_dp_weight[step][u]);
        }
    }
    
    // add trans
    for (int i = 0; i < path_size; i++) for (int j = i + 1; j < path_size; j++)
    {
        std::string tp = path.substr(i * 2, (j - i) * 2 + 1);
        if (line_trainer_path::map_trans[tp] == 0)
        {
            line_trans *ptrans = new line_trans;
            ptrans->init(tp, phin->node_u->vector_size);
            line_trainer_path::vec_trans.push_back(ptrans);
            line_trainer_path::map_trans[tp] = line_trainer_path::cnt_trans + 1;
            line_trainer_path::cnt_trans++;
        }
    }
}
コード例 #3
0
ファイル: hplelib.cpp プロジェクト: kongbu/PLE
void line_link::init_transpose(char *file_name, line_node *p_u, line_node *p_v, int negative)
{
    strcpy(link_file, file_name);
    node_u = p_u;
    node_v = p_v;
    neg_samples = negative;

    if (node_u->get_vector_dim() != node_v->get_vector_dim())
    {
        printf("ERROR: vector dimsions are not same!\n");
        exit(1);
    }
    
    dgr_u = (double *)calloc(node_u->node_size, sizeof(double));
    dgr_v = (double *)calloc(node_v->node_size, sizeof(double));
    
    // compute the number of edges
    char str[2 * MAX_STRING + 10000];
    FILE *fi = fopen(link_file, "rb");
    if (fi == NULL)
    {
        printf("ERROR: link file not found!\n");
        printf("%s\n", link_file);
        exit(1);
    }
    edge_cnt = 0;
    while (fgets(str, sizeof(str), fi)) edge_cnt++;
    fclose(fi);
    
    // allocate spaces
    int u, v;
    double wei;
    edge_u = (int *)malloc(edge_cnt * sizeof(int));
    edge_v = (int *)malloc(edge_cnt * sizeof(int));
    edge_w = (double *)malloc(edge_cnt * sizeof(double));
    if (edge_u == NULL || edge_v == NULL || edge_w == NULL)
    {
        printf("Error: memory allocation failed!\n");
        exit(1);
    }
    graph = new std::vector<struct_neighbor>[node_u->node_size];
    nb_set = new std::set<int>[node_u->node_size];
    struct_neighbor cur_nb;
    
    // read edges
    fi = fopen(link_file, "rb");
    for (int k = 0; k != edge_cnt; k++)
    {
        fscanf(fi, "%d %d %lf", &v, &u, &wei);
        
        if (k % 10000 == 0)
        {
            printf("Reading edges: %.3lf%%%c", k / (double)(edge_cnt + 1) * 100, 13);
            fflush(stdout);
        }
        
        // store edges
        edge_u[k] = u;
        edge_v[k] = v;
        edge_w[k] = wei;
        
        // update degrees
        dgr_u[u] += wei;
        dgr_v[v] += wei;
        
        // update graph
        cur_nb.index = v;
        cur_nb.wei = wei;
        graph[u].push_back(cur_nb);
        
        // update neibor set
        nb_set[u].insert(v);
    }
    fclose(fi);
    
    // initialize edge sampler
    ws = ransampl_alloc(edge_cnt);
    ransampl_set(ws, edge_w);
    
    // compute the value of exp function before training
    expTable = (real *)malloc((EXP_TABLE_SIZE + 1) * sizeof(real));
    for (int i = 0; i < EXP_TABLE_SIZE; i++) {
        expTable[i] = exp((i / (real)EXP_TABLE_SIZE * 2 - 1) * MAX_EXP); // Precompute the exp() table
        expTable[i] = expTable[i] / (expTable[i] + 1);                   // Precompute f(x) = x / (x + 1)
    }
    
    // initialize the negative sampling table
    int a, i;
    double total_pow = 0, d1;
    real power = 0.75;
    
    neg_table_u = (int *)calloc(neg_table_size, sizeof(int));
    neg_table_v = (int *)calloc(neg_table_size, sizeof(int));
    
    total_pow = 0;
    for (a = 0; a < node_u->node_size; a++) total_pow += pow(dgr_u[a], power);
    i = 0;
    d1 = pow(dgr_u[i], power) / (real)total_pow;
    for (a = 0; a < neg_table_size; a++) {
        neg_table_u[a] = i;
        if (a / (real)neg_table_size > d1) {
            i++;
            d1 += pow(dgr_u[i], power) / (real)total_pow;
        }
        if (i >= node_u->node_size) i = node_u->node_size - 1;
    }
    
    total_pow = 0;
    for (a = 0; a < node_v->node_size; a++) total_pow += pow(dgr_v[a], power);
    i = 0;
    d1 = pow(dgr_v[i], power) / (real)total_pow;
    for (a = 0; a < neg_table_size; a++) {
        neg_table_v[a] = i;
        if (a / (real)neg_table_size > d1) {
            i++;
            d1 += pow(dgr_v[i], power) / (real)total_pow;
        }
        if (i >= node_v->node_size) i = node_v->node_size - 1;
    }

    printf("Reading edges from file: %s, DONE!\n", link_file);
    printf("Edge size: %lld\n", edge_cnt);
}