Ejemplo n.º 1
0
int main(int argc, char *argv[])
{
	Graph::vertex user = 0;	
	
	if(argc > 1){
		user = atoi(argv[1]);
	}
	ifstream in, fin;
	int *userId = new int[N];
	int *locationId = new int[N];
	int *mapbacklocId = new int[N];
	string *time = new string[N];
	float *latitude = new float[N];
       	float *longtitude =  new float[N];
	int *numofUser = new int[N];
	int *numofLocation = new int [N];
	vector<Point*> position;
		
	int n = 0, maxNumberofUserLocations = 0, maxNumberofLocationUsers = 0, maxUserId = 0, maxLocationId = 0;
	int lengthofNewlocaitonId = 0, mapedLocationId = 0;
	in.open("Gowalla_totalCheckins_small.txt");
	while(!in.eof()){
		in>>userId[n]>>time[n]>>latitude[n]>>longtitude[n]>>locationId[n];
		if(locationId[n]==0){
			break;
		}
		// map locationId to newId
		mapedLocationId = mapLocationId(mapbacklocId, lengthofNewlocaitonId, locationId[n]);
		if(mapedLocationId == -1){
			mapbacklocId[lengthofNewlocaitonId] = locationId[n];
			mapedLocationId = lengthofNewlocaitonId;
			Point *p = new Point(2);
			p->v[0] = latitude[n];
			p->v[1] = longtitude[n];
			if(p->v[0]==0&& p->v[1]==0) cout<<n<<endl;
			position.push_back(p);
			++ lengthofNewlocaitonId; 
		}


		++ numofUser[userId[n]];
		++ numofLocation[mapedLocationId];
		if(mapedLocationId > maxLocationId) maxLocationId = mapedLocationId;
		if(userId[n]>maxUserId) maxUserId = userId[n];
		if(numofUser[userId[n]] > maxNumberofUserLocations) maxNumberofUserLocations = numofUser[userId[n]];
		if(numofLocation[mapedLocationId] > maxNumberofLocationUsers){
		       	maxNumberofLocationUsers = numofLocation[mapedLocationId];
		//	cout<<maxNumberofLocation<<" "<<locationId[n]<<" "<<n<<endl;
		}
		++n;
	}
	in.close();
	//cout<<maxUserId<<" "<<maxLocationId<<endl;	
	//cout<<maxNumberofUserLocations<<" "<<maxNumberofLocationUsers<<endl;	

	int **locationUserList = new int*[maxLocationId+1];
	int **userLocationList = new int*[maxUserId+1];
	
	for(int i = 0; i <= maxLocationId; ++i){
		locationUserList[i] = new int[maxNumberofLocationUsers+1];
		locationUserList[i][0] = 0;
	}	
	for(int i = 0; i <= maxUserId; ++i){
		userLocationList[i] = new int[maxNumberofLocationUsers+1];
		userLocationList[i][0] = 0;
	}	

	for(int i = 0; i < n; ++i){
		mapedLocationId = mapLocationId(mapbacklocId, lengthofNewlocaitonId,locationId[i]);
		++ userLocationList[userId[i]][0];
		userLocationList[userId[i]][userLocationList[userId[i]][0]] = mapedLocationId;		
		++ locationUserList[mapedLocationId][0];
//		if(maxNumberofLocationUsers - locationUserList[mapedLocationId][0]<0) cout<<i<<endl;
		locationUserList[mapedLocationId][locationUserList[mapedLocationId][0]] = userId[i];		
	}

	//cout<<"ddddddddddddddddd"<<endl;	
	Graph G;
	int source, target;
	fin.open("Gowalla_edges.txt");
	while(!fin.eof()){
		fin>>source>>target;
		G.insert_edge(source, target);
		G.insert_edge(target, source);
	}
		
	Graph::vertex_set neighbors = G.out_neighbors(user);
	
	// location recommendation
	int *recommendLocation = new int[maxLocationId+1];
	memset(recommendLocation, 0, (maxLocationId+1)*sizeof(int));
	for(Graph::vertex_set::const_iterator t=neighbors.begin(); t!=neighbors.end(); ++t){
		//cout<< *t<<" ";
		if(*t > maxUserId) continue;
		for(int i = 1; i <= userLocationList[*t][0]; ++i){
			if(find(userLocationList[user], userLocationList[*t][i]) == 0){
				++ recommendLocation[userLocationList[*t][i]];
			}
		}
	}
	
	cout<<endl;
	cout<<"recommend locations for user "<<user<<": "<<endl;
	for(int i = 0; i <= maxLocationId; ++i){
		if(recommendLocation[i] > 0){
			cout<<mapbacklocId[i]<<" ";
		}
	}
	cout<<endl;

	//friends recommendation 
	int *recommendFriends = new int[maxUserId+1];
	memset(recommendFriends, 0, (maxUserId+1)*sizeof(int));
	for(Graph::vertex_set::const_iterator t=neighbors.begin(); t!=neighbors.end(); ++t){
		Graph::vertex_set nextNeighbors = G.out_neighbors(*t);
		for(Graph::vertex_set::const_iterator r=nextNeighbors.begin(); r!=nextNeighbors.end(); ++r){
			if(neighbors.find(*r)==neighbors.end()){
				++ recommendFriends[*r];
			}
		}
	}

	cout<<endl;
	cout<<"recommend friends for user "<<user<<": "<<endl;
	for(int i = 0; i <= maxUserId; ++i){
		if(recommendFriends[i] > 10 && i != user){
			cout<<i<<" ";
		}
	}
	cout<<endl;

	
	// group computation
	int *kmeansLabel = Kmeans(position, 2, GROUP_SIZE, 0.1);
	ofstream out;
	out.open("result.txt");
	for(int i = 0; i < lengthofNewlocaitonId; ++i){
			out<<position[i]->v[0]<<" "<<position[i]->v[1]<<" "<<kmeansLabel[i]<<endl;
	}
	out.close();
	
	int *userList = new int[N];
	memset(userList, 0, N*sizeof(int));
	cout<<endl;
	cout<<"clustering for find important user:"******"clusterId  userId  degree"<<endl;
	for(int k = 1; k <= GROUP_SIZE; ++k){
		for(int i = 0; i < lengthofNewlocaitonId; ++i){
			if(kmeansLabel[i] == k){
				for(int j = 1; j <= locationUserList[i][0]; ++j){
					if(find(userList, locationUserList[i][j]) == 0){
						++ userList[0];
						userList[userList[0]] = locationUserList[i][j];
					}
			
				}
			}
		}
		
		int max_degree = -1, max_degree_user = -1;
		int max_checkin = -1, max_checkin_user = -1;
		for(int i = 1; i <= userList[0]; ++i){
			int degree = G.out_degree(userList[i]);
			if(degree > max_degree){
				max_degree = degree;
				max_degree_user = userList[i];
			}
		}
		cout<<k<<" "<<max_degree_user<<" "<<max_degree<<" "<<endl;
	}

	delete [] userList;
	delete [] recommendLocation;
	delete [] userId;
	delete [] locationId;
	delete [] mapbacklocId;
	delete [] time;
	delete [] latitude;
      	delete [] longtitude;
	delete [] numofUser;
	delete [] numofLocation;
	delete [] kmeansLabel;
	for(int i = 0; i <= maxLocationId; ++i){
		delete [] locationUserList[i];
	}
	delete [] locationUserList;	
	return 0;
}
Ejemplo n.º 2
0
std::vector<std::pair<std::vector<double>, int> > generateFeatures(const tGraph<T> &g, const tGraph<T> &gPos, const tGraph<T> &gNeg, const std::vector<std::pair<std::pair<T, T>, int> > &edges) {
    std::vector<std::pair<std::vector<double>, int> > result;
    int count = 0;
    int fullSize = edges.size();
    int step = fullSize / 100;
    for (std::vector<std::pair<std::pair<unsigned int, unsigned int>, int> >::const_iterator i = edges.begin(); i != edges.end(); ++i) {
        if (count % step == 0) {
            std::cout << (count * 100) / fullSize << "% are processed" << std::endl;
        }
        ++count;
        std::vector<double> tmp;
        std::vector<unsigned int> inter;
        
        int u = i->first.first;
        int v = i->first.second;
        
        // degree features
        
        tmp.push_back(gPos.in_degree(u));
        tmp.push_back(gNeg.in_degree(v));
        tmp.push_back(gPos.out_degree(u));
        tmp.push_back(gNeg.out_degree(v));
        
        Graph::vertex_set uN = g.out_neighbors(u);
        Graph::vertex_set uIn = g.in_neighbors(u);
        Graph::vertex_set vN = g.out_neighbors(v);
        Graph::vertex_set vIn = g.in_neighbors(v);
        
        uN.insert(uIn.begin(), uIn.end());
        vN.insert(vIn.begin(), vIn.end());
        std::set_intersection(uN.begin(), uN.end(), vN.begin(), vN.end(), std::inserter(inter, inter.begin()));
        tmp.push_back(inter.size());
        tmp.push_back(g.out_degree(u));
        tmp.push_back(g.in_degree(v));
        
        // triad features
        
        std::vector<double> triad(16, 0);
        for (auto w : inter) {
            if (g.includes_edge(u, w)) {
                if (g.includes_edge(v, w)) {
                    if (gPos.includes_edge(u, w)) {
                        if (gPos.includes_edge(v, w)) {
                            ++triad[0];
                        } else if (gNeg.includes_edge(v, w)) {
                            ++triad[1];
                        } else {
                            std::cout << "Error: incorrect graph." << std::endl;
                        }
                    } else if (gNeg.includes_edge(u, w)) {
                        if (gPos.includes_edge(v, w)) {
                            ++triad[2];
                        } else if (gNeg.includes_edge(v, w)) {
                            ++triad[3];
                        } else {
                            std::cout << "Error: incorrect graph." << std::endl;
                        }
                    } else {
                        std::cout << "Error: incorrect graph." << std::endl;
                    }
                } else if (g.includes_edge(w, v)) {
                    if (gPos.includes_edge(u, w)) {
                        if (gPos.includes_edge(w, v)) {
                            ++triad[4];
                        } else if (gNeg.includes_edge(w, v)) {
                            ++triad[5];
                        } else {
                            std::cout << "Error: incorrect graph." << std::endl;
                        }
                    } else if (gNeg.includes_edge(u, w)) {
                        if (gPos.includes_edge(w, v)) {
                            ++triad[6];
                        } else if (gNeg.includes_edge(w, v)) {
                            ++triad[7];
                        }
                    } else {
                        std::cout << "Error: incorrect graph." << std::endl;
                    }
                }
            } else if (g.includes_edge(w, u)) {
                if (g.includes_edge(v, w)) {
                    if (gPos.includes_edge(w, u)) {
                        if (gPos.includes_edge(v, w)) {
                            ++triad[8];
                        } else if (gNeg.includes_edge(v, w)) {
                            ++triad[9];
                        } else {
                            std::cout << "Error: incorrect graph." << std::endl;
                        }
                    } else if (gNeg.includes_edge(w, u)) {
                        if (gPos.includes_edge(v, w)) {
                            ++triad[10];
                        } else if (gNeg.includes_edge(v, w)) {
                            ++triad[11];
                        } else {
                            std::cout << "Error: incorrect graph." << std::endl;
                        }
                    } else {
                        std::cout << "Error: incorrect graph." << std::endl;
                    }
                } else if (g.includes_edge(w, v)) {
                    if (gPos.includes_edge(w, u)) {
                        if (gPos.includes_edge(w, v)) {
                            ++triad[12];
                        } else if (gNeg.includes_edge(w, v)) {
                            ++triad[13];
                        } else {
                            std::cout << "Error: incorrect graph." << std::endl;
                        }
                    } else if (gNeg.includes_edge(w, u)) {
                        if (gPos.includes_edge(w, v)) {
                            ++triad[14];
                        } else if (gNeg.includes_edge(w, v)) {
                            ++triad[15];
                        }
                    } else {
                        std::cout << "Error: incorrect graph." << std::endl;
                    }
                }
            } else {
                std::cout << "Error: incorrect graph." << std::endl;
            }
        }
        tmp.insert(tmp.end(), triad.begin(), triad.end());
        
        result.push_back(std::make_pair(tmp, i->second));
    }
    std::cout << "100% are processed" << std::endl;
    return result;
}