コード例 #1
0
/* functia verifica daca link-ul este corect, dupa criteriile date */
int vefiricaLink(char* linkCurent){
  
  int ok = 0;
  char * copy = (char *)malloc(MAXLEN* sizeof(char));
  memset(copy,0,MAXLEN);

  /* verificare daca linia contine forma ceruta */
  if(strstr(linkCurent,"<a href") > 0 ){
    /* extragere page.extensie */
      extractLink(linkCurent,copy);
      /* verificare daca link-ul este corect */
      if(strstr(copy,"://") <= 0 && strstr(copy,".com") <= 0){
        /* verifica daca extensia are 3 sau 4 caractere */
        if(verificareExtensie(copy) == 1)
          ok = 1;
      }
    }
  else 
    if(strstr(linkCurent,"href=") > 0 ){ 
      /* extragere page.extensie */
      extractLink(linkCurent,copy);
       if(strstr(copy,"://") <= 0 && strstr(copy,".com") <= 0){
        /* verifica daca extensia are 3 sau 4 caractere */
         if(verificareExtensie(copy) == 1)
         ok = 1; 
      }    
    }
    else
      if(strstr(linkCurent,"<a rel=") > 0){
        /* extragere page.extensie */
        extractLink(linkCurent,copy);
        if(strstr(copy,"://") <= 0 && strstr(copy,".com") <= 0){
        /* verifica daca extensia are 3 sau 4 caractere */
          if(verificareExtensie(copy) == 1)
            ok = 1; 
        }
      }
  free(copy);
  return ok;
}
コード例 #2
0
ファイル: pachong.c プロジェクト: sdfsdf107/sdf
int dealurlres(Ip *res,char *domain, char *path)
{	
	int s = -1;
	char filename[2*MAX_LINK_LEN+1];
	int ret;
	struct ip* next = res;

	if ((s = socket(PF_INET, SOCK_STREAM, 0)) < 0) {
		perror("socket");
		return -1;
	}
	
	/*创建服务器套接口地址 */
	struct sockaddr_in server_address;
	bzero(&server_address, sizeof(struct sockaddr_in));
	server_address.sin_family = AF_INET;
	server_address.sin_port = htons(80);

	while(next) {
		server_address.sin_addr = next->ipaddres.sin_addr;
		if ((ret = connect
			(s, (struct sockaddr *)&server_address,
		     sizeof(struct sockaddr_in))) < 0) {
			next = next->next;
			perror("connect");
		}
		else
			break;
	}
	
	if(ret <0) {
		close(s);
		return -1;
	}

	ret = sendRequest(domain,path,s);
	if(ret <0) {
		close(s);
		return -1;
	}
	
	link2fn(domain,path,filename);
	int htmlfd = open(filename, O_WRONLY | O_CREAT | O_APPEND, 0644);	/*以只写方式打开html文件 */
	if (htmlfd < 0) {
		close(s);
		return -1;
	}
	int i, n, need, ll = 0;
	char buf[1024] = { 0 };
	char buf1[1024] = { 0 };
	while (1) {
		need = sizeof(buf) - 1 - ll;
		n = read(s, buf+ll, need);
		if (n < 0) {
			if (errno == EAGAIN) {
				usleep(1000);
				continue;
			} else {
				fprintf(stderr, "%s %d read http response error\n",__func__,__LINE__);
				close(htmlfd);
				close(s);
				return -1;
			}
		} else if (n == 0) {	/*读取http响应这完毕 */
			debug_printf("%s %d get http response success==>[%s%s]\n",__func__,__LINE__,domain,path);
			break;
		} else {	/*还需要继续读 */
			debug_printf("%s %d from socket read:[%s] and will go on\n",__func__,__LINE__,buf);
			memcpy(buf1, buf, sizeof(buf));
			ll = extractLink(buf, domain);
			write(htmlfd, buf1, n-ll);
		}
	}
	close(htmlfd);
	return 0;
}
コード例 #3
0
/* functia parcurge paginile, apeleaza recursiv si descarca in functie
 de parametri dati in linia de comanda */
void linkPages(char * original, char * argument,char * filePath, char * directory,int rec, int s){
  
  /* conditia de oprire */
  if (rec > 5)
    return;

  int sockfd;
  int port = SMTP_PORT;
  int rezult, i = 0, len = 0;

  struct sockaddr_in servaddr;
  struct hostent * host;

  /* declarare constante */
  char server_ip[10];
  char sendbuf[MAXLEN]; 
  char recvbuf[MAXLEN];

  char * addres = (char *)malloc(MAXLEN * sizeof(char));
  char * ipAd = (char *)malloc(MAXLEN * sizeof(char));
  char *fileApel = (char *)malloc(MAXLEN * sizeof(char));
  char *linkPrimit = (char *)malloc(MAXLEN * sizeof(char));
  char *fileNext = (char *)malloc(MAXLEN * sizeof(char));
  char *m = (char *)malloc(MAXLEN * sizeof(char));
  char *p = (char *)malloc(MAXLEN * sizeof(char));
  char *getCom = (char *)malloc(MAXLEN * sizeof(char));
  char **d = (char **)malloc(MAXLEN * sizeof(char));

  /* setare pe 0 pentru a evita erorile */
  memset(getCom,0,MAXLEN);
  memset(p,0,MAXLEN);
  memset(addres,0,MAXLEN);
  memset(ipAd,0,MAXLEN);
  memset(fileApel,0,MAXLEN);
  memset(fileNext,0,MAXLEN);
  memset(linkPrimit,0,MAXLEN);
  memset(m,0,MAXLEN);

  /* calculare adresa primita ca parametru */
  getAdress(argument,addres);
  host = gethostbyname(addres);
  /* calculare adresa IP */
  sprintf(ipAd,"%s",inet_ntoa(*(struct in_addr*)host->h_addr_list[0]));

  strcpy(server_ip, ipAd);

  if ( (sockfd = socket(AF_INET, SOCK_STREAM, 0)) < 0 ){
    if(activO == 1){
      fprintf(fExit,"Eroare la creare socket.\n");
    }
    else
      fprintf(stderr,"Eroare la creare socket.\n");
    exit(-1);
  }  

  /* formarea adresei serverului */
  memset(&servaddr, 0, sizeof(servaddr));
  servaddr.sin_family = AF_INET;
  servaddr.sin_port = htons(port);

  if (inet_aton(server_ip, &servaddr.sin_addr) <= 0 ) {
    if(activO == 1){
      fprintf(fExit, "Adresa IP invalida.\n");
    }
    else{
      fprintf(stderr, "Adresa IP invalida.\n");
    }
    exit(-1);
  }
    
  /*  conectare la server  */
  if (connect(sockfd, (struct sockaddr *) &servaddr, sizeof(servaddr)) < 0 ) {
    if(activO == 1){
      fprintf(fExit, "Eroare la conectare\n");
    }
    else{
      fprintf(stderr, "Eroare la conectare\n");
    }
    exit(-1);
  }
  /* trimitere comanda la server */
  sprintf(getCom,"GET %s HTTP/1.0 \n\n",original);

  FILE * f = fopen(filePath,"wb");

  send_command(sockfd,getCom);
 
  i = 0;
  /* se elimina antetul din pagina primita */
  while((len = Readline(sockfd,recvbuf,MAXLEN-1)) > 0){
    i++;
    if(len <= 2)
      break;
      memset(recvbuf, 0, MAXLEN);
  }

  /* daca a fost trimis link de descarcare */
  if (s == 1){
    while((len = recv(sockfd, recvbuf, MAXLEN, 0)) > 0) {
      fwrite(recvbuf, sizeof(char), len, f);
      memset(recvbuf, 0, MAXLEN);
    }
  }
  /* altfel se verifica paginile .html si de scriu in fisier */
  else{
    while(Readline(sockfd, recvbuf, MAXLEN) > 0){
      /* verifica daca linia curecta contine  */
      if(vefiricaLink(recvbuf) == 1){
        extractLink(recvbuf,p);

        if(strcmp(p,"") != 0){
          sprintf(m,"%s%s",argument,p);
          memset(directory,0,MAXLEN);
          d = getFileAddress(m,linkPrimit,fileNext,directory);
          sprintf(fileApel,"%s%s",directory,fileNext);
          
          /* numai pentru recursivitate */
          if (activR == 1 && strstr(p,".html") > 0){
            i = 0; 
            /* creeaza ierarhia de directoare */
            while(d[i] != NULL){
              mkdir(d[i],S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
              i ++;
            } 
            /* verifica daca fisierul exista */
            rezult = access(fileApel,F_OK);
            /* daca nu exista se apeleaza recursiv pentru link-urile gasite */
            if(rezult != 0 && rec <= 5 )
              linkPages(m,linkPrimit,fileApel,directory,rec + 1,0);
            }
            /* daca este activ -e se apeleaza recursiv pentru a se descarca */
            if(activE == 1 && strstr(p,".html") <= 0){
              i = 0;
              /* creeaza ierarhia de directoare */ 
              while(d[i] != NULL){
                mkdir(d[i],S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
                i ++;
              } 
              /* verifica daca fisierul exista */
              rezult = access(fileApel,F_OK);
              /* se apeleaza recursiv cu s = 1 pentru a intra pe coditia
                 pentru descarcare */
              if(rezult != 0 ){
                linkPages(m,linkPrimit,fileApel,directory,1,1);
              }
            } 
          }
      }
    /* scrie paginile in fisier */
    for(i = 0; i < strlen(recvbuf) ;i ++)
      fwrite(&recvbuf[i],sizeof(char),1,f);
  }
}
  free(addres);
  free(ipAd);
  free(getCom);
  free(m);
  free(linkPrimit);
  free(fileApel);
  free(p);
  free(fileNext);

  fclose(f);
  close(sockfd);
}