/* functia verifica daca link-ul este corect, dupa criteriile date */ int vefiricaLink(char* linkCurent){ int ok = 0; char * copy = (char *)malloc(MAXLEN* sizeof(char)); memset(copy,0,MAXLEN); /* verificare daca linia contine forma ceruta */ if(strstr(linkCurent,"<a href") > 0 ){ /* extragere page.extensie */ extractLink(linkCurent,copy); /* verificare daca link-ul este corect */ if(strstr(copy,"://") <= 0 && strstr(copy,".com") <= 0){ /* verifica daca extensia are 3 sau 4 caractere */ if(verificareExtensie(copy) == 1) ok = 1; } } else if(strstr(linkCurent,"href=") > 0 ){ /* extragere page.extensie */ extractLink(linkCurent,copy); if(strstr(copy,"://") <= 0 && strstr(copy,".com") <= 0){ /* verifica daca extensia are 3 sau 4 caractere */ if(verificareExtensie(copy) == 1) ok = 1; } } else if(strstr(linkCurent,"<a rel=") > 0){ /* extragere page.extensie */ extractLink(linkCurent,copy); if(strstr(copy,"://") <= 0 && strstr(copy,".com") <= 0){ /* verifica daca extensia are 3 sau 4 caractere */ if(verificareExtensie(copy) == 1) ok = 1; } } free(copy); return ok; }
int dealurlres(Ip *res,char *domain, char *path) { int s = -1; char filename[2*MAX_LINK_LEN+1]; int ret; struct ip* next = res; if ((s = socket(PF_INET, SOCK_STREAM, 0)) < 0) { perror("socket"); return -1; } /*创建服务器套接口地址 */ struct sockaddr_in server_address; bzero(&server_address, sizeof(struct sockaddr_in)); server_address.sin_family = AF_INET; server_address.sin_port = htons(80); while(next) { server_address.sin_addr = next->ipaddres.sin_addr; if ((ret = connect (s, (struct sockaddr *)&server_address, sizeof(struct sockaddr_in))) < 0) { next = next->next; perror("connect"); } else break; } if(ret <0) { close(s); return -1; } ret = sendRequest(domain,path,s); if(ret <0) { close(s); return -1; } link2fn(domain,path,filename); int htmlfd = open(filename, O_WRONLY | O_CREAT | O_APPEND, 0644); /*以只写方式打开html文件 */ if (htmlfd < 0) { close(s); return -1; } int i, n, need, ll = 0; char buf[1024] = { 0 }; char buf1[1024] = { 0 }; while (1) { need = sizeof(buf) - 1 - ll; n = read(s, buf+ll, need); if (n < 0) { if (errno == EAGAIN) { usleep(1000); continue; } else { fprintf(stderr, "%s %d read http response error\n",__func__,__LINE__); close(htmlfd); close(s); return -1; } } else if (n == 0) { /*读取http响应这完毕 */ debug_printf("%s %d get http response success==>[%s%s]\n",__func__,__LINE__,domain,path); break; } else { /*还需要继续读 */ debug_printf("%s %d from socket read:[%s] and will go on\n",__func__,__LINE__,buf); memcpy(buf1, buf, sizeof(buf)); ll = extractLink(buf, domain); write(htmlfd, buf1, n-ll); } } close(htmlfd); return 0; }
/* functia parcurge paginile, apeleaza recursiv si descarca in functie de parametri dati in linia de comanda */ void linkPages(char * original, char * argument,char * filePath, char * directory,int rec, int s){ /* conditia de oprire */ if (rec > 5) return; int sockfd; int port = SMTP_PORT; int rezult, i = 0, len = 0; struct sockaddr_in servaddr; struct hostent * host; /* declarare constante */ char server_ip[10]; char sendbuf[MAXLEN]; char recvbuf[MAXLEN]; char * addres = (char *)malloc(MAXLEN * sizeof(char)); char * ipAd = (char *)malloc(MAXLEN * sizeof(char)); char *fileApel = (char *)malloc(MAXLEN * sizeof(char)); char *linkPrimit = (char *)malloc(MAXLEN * sizeof(char)); char *fileNext = (char *)malloc(MAXLEN * sizeof(char)); char *m = (char *)malloc(MAXLEN * sizeof(char)); char *p = (char *)malloc(MAXLEN * sizeof(char)); char *getCom = (char *)malloc(MAXLEN * sizeof(char)); char **d = (char **)malloc(MAXLEN * sizeof(char)); /* setare pe 0 pentru a evita erorile */ memset(getCom,0,MAXLEN); memset(p,0,MAXLEN); memset(addres,0,MAXLEN); memset(ipAd,0,MAXLEN); memset(fileApel,0,MAXLEN); memset(fileNext,0,MAXLEN); memset(linkPrimit,0,MAXLEN); memset(m,0,MAXLEN); /* calculare adresa primita ca parametru */ getAdress(argument,addres); host = gethostbyname(addres); /* calculare adresa IP */ sprintf(ipAd,"%s",inet_ntoa(*(struct in_addr*)host->h_addr_list[0])); strcpy(server_ip, ipAd); if ( (sockfd = socket(AF_INET, SOCK_STREAM, 0)) < 0 ){ if(activO == 1){ fprintf(fExit,"Eroare la creare socket.\n"); } else fprintf(stderr,"Eroare la creare socket.\n"); exit(-1); } /* formarea adresei serverului */ memset(&servaddr, 0, sizeof(servaddr)); servaddr.sin_family = AF_INET; servaddr.sin_port = htons(port); if (inet_aton(server_ip, &servaddr.sin_addr) <= 0 ) { if(activO == 1){ fprintf(fExit, "Adresa IP invalida.\n"); } else{ fprintf(stderr, "Adresa IP invalida.\n"); } exit(-1); } /* conectare la server */ if (connect(sockfd, (struct sockaddr *) &servaddr, sizeof(servaddr)) < 0 ) { if(activO == 1){ fprintf(fExit, "Eroare la conectare\n"); } else{ fprintf(stderr, "Eroare la conectare\n"); } exit(-1); } /* trimitere comanda la server */ sprintf(getCom,"GET %s HTTP/1.0 \n\n",original); FILE * f = fopen(filePath,"wb"); send_command(sockfd,getCom); i = 0; /* se elimina antetul din pagina primita */ while((len = Readline(sockfd,recvbuf,MAXLEN-1)) > 0){ i++; if(len <= 2) break; memset(recvbuf, 0, MAXLEN); } /* daca a fost trimis link de descarcare */ if (s == 1){ while((len = recv(sockfd, recvbuf, MAXLEN, 0)) > 0) { fwrite(recvbuf, sizeof(char), len, f); memset(recvbuf, 0, MAXLEN); } } /* altfel se verifica paginile .html si de scriu in fisier */ else{ while(Readline(sockfd, recvbuf, MAXLEN) > 0){ /* verifica daca linia curecta contine */ if(vefiricaLink(recvbuf) == 1){ extractLink(recvbuf,p); if(strcmp(p,"") != 0){ sprintf(m,"%s%s",argument,p); memset(directory,0,MAXLEN); d = getFileAddress(m,linkPrimit,fileNext,directory); sprintf(fileApel,"%s%s",directory,fileNext); /* numai pentru recursivitate */ if (activR == 1 && strstr(p,".html") > 0){ i = 0; /* creeaza ierarhia de directoare */ while(d[i] != NULL){ mkdir(d[i],S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); i ++; } /* verifica daca fisierul exista */ rezult = access(fileApel,F_OK); /* daca nu exista se apeleaza recursiv pentru link-urile gasite */ if(rezult != 0 && rec <= 5 ) linkPages(m,linkPrimit,fileApel,directory,rec + 1,0); } /* daca este activ -e se apeleaza recursiv pentru a se descarca */ if(activE == 1 && strstr(p,".html") <= 0){ i = 0; /* creeaza ierarhia de directoare */ while(d[i] != NULL){ mkdir(d[i],S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); i ++; } /* verifica daca fisierul exista */ rezult = access(fileApel,F_OK); /* se apeleaza recursiv cu s = 1 pentru a intra pe coditia pentru descarcare */ if(rezult != 0 ){ linkPages(m,linkPrimit,fileApel,directory,1,1); } } } } /* scrie paginile in fisier */ for(i = 0; i < strlen(recvbuf) ;i ++) fwrite(&recvbuf[i],sizeof(char),1,f); } } free(addres); free(ipAd); free(getCom); free(m); free(linkPrimit); free(fileApel); free(p); free(fileNext); fclose(f); close(sockfd); }