int open_log_file(const char *file) { if (NULL == file || strlen(file) == 0) { return -1; } int fd = open(file, O_CREAT | O_WRONLY | O_APPEND, 0640); if (fd < 0) { char file_tmp[1024] = {0}; strncpy(file_tmp, file, 1024); char *dir = strrchr(file_tmp, '/'); if (NULL == dir) { printf("the file must be a dir\n"); return -1; } //the dir path may be lost *dir = '\0'; if (0 == make_dir_recusive(file_tmp, 0644)) { printf("the mkdir recursive failed, the dir is %s\n", dir); return -1; } fd = open(file, O_CREAT | O_WRONLY | O_APPEND, 0640); } if (fd < 0) { return -1; } //release old log file handler if (log_file > 0) { close(log_file); } log_file = fd; log_fd_invalid = 0; return 0; }
bool parse_course_assignment(CURL *curl_web_handler,char *path, p_assignment_element head) { extern cache_memory cache; p_list_entity entity_head = NULL; entity_head = basic_parse_page(cache.mem,"<tr class=\"tr","</tr>"); // for loop p_list_entity p = entity_head->next; reset_cache_memory(); p_assignment_element pre = head; p_assignment_element cur = head->next; char buf[BUFFER_MAX_SIZE]; while(p) { cur = (p_assignment_element)malloc(sizeof(assignment_element)); // init all char * init_assignment_element(cur); char *src = p->entity; // GET NAME extract_content_between_tags(p->entity, buf, "<a href","</a>"); remove_and_convert_html(buf); string_trip(buf); cur->assignment_name = strdup(buf); // GET DETAIL URL extract_content_between_fix(p->entity, buf, "href=\"","\">"); str_add_prefix(HOMEWORK_URL_PREFIX,buf); // GET DETAIL get_assignment_detail(curl_web_handler,buf,cur); // GET TIME src = extract_content_between_fix( src, buf, "<td width=\"10%\">", "</td>"); cur->assignment_time = strdup(buf); // GET DEADLINE src = extract_content_between_fix(src, buf, "<td width=\"10%\">", "</td>"); cur->assignment_deadline = strdup(buf); // GET STAUTS src = extract_content_between_tags(src, buf, "<td width=\"15%\"", "</td>"); if(string_trip(buf)) { cur->assignment_status = strdup(buf); } // GET SCORE URL src = extract_content_between_fix (src, buf, "javascript:window.location.href='","';"); extract_content_between_fix (src, buf,"javascript:window.location.href='","'\""); if(str_add_prefix(HOMEWORK_URL_PREFIX, buf)) get_assignment_score(curl_web_handler,buf,cur); //print_assignment_element(cur); // save it if(cur->assignment_attachment_url) { sprintf(buf,"%s%c%s%c%s%c",path, PATH_SPILIT_CHAR, cur->assignment_name, PATH_SPILIT_CHAR, "教师作业附件", PATH_SPILIT_CHAR); make_dir_recusive(buf); sprintf(buf, "%s%s", buf, cur->assignment_attachment_name); cur->assignment_attachment_location = strdup(buf); download_file(curl_web_handler,cur->assignment_attachment_url, cur->assignment_attachment_location); } if(cur->my_handin_attachment_url) { sprintf(buf,"%s%c%s%c%s%c",path, PATH_SPILIT_CHAR, cur->assignment_name, PATH_SPILIT_CHAR, "我的提交", PATH_SPILIT_CHAR); make_dir_recusive(buf); sprintf(buf, "%s%s", buf, cur->my_handin_attachment_name); cur->my_handin_attachment_location = strdup(buf); download_file(curl_web_handler,cur->my_handin_attachment_url, cur->my_handin_attachment_location); } pre->next = cur; pre = cur; cur = NULL; p = p->next; } sprintf(buf, "%s%c%s", path, PATH_SPILIT_CHAR, "assignment.xml"); write_all_assignment_to_xmlfile(buf,head); if(!destroy_all_entity_memory(entity_head)) return FALSE; if(!destroy_all_assignment_element(head)) return FALSE; return TRUE; }
// Get discussion list, main function bool parse_course_discussion(CURL *curl_web_handler,char *path, p_discussion_element head) { extern cache_memory cache; // split the memory and copy it into entity list p_list_entity entity_head = NULL; entity_head = basic_parse_page(cache.mem,"<tr class=\"tr","</tr>"); // for loop p_list_entity p = entity_head->next; // reset cache memory and waiting for next download. reset_cache_memory(); // init p_discussion_element pre = head; p_discussion_element cur = head->next; // BUFFER char buf[BUFFER_MAX_SIZE]; char num_buf[16]; // loop char *reply_url = NULL; // int discussion_id = 0; while(p) { cur = (p_discussion_element)malloc(sizeof(discussion_element)); // clear cur pointer memory init_discussion_element(cur); cur->id = discussion_id; // save a char pointer and move it, for performance char *src = p->entity; // GET TOPIC extract_content_between_tags(p->entity, buf,"<a href","</a>"); remove_and_convert_html(buf); cur->discussion_topic = strdup(buf); // GET REPLY URL WAITING, SAVE IT extract_content_between_fix(p->entity, buf, "href='","'>"); str_add_prefix(REPLY_URL_PREFIX, buf); reply_url = strdup(buf); // GET AUTHOR src = extract_content_between_tags( src, buf, "<td width=\"15%\"", "</td>"); cur->discussion_author = strdup(buf); // GET reply and click src = extract_content_between_fix( src, buf, "<td width=\"15%\">", "</td>"); memset (num_buf, 0, 16); int i = 0,j = 0; for(i = 0;i < strlen(buf); i++) { if(buf[i] == '/') { // how many reply cur->discussion_reply = atoi(num_buf); break; } num_buf[j++] = buf[i]; } cur->discussion_click = atoi(buf+i+1); // Get handin time extract_content_between_fix(src, buf, "<td width=\"20%\">", "</td>"); cur->discussion_handin_time = strdup(buf); // Get content...etc if(!get_reply(curl_web_handler,reply_url, cur)) printf("Get Reply Error, Check Network.\n"); // print //print_discussion_element(cur); // write to file if(cur->discussion_attachment_url) { sprintf(buf,"%s%c%d%c%s%c",path, PATH_SPILIT_CHAR,cur->id, PATH_SPILIT_CHAR, "附件", PATH_SPILIT_CHAR); make_dir_recusive(buf); sprintf(buf, "%s%s", buf, cur->discussion_attachment_name); cur->discussion_attachment_location = strdup(buf); download_file(curl_web_handler,cur->discussion_attachment_url, cur->discussion_attachment_location); } if(cur->reply_head) { sprintf(buf,"%s%c%d%c%s%c",path, PATH_SPILIT_CHAR, cur->id, PATH_SPILIT_CHAR, "回复附件", PATH_SPILIT_CHAR); download_reply_attachment(curl_web_handler,buf, cur->reply_head); } pre->next = cur; pre = cur; cur = NULL; p = p->next; discussion_id ++; } sprintf(buf, "%s%c%s", path, PATH_SPILIT_CHAR, "discussion.xml"); write_all_discussion_to_xmlfile(buf,head); if(!destroy_all_entity_memory (entity_head)) return FALSE; if(!destroy_all_discussion_element (head)) return FALSE; return TRUE; }
bool parse_course_file(CURL *curl_web_handler,char *path,p_file_element head) { extern cache_memory cache; // temp buffer char buf[BUFFER_MAX_SIZE]; memset(buf, 0 ,BUFFER_MAX_SIZE); // saved the file type name char file_type_name[6][128]; // how many of types int i = 0; int no_of_file_type = 0; // pointer to the cache. char *p_cache = cache.mem; for(i = 0;i < 5; i++) { memset(file_type_name[i], 0, 128); p_cache = extract_content_between_tags(p_cache,file_type_name[i],"<td class=\"textTD\"","</td>"); if(!p_cache) break; } no_of_file_type = i; p_cache = cache.mem; char *p_cache_start[6]; p_cache_start[0] = p_cache; // discard p_cache_start[0], it has no use. for(i = 1; i<= no_of_file_type;i++) { p_cache_start[i] = strstr(p_cache_start[i-1] + 1, "<table id=\"table_box\""); memset(p_cache_start[i] - 1, 0, 1); } // because 0 not use. p_file_element pre = head; p_file_element cur = head->next; for(i = 1;i <= no_of_file_type;i++) { char file_save_prefix[BUFFER_MAX_SIZE]; sprintf(file_save_prefix, "%s%c%s%c",path, PATH_SPILIT_CHAR,file_type_name[i-1],PATH_SPILIT_CHAR); make_dir_recusive(file_save_prefix); p_list_entity entity_head = NULL; entity_head = basic_parse_page(p_cache_start[i],"<tr class=\"tr","</tr>"); // for loop p_list_entity p = entity_head->next; while(p) { cur = (p_file_element)malloc(sizeof(file_element)); if(!init_file_element(cur)) { printf("Init Error!\n"); return FALSE; } // SET FILE TYPE cur->file_type = strdup(file_type_name[i-1]); // SET DOCUMENT ID char *pointer_src = p->entity; pointer_src = extract_content_between_tags(pointer_src, buf, "<td width=\"80\">","</td>"); cur->file_id = atoi(buf); // GET DOCUMENT ORIGIN_NAME pointer_src = extract_content_between_fix(pointer_src, buf, "getfilelink=", "&"); cur->file_orign_name = strdup(buf); // GET FILE URL extract_content_between_fix(pointer_src, buf, "href=\"","\" >"); str_add_prefix(tsinghua_prefix,buf); cur->file_url = strdup(buf); // GET FILE TITLE && MOVE POINTER pointer_src = extract_content_between_tags(pointer_src, buf, "<a target=\"_top\"", "</a>"); remove_and_convert_html(buf); if(string_trip(buf)) { remove_path_spilit_char(buf); cur->file_title = strdup(buf); } else cur->file_title = NULL; // GET FILE ABSTRACT pointer_src = extract_content_between_tags(pointer_src, buf, "<td width=\"300\"", "</td>"); cur->file_abstract = strdup(buf); // GET FILE UP TIME pointer_src = extract_content_between_tags(pointer_src, buf, "<td width=\"100\"", "</td>"); cur->file_up_time = strdup(buf); sprintf(buf, "%s%s_%s",file_save_prefix,cur->file_title,cur->file_orign_name); cur->file_local_location = strdup(buf); download_file(curl_web_handler,cur->file_url, cur->file_local_location); //print_file_element(cur); pre->next = cur; pre = cur; cur = NULL; p = p->next; } if( !destroy_all_entity_memory(entity_head)) { printf("FREE EROOR @ LINE %d FILE %s",__LINE__, __FILE__); return FALSE; } } reset_cache_memory(); sprintf(buf, "%s%c%s",path, PATH_SPILIT_CHAR,"file.xml"); write_all_file_to_xmlfile(buf,head); if(!destroy_all_file_element(head)) { printf("FREE EROOR @ LINE %d FILE %s",__LINE__, __FILE__); return FALSE; } return TRUE; }