// called by only proc_hyperlink() // get hyperlink title: <a href="/aaa/t.html"> good link </a>, "good link" is the title // html: HTML source, title: save the result(==title) // 타이틀 길이 제한 int htmlParse::get_hyperlink_title(char *html, char *title, int maxtitlelen) { char *ptr=html, *start=title; int n=0; #ifdef DEB PRN("get_hyperlink_title(): html=%X, title=%X\n", html, title); #endif if (html==0|| title==0) return 0; *title=0; while (*ptr && *ptr!='>') ptr++; if (*ptr) ptr++; ptr = StrUtil::skip_space(ptr); while (*ptr && *ptr!='<' && n < maxtitlelen-2) { *title++ = *ptr++; n++; } #ifdef DEB PRN("get_hyperlink_title(): n=%d, ptr=%X\n", n, ptr); #endif while (n>0 && ISSPACE(*(title-1)) ) { title--; n--; } #ifdef DEB PRN("get_hyperlink_title(): n=%d, ptr=%X\n", n, ptr); PRN("get_hyperlink_title(): html=%X, title=%X\n", html, title); #endif *title=0; return n; }
void TravExpr::print(TRVexpr *u) { PRN("URL EXPR: %s\n", u->org_url_expr); //if ( u->ctg_name ) PRN("\tCategory= %s\n", u->ctg_name); //if ( u->dtype_name ) PRN("\tDocType= %s\n", u->dtype_name); PRN("\tCTG=%X, DocType=%X\n", u->ctg_id, u->doctype_id); PRN("\tDepth= %d\n", u->depth); if (u->host_span) PRN("\tAll host\n"); if (u->loop_type==INT_LOOP) PRN("\t[%d..%d]\n", u->from, u->to); else if (u->loop_type==CHAR_LOOP) PRN("\t[%c..%c]\n", u->from, u->to); if (u->user) PRN("\tUser/Passwd: %s %s\n", u->user, u->passwd); u->key_dirs.print("KEY: "); u->inc_dirs.print("INC: "); u->exc_dirs.print("EXC: "); if (u->filename) PRN("\tFile: %s\n", u->filename); }
void htmlParse::print_maxlen_tag(char *textbuf) { char tmpch=0; //tagStackType *maxtag = find_maxlen_tag(); tagStackType *maxtag = maxdelta_tag; PRN("tagStack: stack_top = %d\n", stack_top); if (maxtag) PRN("tagStack: tag =%s, start=%d, leng=%d\n", maxtag->tag->name, maxtag->start_pos, maxtag->len_txt); tmpch = textbuf[maxtag->start_pos + maxtag->len_txt]; textbuf[maxtag->start_pos + maxtag->len_txt] = 0; FileUtil::save_file_2_from_mem(textbuf + maxtag->start_pos, "D:/SVN/ad2005/trimmed.txt"); //PRN("%s\n", textbuf + maxtag->start_pos); textbuf[maxtag->start_pos + maxtag->len_txt] = tmpch; }
/* decode navigation data (Galileo E1B page) ----------------------------------- * * args : uint8_t *buff1 I navigation data bits (even page) * uint8_t *buff2 I navigation data bits (odd page) * sdreph_t *eph I/O sdr ephemeris structure * return : int word type *-----------------------------------------------------------------------------*/ extern int decode_page_e1b(const uint8_t *buff1, const uint8_t *buff2, sdrnav_t *nav) { int id; /* buff is 240 bits (30 bytes) of composed two page parts */ /* see Galileo SISICD Table 39, pp. 34 */ uint8_t buff[30]; memcpy(buff,buff1,15); memcpy(&buff[15],buff2,15); id=getbitu(buff,2,6); /* word type */ Ephemeris[nav->sat].PageN((id >= 7)? 999:id); switch (id) { case 0: decode_word0(buff,nav); break; case 1: decode_word1(buff,nav); break; case 2: decode_word2(buff,nav); break; case 3: decode_word3(buff,nav); break; case 4: decode_word4(buff,nav); break; case 5: decode_word5(buff,nav); break; case 6: decode_word6(buff,nav); break; case 7: case 8: case 9: break; // almanac case 10: decode_word10(buff,nav); break; case 63: break; // dummy page (we've actually seen these) default: printf("%s UNKNOWN W%d\n", PRN(nav->sat), id); break; } return id; }
/** HTML 내용을 text 형식으로 변환. return : text 길이. */ int htmlParse::html2text(char *html_mem, int html_size, char *txt_mem, int max_txt_size) { stToken tok; int tmp_txt_n=0, txt_n=0; char * tmp_txt=0; htmlTitle[0] = 0; memset(&tok, 0, sizeof(tok)); tok.src_mem = html_mem; tok.src_len = html_size; txt_mem[0] = 0; if ( (tmp_txt = new char [max_txt_size+1]) == NULL) { PRN("memory fail\n"); return 0; } tmp_txt_n = parse(&tok, tmp_txt, max_txt_size); #ifdef MAIN_TEXT_ONLY print_maxlen_tag(tmp_txt); #endif #ifdef DEB PRN("html --> tmp_txt: %d --> %d\n", html_size, tmp_txt_n); PRN("tmp_txt strlen=%d, max_txt_size=%d\n", strlen(tmp_txt), max_txt_size ); //printf("%s", tmp_txt); #endif txt_n = StrUtil::trim_text(txt_mem, tmp_txt, max_txt_size); #ifdef DEB PRN("trim text: %d --> %d\n", tmp_txt_n, txt_n); PRN("txt_mem strlen=%d\n", strlen(txt_mem) ); #endif delete [] tmp_txt; return txt_n; }
int extractCron::loadTable_DB() { int tabsize = 0; int n=0; DbQuery query; char *cronjob[6]; tabsize = dbStore::countRows("cronjob"); PRN("NUMBER of cronjob = %d\n", tabsize); if (tabsize < 0) { PRN("DB query fail\n"); } tabsize += 2; this->allocTable(tabsize); if ( query.exec(dbStore::dbConn, "select * from cronjob") < 0) { PRN("DB query error!\n"); return 0; } while (query.fetch()) { int jobtype = 0; jobtype = query.colIntVal(6);; /* @todo jobtype == 1 : HARVEST jobtype == 2 : EXTRACT */ if (jobtype!=2) continue; for (int i=0; i<5; i++) cronjob[i] = query.colStrVal(i+1); //cronjob[5] = query.colStrVal(7); cronjob[5] = query.colStrVal_trimmed(7); this->insertCronJob( cronjob ); } return n; }
void ArchiveEditable::add(const _TCHAR *fn) { ListItr li = find(fn); if(li == list_.end()) { struct _stat st; if( 0 == _tstat(fn, &st) ) { if(st.st_mode & _S_IFDIR) { // ディレクトリなら TDIR *dir; tdirent *ent; dir = topendir(fn); while(dir) { ent = treaddir(dir); if(!ent) break; if(!_tcscmp(ent->d_name, _T(".")) && !_tcscmp(ent->d_name, _T(".."))) { basic_string<_TCHAR> path(fn); (path += _T("/")) += ent->d_name; add(path.c_str()); PRN(path.c_str()); } } tclosedir(dir); } else { // 最後尾に追加 ArchiveEntry newe(fn); newe.time = st.st_mtime; newe.size = st.st_size; if(list_.size() > 0) newe.pos = list_.back().pos+list_.back().size; else newe.pos = 0; list_.push_back(EntryAttr(newe, EntryAttr::NEW)); } } } else { struct _stat st; if( 0 == _tstat(fn, &st) && (li->time != st.st_mtime || li->size != st.st_size)) { li->attr = EntryAttr::UPDATE; li->time = st.st_mtime; li->size = st.st_size; } else if(li->attr == EntryAttr::REMOVE) li->attr = EntryAttr::ALREADY; } }
int main(int argc, char *argv[]) { //declare the time period extern int monthDay[12]; extern int periodStartYear, periodStartMonth, periodStartDay, periodStartHour, periodStartMinute; extern int periodEndYear, periodEndMonth, periodEndDay, periodEndHour, periodEndMinute; FILE *fp, *tmpfp, *dest; int intbuf,qtm; qtm = 0; //type of scheduler char* scheduleType = (char*)malloc(10); //current number of array slot int eventSize = 80; //declare the event array, can be expanded its size twice to contain all events Event *event = (Event*)calloc(eventSize, sizeof(*event)); //printf("allocate calloc space + %d",sizeof(*event)); int i, j,k,l,m,havevalidday; int pid = 0; // initialize all the data in event array (may not needed) for (i=0; i<eventSize; i++) { strcpy(event[i].type, ""); strcpy(event[i].name, ""); event[i].startYear = 0; event[i].startMonth = 0; event[i].startDay = 0; event[i].startHour = 0; event[i].dueYear = 0; event[i].dueMonth = 0; event[i].dueDay = 0; event[i].dueHour = 0; event[i].duration = 0; event[i].priority = 0; event[i].handled = 0; event[i].startTimeValue = 0; event[i].dueTimeValue = 0; } printf(" ~~WELCOME TO ASKS~~\n\n"); //declare the buffer to contain the input string char input[100]; printf("Please enter:\n"); fgets(input, 100, stdin); //initialize the period of schedule char* tmp; char* tmpforrr; tmp = strtok(input, " "); tmp = strtok(NULL, "-"); periodStartYear = atoi(tmp); tmp = strtok(NULL, "-"); periodStartMonth = atoi(tmp); tmp = strtok(NULL, " "); periodStartDay = atoi(tmp); tmp = strtok(NULL, ":"); periodStartHour = atoi(tmp); tmp = strtok(NULL, " "); periodStartMinute = atoi(tmp); tmp = strtok(NULL, "-"); periodEndYear = atoi(tmp); tmp = strtok(NULL, "-"); periodEndMonth = atoi(tmp); tmp = strtok(NULL, " "); periodEndDay = atoi(tmp); tmp = strtok(NULL, ":"); periodEndHour = atoi(tmp); tmp = strtok(NULL, " "); periodEndMinute = atoi(tmp); // time table array int duryear = periodEndYear-periodStartYear+1; int timetable [duryear][12][31][24]; int max = (periodEndYear-periodStartYear)*1000000+periodEndMonth*10000+periodEndDay*100+periodEndHour; int min = periodStartMonth*10000+periodStartDay*100+periodStartHour; int dayAmount = 0; // initialization int tmptime; for(i=0; i<duryear; i++) { for(j=0; j<12; j++) { for(k=0; k<31; k++) { havevalidday = 0; for(l=0; l<24; l++) { tmptime = i*1000000+(j+1)*10000+(k+1)*100+l; if(tmptime<min||tmptime>max) timetable[i][j][k][l] = -2; //day do not need, disable else timetable[i][j][k][l] = -1; //valid day, give -1 if(k>monthDay[j]-1) timetable[i][j][k][l] = -2; //day do not exist, disable if(timetable[i][j][k][l] == -1) havevalidday = 1; } if(havevalidday == 1) dayAmount++; } } } int timetablebyday[dayAmount][24]; // for counting the number of used array slot to decide whether there is need to expand the array int eventCounter = 0; //initialize the formal input event and put them into correct Event data type. while(1) { printf("Please enter:\n"); char input[100]; fgets(input, 100, stdin); //decide whether this is the end of input if (input[0]=='a'&&input[1]=='d'&&input[2]=='d') { //initialize the type of event tmp = strtok(input, " "); for (i=3; i<strlen(tmp)+1; i++) { *(tmp+i-3) = *(tmp+i); } strcpy(event[eventCounter].type, tmp); //printf("%s\n",event[eventCounter].type); //initialize the name of event tmp = strtok(NULL, " "); strcpy(event[eventCounter].name, tmp); //printf("%s\n",event[eventCounter].name); if (strcmp(event[eventCounter].type, "Project") != 0) { //initialize the year of start time of this event tmp = strtok(NULL, "-"); event[eventCounter].startYear = atoi(tmp); //printf("%d\n",event[eventCounter].startYear); //initialize the month of start time of this event tmp = strtok(NULL, "-"); event[eventCounter].startMonth = atoi(tmp); //printf("%d\n",event[eventCounter].startMonth); //initialize the day of start time of this event tmp = strtok(NULL, " "); event[eventCounter].startDay = atoi(tmp); //printf("%d\n",event[eventCounter].startDay); //initialize the hour of start time of this event tmp = strtok(NULL, ":"); event[eventCounter].startHour = atoi(tmp); //printf("%d\n",event[eventCounter].startHour); tmp = strtok(NULL, " "); } //test whether the event is project or assignment if (strcmp(event[eventCounter].type, "Project") == 0 || strcmp(event[eventCounter].type, "Assignment") == 0) { //initialize the year of due time of this event tmp = strtok(NULL, "-"); event[eventCounter].dueYear = atoi(tmp); //initialize the month of due time of this event tmp = strtok(NULL, "-"); event[eventCounter].dueMonth = atoi(tmp); //initialize the day of due time of this event tmp = strtok(NULL, " "); event[eventCounter].dueDay = atoi(tmp); //initialize the hour of due time of this event tmp = strtok(NULL, ":"); event[eventCounter].dueHour = atoi(tmp); tmp = strtok(NULL, " "); int minute = atoi(tmp); if (minute > 0) { event[eventCounter].dueHour++; } } //initialize the duration tmp = strtok(NULL, " "); event[eventCounter].duration = atoi(tmp); //printf("%d\n",event[eventCounter].duration); tmp = strtok(NULL, " "); event[eventCounter].priority = atoi(tmp); //printf("%d\n",event[eventCounter].priority); //expand dynamic array to contain more event (may need more tests) eventCounter++; if (eventCounter == eventSize) { Event *tmpEvent = (Event*)calloc(eventSize, 80); memcpy(tmpEvent, event, eventSize*80); eventSize *= 2; event = (Event*)calloc(eventSize, 80); memcpy(event, tmpEvent, eventSize*40); free(tmpEvent); } } else if(strcmp(strtok(input, "-"), "runASKS ") == 0) { tmp = strtok(NULL, "|"); strcpy(scheduleType, tmp); // printf("---%s---\n",scheduleType); tmp = strtok(NULL, " "); tmp = strtok(NULL, " "); tmp = strtok(NULL, " "); tmp[strlen(tmp)-2] = '\0'; fp = fopen("report.dat", "wt"); tmpfp = fopen("analysis.dat","wt"); break; } //printf("%d\n", eventCounter); //testing code }//end of input module // put all class and sleep into the timetable int theYear; int theMonth; int theDay; int theHour; int timetablevalue; int periodStartTimeValue = periodStartMonth*10000+periodStartDay*100+periodStartHour; int periodEndTimeValue = duryear*1000000+periodEndMonth*10000+periodEndDay*100+periodEndHour; for(m=0; m<eventCounter; m++) { theYear = event[m].startYear - periodStartYear; theMonth = event[m].startMonth; theDay = event[m].startDay; theHour = event[m].startHour; if((event[m].startYear+event[m].startMonth+event[m].startDay+event[m].startHour)!=0) { // if start time has been input, calculate the starttimevalue event[m].startTimeValue = theYear*1000000+theMonth*10000+theDay*100+theHour; } else { // if start time not input, set it to the start of the period and calculate event[m].startYear = periodStartYear; event[m].startMonth = periodStartMonth; event[m].startDay = periodStartDay; event[m].startHour = periodStartHour; theYear = event[m].startYear - periodStartYear; theMonth = event[m].startMonth; theDay = event[m].startDay; theHour = event[m].startHour; event[m].startTimeValue = theYear*1000000+theMonth*10000+theDay*100+theHour; } if((event[m].dueYear+event[m].dueMonth+event[m].dueDay+event[m].dueHour)!=0) // calculate duetimevalue event[m].dueTimeValue = (event[m].dueYear-periodStartYear)*1000000+event[m].dueMonth*10000+event[m].dueDay*100+event[m].dueHour; // printf("%d %d %d %d\n",event[m].startYear,event[m].startMonth,event[m].startDay,event[m].startHour); // printf("%d %d\n ",event[m].startTimeValue,event[m].dueTimeValue); if(strcmp(event[m].type,"Class")==0||strcmp(event[m].name,"Sleep")==0) { // for class and sleep if(event[m].startTimeValue<periodStartTimeValue) { //printf("%s %d %d\n",event[m].name,event[m].startTimeValue,periodStartTimeValue); continue; } //printf("%s %s \n",event[m].type,event[m].name); for(i=theYear; i<duryear; i++) { //year for(j=0; j<12; j++) { //month for(k=0; k<31; k++) { //day for(l=0; l<24; l++) { //hour timetablevalue = (i)*1000000+(j+1)*10000+(k+1)*100+l+1;//time in this round of loop if(timetablevalue > event[m].startTimeValue &&timetable[i][j][k][l] == -1) { event[m].handled++; // 啊啊啊腰疼疼疼疼 //printf("handled\n"); timetable[i][j][k][l] = m; if(event[m].handled == event[m].duration) { // printf("%d %d %d %d %s\n",i+1,j+1,k+1,l+1,event[m].type); break; } }//if if(event[m].handled == event[m].duration) { break; } }//l if(event[m].handled == event[m].duration) { break; } }//k if(event[m].handled == event[m].duration) { break; } }//j if(event[m].handled == event[m].duration) { break; } }//i } } //printf("%s\n",scheduleType); if(strcmp(scheduleType, "FCFS ") == 0) { // printf("fcfs\n"); FCFS(event,eventCounter,duryear,timetable); scheduleType = "First Come First Serve"; } else if(strcmp(scheduleType, "SJF ") == 0) { // printf("sjf\n"); SJF(event,eventCounter,duryear,timetable); scheduleType = "Short Job First"; } else if(strcmp(scheduleType, "SRT ") == 0) { // printf("srt\n"); SRT(event,eventCounter,duryear,timetable); scheduleType = "Shortest Remaining Time"; } else if(strcmp(scheduleType, "PR p ") == 0) { // printf("pr p\n"); PRP(event,eventCounter,duryear,timetable); scheduleType = "priority with preemption"; } else if(strcmp(scheduleType, "PR n ") == 0) { // printf("pr n\n"); PRN(event,eventCounter,duryear,timetable); scheduleType = "priority without preemption"; } else { tmpforrr = strtok(scheduleType, " "); if(strcmp(tmpforrr,"RR ")) { qtm = atoi(strtok(NULL," ")); RRX(event,eventCounter,duryear,timetable,qtm); scheduleType = "Round Robin with quantum of "; } } /*for(i=0;i<duryear;i++){ for(j=0;j<12;j++){ // printf("month %d\n",j+1); for(k=0;k<31;k++){ // printf("day %d\n",k+1); for(l=0;l<24;l++){ if(timetable[i][j][k][l] != -2) printf("%d ",timetable[i][j][k][l]); } // printf("\n"); } } } printf("\n-----\n"); */ //prepare for multi-process int fd[2]; if(pipe(fd)<0) { printf("pipe creation error!\n"); exit(1); } pid = fork(); if(pid == 0) { //child for output... //printf("outer child %d %d\n",getpid(),getppid()); close(fd[1]); for(i=0; i<dayAmount; i++) { for(j=0; j<24; j++) { if(i==0&&j<periodStartHour) timetablebyday[i][j] = -2; else if(i==dayAmount-1&&j>periodEndHour) timetablebyday[i][j] = -2; else { read(fd[0],&intbuf,sizeof(int)); timetablebyday[i][j] = intbuf; //printf(" %d",timetablebyday[i][j]); } } } pid = fork(); if(pid == 0) { //child of child for output //printf("inner child %d %d\n",getpid(),getppid()); fprintf(fp, "Alex Timetable\n"); fprintf(fp, "%d-%02d-%02d %02d:%02d to %d-%02d-%02d %02d:%02d\n", periodStartYear, periodStartMonth, periodStartDay,periodStartHour, periodStartMinute, periodEndYear, periodEndMonth, periodEndDay, periodEndHour, periodEndMinute); fprintf(fp, "time\t"); for(i=0; i<duryear; i++) { for(j=0; j<12; j++) { for(k=0; k<31; k++) { havevalidday = 0; for(l=0; l<24; l++) { if(timetable[i][j][k][l] != -2) havevalidday = 1; } if(havevalidday == 1) fprintf(fp,"%d-%02d-%02d\t",i+periodStartYear,j+1,k+1); }//k }//j }//i fprintf(fp,"\n"); for(i=0; i<24; i++) { fprintf(fp,"%02d:00\t",i); for(j=0; j<dayAmount; j++) { if(timetablebyday[j][i] == -1) fprintf(fp,"N/A\t\t"); else if(timetablebyday[j][i] > -1) fprintf(fp,"%s \t",event[timetablebyday[j][i]].name); else fprintf(fp,"invalid time\t"); } fprintf(fp,"\n"); } fclose(fp); exit(0); } else { //parent of child for analysis //printf("child parent %d %d\n",getpid(),getppid()); fprintf(tmpfp,"\n\n**Summary of assignments allocation**\n\n"); fprintf(tmpfp,"Algorithms used: %s",scheduleType); if(qtm != 0) fprintf(tmpfp,"%d",qtm); fprintf(tmpfp,"\n"); int cnt=0; for(i=0; i<eventCounter; i++) { if(event[i].duration == event[i].handled) cnt++; } int turntime = 0; int tmpturntime = 0; int tmpwaittime = 0; int waittime = 0; fprintf(tmpfp,"There are %d assignments scheduled. Details are as follows\n",cnt); fprintf(tmpfp,"ASSIGNMENT\tSTART\t\t\tEND\t\t\tTURNAROUND TIME\n"); fprintf(tmpfp,"===========================================================================\n"); //finished calculate turnaround time and wait time for(m=0; m<eventCounter; m++) { tmpturntime = 0; tmpwaittime = 0; if(event[m].duration != event[m].handled)// not finished continue;//pass event[m].handled = 0; for(i=0; i<duryear; i++) { //year for(j=0; j<12; j++) { //month for(k=0; k<31; k++) { //day for(l=0; l<24; l++) { //hour if(timetable[i][j][k][l]==-2) continue; timetablevalue = (i)*1000000+(j+1)*10000+(k+1)*100+l;//time in this round of loop if(event[m].startTimeValue<timetablevalue+1) { tmpwaittime++; tmpturntime++; } if(timetable[i][j][k][l] == m) { if(event[m].handled==0) { event[m].startYear = i+periodStartYear; event[m].startMonth = j+1; event[m].startDay = k+1; event[m].startHour = l; } event[m].handled++; tmpwaittime--; if(event[m].handled == event[m].duration) { fprintf(tmpfp,"%s\t%d-%02d-%02d %02d:00\t%d-%02d-%02d %02d:00\t%dHrs\t\n",event[m].name,event[m].startYear,event[m].startMonth,event[m].startDay,event[m].startHour,i+periodStartYear,j+1,k+1,l,tmpturntime); turntime = turntime + tmpturntime; waittime = waittime + tmpwaittime; break; } } }//l if(event[m].handled == event[m].duration) { break; } }//k if(event[m].handled == event[m].duration) { break; } }//j if(event[m].handled == event[m].duration) { break; } }//i } fprintf(tmpfp," -end-\n"); fprintf(tmpfp,"\n===========================================================================\n"); fprintf(tmpfp,"**PERFORMANCE\n\n"); fprintf(tmpfp,"AVERAGE TURNAROUND TIME FOR ASSIGNMENTS: %d HRS\n",turntime/cnt); fprintf(tmpfp,"AVERAGE WAITINGTIME FOR ASSIGNMENTS: %d HRS\n",waittime/cnt); fprintf(tmpfp,"\n**Outstanding/Rejected List**\n\n"); m=0; for(i=0; i<eventCounter; i++) if(event[i].duration!=event[i].handled) { m++; fprintf(tmpfp,"%s %s\n",event[i].type,event[i].name); } fprintf(tmpfp,"There are %d events rejected.\n\n\n",m); fclose(tmpfp); //remove("analysis.dat"); waitpid(); exit(0); } } else { //parent close(fd[0]); //printf("parent %d %d\n",getpid(),getppid()); for(i=0; i<duryear; i++) { for(j=0; j<12; j++) { for(k=0; k<31; k++) { for(l=0; l<24; l++) { if(timetable[i][j][k][l] != -2) { intbuf = timetable[i][j][k][l]; // printf(" %d \n ",intbuf); write(fd[1],&intbuf,sizeof(int)); } } } } } waitpid(); sleep(1); fp = fopen("report.dat", "r"); tmpfp = fopen("analysis.dat","r"); dest = fopen(tmp,"a+"); char byte; while (!feof(fp)) { fread(&byte, sizeof(char), 1, fp); fwrite(&byte, sizeof(char), 1, dest); } close(fp); while (!feof(tmpfp)) { fread(&byte, sizeof(char), 1, tmpfp); fwrite(&byte, sizeof(char), 1, dest); } close(tmpfp); close(dest); remove("report.dat"); remove("analysis.dat"); exit(0); } }
/** HTML 문서를 parsing 하면서 text로 변환. */ int htmlParse::parse(stToken *tok, char *text, int maxlen) { int tok_val=0, prev_tok_val=0, prev_ch=0; int is_in_tag=0; uint4 tmp=0; int not_moved=0; char *start=text; stTagProc *curr_tag=0; //stTagProc **prev_tag=0; stEntityProc *ent_proc; int tag_depth=0; int is_in_BODY_tag=0; int is_in_A_tag=0; int is_in_PRE_tag=0; // 2003.11.8 int hlink_chars = 0; //int newline_delayed=0; // Optional Pair && Newline tag가 시작한 경우 TRUE //int dont_newline=0; // TRUE means "dont write newline" int pair_mark=0; int words_in_line=0; int b_tag_first_in_line=0; // 라인 첫머리에 B tag가 나온 경우 int glossary_marked=0; int is_glossary=0; tok->offset = 0; tok->line_num=1; *text = 0; maxlen -= 20; while(tok->offset < tok->src_len && (int)((uint4)text-(uint4)start) < maxlen) { tmp = tok->offset; htmlLex::get_token_mem(tok); #ifdef BUG if (tok->tok_len>=20) { printf("long tok: line=%d, tok_val=%d, CH=%c\n", tok->line_num, tok->tok_val, tok->src_mem[tok->offset-1]); fflush(stdout); printf("tok=%s\n", tok->tok_str); fflush(stdout); } #endif #ifdef DEB /* if (tok->tok_val==0 || tok->line_num==1) { printf("line=%d, tok_val=%d, CH=%c\n", tok->line_num, tok->tok_val, tok->src_mem[tok->offset-1]); printf("offset=%d, text-start=%d\n", tok->offset, text-start); } */ #endif if (text > start) { prev_ch = *(text-1); if (prev_ch=='\n') { words_in_line = 0; b_tag_first_in_line = 0; glossary_marked = 0; } } if (tok->tok_val == TOK_EOF) break; if (tok->tok_val == URL_CMNT) { //printf("%s", tok->tok_str); continue; } if (tok->offset==tmp) { #ifdef BUG //printf("parse(): offset not moved: line=%d, CH=%d(%c)\n", // tok->line_num, tok->src_mem[tok->offset-1], tok->src_mem[tok->offset-1]); #endif tok->offset++; if (++not_moved >= 2) break; else continue; } tok_val = tok->tok_val; if (tok_val == BTAG_BEGIN) { skip_to_two_token(tok, STRING, TAG_END); if (tok->tok_val != STRING) { // must be a HTML tag skip_to_token(tok, TAG_END); continue; } //prn_tabs(); //fprintf(log,"%s-->:%d :%d\n", tok->tok_str, tok->line_num, tok->offset); curr_tag = htmlTagEntity::tag_idx(tok->tok_str); if (curr_tag == NULL) { skip_to_token(tok, TAG_END); continue; } tag_depth++; #ifdef MAIN_TEXT_ONLY if (curr_tag->pair != Not_PAIR && is_in_BODY_tag && curr_tag!=htmlTagEntity::A_tag) push_tag(curr_tag, tag_depth, (int)((uint4)text-(uint4)start)); if (curr_tag==htmlTagEntity::BODY_tag) { is_in_BODY_tag = 1; } #endif if (curr_tag==htmlTagEntity::A_tag) { is_in_A_tag = 1; } else if (curr_tag==htmlTagEntity::PRE_tag) { is_in_PRE_tag = 1; } else if(curr_tag==htmlTagEntity::B_tag||curr_tag==htmlTagEntity::DT_tag) { if (words_in_line==0) b_tag_first_in_line = 1; } else if (curr_tag==htmlTagEntity::TITLE_tag) { get_hyperlink_title(tok->src_mem + tok->offset , htmlTitle, HTMLTITLE_LEN); #ifdef DEB PRN("tok->scr_mem=%X, offset=%d, title=%X\n", tok->src_mem, tok->offset, htmlTitle); PRN("title: %s (%d)\n", htmlTitle, strlen(htmlTitle) ); #endif is_glossary = is_glossay_mode(htmlTitle); } else if (curr_tag==htmlTagEntity::BR_tag) { *text++ = '\n'; } if (curr_tag->proc == 0) { // 0 if (curr_tag->pair != Not_PAIR) skip_ignore_part(tok, curr_tag); else skip_to_token(tok, TAG_END); if (curr_tag->newline) { if (prev_ch != '\n') *text++ = '\n'; } continue; } if (curr_tag==htmlTagEntity::TITLE_tag) { if (prev_ch != '\n') *text++ = '\n'; *text++ = '['; *text++ = '['; } else if (curr_tag->proc == 1) { } else if (!pair_mark && curr_tag->proc == 2) { pair_mark = 2; *text++ = '<'; } else if (!pair_mark && curr_tag->proc == 3) { pair_mark = 3; *text++ = '\''; } else if (!pair_mark && curr_tag->proc == 4) { pair_mark = 4; *text++ = '['; } else if (curr_tag->proc == 5) { if (prev_ch != '\n') *text++ = '\n'; *text++ = '*'; *text++ = ' '; words_in_line = 0; } else if (curr_tag->proc == 6) { if (prev_ch != '\n') *text++ = '\n'; *text++ = '\n'; } skip_to_token(tok, TAG_END); if (!pair_mark && prev_ch != ' ') *text++ = ' '; //if (!ISSPACE(*text)) *text++ = ' '; }// BTAG_BEGIN '<' else if (tok_val == ETAG_BEGIN) { skip_to_two_token(tok, STRING, TAG_END); if (tok->tok_val != STRING) { // must be a HTML tag skip_to_token(tok, TAG_END); continue; } curr_tag = htmlTagEntity::tag_idx(tok->tok_str); if (curr_tag == NULL) { skip_to_token(tok, TAG_END); continue; } #ifdef MAIN_TEXT_ONLY tag_depth--; if (curr_tag==htmlTagEntity::BODY_tag) { is_in_BODY_tag = 0; } if (curr_tag->pair != Not_PAIR && is_in_BODY_tag && curr_tag!=htmlTagEntity::A_tag) end_tag(curr_tag, (int)((uint4)text-(uint4)start)); #endif if (curr_tag==htmlTagEntity::A_tag) { is_in_A_tag = 0; #ifdef MARK_A_LINK *text++ = '}'; #endif } else if (curr_tag==htmlTagEntity::PRE_tag) { is_in_PRE_tag = 0; } else if(curr_tag==htmlTagEntity::DT_tag || (curr_tag==htmlTagEntity::B_tag && (b_tag_first_in_line && is_glossary))) { if (glossary_marked==0) { *text++ = ' '; *text++ = ':'; //*text++ = ' '; b_tag_first_in_line = 0; glossary_marked = 1; } } else if (curr_tag==htmlTagEntity::TITLE_tag) { *text++ = ']'; *text++ = ']'; *text++ = '\n'; } else if (curr_tag->proc == 1) { } else if (pair_mark==2 && curr_tag->proc == 2) { pair_mark = 0; *text++ = '>'; } else if (pair_mark==3 && curr_tag->proc == 3) { pair_mark = 0; *text++ = '\''; } else if (pair_mark==4 && curr_tag->proc == 4) { pair_mark = 0; *text++ = ']'; } else if (curr_tag->proc == 5) { } else if (curr_tag->proc == 6) { if (prev_ch != '\n') *text++ = '\n'; *text++ = '\n'; } if (curr_tag->newline) { *text++ = '\n'; } skip_to_token(tok, TAG_END); }// ETAG_BEGIN '</' else if (tok_val == STAG_END) { skip_to_token(tok, TAG_END); is_in_tag = 0; tag_depth--; } else if (tok_val == TAG_EXC) { tmp = skip_to_token(tok, TAG_END); //fprintf(log,"<- > skipped %d\n", tmp); } else if (tok_val == CMNT_BEGIN) { //fprintf(log,"Cmnt -->: %d :%d\n", tok->line_num, tok->offset); htmlLex::skip_to_cmnt_end(tok); //fprintf(log,"Cmnt <--: %d :%d\n", tok->line_num, tok->offset); } else if (tok_val == ENTITY_STR) { if (prev_ch !=' ') *text++ = ' '; //2002.12.2 ent_proc = htmlTagEntity::entity_idx(tok->tok_str); if (ent_proc && ent_proc->conv[0]) { #ifdef BUG //prn_ent_proc(ent_proc); #endif strcpy(text, ent_proc->conv); text += strlen(ent_proc->conv); } else { //fprintf(log,"ignored entity = %s\n", tok->tok_str); } } else if (tok_val == ENTITY_NUM) { if (prev_ch !=' ') *text++ = ' '; //2002.12.2 ent_proc = htmlTagEntity::entity_id_idx(tok->tok_realval); if (ent_proc && ent_proc->conv[0]) { #ifdef BUG //prn_ent_proc(ent_proc); #endif strcpy(text, ent_proc->conv); text += strlen(ent_proc->conv); } else { if (tok->tok_realval < 0x80) { // 2005.7.19 *text = (char)tok->tok_realval ; text++; } } } else { char *t = text; if (is_in_PRE_tag || tok->tok_len>1 || tok_val == STRING || tok_val == NUMBER) { //if (prev_tok_val == TAG_END && *text != ' ') *text++ = ' '; // 2003. 3.20 //if (prev_tok_val == TAG_END && isalnum(prev_ch) ) // *text++ = ' '; words_in_line++; strcpy(text, tok->tok_str); text += tok->tok_len; } // 2002.10.16 한글은 space로 전환 else if (tok->tok_val == FR_STR) { if (do_prn_hangul) { if (prev_ch != ' ') *text++ = ' '; strcpy(text, tok->tok_str); text += tok->tok_len; words_in_line++; } else { if (prev_ch != ' ') *text++ = ' '; } } else if (tok->tok_len==1) { if (curr_tag==htmlTagEntity::PRE_tag) { #ifdef ODD_CHAR if ( (tok_val & 0xF0) != 0x90) *text++ = tok_val; else if (tok_val==0x92) { if (prev_tok_val != 0x92) *text++ = '\''; } #else *text++ = tok_val; #endif } //else if (tok_val == '\n' || tok_val == '\r') { else if (ISSPACE(tok_val)) { if (prev_ch != ' ') *text++ = ' '; } else { if (prev_ch==':' && tok_val==':') { } else *text++ = tok_val; } } if (is_in_A_tag) hlink_chars += (int)(text - t); } prev_tok_val = tok->tok_val; }// while(1) *text = 0; if ((int)(text-start) >= maxlen-5) { PRN("parse(): too far !! maxlen=%d, %d\n", maxlen, text-start); } if ((int)(text-start) < maxlen-5) memset(text, 0, 4); #ifdef DEB PRN("text=%X, start=%X, text=%d, start=%d\n", text, start, (int)text % 10000, (int)start % 10000); PRN("start[0]=%d text[0]=%d\n", start[0], text[0]); #endif return ((int)text-(int)start); }
int main () { PRN(Hello from main()); return 0 ; }
static void meta(void) { register int i; register char *s, *r; register char *prn_bufp; memcpy(set[0], "abcdefghijklmno", 15); prn_bufp = prn_buf; level = 1; stack[0] = 1; iter: if (level == 0) { PRN_LASTFLUSH(); return; } s = set[level-1]; if (level == n) { PRN('w'); PRN('r'); PRN('i'); PRN('t'); PRN('e'); PRN('l'); PRN('n'); PRN('('); PRN(s[0]); for (i = 1; i < n; i++) { PRN(','); PRN(s[i]); } PRN(')'); PRN('\n'); level--; goto iter; } r = set[level]; if ((k = stack[level - 1]) < 0) { level--; goto iter; } if (k > 0) { if (k != level) { /* printf("else "); */ PRN('e'); PRN('l'); PRN('s'); PRN('e'); PRN(' '); } /* printf("if %c < %c then\n", s[k-1], s[level]); */ PRN('i'); PRN('f'); PRN(' '); PRN(s[k-1]); PRN(' '); PRN('<'); PRN(' '); PRN(s[level]); PRN(' '); PRN('t'); PRN('h'); PRN('e'); PRN('n'); PRN('\n'); } else { /* printf("else\n"); */ PRN('e'); PRN('l'); PRN('s'); PRN('e'); PRN('\n'); PRN_CHKFLUSH(); } r[k] = s[level]; for (i = 0; i < k; i++) *r++ = *s++; for (r++; i < level; i++) *r++ = *s++; for (i++,s++; i < n; i++) *r++ = *s++; stack[level - 1]--; stack[level] = level + 1; level++; goto iter; }
/** 테스트 결과. root가 다음과 같은 경우: "http://www.cnn.com/ASIA -D 1 -I /WORLD,/2005/SPORT" ASIA를 화일로 인식함. */ int TravExpr::match(URLParse &linkurl) { int i, n; int match=LINK_MATCH_UNDEF; char *ptr; URLst *hlink = &(linkurl.url); URLst *rootURL = &(rootURLparse.url); // host check if (expr.host_span==0 && hlink->host && (strncmp(hlink->host, rootURL->host, rootURL->host_len)!=0)) { if (debug) PRN("match(): false by host\n"); return LINK_MATCH_FALSE; } // exclude dir for(i=0; i<(const int)expr.exc_dirs.num(); i++) { if (isSubdir (hlink->path, expr.exc_dirs.str(i)) ) { match = LINK_MATCH_FALSE; if (debug) PRN("match(): false by exclude dir\n"); return LINK_MATCH_FALSE; } } //화일 이름에 wild match $* $? :: keyword dir, inc dir에 대해 AND 관게 (relation) if (expr.filename) { if (StrCmp::URLwildcmp(expr.filename, hlink->file)==0) match = LINK_MATCH_TRUE; else { match = LINK_MATCH_FALSE; if (debug) PRN("match(): false by filename\n"); return LINK_MATCH_FALSE; } } // keyword 를 포함하는 dir for(i=0; i<(const int)expr.key_dirs.num(); i++) { n = strlen(expr.key_dirs.str(i)); ptr = strstr(expr.key_dirs.str(i), hlink->path); // dir 이름중에 keyword 가 나타나면.. if (ptr!=0 && !isalpha(ptr[n]) && (ptr==hlink->path || !isalpha( *(ptr-1) ) ) ) { match = LINK_MATCH_TRUE; break; } } if (match!=LINK_MATCH_TRUE) { //---- URL expr의 ROOT URL의 같은 dir이하만 포함 --> 변경 n = rootURL->dir_len; if (n<=1 && rootURL->file_len!=0) { // http://news.nationalgeographic.com/iraq.html 같은 경우 // '/' 이하가 다 포함되는 문제 발생. // 그래서 DIR이 '/'인 경우는 URL type이 file 이 아닌 경우만 true 가능 match=LINK_MATCH_FALSE; } else if (n<=1 || linkurl.isSubDir(rootURLparse) ) { match=LINK_MATCH_TRUE; return LINK_MATCH_TRUE; } else match=LINK_MATCH_FALSE; if (debug) PRN("match(): root path match = %d\n", match); } // include dir if (match != LINK_MATCH_TRUE) { // keyword dir에서 TRUE이면 여기 검사 필요없이 TRUE // root URL과 link.path가 다른 경우 INC_DIR 검사 for(i=0; i<(const int)expr.inc_dirs.num(); i++) { if (isSubdir (hlink->path, expr.inc_dirs.str(i)) ) { match = LINK_MATCH_TRUE; break; } } // include dir에 해당하는 dir만 (Only) (2002.9.1) if (expr.inc_dirs.num() && match != LINK_MATCH_TRUE) match = LINK_MATCH_FALSE; } return match; }