void reduce_do(char* buf, const size_t total, const std::string &out_folder){ yucha::tool::recursive_mkdir(out_folder.c_str()); size_t prev = 0; size_t split = 0; std::pair<size_t , size_t> p; MyHash h(buf); MyHashEq e(buf); MYMAP uni(3 , h , e); { size_t spaces[3]; for(size_t i = 0 ; i < total; ++i){ prev = i; unsigned short int count = 0; bool error = false; for(; buf[i] != '\n'; ++i){ //get freq if(buf[i] == '\t'){ if (count==2){ error = true; } else{ spaces[count] = i; ++count; }; }; }; if (count!=2 or error) continue; buf[spaces[0]] = ' '; const size_t end_1 = spaces[0]; const size_t end_2 = spaces[1]; const size_t tail = i; std::pair<size_t , size_t> word1(prev, end_1); std::pair<size_t , size_t> word2(end_1+1 , end_2); buf[tail] = '\0'; const unsigned long freq = atoi(&buf[end_2]); add(uni, word1, freq); add(uni, word2, freq); }; } printout(1, uni, out_folder, buf, 0); };
/* * PORTB:デコーダー1,2: p~i:PORTB 7~4 RB7:C RB6:B RB5:A RB4:G1 H~A:8~0 RB3:C RB2:B RB1:A RB0:G1 * PORTD:16~9 Anode: RD7~RD0 * PORTC:8~1 Anode: RC7~RC0 */ void main( ) { TRISB = 0x00;//PORTB output TRISC = 0x00;//PORTC output TRISD = 0x00;//PORTD output PORTB = 0x00;//portb init PORTC = 0x00;//portc init PORTD = 0x00;//portd init //メモリ足りない;配列は使えない。 unsigned int w1[16]={0x1024,0x0c24,0x0324,0xfffe,0x0122,0x8623,0xc0c1,0x6078,0x3800,0x0f00,0x01ff,0x0f00,0x3840,0x6070,0xc01c,0x8000}; unsigned int w2[16]; unsigned int tmp[16]; while(1){ copy(w1,tmp); copy(w1,w2); for(int i=0;i<30;i++){ word2(w2); } scroll(w2,tmp); } }
// la procedure ajout_mot comme son nom l'indique ajoute un mot donne // dans l'index void ajout_mot (mot **tableau,char chainn[50],int k){ mot *ptr,*ptr1,*ptr2; page *ptrpg; int z,f; if(tableau[tolower(chainn[0]) - 'a'] == NULL){ ptr =(mot *)malloc(sizeof(mot )); tableau[tolower(chainn[0]) - 'a'] = ptr ; ptr->motindex=(char *)malloc(strlen(chainn)+1); strcpy(ptr->motindex,chainn); ptr->next=NULL; ptr->prempg=(page *)malloc(sizeof(page )); ptr->prempg->nbrocc=1; ptr->prempg->numpg=k; ptr->prempg->next=NULL; ptr->derpg=(page *)malloc(sizeof(page)); ptr->derpg=ptr->prempg; } else { ptr = tableau[tolower(chainn[0]) - 'a'] ; z = word(tableau,chainn,k); printf("======================> Z :%d\n",z); switch (z) { case 3 : ptrpg = ptr->prempg; ptr->prempg=NULL; ptr->prempg=(page *)malloc(sizeof(page)); ptr->prempg->nbrocc=1; ptr->prempg->numpg=k; ptr->prempg->next = ptrpg; break ; case 2 : ptr = word2(tableau[tolower(chainn[0]) - 'a'],chainn,k); if (ptr==NULL){ printf("we have an error \n"); } ptr->derpg->nbrocc++; ptr->derpg->next=NULL; break ; case 1: ptr = word2(tableau[tolower(chainn[0]) - 'a'],chainn,k); if (ptr == NULL) { printf("we have problem"); } ptrpg=(page *)malloc(sizeof(page )); ptr->derpg->next=ptrpg; ptr->derpg=ptrpg; ptrpg->next=NULL; ptrpg->nbrocc=1; ptrpg->numpg=k; break ; case 0: ptr1 = tableau[tolower(chainn[0]) - 'a'] ; f=0; while(strcasecmp(chainn,ptr->motindex)>0){ f=1; ptr1 = ptr; ptr = ptr->next; if (ptr == NULL){ break; } } if (f==0){ ptr2 = (mot *)malloc(sizeof(mot )); tableau[tolower(chainn[0]) - 'a'] =ptr2; ptr2->next=ptr1; ptr2->motindex=(char *)malloc(strlen(chainn)+1); strcpy(ptr2->motindex,chainn); ptr2->prempg=(page *)malloc(sizeof(page )); ptr2->prempg->nbrocc=1; ptr2->prempg->numpg=k; ptr2->prempg->next=NULL; ptr2->derpg = (page * )malloc(sizeof(page)); ptr2->derpg=ptr2->prempg; }else { ptr2 = (mot *)malloc(sizeof(mot )); ptr2->motindex=(char *)malloc(strlen(chainn)+1); strcpy(ptr2->motindex,chainn); ptr2->prempg=(page *)malloc(sizeof(page )); ptr2->prempg->nbrocc=1; ptr2->prempg->numpg=k; ptr2->prempg->next=NULL; ptr2->derpg=(page *)malloc(sizeof(page)); ptr2->derpg=ptr2->prempg; ptr2->next=ptr1->next; ptr1->next=ptr2; } break ; } } }
void creat_index (mot* *tab,char name[26]) { page *ptrpg; mot *ptr,*ptr1,*ptr2; int s = 0, z = 0,f; float num_page ; for (s = 0 ; s <26 ; s++) { tab[s]=NULL; } s=0 ; // on a utilise la variable s comme compteur pour compter le nbr du ligne :) char chain[100],chain1[100]; FILE *fp; fp = fopen(name,"rt"); if (fp == NULL) { printf("\n\n\t Impossible d'ouvrir le fichier \n") ; printf(" veuillez verifier Que le Fichier %s exist et dans le meme directory\n\n\n\n",name); Sleep(5000); exit (3); } else { while (fgets (chain,100,fp)!= NULL) { // on prend une ligne de 100 carectere s = s +1 ; // on a utilise la variable s comme compteur pour compter le nbr du ligne :) if (strlen(chain) != 1 ) { // on verifie si la chaine n'est pas vide or \n only write(chain); // on ecrase et on ecrit la ligne dans un fichier tmp.txt FILE *ft ; // on ouvre le fichier tmp.txt cette fois en mode lire seulement ft = fopen("tmp.txt","rt"); while (fscanf(ft,"%s",chain1) != EOF) { float j ; j = s ; // on lit jusqu"a la fin du fichier tmp.txt Qui contient une seul ligne num_page=ceil(j/lp); if (check(chain1) !=1 && strlen(chain1) > 2 && isalpha(chain1[0])) { if(tab[tolower(chain1[0]) - 'a'] == NULL){ ptr =(mot *)malloc(sizeof(mot )); tab[tolower(chain1[0]) - 'a'] = ptr ; ptr->motindex=(char *)malloc(strlen(chain1)+1); strcpy(ptr->motindex,chain1); ptr->next=NULL; ptr->prempg=(page *)malloc(sizeof(page )); ptr->prempg->nbrocc=1; ptr->prempg->numpg=(int )num_page; ptr->prempg->next=NULL; ptr->derpg=(page *)malloc(sizeof(page)); ptr->derpg=ptr->prempg; } else { ptr = tab[tolower(chain1[0]) - 'a']; z = word(tab,chain1,(int )num_page); switch (z) { case 3 : ptrpg = ptr->prempg; ptr->prempg=(page *)malloc(sizeof(page)); ptr->prempg->nbrocc=1; ptr->prempg->numpg=(int )num_page; ptr->prempg->next = ptrpg; break ; case 2 : ptr = word2(tab[tolower(chain1[0]) - 'a'],chain1,(int )num_page); if (ptr==NULL){ printf("we have an error \n"); } ptr->derpg->nbrocc++; break ; case 1 : ptr = word2(tab[tolower(chain1[0]) - 'a'],chain1,( int )num_page); if (ptr == NULL) { printf("we have problem"); } ptrpg=(page *)malloc(sizeof(page )); ptr->derpg->next=ptrpg; ptr->derpg=ptrpg; ptrpg->next=NULL; ptrpg->nbrocc=1; ptrpg->numpg=(int )num_page; break ; case 0 : ptr1 = tab[tolower(chain1[0]) - 'a'] ; f=0; while(strcasecmp(chain1,ptr->motindex)>0){ f=1; ptr1 = ptr; ptr = ptr->next; if (ptr == NULL){ break; } } if (f==0){ ptr2 = (mot *)malloc(sizeof(mot )); tab[tolower(chain1[0]) - 'a'] =ptr2; ptr2->next=ptr1; ptr2->motindex=(char *)malloc(strlen(chain1)+1); strcpy(ptr2->motindex,chain1); ptr2->prempg=(page *)malloc(sizeof(page )); ptr2->prempg->nbrocc=1; ptr2->prempg->numpg=(int )num_page; ptr2->prempg->next=NULL; ptr2->derpg=ptr2->prempg; }else { ptr2 = (mot *)malloc(sizeof(mot )); ptr2->motindex=(char *)malloc(strlen(chain1)+1); strcpy(ptr2->motindex,chain1); ptr2->prempg=(page *)malloc(sizeof(page )); ptr2->prempg->nbrocc=1; ptr2->prempg->numpg=(int)num_page; ptr2->prempg->next=NULL; ptr2->derpg=(page *)malloc(sizeof(page)); ptr2->derpg=ptr2->prempg; ptr2->next=ptr1->next; ptr1->next=ptr2; } break ; default : printf("i guess we have a problem here \n"); break ; } } } ; }fclose(ft); } }remove("tmp.txt"); } }
void reduce_do(char* buf, const size_t total, const std::string &out_folder){ yucha::tool::recursive_mkdir(out_folder.c_str()); size_t prev = 0; size_t split = 0; std::pair<size_t , size_t> p; MyHash h(buf); MyHashEq e(buf); MYMAP uni(3 , h , e); MYMAP bi(3 , h , e); MYMAP tri(3 , h , e); { size_t spaces[3]; for(size_t i = 0 ; i < total; ++i){ prev = i; unsigned short int count = 0; unsigned short int tab_count = 0; bool error = false; for(; buf[i] != '\n'; ++i){ //get freq if (buf[i] == '\t'){ if (tab_count < 2){ buf[i] = '='; } else{ spaces[count] = i; ++count; }; ++tab_count; } else if(buf[i] == ' '){ //you can't use space buf[i] = '_'; } else if(buf[i] == ':'){ //SPLITTER is ':' if (count==3){ error = true; } else{ spaces[count] = i; ++count; }; }; }; if (count!=3 or error) continue; buf[spaces[0]] = ' '; buf[spaces[1]] = ' '; const size_t end_1 = spaces[0]; const size_t end_2 = spaces[1]; const size_t end_3 = spaces[2]; const size_t tail = i; std::pair<size_t , size_t> word1(prev, end_1); std::pair<size_t , size_t> word2(end_1+1 , end_2); std::pair<size_t , size_t> bi1(prev, end_2); std::pair<size_t , size_t> word3(end_2+1 , end_3); std::pair<size_t , size_t> bi2(end_1+1, end_3); std::pair<size_t , size_t> tri1(prev, end_3); buf[tail] = '\0'; const unsigned long freq = atoi(&buf[end_3]); add(uni, word1, freq); add(uni, word2, freq); add(uni, word3, freq); add(bi, bi1, freq); add(bi, bi2, freq); add(tri, tri1, freq); }; } printout(1, uni, out_folder, buf, 0); printout(2, bi, out_folder, buf, 10000000); printout(3, tri, out_folder, buf, 10000000); };