示例#1
0
void TCpDoc::SaveAsfaToCpd(const TStr& InFPath, const TStr& OutCpdFNm){
  // create output file
  PSOut SOut=TFOut::New(OutCpdFNm);
  // traverse files
  TStrH AccessionIdH;
  TFFile FFile(TStr::GetNrFPath(InFPath)+"*.Asfa"); TStr AsfaFNm;
  while (FFile.Next(AsfaFNm)){
    printf("Processing file '%s'\n", AsfaFNm.CStr());
    PSIn SIn=TFIn::New(AsfaFNm);
    TILx Lx(SIn, TFSet(iloRetEoln, iloExcept));
    Lx.GetSym(syLn, syEof);
    while (Lx.Sym!=syEof){
      // Query Line
      TStr QueryLnStr=Lx.Str;
      TStrV QueryStrV; QueryLnStr.SplitOnAllCh('\t', QueryStrV, false);
      IAssert(QueryStrV[0]=="Query");
      // RecordNo Line
      Lx.GetSym(syLn); TStr RecNoLnStr=Lx.Str;
      TStrV RecNoStrV; RecNoLnStr.SplitOnAllCh('\t', RecNoStrV, false);
      IAssert(RecNoStrV[0]=="RecordNo");
      //int RecN=RecNoStrV[1].GetInt();
      // fields (format: Short-Name Tab Long-Name Tab Value-String)
      TStr TitleStr, AbstractStr, PublicationYearStr, AccessionId;
      TStrV AuthorNmV; TStrV TermNmV1, TermNmV2;
      while (true){
        Lx.GetSym(syLn); TStr FldLnStr=Lx.Str;
        TStrV FldStrV; FldLnStr.SplitOnAllCh('\t', FldStrV, false);
        if (FldStrV[0]=="----"){
          if (!AccessionIdH.IsKey(AccessionId)){
            AccessionIdH.AddKey(AccessionId);
            // create & save cpd document
            PCpDoc CpDoc=TCpDoc::New();
            CpDoc->DocNm=AccessionId;
            CpDoc->DateStr=PublicationYearStr;
            CpDoc->TitleStr=TitleStr;
            CpDoc->ParStrV.Add(AbstractStr);
            CpDoc->TopCdNmV=TermNmV1;
            CpDoc->GeoCdNmV=TermNmV2;
            CpDoc->IndCdNmV=AuthorNmV;
            CpDoc->Save(*SOut);
          } else {/*printf("[%s]", AccessionId.CStr());*/}
          break;
        } else
        if (FldStrV[0]=="TI"){
          TitleStr=FldStrV[2];
        } else if (FldStrV[0]=="TI"){
          TitleStr=FldStrV[2];
        } else if (FldStrV[0]=="AU"){
          FldStrV[2].SplitOnAllCh(';', AuthorNmV);
          for (int StrN=0; StrN<AuthorNmV.Len(); StrN++){AuthorNmV[StrN].ToTrunc();}
        } else if (FldStrV[0]=="AB"){
          AbstractStr=FldStrV[2];
        } else if (FldStrV[0]=="PY"){
          PublicationYearStr=FldStrV[2];
        } else if (FldStrV[0]=="DE"){
          FldStrV[2].SplitOnAllCh(';', TermNmV1);
          for (int StrN=0; StrN<TermNmV1.Len(); StrN++){TermNmV1[StrN].ToTrunc();}
        } else if (FldStrV[0]=="CL"){
          FldStrV[2].SplitOnAllCh(';', TermNmV2);
          for (int StrN=0; StrN<TermNmV2.Len(); StrN++){TermNmV2[StrN].ToTrunc();}
        } else if (FldStrV[0]=="AN"){
          AccessionId=FldStrV[2];
        }
      }
      printf("%d\r", AccessionIdH.Len());
      Lx.GetSym(syLn, syEof);
    }
  }
}