//transform lib function //void StringParse(ifstream &ifs, string &in_file_name, vector<string> vec_str) { void StringParse(std::string &in_file_name) { //cut words sector EncodingConverter trans; std::vector<std::string> word_vec; //store word into vector std::vector<std::string>::iterator iter_word_vec; std::string line; //读入行 std::string content; //一次性读入全部 std::string out_file_name = in_file_name; //生成新的文件名,就是替换替换txt为utf8 std::ifstream ifs; //文本输入流 std::ofstream ofs; //文本输出流 out_file_name.erase(out_file_name.size() - 3, out_file_name.npos); out_file_name += "utf8"; OpenInFile(ifs, in_file_name); OpenOutFile(ofs, out_file_name); while (getline(ifs, line)) { content += line + "\n"; } ofs << trans.gbk_to_utf8(content) << std::endl; log_file << "tranform complete " << out_file_name << std::endl; ofs.close(); ifs.close(); }
/* 通过输入DocId搜索页面,借助索引可以快速找到docid */ void SearchIndex(std::string &index_file_name, std::string &lib_file_name) { std::ifstream ifs_index; std::vector<std::string> index; std::string line; OpenInFile(ifs_index, index_file_name); while (getline(ifs_index, line)) { index.push_back(line); } ifs_index.close(); int num; while (std::cin >> num) { if (num > (int)index.size() || num < 1) { std::cout << "out of index range" << std::endl; continue; } int doc_id; int start; int size_; std::istringstream iss(index[num - 1].c_str()); iss >> doc_id >> start >> size_; //std::cout << doc_id << start << size_; std::ifstream ifs_lib; OpenInFile(ifs_lib, lib_file_name); ifs_lib.seekg(start); std::string content; while (getline(ifs_lib, line) && ifs_lib.tellg() <= (start + size_)) { content += line + "\n"; } //std::cout << content << std::endl; ifs_lib.close(); } }
void BuildPageLib(std::string &in_file_name, std::vector<std::string> &lib_vec) { std::string line; //读入行 std::string content; //一次性读入全部 std::string title; //title std::ofstream ofs; std::ifstream ifs; //文本输入流 OpenInFile(ifs, in_file_name); log_file << " processing file name: " << in_file_name << std::endl; getline(ifs, title, '\r'); //先取第一行作为标题 TrimR(title); content += title; while (getline(ifs, line, '\r')) { if (strncmp(line.c_str(), "\n【 标 题 】", 17) == 0) { //title.clear(); //title = std::string(text_title); title.clear(); title = line; std::cout << "titile: ~~~~~~" << line << std::endl; } /* 每行后面添加换行符 */ if (TrimR(line)) { content += line + "\n"; } else { content += line; } /* 每行后面不添加换行符, 整篇为一行 */ //content +=line; //char text_title[MAXSIZE]; //bzero(text_title, MAXSIZE); //strcpy(text_title, line.c_str()); //if (strncmp(text_title, "【 标 题 】", 16) == 0) { } std::string doc; char id[8]; bzero(id, 8); sprintf(id, "%d", (int)(lib_vec.size() + 1)); doc = "<doc><docid>" + std::string(id) + "</docid><url>" + in_file_name + "</url><title>" + title + "</title><content>" + content + "</content></doc>\n"; lib_vec.push_back(doc); ifs.close(); }
void CDTASingleInput::LoadAll(string strPath, vector<CSpectrum> & S) { try { OpenInFile(string(strPath)); } catch(exception & e) { CErrInfo info("CDTASingleInput", "LoadAll", "OpenInFile: Cannot open " + strPath + "."); throw runtime_error(info.Get(e).c_str()); } catch(...) { CErrInfo info("CDTASingleInput", "LoadAll", "caught an unknown exception from OpenInFile: Cannot open " + strPath + "."); throw runtime_error(info.Get().c_str()); } CSpectrum spec; try { ReadMHAndCharge(spec); ReadMZAndItensity(spec); } catch(exception & e) { CErrInfo info("CDTASingleInput", "LoadAll", "ReadMHAndCharge & ReadMZandIntensity: Cannot read " + spec.m_strFilePath + "."); throw runtime_error(info.Get(e).c_str()); } catch(...) { CErrInfo info("CDTASingleInput", "LoadAll", "caught an unknown exception from ReadMHAndCharge & ReadMZandIntensity: Cannot read " + spec.m_strFilePath + "."); throw runtime_error(info.Get().c_str()); } S.clear(); S.push_back(spec); CloseInFile(); }
static int Load_Dump(void) //Загрузка дампа { HANDLE inFile; DWORD nb; if(GetName_InFile(NameFDump) < 0) return -1; //Ввод имени обрабатываемого файла if(OpenInFile(NameFDump, &inFile) < 0) return -1; //Открытие входного файла inBuf = (char *)MyAllocMem(inSize); if(inBuf == NULL) return -1; if(ReadFile(inFile, inBuf, inSize, &nb, NULL) == FALSE || nb != inSize) return ErrorSys2("Ошибка при чтении исходного файла", NameFDump); CloseFile(&inFile); //Закрыли входной файл AdrEnd = inBuf + inSize; Adr = inBuf; for(;;) //До конца файла { char *NewAdr = strchr(Adr, '\n'); if(NewAdr > AdrEnd || NewAdr == NULL) break; //Данные кончились и Больше нет ни одной полной строки *NewAdr = 0; //Ограничили строку Adr = NewAdr + 1; } return 0; }
void CDTASingleInput::LoadNext(CSpectrum & spec, int & idx) { try { OpenInFile(string(m_strInPath)); } catch(exception & e) { CErrInfo info("CDTASingleInput", "LoadNext", "OpenInFile: Cannot open " + m_strInPath + "."); throw runtime_error(info.Get(e).c_str()); } catch(...) { CErrInfo info("CDTASingleInput", "LoadNext", "caught an unknown exception from OpenInFile: Cannot open " + m_strInPath + "."); throw runtime_error(info.Get().c_str()); } ReadMHAndCharge(spec); ReadMZAndItensity(spec); CloseInFile(); }
void CDTASingleInput::StartLoad(string strPath, int & nTotal) { try { OpenInFile(strPath); } catch(exception & e) { CErrInfo info("CDTASingleInput", "StartWrite", "OpenInFile: " + strPath + " failed."); throw runtime_error(info.Get(e).c_str()); } catch(...) { CErrInfo info("CDTASingleInput", "StartWrite", "caught an unknown exception from OpenInFile: " + strPath + " failed."); info.Append("strPath=" + strPath); throw runtime_error(info.Get().c_str()); } m_strInPath = strPath; nTotal = 1; CloseInFile(); // OpenInFile(strPath); }
/* convert file ipInFN to DOS format text and write to file ipOutFN * RetVal: 0 if success * -1 otherwise */ int ConvertUnixToDosNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag) { int RetVal = 0; FILE *InF = NULL; FILE *TempF = NULL; char *TempPath; struct stat StatBuf; struct utimbuf UTimeBuf; #ifndef NO_FCHMOD mode_t mask; #endif #ifdef NO_MKSTEMP FILE* fd; #else int fd; #endif if ((ipFlag->Force == 0) && regfile(ipInFN)) { ipFlag->status |= NO_REGFILE ; return -1; } else ipFlag->status = 0 ; /* retrieve ipInFN file date stamp */ if (stat(ipInFN, &StatBuf)) RetVal = -1; #ifdef NO_MKSTEMP if((fd = MakeTempFileFrom(ipOutFN, &TempPath))==NULL) { #else if((fd = MakeTempFileFrom (ipOutFN, &TempPath)) < 0) { #endif perror(_("unix2dos: Failed to open temporary output file")); RetVal = -1; } #ifdef DEBUG fprintf(stderr, _("unix2dos: using %s as temporary file\n"), TempPath); #endif /* can open in file? */ if ((!RetVal) && ((InF=OpenInFile(ipInFN)) == NULL)) RetVal = -1; /* can open output file? */ #ifdef NO_MKSTEMP if ((!RetVal) && (InF) && ((TempF=fd) == NULL)) #else if ((!RetVal) && (InF) && ((TempF=OpenOutFile(fd)) == NULL)) #endif { fclose (InF); InF = NULL; RetVal = -1; } #ifndef NO_FCHMOD /* preserve original mode as modified by umask */ mask = umask(0); umask(mask); if (!RetVal && fchmod(fd, StatBuf.st_mode & ~mask)) RetVal = -1; #endif /* conversion sucessful? */ if ((!RetVal) && (ConvertUnixToDos(InF, TempF, ipFlag))) RetVal = -1; /* can close in file? */ if ((InF) && (fclose(InF) == EOF)) RetVal = -1; /* can close output file? */ if ((TempF) && (fclose(TempF) == EOF)) RetVal = -1; #ifdef NO_MKSTEMP if(fd!=NULL) fclose(fd); #else if(fd>=0) close(fd); #endif if ((!RetVal) && (ipFlag->KeepDate)) { UTimeBuf.actime = StatBuf.st_atime; UTimeBuf.modtime = StatBuf.st_mtime; /* can change output file time to in file time? */ if (utime(TempPath, &UTimeBuf) == -1) RetVal = -1; } /* any error? */ if ((RetVal) && (unlink(TempPath))) RetVal = -1; /* can rename temporary file to output file? */ if (!RetVal) { #ifdef NEED_REMOVE remove(ipOutFN); #endif if ((rename(TempPath, ipOutFN) == -1) && (!ipFlag->Quiet)) { fprintf(stderr, _("unix2dos: problems renaming '%s' to '%s'\n"), TempPath, ipOutFN); fprintf(stderr, _(" output file remains in '%s'\n"), TempPath); RetVal = -1; } } free(TempPath); return RetVal; } /* convert file ipInFN to DOS format text * RetVal: 0 if success * -1 otherwise */ int ConvertUnixToDosOldFile(char* ipInFN, CFlag *ipFlag) { int RetVal = 0; FILE *InF = NULL; FILE *TempF = NULL; char *TempPath; struct stat StatBuf; struct utimbuf UTimeBuf; #ifndef NO_FCHMOD mode_t mode = S_IRUSR | S_IWUSR; #endif #ifdef NO_MKSTEMP FILE* fd; #else int fd; #endif if ((ipFlag->Force == 0) && regfile(ipInFN)) { ipFlag->status |= NO_REGFILE ; return -1; } else ipFlag->status = 0 ; /* retrieve ipInFN file date stamp */ if (stat(ipInFN, &StatBuf)) RetVal = -1; #ifndef NO_FCHMOD else mode = StatBuf.st_mode; #endif #ifdef NO_MKSTEMP if((fd = MakeTempFileFrom(ipInFN, &TempPath))==NULL) { #else if((fd = MakeTempFileFrom(ipInFN, &TempPath)) < 0) { #endif perror(_("unix2dos: Failed to open temporary output file")); RetVal = -1; } #ifndef NO_FCHMOD if (!RetVal && fchmod (fd, mode) && fchmod (fd, S_IRUSR | S_IWUSR)) RetVal = -1; #endif #ifdef DEBUG fprintf(stderr, _("unix2dos: using %s as temporary file\n"), TempPath); #endif /* can open in file? */ if ((!RetVal) && ((InF=OpenInFile(ipInFN)) == NULL)) RetVal = -1; /* can open output file? */ #ifdef NO_MKSTEMP if ((!RetVal) && (InF) && ((TempF=fd) == NULL)) #else if ((!RetVal) && (InF) && ((TempF=OpenOutFile(fd)) == NULL)) #endif { fclose (InF); InF = NULL; RetVal = -1; } /* conversion sucessful? */ if ((!RetVal) && (ConvertUnixToDos(InF, TempF, ipFlag))) RetVal = -1; /* can close in file? */ if ((InF) && (fclose(InF) == EOF)) RetVal = -1; /* can close output file? */ if ((TempF) && (fclose(TempF) == EOF)) RetVal = -1; #ifdef NO_MKSTEMP if(fd!=NULL) fclose(fd); #else if(fd>=0) close(fd); #endif if ((!RetVal) && (ipFlag->KeepDate)) { UTimeBuf.actime = StatBuf.st_atime; UTimeBuf.modtime = StatBuf.st_mtime; /* can change output file time to in file time? */ if (utime(TempPath, &UTimeBuf) == -1) RetVal = -1; } /* any error? */ if ((RetVal) && (unlink(TempPath))) RetVal = -1; #ifdef NEED_REMOVE if (!RetVal) remove(ipInFN); #endif /* can rename output file to in file? */ if ((!RetVal) && (rename(TempPath, ipInFN) == -1)) { if (!ipFlag->Quiet) { fprintf(stderr, _("unix2dos: problems renaming '%s' to '%s'\n"), TempPath, ipInFN); fprintf(stderr, _(" output file remains in '%s'\n"), TempPath); } RetVal = -1; } free(TempPath); return RetVal; } /* convert stdin to DOS format text and write to stdout * RetVal: 0 if success * -1 otherwise */ int ConvertUnixToDosStdio(CFlag *ipFlag) { ipFlag->NewFile = 1; ipFlag->Quiet = 1; ipFlag->KeepDate = 0; ipFlag->Force = 1; #ifdef WIN32 /* stdin and stdout are by default text streams. We need * to set them to binary mode. Otherwise an LF will * automatically be converted to CR-LF on DOS/Windows. * Erwin */ /* 'setmode' was deprecated by MicroSoft * since Visual C++ 2005. Use '_setmode' instead. */ _setmode(fileno(stdout), O_BINARY); _setmode(fileno(stdin), O_BINARY); return (ConvertUnixToDos(stdin, stdout, ipFlag)); #elif defined(MSDOS) || defined(__OS2__) setmode(fileno(stdout), O_BINARY); setmode(fileno(stdin), O_BINARY); return (ConvertUnixToDos(stdin, stdout, ipFlag)); #else return (ConvertUnixToDos(stdin, stdout, ipFlag)); #endif }