Пример #1
0
//transform lib function
//void StringParse(ifstream &ifs, string &in_file_name, vector<string> vec_str) {
void StringParse(std::string &in_file_name) {
	//cut words sector
	EncodingConverter trans;
	std::vector<std::string> word_vec; //store word into vector
	std::vector<std::string>::iterator iter_word_vec;

	std::string line; //读入行
	std::string content; //一次性读入全部
	std::string out_file_name = in_file_name; //生成新的文件名,就是替换替换txt为utf8
	std::ifstream ifs; //文本输入流
	std::ofstream ofs; //文本输出流

	out_file_name.erase(out_file_name.size() - 3, out_file_name.npos);
	out_file_name += "utf8";

	OpenInFile(ifs, in_file_name);
	OpenOutFile(ofs, out_file_name);

	while (getline(ifs, line)) {
		content += line + "\n";
	}
	ofs << trans.gbk_to_utf8(content) << std::endl;
	log_file << "tranform complete " << out_file_name << std::endl;

	ofs.close();
	ifs.close();
}
Пример #2
0
/* 通过输入DocId搜索页面,借助索引可以快速找到docid */
	void SearchIndex(std::string &index_file_name, std::string &lib_file_name) {
		std::ifstream ifs_index;
		std::vector<std::string> index;
		std::string line;

		OpenInFile(ifs_index, index_file_name);

		while (getline(ifs_index, line)) {
			index.push_back(line);
		}
		ifs_index.close();

		int num;
		while (std::cin >> num) {
			if (num > (int)index.size() || num < 1) {
				std::cout << "out of index range" << std::endl;
				continue;
			}

			int doc_id;
			int start;
			int size_;
			std::istringstream iss(index[num - 1].c_str());
			iss >> doc_id >> start >> size_;
			//std::cout << doc_id << start << size_;
			std::ifstream ifs_lib;
			OpenInFile(ifs_lib, lib_file_name);
			ifs_lib.seekg(start);
			std::string content;
			while (getline(ifs_lib, line) && ifs_lib.tellg() <= (start + size_)) {
				content += line + "\n";
			}
			//std::cout << content << std::endl;
			ifs_lib.close();
		}
	}
Пример #3
0
void BuildPageLib(std::string &in_file_name, std::vector<std::string> &lib_vec) {
	std::string line; //读入行
	std::string content; //一次性读入全部
	std::string title; //title
	std::ofstream ofs;
	std::ifstream ifs; //文本输入流

	OpenInFile(ifs, in_file_name);
	log_file << " processing file name: " << in_file_name << std::endl;

	getline(ifs, title, '\r'); //先取第一行作为标题
	TrimR(title);
	content += title;

	while (getline(ifs, line, '\r')) {
		if (strncmp(line.c_str(), "\n【 标  题 】", 17) == 0) {
			//title.clear();
			//title = std::string(text_title);
			title.clear();
			title = line;
			std::cout << "titile: ~~~~~~" << line << std::endl;
		}

		/* 每行后面添加换行符 */
		if (TrimR(line)) {
			content += line + "\n";
		} else {
			content += line;
		}

		/* 每行后面不添加换行符, 整篇为一行 */
		//content +=line;

		//char text_title[MAXSIZE];
		//bzero(text_title, MAXSIZE);
		//strcpy(text_title, line.c_str());
		//if (strncmp(text_title, "【 标  题 】", 16) == 0) {
	}

	std::string doc;
	char id[8];
	bzero(id, 8);
	sprintf(id, "%d", (int)(lib_vec.size() + 1));
	doc = "<doc><docid>" + std::string(id) + "</docid><url>" + in_file_name + "</url><title>" + title + "</title><content>" + content + "</content></doc>\n";
	lib_vec.push_back(doc);

	ifs.close();
}
Пример #4
0
void CDTASingleInput::LoadAll(string strPath, vector<CSpectrum> & S)
{

	try
	{
		OpenInFile(string(strPath));
	}
	catch(exception & e)
	{
		CErrInfo info("CDTASingleInput", "LoadAll", "OpenInFile: Cannot open " + strPath + ".");
		throw runtime_error(info.Get(e).c_str());
	}
	catch(...)
	{
		CErrInfo info("CDTASingleInput", "LoadAll", "caught an unknown exception from OpenInFile: Cannot open " + strPath + ".");
		throw runtime_error(info.Get().c_str());
	}

	CSpectrum spec;
	try
	{
		ReadMHAndCharge(spec);
		ReadMZAndItensity(spec);
	}
	
	catch(exception & e)
	{
		CErrInfo info("CDTASingleInput", "LoadAll", "ReadMHAndCharge & ReadMZandIntensity: Cannot read " + spec.m_strFilePath + ".");
		throw runtime_error(info.Get(e).c_str());
	}
	catch(...)
	{
		CErrInfo info("CDTASingleInput", "LoadAll", "caught an unknown exception from ReadMHAndCharge & ReadMZandIntensity: Cannot read " + spec.m_strFilePath + ".");
		throw runtime_error(info.Get().c_str());
	}

	S.clear();
	S.push_back(spec);

	CloseInFile();
}
static int Load_Dump(void)                                   //Загрузка дампа
{
   HANDLE inFile;
   DWORD nb;
   if(GetName_InFile(NameFDump) < 0) return -1;              //Ввод имени обрабатываемого файла
   if(OpenInFile(NameFDump, &inFile) < 0) return -1;         //Открытие входного файла
   inBuf = (char *)MyAllocMem(inSize);
   if(inBuf == NULL) return -1;
   if(ReadFile(inFile, inBuf, inSize, &nb, NULL) == FALSE || nb != inSize)
      return ErrorSys2("Ошибка при чтении исходного файла", NameFDump);
   CloseFile(&inFile);                                       //Закрыли входной файл
   AdrEnd = inBuf + inSize;
   Adr = inBuf;
   for(;;)                                                   //До конца файла
   {  char *NewAdr = strchr(Adr, '\n');
      if(NewAdr > AdrEnd || NewAdr == NULL) break;           //Данные кончились и Больше нет ни одной полной строки
      *NewAdr = 0;                                           //Ограничили строку
      Adr = NewAdr + 1;
   }
   return 0;
}
Пример #6
0
void CDTASingleInput::LoadNext(CSpectrum & spec, int & idx)
{

	try
	{
		OpenInFile(string(m_strInPath));
	}
	catch(exception & e)
	{
		CErrInfo info("CDTASingleInput", "LoadNext", "OpenInFile: Cannot open " + m_strInPath + ".");
		throw runtime_error(info.Get(e).c_str());
	}
	catch(...)
	{
		CErrInfo info("CDTASingleInput", "LoadNext", "caught an unknown exception from OpenInFile: Cannot open " + m_strInPath + ".");
		throw runtime_error(info.Get().c_str());
	}

	ReadMHAndCharge(spec);
	ReadMZAndItensity(spec);

	CloseInFile();
}
Пример #7
0
void CDTASingleInput::StartLoad(string strPath, int & nTotal)
{
	try
	{
		OpenInFile(strPath);
	}
	catch(exception & e)
	{
		CErrInfo info("CDTASingleInput", "StartWrite", "OpenInFile: " + strPath + " failed.");
		throw runtime_error(info.Get(e).c_str());
	}
	catch(...)
	{
		CErrInfo info("CDTASingleInput", "StartWrite", "caught an unknown exception from OpenInFile: " + strPath + " failed.");
		info.Append("strPath=" + strPath);
		throw runtime_error(info.Get().c_str());
	}
	m_strInPath = strPath;
	nTotal = 1;

	CloseInFile();
//	OpenInFile(strPath);
}
Пример #8
0
/* convert file ipInFN to DOS format text and write to file ipOutFN
 * RetVal: 0 if success
 *         -1 otherwise
 */
int ConvertUnixToDosNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag)
{
  int RetVal = 0;
  FILE *InF = NULL;
  FILE *TempF = NULL;
  char *TempPath;
  struct stat StatBuf;
  struct utimbuf UTimeBuf;
#ifndef NO_FCHMOD
  mode_t mask;
#endif
#ifdef NO_MKSTEMP
  FILE* fd;
#else
  int fd;
#endif

  if ((ipFlag->Force == 0) && regfile(ipInFN))
  {
    ipFlag->status |= NO_REGFILE ;
    return -1;
  }
  else
    ipFlag->status = 0 ;

  /* retrieve ipInFN file date stamp */
  if (stat(ipInFN, &StatBuf))
    RetVal = -1;

#ifdef NO_MKSTEMP
  if((fd = MakeTempFileFrom(ipOutFN, &TempPath))==NULL) {
#else
  if((fd = MakeTempFileFrom (ipOutFN, &TempPath)) < 0) {
#endif
    perror(_("unix2dos: Failed to open temporary output file"));
    RetVal = -1;
  }

#ifdef DEBUG
  fprintf(stderr, _("unix2dos: using %s as temporary file\n"), TempPath);
#endif

  /* can open in file? */
  if ((!RetVal) && ((InF=OpenInFile(ipInFN)) == NULL))
    RetVal = -1;

  /* can open output file? */
#ifdef NO_MKSTEMP
  if ((!RetVal) && (InF) && ((TempF=fd) == NULL))
#else
  if ((!RetVal) && (InF) && ((TempF=OpenOutFile(fd)) == NULL))
#endif
  {
    fclose (InF);
    InF = NULL;
    RetVal = -1;
  }

#ifndef NO_FCHMOD
  /* preserve original mode as modified by umask */
  mask = umask(0);
  umask(mask);
  if (!RetVal && fchmod(fd, StatBuf.st_mode & ~mask))
    RetVal = -1;
#endif

  /* conversion sucessful? */
  if ((!RetVal) && (ConvertUnixToDos(InF, TempF, ipFlag)))
    RetVal = -1;

   /* can close in file? */
  if ((InF) && (fclose(InF) == EOF))
    RetVal = -1;

  /* can close output file? */
  if ((TempF) && (fclose(TempF) == EOF))
    RetVal = -1;

#ifdef NO_MKSTEMP
  if(fd!=NULL)
    fclose(fd);
#else
  if(fd>=0)
    close(fd);
#endif

  if ((!RetVal) && (ipFlag->KeepDate))
  {
    UTimeBuf.actime = StatBuf.st_atime;
    UTimeBuf.modtime = StatBuf.st_mtime;
    /* can change output file time to in file time? */
    if (utime(TempPath, &UTimeBuf) == -1)
      RetVal = -1;
  }

  /* any error? */
  if ((RetVal) && (unlink(TempPath)))
    RetVal = -1;

  /* can rename temporary file to output file? */
  if (!RetVal)
  {
#ifdef NEED_REMOVE
    remove(ipOutFN);
#endif
    if ((rename(TempPath, ipOutFN) == -1) && (!ipFlag->Quiet))
    {
      fprintf(stderr, _("unix2dos: problems renaming '%s' to '%s'\n"), TempPath, ipOutFN);
      fprintf(stderr, _("          output file remains in '%s'\n"), TempPath);
      RetVal = -1;
    }
  }
  free(TempPath);
  return RetVal;
}


/* convert file ipInFN to DOS format text
 * RetVal: 0 if success
 *         -1 otherwise
 */
int ConvertUnixToDosOldFile(char* ipInFN, CFlag *ipFlag)
{
  int RetVal = 0;
  FILE *InF = NULL;
  FILE *TempF = NULL;
  char *TempPath;
  struct stat StatBuf;
  struct utimbuf UTimeBuf;
#ifndef NO_FCHMOD
  mode_t mode = S_IRUSR | S_IWUSR;
#endif
#ifdef NO_MKSTEMP
  FILE* fd;
#else
  int fd;
#endif

  if ((ipFlag->Force == 0) && regfile(ipInFN))
  {
    ipFlag->status |= NO_REGFILE ;
    return -1;
  }
  else
    ipFlag->status = 0 ;

  /* retrieve ipInFN file date stamp */
  if (stat(ipInFN, &StatBuf))
    RetVal = -1;
#ifndef NO_FCHMOD
  else
    mode = StatBuf.st_mode;
#endif

#ifdef NO_MKSTEMP
  if((fd = MakeTempFileFrom(ipInFN, &TempPath))==NULL) {
#else
  if((fd = MakeTempFileFrom(ipInFN, &TempPath)) < 0) {
#endif
    perror(_("unix2dos: Failed to open temporary output file"));
    RetVal = -1;
  }

#ifndef NO_FCHMOD
  if (!RetVal && fchmod (fd, mode) && fchmod (fd, S_IRUSR | S_IWUSR))
    RetVal = -1;
#endif

#ifdef DEBUG
  fprintf(stderr, _("unix2dos: using %s as temporary file\n"), TempPath);
#endif

  /* can open in file? */
  if ((!RetVal) && ((InF=OpenInFile(ipInFN)) == NULL))
    RetVal = -1;

  /* can open output file? */
#ifdef NO_MKSTEMP
  if ((!RetVal) && (InF) && ((TempF=fd) == NULL))
#else
  if ((!RetVal) && (InF) && ((TempF=OpenOutFile(fd)) == NULL))
#endif
  {
    fclose (InF);
    InF = NULL;
    RetVal = -1;
  }

  /* conversion sucessful? */
  if ((!RetVal) && (ConvertUnixToDos(InF, TempF, ipFlag)))
    RetVal = -1;

   /* can close in file? */
  if ((InF) && (fclose(InF) == EOF))
    RetVal = -1;

  /* can close output file? */
  if ((TempF) && (fclose(TempF) == EOF))
    RetVal = -1;

#ifdef NO_MKSTEMP
  if(fd!=NULL)
    fclose(fd);
#else
  if(fd>=0)
    close(fd);
#endif

  if ((!RetVal) && (ipFlag->KeepDate))
  {
    UTimeBuf.actime = StatBuf.st_atime;
    UTimeBuf.modtime = StatBuf.st_mtime;
    /* can change output file time to in file time? */
    if (utime(TempPath, &UTimeBuf) == -1)
      RetVal = -1;
  }

  /* any error? */
  if ((RetVal) && (unlink(TempPath)))
    RetVal = -1;

#ifdef NEED_REMOVE
  if (!RetVal)
    remove(ipInFN);
#endif
  /* can rename output file to in file? */
  if ((!RetVal) && (rename(TempPath, ipInFN) == -1))
  {
    if (!ipFlag->Quiet)
    {
      fprintf(stderr, _("unix2dos: problems renaming '%s' to '%s'\n"), TempPath, ipInFN);
      fprintf(stderr, _("          output file remains in '%s'\n"), TempPath);
    }
    RetVal = -1;
  }
  free(TempPath);
  return RetVal;
}


/* convert stdin to DOS format text and write to stdout
 * RetVal: 0 if success
 *         -1 otherwise
 */
int ConvertUnixToDosStdio(CFlag *ipFlag)
{
    ipFlag->NewFile = 1;
    ipFlag->Quiet = 1;
    ipFlag->KeepDate = 0;
    ipFlag->Force = 1;

#ifdef WIN32

    /* stdin and stdout are by default text streams. We need
     * to set them to binary mode. Otherwise an LF will
     * automatically be converted to CR-LF on DOS/Windows.
     * Erwin */

    /* 'setmode' was deprecated by MicroSoft
     * since Visual C++ 2005. Use '_setmode' instead. */

    _setmode(fileno(stdout), O_BINARY);
    _setmode(fileno(stdin), O_BINARY);
    return (ConvertUnixToDos(stdin, stdout, ipFlag));
#elif defined(MSDOS) || defined(__OS2__)
    setmode(fileno(stdout), O_BINARY);
    setmode(fileno(stdin), O_BINARY);
    return (ConvertUnixToDos(stdin, stdout, ipFlag));
#else
    return (ConvertUnixToDos(stdin, stdout, ipFlag));
#endif
}