Example #1
0
char *strtrim(char *s)
{
    strrtrim(s);
    strltrim(s);

    return s;
}
Example #2
0
/*
 * ParseDirective
 * Note: macro directives are handled before recording
 */
void ParseDirective(const char *cline)
{
	char *line = strdup(strltrim(cline) + 1);	// skip '.' and allow strtok
//printf("'%s'\n", file->line);
//printf("'%s'\n", line);
	bool valid_directive = false;
	//bool done_directive = true;
//printf("Line: [%s]\n", cline);
	
	/*
	 * macro ending directives
	 */

	if (DIRECTIVE("endm", PARSE_MACRO_DIRECTIVES))
	{
		current_macro = 0;
		parse_directives = PARSE_ALL_DIRECTIVES;
	}
	else if (DIRECTIVE("endr", PARSE_REPT_DIRECTIVES))
	{
		current_macro = 0;
		parse_directives = PARSE_ALL_DIRECTIVES;
		while (repeat-- > 0) MacroExecute("_rept");
	}

	/*
	 * record to macro
	 */
	if (current_macro)
	{
		MacroLine(cline);		// record full line
		goto exit;				// only process macro ending directives
	}

	/*
	 * macro starting directives
	 */
	if (DIRECTIVE("macro", PARSE_MACRO_DIRECTIVES) || DIRECTIVE("macroicase", PARSE_MACRO_DIRECTIVES))
	{
		char *name = strtok((char *)strskipspace(line), delim_chars);
		current_macro = FindMacro(name);
		if (pass != PASS_ASM)
		{
			if (current_macro) { eprintf("Macro name already defined.\n"); eexit(); }
			current_macro = NewMacro(name, DIRECTIVE("macroicase", PARSE_MACRO_DIRECTIVES));
EEKS{printf("new macro at %p\n", current_macro);}
			char *paramname;
			while ((paramname = strtok(0, delim_chars)))
			{
				if (isspace2(paramname[0])) paramname = strskipspace(paramname);
				if (strchr(endline_chars, *paramname)) break;

				current_macro->AddParameter(paramname);
			}
		}
		parse_directives = PARSE_MACRO_DIRECTIVES;
	}
Example #3
0
/**
 * Return the next (non-commented) line from the host-file.
 * Format is:
 *  ip-address host-name [alias..] {\n | # ..}
 */
struct hostent * W32_CALL gethostent (void)
{
  struct _hostent h;
  char  *tok, *ip, *name, *alias;
  char   buf [2*MAX_HOSTLEN];
  int    i;

  if (!netdb_init() || !hostFile)
  {
    h_errno = NO_RECOVERY;
    return (NULL);
  }

  while (1)
  {
    if (!fgets(buf,sizeof(buf),hostFile))
       return (NULL);

    tok = strltrim (buf);
    if (*tok == '#' || *tok == ';' || *tok == '\n')
       continue;

    ip   = strtok (tok, " \t");
    name = strtok (NULL, " \t\n");
    if (ip && name && isaddr(ip))
       break;
  }

  if (hostClose)
     endhostent();

  memset (&h, 0, sizeof(h));
  if (!strcmp(ip,"0.0.0.0"))   /* inet_addr() maps 0 -> INADDR_NONE */
       h.h_address[0] = INADDR_ANY;
  else h.h_address[0] = inet_addr (ip);

  h.h_num_addr = 1;
  h.h_name = name;
  alias    = strtok (NULL, " \t\n");

  for (i = 0; alias && i < MAX_HOST_ALIASES; i++)
  {
    static char aliases [MAX_NETENT_ALIASES][MAX_HOSTLEN];

    if (*alias == '#' || *alias == ';')
       break;

    h.h_aliases[i] = StrLcpy (aliases[i], alias, sizeof(aliases[i]));
    alias = strtok (NULL, " \t\n");
  }
  return fill_hostent (&h);
}
Example #4
0
/*
 * Return the next (non-commented) line from the network-file
 * Format is:
 *   name [=] net [alias..] {\n | # ..}
 *
 * e.g.
 *   loopback     127
 *   arpanet      10   arpa
 */
struct netent * W32_CALL getnetent (void)
{
  struct _netent n;
  char  *name, *net, *alias;
  char   buf [2*MAX_NAMELEN], *tok;
  int    i;

  if (!netdb_init())
     return (NULL);

  while (1)
  {
    if (!fgets(buf,sizeof(buf),networkFile))
       return (NULL);

    tok = strltrim (buf);
    if (*tok == '#' || *tok == ';' || *tok == '\n')
       continue;

    name = strtok (tok, " \t");
    net  = strtok (NULL, "= \t\n");
    if (name && net)
       break;
  }

  if (networkClose)
     endnetent();

  memset (&n, 0, sizeof(n));
  n.n_net  = inet_network (net);
  n.n_name = name;
  alias    = strtok (NULL, " \t\n");

  for (i = 0; alias && i < MAX_NETENT_ALIASES; i++)
  {
    static char aliases [MAX_NETENT_ALIASES][MAX_NAMELEN];

    if (*alias == '#' || *alias == ';')
       break;

    n.n_aliases[i] = StrLcpy (aliases[i], alias, sizeof(aliases[i]));
    alias = strtok (NULL, " \t\n");
  }
  return fill_netent (&n);
}
Example #5
0
File: str.c Project: amikoren/tnapy
char *strtrim(char *s)
{
    return strrtrim(strltrim(s));
}
Example #6
0
static char *strtrim(char *str)
{
	return strltrim(strrtrim(str));
}
Example #7
0
char *CDateTime::strtrim(char *str, const char *trim)
{
	char *buf = strltrim(strrtrim(str, trim),  trim);
	return buf;
}
Example #8
0
char    *strtrim(char *str, const char *trim)
{
    return strltrim(strrtrim(str, trim), trim);
}
Example #9
0
void test_left_side_trim (void) {
  CU_ASSERT (strcmp (STR_A_EXPECTED_VALUE_LT, strltrim (str_a)) == 0);
  CU_ASSERT (strcmp (STR_B_EXPECTED_VALUE_LT, strltrim (str_b)) == 0);
  CU_ASSERT (strcmp (STR_C_EXPECTED_VALUE_LT, strltrim (str_c)) == 0);
  CU_ASSERT (strcmp (STR_D_EXPECTED_VALUE_LT, strltrim (str_d)) == 0)
}
Example #10
0
int main(int argc, char* argv[])
{
	QINIU_ACCESS_KEY = "sn7d6X2kmRQKkNyO0_ZY_Hz2utVrXIeEmc8QutVC";
	QINIU_SECRET_KEY = "jeQSWafTp7kczgR4qVa-erKOaHk0_qcvMNacxO8E";
	curl_global_init(CURL_GLOBAL_ALL);
	Qiniu_Global_Init(-1);                  /* 全局初始化函数,整个进程只需要调用一次 */
	if(strcmp(argv[1],"imgtest") == 0 && argc == 4)
	{
		string oldurl,newurl,articleurl;
		articleurl = argv[2];
		oldurl     = argv[3];
		newurl =  _expandlinks(articleurl,oldurl);
		printf("newurl=%s\n",oldurl.c_str());
	}
	if(strcmp(argv[1],"-start") == 0 && argc == 4)
	{
		int ntime = atoi(argv[2]);
		int nprocesstimes = 1;
		ArticleManage m_article;
		theLog.SetLogFilePath(GetFullPath());
		if(!ReadConfigFile((GetFullPath()+"/sqlconfig.conf").c_str(),p_config))
		{
			return 0;
		}
		if(!ReadSpiderRulerConfigFile((GetFullPath()+"/spiderruler.conf").c_str(),p_spiderruler))
		{
			return 0;
		}
		while(1)
		{
			
			m_article.start(atoi(argv[3]));
			theLog.WriteLog(LOG_LEVEL_SYS,"the proc ArticleExtract %d times completed!",nprocesstimes);
			nprocesstimes++;
			sleep(ntime * 60);
		}
	}
	
	if(argc == 3 && strcmp(argv[1],"-start") != 0 )
	{
		if(!ReadSpiderRulerConfigFile((GetFullPath()+"/spiderruler.conf").c_str(),p_spiderruler))
		{
			return 0;
		}
		string page;
		list<string> ImgStrList;
		page.clear();
		const char* url = argv[1];
		string sourcepagename = argv[2];
		//const char* url = "http://admin.wechat.com/mp/appmsg/show?__biz=MjM5MTIwODcxNA==&appmsgid=10001872&itemidx=1&sign=d5997fecd12a3af79f8c8d65600f82a1";
		printf("url=%s\n",url);
		string urlstr = url;
		if(0 == sourcepagename.compare("zatu")) 
		{
			string strtmp = "";
			iconv_string("utf-8","gbk", urlstr.c_str(), urlstr.length(),strtmp,1);
			urlstr = strtmp;
			printf("urlstr=%s\n",urlstr.c_str());
		}
		
		int method = 0;
		if(!getPage(urlstr.c_str(), method,page))
		{
			printf("不能获取URL内容\n");
			return 0;
		}
		string content;
		string Introduction;
		string publishtime;
		string titlestr;
		string contentimg;
		page = mainpagetagclean(page);
		if(page.length() < 2048)
		{
			printf("the page source length too short ! \n");
			return 0;
		}
		printf("page length=%d\n",page.length());
		
		HtmlExtract sorceExtract(page,p_spiderruler[sourcepagename]);
		sorceExtract.Extract();
		titlestr	= sorceExtract.GetTitle();
		boost::regex title_reg("((?i)(&nbsp;))");
		titlestr = boost::regex_replace(titlestr,title_reg,"");
		printf("title=%s\n",titlestr.c_str());
		content			= sorceExtract.ArticleContent;
		publishtime		= sorceExtract.GetDateTime();
		Introduction	= sorceExtract.Introduction;
		strltrim(publishtime);
		publishtime		= publish_time_deal(publishtime);
		if(0 == sourcepagename.compare("geekpark"))  //极客公园特殊处理
		{
			//boost::regex reg14("((?i)(<div\\s{1,4}class=\"yarpp-related\".*?</div>))");
			boost::regex reg14("((?i)(<div\\s{1,4}(id=\"tags\").*?</div>))");
			content = boost::regex_replace(content,reg14,"");
		}
		if(0 == sourcepagename.compare("cuntuba"))  //苹果网特殊处理
		{
			//boost::regex reg14("((?i)(<div\\s{1,4}class=\"yarpp-related\".*?</div>))");
			boost::regex reg14("((?i)(<div\\s{1,4}(?!class=\"cont\").*?</div>))");
			content = boost::regex_replace(content,reg14,"");
		}
		if(0 == sourcepagename.compare("macx"))  //苹果网特殊处理
		{
			//boost::regex reg14("((?i)(<div\\s{1,4}class=\"yarpp-related\".*?</div>))");
			boost::regex reg14("((?i)(<div\\s{1,4}(?!class=\"v2-t_fsz\").*?</div>))");
			content = boost::regex_replace(content,reg14,"");
			reg14.assign("(?i)(<a[^>]*>.*?</a>)");
			content = boost::regex_replace(content,reg14,"");
		}
		if(0 == sourcepagename.compare("leiphone"))  //雷锋网尾部特殊处理
		{
			//boost::regex reg14("((?i)(<div\\s{1,4}class=\"yarpp-related\".*?</div>))");
			boost::regex reg14("((?i)(<div\\s{1,4}(?!class=\"post_content\").*?</div>))");
			content = boost::regex_replace(content,reg14,"");
			reg14.assign("(?i)(<div>.*?</div>)");
			content = boost::regex_replace(content,reg14,"");
		}
		if(0 == sourcepagename.compare("tech163"))  //网易科技尾部特殊处理
		{
			//boost::regex reg14("((?i)(<div\\s{1,4}class=\"yarpp-related\".*?</div>))");
			boost::regex reg14("((?i)(<div\\s{1,4}(?!id=\"endtext\").*?</div>))");
			content = boost::regex_replace(content,reg14,"");
		}
		if(0 == sourcepagename.compare("pingwest"))  //pingwest尾部特殊处理
		{
			//boost::regex reg14("((?i)(<div\\s{1,4}class=\"yarpp-related\".*?</div>))");
			boost::regex reg15("((?i)(<div.*?>[^<]+</div>))");
			content = boost::regex_replace(content,reg15,"");
		}
		if(0 == sourcepagename.compare("zatu"))  //杂图天下尾部特殊处理
		{
			//boost::regex reg14("((?i)(<div\\s{1,4}class=\"yarpp-related\".*?</div>))");
			boost::regex reg14("((?i)(<div\\s{1,4}(?!class=\"format_text entry-content\").*?</div>))");
			content = boost::regex_replace(content,reg14,"");
		}
		if(0 == sourcepagename.compare("jandan"))
		{
			boost::regex reg11("((?i)(<a[^>]*>.*?</a>))");
			content = boost::regex_replace(content,reg11,"");
			boost::regex reg12("((?i)(<span[^>]*>.*?</span>))");
			content = boost::regex_replace(content,reg12,"");
		}
		if(0 == sourcepagename.compare("guaixun"))
		{
			boost::regex reg13("((?i)(<div style=\"position:absolute.*?</div>))");
			content = boost::regex_replace(content,reg13,"");
		}
		
		content = maincontenttagclean(content);	
		strltrim(content);
		boost::smatch m;
		boost::regex reg8;
		if(sourcepagename == "sinablogit")
		{
			reg8.assign("(?i)( src\\s{0,2}=\\s{0,2}\"([^\"]*)\")");
			content = boost::regex_replace(content,reg8,"");
			reg8.assign("(?i)( real_src =)");
			content = boost::regex_replace(content,reg8," src=");
		}
		else if(sourcepagename == "aqee")
		{
			reg8.assign("(?i)( src\\s{0,2}=\\s{0,2}\"([^\"]*)\")");
			content = boost::regex_replace(content,reg8,"");
			reg8.assign("(?i)( data-original=)");
			content = boost::regex_replace(content,reg8," src=");
		}
		else if(sourcepagename == "macx")
		{
			reg8.assign("(?i)( src\\s{0,2}=\\s{0,2}\"([^\"]*)\")");
			content = boost::regex_replace(content,reg8,"");
			reg8.assign("(?i)( zoomfile=)");
			content = boost::regex_replace(content,reg8," src=");
		}
		if(0 == sourcepagename.compare("cuntuba"))  //寸土吧特殊处理
		{
			reg8.assign("(?i)(\')");
			content = boost::regex_replace(content,reg8,"\"");
		}
		reg8.assign("(?i)( src\\s{0,2}=\\s{0,2}\"([^\"]*)\")");
		
		contentimg = content;
		boost::regex reg("((?i)<((?!img)[^>]*>))");
		contentimg = boost::regex_replace(contentimg,reg,"");

		//boost::regex reg8("(?i)(src=\"([^\"]*)\")");
		std::string::const_iterator start = contentimg.begin();
		std::string::const_iterator end = contentimg.end();
		try
		{
			while(boost::regex_search(start,end,m,reg8))
			{
				if (m[0].matched)
				{
					string tempurl(m[0].first,m[0].second);
					string regurl;
					tempurl = tempurl.substr(tempurl.find_first_of('\"',0)+1,tempurl.find_last_of('\"')-tempurl.find_first_of('\"',0)-1);
					//if( 0 == sourcepagename.compare("zatu") )
				//	{
						regurl =  _expandlinks(urlstr,tempurl);
			//		}
					if(0 != tempurl.length())
					{
						boost::regex reg(tempurl);
						content = boost::regex_replace(content,reg,regurl);
						ImgStrList.push_back(regurl);
					}
					start = m[0].second;
				}
			} 
		}
		catch (const boost::bad_expression& e)
		{
			theLog.WriteLog(LOG_LEVEL_ERROR,"cann't create regex with %s!",urlstr.c_str());
		}
		list<string>::iterator it;
		for( it = ImgStrList.begin(); it != ImgStrList.end(); it++)
		{
			printf("%s\n",(*it).c_str());
		}
		if( 0 == Introduction.length() )
		{
			boost::smatch m1;
			boost::regex reg("(?i)(<p>.*?</p>)");
			std::string::const_iterator start = content.begin();
			std::string::const_iterator end = content.end();
			while(boost::regex_search(start,end,m1,reg))
			{
				if (m1[0].matched)
				{
					Introduction = m1[0].str();
				}
				break;
			}
				
			int pos;
			if( 0 == Introduction.length() )
			{
				if(-1 != (pos = content.find_first_of("\x0d\x0a",0)))
				{
					Introduction = content.substr(0,pos);
				}
			}
		}
		Introduction = Introductioncontenttagclean(Introduction);
		strltrim(Introduction);
		if( 0 == p_spiderruler[sourcepagename].summaryisinbody.compare("yes") )
		{
			content = Introduction + content;
		}
		printf("publishtime=%s\n",publishtime.c_str());
		printf("Introduction=%s\n",Introduction.c_str());
		printf("content=%s\n",content.c_str());
	}
	Qiniu_Global_Cleanup();                 /* 全局清理函数,只需要在进程退出时调用一次 */
	return 0;


}
Example #11
0
char
line_parse              (void)
{
   /*---(locals)-----------+-----------+-*/
   char        rce         =  -10;
   int         rc          =    0;
   int         x_fields    =    0;
   char        x_recd      [LEN_RECD];
   int         x_pos       =    0;
   /*---(header)-------------------------*/
   DEBUG_INPT  yLOG_enter   (__FUNCTION__);
   /*---(prepare)------------------------*/
   my.t_ready = '-';
   strlcpy (my.t_tracker , "n/a"      , LEN_TRACKER);
   strlcpy (my.t_schedule, ""         , LEN_RECD);
   strlcpy (my.t_flags   , "- - - - -", LEN_FLAGS);
   strlcpy (my.t_command , ""         , LEN_COMMAND);
   /*---(field count)--------------------*/
   rc = yPARSE_ready (&x_fields);
   DEBUG_INPT   yLOG_value   ("ready"     , rc);
   --rce;  if (rc != 'y') {
      DEBUG_INPT  yLOG_exit    (__FUNCTION__);
      return rce;
   }
   DEBUG_INPT   yLOG_value   ("fields"    , x_fields);
   --rce; if (x_fields < 1) {
      DEBUG_INPT  yLOG_exit    (__FUNCTION__);
      return rce;
   }
   /*---(adjust original record)---------*/
   if (x_fields == 1) {
      DEBUG_INPT   yLOG_note    ("found original style record (six space delimited parts)");
      rc = yPARSE_popstr  (x_recd);
      DEBUG_INPT   yLOG_value   ("original"  , rc);
      --rce;  if (rc < 0) {
         DEBUG_INPT  yLOG_exitr   (__FUNCTION__, rce);
         return rce;
      }
      strltrim (x_recd, ySTR_SINGLE, LEN_RECD);
      x_fields = strldcnt (x_recd, ' ', LEN_RECD);
      DEBUG_INPT   yLOG_value   ("x_fields"  , x_fields);
      if (x_fields < 5) {
         DEBUG_INPT  yLOG_exitr   (__FUNCTION__, rce);
         return rce;
      }
      x_pos = strldpos (x_recd, ' ', 5, LEN_RECD);
      DEBUG_INPT   yLOG_value   ("x_pos"     , x_pos);
      x_recd [x_pos] = '\0';
      strlcpy (my.t_schedule, x_recd, LEN_RECD);
      DEBUG_INPT   yLOG_info    ("t_schedule", my.t_schedule);
      strlcpy (my.t_command , x_recd + x_pos + 1, LEN_COMMAND);
      DEBUG_INPT   yLOG_info    ("t_command" , my.t_command);
      my.t_ready = 'y';
      /*---(complete)-----------------------*/
      DEBUG_INPT  yLOG_exit    (__FUNCTION__);
      return 0;
   }
   /*---(schedule string)----------------*/
   rc = yPARSE_popstr  (my.t_schedule);
   strltrim (my.t_schedule, ySTR_SINGLE, LEN_RECD);
   DEBUG_INPT   yLOG_value   ("schedule"  , rc);
   --rce;  if (rc < 0) {
      DEBUG_INPT  yLOG_exitr   (__FUNCTION__, rce);
      return rce;
   }
   DEBUG_INPT   yLOG_info    ("t_schedule", my.t_schedule);
   /*---(tracker/title)------------------*/
   if (x_fields > 2) {
      rc = yPARSE_popstr  (my.t_tracker);
      DEBUG_INPT   yLOG_value   ("tracker"   , rc);
      --rce;  if (rc < 0) {
         DEBUG_INPT  yLOG_exitr   (__FUNCTION__, rce);
         return rce;
      }
      DEBUG_INPT   yLOG_info    ("tracker"   , my.t_tracker);
   }
   /*---(run-time flags)-----------------*/
   if (x_fields > 3) {
      rc = yPARSE_popstr  (my.t_flags);
      DEBUG_INPT   yLOG_value   ("flags"     , rc);
      --rce;  if (rc < 0) {
         DEBUG_INPT  yLOG_exitr   (__FUNCTION__, rce);
         return rce;
      }
      DEBUG_INPT   yLOG_info    ("t_flags"   , my.t_flags);
   }
   /*---(command)------------------------*/
   rc = yPARSE_popstr  (my.t_command);
   DEBUG_INPT   yLOG_value   ("command"   , rc);
   --rce;  if (rc < 0) {
      DEBUG_INPT  yLOG_exitr   (__FUNCTION__, rce);
      return rce;
   }
   DEBUG_INPT   yLOG_info    ("command"   , my.t_command);
   /*---(set ready)----------------------*/
   my.t_ready = 'y';
   /*---(complete)-----------------------*/
   DEBUG_INPT  yLOG_exit    (__FUNCTION__);
   return 0;
}