bool openIconvDescriptors() { // why do this when we call gbiconv_open() directly from ucToAny() // and other functions? return true; for (int i=2; i <= 2258 ; i++ ){ if (!supportedCharset(i)) continue; char *charset = get_charset_str(i); if (!charset) return false; char *csAlias = charset; if (!strncmp(charset, "x-windows-949", 13)) csAlias = "CP949"; // Treat all latin1 as windows-1252 extended charset if (!strncmp(charset, "ISO-8859-1", 10) ) csAlias = "WINDOWS-1252"; if (!strncmp(charset, "Windows-31J", 13)){ csAlias = "CP932"; } //iconv_t cd1 = gbiconv_open("UTF-16LE", csAlias); iconv_t cd1 = gbiconv_open("UTF-8", csAlias); if (cd1 == (iconv_t)-1) { //return false; } //iconv_t cd2 = gbiconv_open(csAlias, "UTF-16LE"); iconv_t cd2 = gbiconv_open(csAlias, "UTF-8"); if (cd2 == (iconv_t)-1) { //return false; } } // ...and the ones that don't involve utf16 if (gbiconv_open("UTF-8", "WINDOWS-1252") < 0) return false; if (gbiconv_open("WINDOWS-1252", "UTF-8") < 0) return false; //log(LOG_INIT, "uni: Successfully loaded all iconv descriptors"); return true; }
// . sets m_qbuf1[] and m_qbuf2[] // . m_qbuf1[] is the advanced query // . m_qbuf2[] is the query to be used for spell checking // . returns false and set g_errno on error bool SearchInput::setQueryBuffers ( HttpRequest *hr ) { m_sbuf1.reset(); m_sbuf2.reset(); m_sbuf3.reset(); short qcs = csUTF8; if (m_queryCharset && m_queryCharsetLen){ // we need to convert the query string to utf-8 qcs = get_iana_charset(m_queryCharset, m_queryCharsetLen); if (qcs == csUnknown) { //g_errno = EBADCHARSET; //g_msg = "(error: unknown query charset)"; //return false; qcs = csUTF8; } } // prepend sites terms long numSites = 0; char *csStr = NULL; numSites = 0; csStr = get_charset_str(qcs); /* if ( m_sites && m_sites[0] ) { char *s = m_sites; char *t; long len; m_sbuf1.pushChar('(');// *p++ = '('; loop: // skip white space while ( *s && ! is_alnum_a(*s) ) s++; // bail if done if ( ! *s ) goto done; // get length of it t = s; while ( *t && ! is_wspace_a(*t) ) t++; len = t - s; // add site: term //if ( p + 12 + len >= pend ) goto toobig; if ( numSites > 0 ) m_sbuf1.safeStrcpy ( " UOR " ); m_sbuf1.safeStrcpy ( "site:" ); //p += ucToUtf8(p, pend-p,s, len, csStr, 0,0); m_sbuf1.safeMemcpy ( s , len ); //memcpy ( p , s , len ); p += len; // *p++ = ' '; m_sbuf1.pushChar(' '); s = t; numSites++; goto loop; done: m_sbuf1.safePrintf(") | "); // inc totalLen m_sitesQueryLen = m_sitesLen + (numSites * 10); } */ // prepend char *qp = hr->getString("prepend",NULL,NULL); if( qp && qp[0] ) { //if( p > pstart ) *p++ = ' '; if ( m_sbuf1.length() ) m_sbuf1.pushChar(' '); //p += sprintf( p, "+gblang:%li |", m_gblang ); m_sbuf1.safePrintf( "%s", qp ); } // append site: term if ( m_siteLen > 0 ) { //if ( p > pstart ) *p++ = ' '; if ( m_sbuf1.length() ) m_sbuf1.pushChar(' '); //memcpy ( p , "+site:" , 6 ); p += 6; m_sbuf1.safePrintf("+site:"); //memcpy ( p , m_site , m_siteLen ); p += m_siteLen; m_sbuf1.safeMemcpy(m_site,m_siteLen); } if ( m_familyFilter ) { if ( m_sbuf1.length() ) m_sbuf1.pushChar(' '); m_sbuf1.safePrintf("gbisadult:0 | "); } // append gblang: term if( m_gblang > 0 ) { //if( p > pstart ) *p++ = ' '; if ( m_sbuf1.length() ) m_sbuf1.pushChar(' '); //p += sprintf( p, "+gblang:%li |", m_gblang ); m_sbuf1.safePrintf( "+gblang:%li |", m_gblang ); } // bookmark here so we can copy into st->m_displayQuery below //long displayQueryOffset = m_sbuf1.length(); // append url: term if ( m_urlLen > 0 ) { //if ( p > pstart ) *p++ = ' '; if ( m_sbuf1.length() ) m_sbuf1.pushChar(' '); //memcpy ( p , "+url:" , 5 ); p += 5; m_sbuf1.safeStrcpy ( "+url:"); //memcpy ( p , m_url , m_urlLen ); p += m_urlLen; m_sbuf1.safeMemcpy ( m_url , m_urlLen ); } // append url: term if ( m_linkLen > 0 ) { //if ( p > pstart ) *p++ = ' '; if ( m_sbuf1.length() ) m_sbuf1.pushChar(' '); //memcpy ( p , "+link:" , 6 ); p += 6; m_sbuf1.safeStrcpy ( "+link:"); //memcpy ( p , m_link , m_linkLen ); p += m_linkLen; m_sbuf1.safeMemcpy ( m_link , m_linkLen ); } // append the natural query if ( m_queryLen > 0 ) { //if ( p > pstart ) *p++ = ' '; if ( m_sbuf1.length() ) m_sbuf1.pushChar(' '); //p += ucToUtf8(p, pend-p, m_query, m_queryLen, csStr, 0,0); m_sbuf1.safeMemcpy ( m_query , m_queryLen ); //memcpy ( p , m_query , m_queryLen ); p += m_queryLen; // add to spell checked buf, too //if ( p2 > pstart2 ) *p2++ = ' '; if ( m_sbuf2.length() ) m_sbuf2.pushChar(' '); //p2 +=ucToUtf8(p2, pend2-p2, m_query, m_queryLen, csStr, 0,0); m_sbuf2.safeMemcpy ( m_query , m_queryLen ); //memcpy ( p2 , m_query , m_queryLen ); p2 += m_queryLen; } if ( m_query2Len > 0 ) { //if ( p3 > pstart3 ) *p3++ = ' '; if ( m_sbuf3.length() ) m_sbuf3.pushChar(' '); //p3+=ucToUtf8(p3, pend3-p3, m_query2, m_query2Len, csStr,0,0); m_sbuf3.safeMemcpy ( m_query2 , m_query2Len ); } //if (g_errno == EILSEQ){ // illegal character seq // log("query: bad char set"); // g_errno = 0; // if (qcs == csUTF8) {qcs = csISOLatin1;goto doOver;} // if (qcs != csISOLatin1) {qcs = csUTF8;goto doOver;} //} // append quoted phrases to query if ( m_quoteLen1 > 0 ) { //if ( p > pstart ) *p++ = ' '; if ( m_sbuf1.length() ) m_sbuf1.pushChar(' '); //*p++ = '+'; //*p++ = '\"'; m_sbuf1.safeStrcpy("+\""); //p += ucToUtf8(p, pend-p, m_quote1, m_quoteLen1, csStr, 0,0); m_sbuf1.safeMemcpy ( m_quote1 , m_quoteLen1 ); //memcpy ( p , m_quote1 , m_quoteLen1 ); p += m_quoteLen1 ; //*p++ = '\"'; m_sbuf1.safeStrcpy("\""); // add to spell checked buf, too //if ( p2 > pstart2 ) *p2++ = ' '; if ( m_sbuf2.length() ) m_sbuf2.pushChar(' '); //*p2++ = '+'; //*p2++ = '\"'; m_sbuf2.safeStrcpy("+\""); //p2+=ucToUtf8(p2, pend2-p2, m_quote1, m_quoteLen1, csStr,0,0); m_sbuf2.safeMemcpy ( m_quote1 , m_quoteLen1 ); //memcpy ( p2 , m_quote1 , m_quoteLen1 ); p2 += m_quoteLen1 ; //*p2++ = '\"'; m_sbuf2.safeStrcpy("\""); } //if (g_errno == EILSEQ){ // illegal character seq // g_errno = 0; // if (qcs == csUTF8) {qcs = csISOLatin1;goto doOver;} // if (qcs != csISOLatin1) {qcs = csUTF8;goto doOver;} //} if ( m_quoteLen2 > 0 ) { //if ( p > pstart ) *p++ = ' '; if ( m_sbuf1.length() ) m_sbuf1.pushChar(' '); //*p++ = '+'; //*p++ = '\"'; m_sbuf1.safeStrcpy("+\""); //p += ucToUtf8(p, pend-p, m_quote2, m_quoteLen2, csStr, 0,0); m_sbuf1.safeMemcpy ( m_quote2 , m_quoteLen2 ); //memcpy ( p , m_quote2 , m_quoteLen2 ); p += m_quoteLen2 ; //*p++ = '\"'; m_sbuf1.safeStrcpy("\""); // add to spell checked buf, too //if ( p2 > pstart2 ) *p2++ = ' '; if ( m_sbuf2.length() ) m_sbuf2.pushChar(' '); //*p2++ = '+'; //*p2++ = '\"'; m_sbuf2.safeStrcpy("+\""); //p2+=ucToUtf8(p2, pend2-p2, m_quote2, m_quoteLen2, csStr,0,0); m_sbuf2.safeMemcpy ( m_quote2 , m_quoteLen2 ); //memcpy ( p2 , m_quote2 , m_quoteLen2 ); p2 += m_quoteLen2 ; //*p2++ = '\"'; m_sbuf2.safeStrcpy("\""); } //if (g_errno == EILSEQ){ // illegal character seq // g_errno = 0; // if (qcs == csUTF8) {qcs = csISOLatin1;goto doOver;} // if (qcs != csISOLatin1) {qcs = csUTF8;goto doOver;} //} // append plus terms if ( m_plusLen > 0 ) { char *s = m_plus; char *send = m_plus + m_plusLen; //if ( p > pstart && p < pend ) *p++ = ' '; //if ( p2 > pstart2 && p2 < pend2) *p2++ = ' '; if ( m_sbuf1.length() ) m_sbuf1.pushChar(' '); if ( m_sbuf2.length() ) m_sbuf2.pushChar(' '); while (s < send) { while (isspace(*s) && s < send) s++; char *s2 = s+1; if (*s == '\"') { // if there's no closing quote just treat // the end of the line as such while (*s2 != '\"' && s2 < send) s2++; if (s2 < send) s2++; } else { while (!isspace(*s2) && s2 < send) s2++; } if (s2 < send) break; //if (p < pend) *p++ = '+'; //if (p2 < pend2) *p2++ = '+'; m_sbuf1.pushChar('+'); m_sbuf2.pushChar('+'); //p += ucToUtf8(p, pend-p, s, s2-s, csStr, 0,0); //p2 += ucToUtf8(p2, pend2-p2, s, s2-s, csStr, 0,0); m_sbuf1.safeMemcpy ( s , s2 - s ); m_sbuf2.safeMemcpy ( s , s2 - s ); /* if (g_errno == EILSEQ) { // illegal character seq g_errno = 0; if (qcs == csUTF8) { qcs = csISOLatin1; goto doOver; } if (qcs != csISOLatin1) { qcs = csUTF8; goto doOver; } } */ s = s2 + 1; if (s < send) { //if (p < pend) *p++ = ' '; //if (p2 < pend2) *p2++ = ' '; if ( m_sbuf1.length() ) m_sbuf1.pushChar(' '); if ( m_sbuf2.length() ) m_sbuf2.pushChar(' '); } } } // append minus terms if ( m_minusLen > 0 ) { char *s = m_minus; char *send = m_minus + m_minusLen; //if ( p > pstart && p < pend ) *p++ = ' '; //if ( p2 > pstart2 && p2 < pend2) *p2++ = ' '; if ( m_sbuf1.length() ) m_sbuf1.pushChar(' '); if ( m_sbuf2.length() ) m_sbuf2.pushChar(' '); while (s < send) { while (isspace(*s) && s < send) s++; char *s2 = s+1; if (*s == '\"') { // if there's no closing quote just treat // the end of the line as such while (*s2 != '\"' && s2 < send) s2++; if (s2 < send) s2++; } else { while (!isspace(*s2) && s2 < send) s2++; } if (s2 < send) break; //if (p < pend) *p++ = '-'; //if (p2 < pend2) *p2++ = '-'; m_sbuf1.pushChar('-'); m_sbuf2.pushChar('-'); //p += ucToUtf8(p, pend-p, s, s2-s, csStr, 0,0); //p2 += ucToUtf8(p2, pend2-p2, s, s2-s, csStr, 0,0); m_sbuf1.safeMemcpy ( s , s2 - s ); m_sbuf2.safeMemcpy ( s , s2 - s ); /* if (g_errno == EILSEQ) { // illegal character seq g_errno = 0; if (qcs == csUTF8) { qcs = csISOLatin1; goto doOver; } if (qcs != csISOLatin1) { qcs = csUTF8; goto doOver; } } */ s = s2 + 1; if (s < send) { //if (p < pend) *p++ = ' '; //if (p2 < pend2) *p2++ = ' '; if ( m_sbuf1.length() ) m_sbuf1.pushChar(' '); if ( m_sbuf2.length() ) m_sbuf2.pushChar(' '); } } } // append gbkeyword:numinlinks if they have &mininlinks=X, X>0 long minInlinks = m_hr->getLong("mininlinks",0); if ( minInlinks > 0 ) { //if ( p > pstart ) *p++ = ' '; if ( m_sbuf1.length() ) m_sbuf1.pushChar(' '); //char *str = "gbkeyword:numinlinks"; //long len = gbstrlen(str); //memcpy ( p , str , len ); //p += len; m_sbuf1.safePrintf ( "gbkeyword:numinlinks"); } // null terms if ( ! m_sbuf1.pushChar('\0') ) return false; if ( ! m_sbuf2.pushChar('\0') ) return false; if ( ! m_sbuf3.pushChar('\0') ) return false; // the natural query m_displayQuery = m_sbuf2.getBufStart();// + displayQueryOffset; if ( ! m_displayQuery ) m_displayQuery = ""; while ( *m_displayQuery == ' ' ) m_displayQuery++; m_displayQueryLen = gbstrlen(m_displayQuery);//p-m_displayQuery //log("query: got query %s",m_sbuf1.getBufStart()); //log("query: got display query %s",m_displayQuery); // urlencoded display query urlEncode(m_qe, MAX_QUERY_LEN*2, m_displayQuery, m_displayQueryLen); ////////// // // show DMOZ BREADCRUMB if doing a // "gbpcatid:<catid> |" (Search restricted to category) // "gbcatid:<catid>" (DMOZ urls in that topic, c=dmoz3) // ////////// long pcatId = -1; long dcatId = -1; // get the final query char *q =m_sbuf1.getBufStart(); if ( q ) sscanf(q,"gbpcatid:%li",&pcatId); if ( q ) sscanf(q,"gbcatid:%li",&dcatId); // pick the one that is valid long catId = -1; if ( pcatId >= 0 ) catId = pcatId; if ( dcatId >= 0 ) catId = dcatId; ////// // // save catid into the state m_catId = catId; // /////// // are we a right to left language like hebrew? if ( catId > 0 && g_categories->isIdRTL(catId) ) m_isRTL = true; else m_isRTL = false; return true; }