Exemple #1
0
int main(){

  //count[x][y] stores the no. of times we observe the letter y after letter x.
  int count[26][26];

  //initialize the count 2D array to zeros.
  int i, j;
  for(i = 0; i < 26; i++){
    for(j = 0; j < 26; j++){
      count[i][j] = 0;
    }
  }

  // Do the counting.
  int x, y, c;
  x = getchar();
  while( (c = getchar()) != EOF){
    y = c;
    if ((isLower(x)) && (isLower(y))){
      count[x-'a'][y-'a']++;//xとyは読み取った文字のASCIIコードだから、-'a'をつけた後は入るべき配列の位置を表す。
      }
    x = y;
  }

  //print the non-zero counts.
  for(i = 0; i < 26; i++){
    for(j = 0; j < 26; j++){
      if (count[i][j] > 0){
	printf("count[%c][%c] = %d\n", (i + 'a'), (j + 'a'), count[i][j]);
      }
    }
  }
  return 0;
}
static bool
differentSeparated(register char *s1, register char *s2)
{ register char c1, c2;

  if ( *s1 != *s2 || *s1 == EOS )
    fail;

  c1 = *++s1, c2 = *++s2;
  while(c1 && c1 == c2)
  { if ((c1 = *++s1) == '_')
    { c1 = *++s1;
    } else
    { if (isLower(s1[-1]) && isUpper(c1))
        c1 = makeLower(c1);
    }
    if ((c2 = *++s2) == '_')
    { c2 = *++s2;
    } else
    { if (isLower(s2[-1]) && isUpper(c2))
	c2 = makeLower(c2);
    }
  }
  if (c1 == EOS && c2 == EOS)
    succeed;
  fail;
}
int solve(int si, int sj)
{
    if (cache[si][sj] != -1)
        return cache[si][sj];

    int mi[] = { -1, 1, 0, 0 };
    int mj[] = { 0, 0, -1, 1 };

    int max = 1;

    for (int i = 0; i < 4; i++)
    {
        int ei = si + mi[i];
        int ej = sj + mj[i];

        if (!isLower(si, sj, ei, ej))
            continue;

        int dist = solve(ei, ej) + 1;
        if (dist > max)
            max = dist;
    }

    cache[si][sj] = max;
    return max;
}
char shiftChar(char c, int n){
	int cLower = isLower(c);
	int cUpper = isUpper(c);
	while(n >= 26) {
		n -= 26;
	}
	
	while(n <0) {
		n += 26;
	}
	
	if (cLower == 1) {
		if ((c + n) > 'z') {
			char diff = 'z' - c;
			c = 'a' + n - 1 - diff;
			return c;
		} else {
			return c + n;
		}
	}
	else if (cUpper == 1) {
		if ((c+n) > 'Z') {
			char diff = 'Z' - c;
			c = 'A' + n - 1 - diff;
			return c;
		} else {
			return c + n;
		}
	}
	else {
		return c;
	}
			
}
int Unicode::toUpper(int ch)
{
	if (isLower(ch))
		return static_cast<int>(UCD_OTHERCASE(static_cast<unsigned>(ch)));
	else
		return ch;
}
Exemple #6
0
int Unicode::toUpper(int ch)
{
	if (isLower(ch))
		return static_cast<int>(_pcre_ucp_othercase(static_cast<unsigned>(ch)));
	else
		return ch;
}
static int
leading_uc(char *dst, char *src)
{
    char *base = dst;
    int c;

    while ((c = *src) != EOS && c != SLASHC) {
	if (isAlpha(c)) {
	    if (isLower(c))
		return (0);
	} else if (!strchr("0123456789$_", c))
	    return (0);
	*dst++ = c;
	*dst = EOS;
	src++;
    }
    *dst = EOS;
    if ((*base) && (dst = getenv(base)) != 0) {
	c = strlen(base);
	dst = skip_blanks(dst);
	(void) strcpy(base, dst);
	return (c);
    }
    return (0);
}
Exemple #8
0
char *MYRTLEXP StrCase( char *str,msCaseTypes type )
  {  int n;

     if ( !str || *str == 0 ) return str;

     switch( type ){
       case      mscLower: StrLwr( str ); break;
       case      mscUpper: StrUpr( str ); break;
       case mscCapitalize: StrLwr( str );
                           str[0] = ToUpper(str[0]);
                        break;
       case    mscUpLower: for ( n = 0; str[n]; n++ )
                             if ( isLower(str[n]) ) return str;
                           return StrCase( str,mscLower );
       case    mscLoUpper: for ( n = 0; str[n]; n++ )
                             if ( isUpper(str[n]) ) return str;
                           return StrCase( str,mscUpper );
       case     mscInvert: for ( n = 0; str[n]; n++ )
                             if ( isUpper(str[n]) )
                               str[n] = ToLower(str[n]);
                              else
                               str[n] = ToUpper(str[n]);
                        break;
     }
 return str;
}
Exemple #9
0
void sh_htoi(char* params){
	int len = strLen(params)-1;
	if(len < 3 || params[0] != '0' || params[1] != 'x'){
		ttprintln("htoi param must start with `0x`");
		return;
	}
	reverseInPlace(params+2);
	len -= 2;
	int i;
	int sum = 0;
	for(i = 0; i < len; i++){
		if(isAlpha(params[i+2])){
			if(isLower(params[i+2]) && params[i+2] <= 'f'){
				sum += pow(16,i) * (params[i+2]-'a'+10);
			}
			else if(isUpper(params[i+2]) && params[i+2] <= 'F'){
				sum += pow(16,i) * (params[i+2]-'A'+10);
			}
			else {
				ttprintln("improperly formed hex");
				return;
			}
		}
		else sum += pow(16,i) * (params[i+2]-'0');
	}
	ttprintIntln(sum);
}
Exemple #10
0
//
// Check if string is entirely in UPPERCase
//
bool STR_String::IsUpper() const
{
	for (int i = 0; i < this->m_len; i++)
		if (isLower(this->m_data[i]))
			return false;

	return true;
}
Exemple #11
0
//
// Check if string is entirely in UPPERCase
//
bool STR_String::IsUpper() const
{
	for (int i=0; i<Len; i++)
		if (isLower(pData[i]))
			return false;

	return true;
}
//大小写分开
void func1(char a[], int low, int high)
{
    while (low < high)
    {
        while (isUpper(a[high]) && low < high) high--;
        while (isLower(a[low]) && low < high) low++;
        swap(a[low], a[high]);
    }
}
Exemple #13
0
void paragraph::considerHyphenation(flags & flgs)
    {
    bool vowel = false;
    bool nonAlphaFound = false;
    int cnt = 0;
    int j = 0;
    int punkpos = -1;
    bool locAllLower = true;
    bool locAllUpper = true;
    for(j = 0;j != waited && isFlatSpace(circularBuffer[j]);++j)
        circularBuffer[j] = ' ';
    for(;j != waited;++j)
        {
        ++cnt;
        int k = circularBuffer[j];
        if(isAlpha(k))
            {
            if(!isLower(k))
                locAllLower = false;
            if(!isUpper(k))
                locAllUpper = false;
            }
        else if(k != '-')
            {
            if(punkpos < 0 && isPunct(k))
                punkpos = cnt;
            else if(!isSemiPunct(k))
                nonAlphaFound = true;
            break;
            }
        if(isVowel(k))
            vowel = true;
        }
    if(dropHyphen)
        { // Require agreement of case
        if(allLower)
            {
            if(!locAllLower)
                dropHyphen = false;
            }
        else
            {
            if(!locAllUpper)
                dropHyphen = false;
            }
        }
    if((!nonAlphaFound && cnt >= 2 && punkpos < 0) || punkpos == cnt)
        {
        if(!dropHyphen || !vowel)
            Segment.Put(file,'-',flgs);
        for(j = 0;j != waited;++j)
            Segment.Put(file,circularBuffer[j],flgs);
        }
    else
        hyphenate(flgs);
    }
Exemple #14
0
static int
nmark2inx(int c)
{
    if (isDigit(c)) {
	return c - '0' + 26;
    } else if (isLower(c)) {
	return c - 'a';
    }
    return -1;
}
int main (int argc, char *argv[]){
	//int c = sizeof("one");
	int a = isUpper('a');
	int b = isLower('a');
	int c = isUpper('Z');
	int d = isLower('Z');
	char r = shiftChar('.', 1);
	printf("%d %d %d %d\n", a,b,c,d);
	printf("%c\n", r);
	
	char *message = strdup("M! K fobi pkcd, iod kd dswoc pbecdbkdsxq vkxqekqo.");
	printf("message    : %s\n", message);
	encryptInPlace(message,-10);
	printf("ciphertext : %s\n\n", message);
	
	char *message1 = strdup("M! K fobi pkcd, iod kd dswoc pbecdbkdsxq vkxqekqo.");
	printf("message    : %s\n", message1);
	char *ciphertext = encrpytNewMemory(message1,-10);
	printf("ciphertext : %s\n", ciphertext);
	free(ciphertext);
	printf("ciphertext : %s\n", ciphertext);
	test();
	return 0;
}
Exemple #16
0
/*
 * Upper case region. Zap all of the lower
 * case characters in the region to upper case. Use
 * the region code to set the limits. Scan the buffer,
 * doing the changes. Call "lineSetChanged" to ensure that
 * redisplay is done in all buffers. Bound to
 * "C-X C-U".
 */
int
upperRegion(int f, int n)
{
    meLine *line ;
    int   loffs ;
    long  lline ;
    register char  c;
    register int   s;
    meRegion         region;
    
    if((s=getregion(&region)) <= 0)
        return (s);
    if((s=bufferSetEdit()) <= 0)               /* Check we can change the buffer */
        return s ;
    line = frameCur->windowCur->dotLine ;
    loffs = frameCur->windowCur->dotOffset ;
    lline = frameCur->windowCur->dotLineNo ;
    frameCur->windowCur->dotLine = region.line ;
    frameCur->windowCur->dotOffset = region.offset ;
    frameCur->windowCur->dotLineNo = region.lineNo ;
    while (region.size--)
    {
        if((c = meLineGetChar(frameCur->windowCur->dotLine, frameCur->windowCur->dotOffset)) == '\0')
        {
            frameCur->windowCur->dotLine = meLineGetNext(frameCur->windowCur->dotLine);
            frameCur->windowCur->dotOffset = 0;
            frameCur->windowCur->dotLineNo++ ;
        }
        else
        {
            if(isLower(c))
            {
                lineSetChanged(WFMAIN);
#if MEOPT_UNDO
                meUndoAddRepChar() ;
#endif
                c = toggleCase(c) ;
                meLineSetChar(frameCur->windowCur->dotLine, frameCur->windowCur->dotOffset, c);
            }
            (frameCur->windowCur->dotOffset)++ ;
        }
    }
    frameCur->windowCur->dotLine = line ;
    frameCur->windowCur->dotOffset = loffs ;
    frameCur->windowCur->dotLineNo = lline ;
    return meTRUE ;
}
Exemple #17
0
//Function adds object with newArray to Set s
void Fifteen::Set::add(Set* s, int nG, TGameArray newArray, Set* prev, Set* &current)
{
	s->items++;
	while ((s->next != NULL) && (isLower(s->next->move, newArray)))
	{
		s = s->next;	
	}
	Set* dummy = s->next;
	s->next = new Set();
	s->next->G = nG;
	s->next->H = CountH(newArray, prev->move);
	s->next->next = dummy;
	s->next->prev = prev;
	
	CopyTGameArray(s->next->move, newArray);
	current = s->next;
}
Exemple #18
0
int main( void )
{
	char c;
	
	do {
		scanf("%c", &c);
		if ( isLower(c) ) {
			printf("%c", toUpper(c));
		} else if ( isNum(c) ) {
			toChinese(c);
		} else {
			printf("%c", c);
		}
	} while ( c != '\n' );	

	return 0;
}
Exemple #19
0
string Floating::defaultCSSClass() const
{ 
	if (!defaultcssclass_.empty())
		return defaultcssclass_;
	string d;
	string n = floattype_;
	string::const_iterator it = n.begin();
	string::const_iterator en = n.end();
	for (; it != en; ++it) {
		if (!isAlphaASCII(*it))
			d += "_";
		else if (isLower(*it))
			d += *it;
		else
			d += support::lowercase(*it);
	}
	// are there other characters we need to remove?
	defaultcssclass_ = "float-" + d;
	return defaultcssclass_;
}
Exemple #20
0
char* strstr_uc(char* haystack, char* needle, size_t needlesize) {
	if(!haystack || !needle || !needlesize) return NULL;
	char diff;
	size_t i;
	char* save;
	while(*haystack) {
		save = haystack;
		for(i = 0; i < needlesize; i++) {
			if(!*haystack) return NULL;
			if(isLower(haystack)) diff = -ludiff;
			else diff = 0;
			if(*(haystack) + diff != needle[i]) goto next;
			haystack++;
		}
		return save;
		next:
		haystack++;
	}
	return NULL;
}
static char *
subWord(register char *s, register char *store)
{ *store++ = makeLower(*s);
  s++;

  for(;;)
  { if (*s == EOS)
    { *store = EOS;
      return s;
    }
    if (*s == '_')
    { *store = EOS;
      return ++s;
    }
    if (isLower(s[-1]) && isUpper(s[0]) )
    { *store = EOS;
      return s;
    }
    *store++ = *s++;
  }
}    
Exemple #22
0
int main()
{
    //a = 97
    //z = 122
    //A = 65
    //Z = 90

    char input[50];
    puts("Enter type a single letter");
    gets(input);

    if (isLower(input[0]) == true)
    {
        input[0] -= 32;
    }
    else if (isUpper(input[0]) == true)
    {
        input[0] += 32;
    }
    puts(input);
}
Exemple #23
0
//Function checks if TGameArray t is already in Set s
bool Fifteen::Set::isAlready(Set* s, TGameArray t)
{
	while((s != NULL) && (isLower(s->move, t))) 
	{
		int same = 0;
		for (int i = 0; i < SIZE; i++)
		{	
			for (int j = 0; j < SIZE; j++)
			{
				if (s->move[i][j] == t[i][j])
				{
					same++;
				}
			}
		}	
		if (same == SIZE*SIZE)
		{
			return true;
		}
		s = s->next;
	}
	return false;
}
Exemple #24
0
void User::load(struct passwd *entry)
{
    if (entry) {
        exists_ = true;
        id_ = entry->pw_uid;
        groupId_ = entry->pw_gid;
        loginName_ = entry->pw_name;
        fullName_ = entry->pw_gecos;
        if (fullName_->count() > 0)
            if ((fullName_->at(0) == ',') || (fullName_->at(fullName_->count() - 1) == ',')) {
                fullName_ = loginName_->copy();
                if (isLower(fullName_->at(0)))
                    fullName_->at(0) = downcase(fullName_->at(0));
                // fullName_ << " Anonymous";
            }
        home_ = entry->pw_dir;
        shell_ = entry->pw_shell;
    }
    else {
        exists_ = false;
        id_ = 0;
        groupId_ = 0;
    }
}
Exemple #25
0
void paragraph::PutHandlingWordWrap(const wint_t ch,flags & flgs) // Called from GetPut, GetPutBullet and doTheSegmentation
    {
    if(flgs.inhtmltag || !Option.wordUnwrap)
        {
        Segment.Put(file,ch,flgs); // //
        }
    else
        {
        if(isSentencePunct(ch))
            {
            ++flgs.punctuationFound;
            }
        else if(isSemiPunct(ch))
            {
            ++flgs.semiPunctuationFound;
            }
        else if(ch == '-')
            {
            ++flgs.hyphenFound;
            }

        if(ch == '\n' || ch == '\r')
            {
            switch(last)
                {
                case '\n':
                    if(wait)
                        {
                        hyphenate(flgs);
                        wait = 0;
                        flgs.punctuationFound = 0;
                        flgs.hyphenFound = 0;
                        spaceAfterHyphen = false;
                        }
                    flgs.semiPunctuationFound = 0;
                    Segment.Put(file,'\n',flgs);
                    break;
                case '-':
                    {
                    int k;
                    int nonSpaceCount = 0;
                    bool nonAlphaFound = false;
                    dropHyphen = false;
                    int i;
                    // skip previous spaces
                    for( i = ind(lastWordIndex-1)
                       ;    i != lastWordIndex 
                         && isSpace(circularBuffer[ind(i)])
                       ; i = dec(i)
                       )
                        ;
                    allLower = allUpper = true;
                    bool Upper = false; 
                    /* This variable introduces "lag" of one iteration in 
                    changing the value of allLower. In that way, the case of 
                    first character in the string (the one last checked)
                    doesn't matter.
                    Example:
                    blah blah. Da-
                    vid did it.
                    */

                    for (
                        ;    i != lastWordIndex 
                          && ( k = circularBuffer[ind(i)]
                             , !isSpace(k)
                             )
                        ; i = dec(i)
                        )  // Look at casing of last word in retrograde fashion.
                        {
                        ++nonSpaceCount;
                        if(Upper)
                            allLower = false;
                        if(isAlpha(k) && k != '-')
                            {
                            if(!isLower(k))
                                Upper = true;
                            if(!isUpper(k))
                                allUpper = false;
                            }
                        else if(k != '-')
                            {
                            nonAlphaFound = true;
                            break;
                            }
                        if(isVowel(k))  // We need at least one vowel to admit
                            //interpretation of hyphen as the 
                            // effect of word wrap.
                            dropHyphen = true;
                        }
                    if(dropHyphen && !allLower && !allUpper)
                        dropHyphen = false; // Mixed case -> keep hyphen
                    if(!nonAlphaFound && nonSpaceCount >= 2)
                        {
                        /**/
                        lastWordIndex = 0;
                        wait = (sizeof(circularBuffer)/sizeof(circularBuffer[0]));
                        waited = 0;
                        }
                    break;
                    }
                default:
                    {
                    if(wait)
                        {
                        considerHyphenation(flgs);
                        wait = 0;
                        flgs.punctuationFound = 0;
                        flgs.semiPunctuationFound = 0;
                        flgs.hyphenFound = 0;
                        spaceAfterHyphen = false;
                        }
                    Segment.Put(file,'\n',flgs); // Treat newline as a blank
                    }
                }
            }
        else
            {
            if(wait)
                {
                if(!isFlatSpace(ch))
                    {
                    if(waited == wait)
                        {
                        hyphenate(flgs);
                        wait = 0;
                        flgs.punctuationFound = 0;
                        flgs.semiPunctuationFound = 0;
                        flgs.hyphenFound = 0;
                        spaceAfterHyphen = false;
                        Segment.Put(file,ch,flgs);
                        }
                    else
                        {
                        circularBuffer[waited++] = (wchar_t)ch;
                        }
                    }
                else if(waited > 0)
                    {
                    considerHyphenation(flgs);
                    wait = 0;
                    flgs.punctuationFound = 0;
                    flgs.semiPunctuationFound = 0;
                    flgs.hyphenFound = 0;
                    spaceAfterHyphen = false;
                    Segment.Put(file,' ',flgs);
                    }
                }
            else
                {
                if(!flgs.hyphenFound)
                    Segment.Put(file,ch,flgs);
                }
            }
        }
    if(!isFlatSpace(ch))
        {
        if(ch != '\n' && !isSentencePunct(ch) && !wait)
            flgs.punctuationFound = 0;

        if(ch != '\n' && !isSemiPunct(ch) && !wait)
            flgs.semiPunctuationFound = 0;

        if(ch != '\n' && ch != '-' && flgs.hyphenFound && !wait) // A-bomb
            {
            int k;
            for(k = 0;k < flgs.hyphenFound;++k)
                Segment.Put(file,'-',flgs);
            if(spaceAfterHyphen)
                Segment.Put(file,' ',flgs);
            spaceAfterHyphen = false;
            flgs.hyphenFound = 0;
            Segment.Put(file,ch,flgs);
            }
        last = ch; ///
        }
    else if(flgs.hyphenFound && last != '\n')
        {
        spaceAfterHyphen = true;
        }

    if(!wait)
        {
        if(  !isFlatSpace(ch) 
          || !isFlatSpace(circularBuffer[ind(lastWordIndex-1)])
          )
            {
            circularBuffer[lastWordIndex] = (wchar_t)ch;
            lastWordIndex = inc(lastWordIndex);
            }
        }
    }
Exemple #26
0
	bool NE_DLL NEKey::operator<(const NEKey& source) const
	{
		return isLower(source);
	}
Exemple #27
0
bool isAlpha(char c){
  return (isLower(c) or isUpper(c));
}
/* Never was great formulating =P */
bool CWordFilter::apply(CPlayer *pPlayer, CBuffer &pBuffer, int pCheck)
{
	bool logsave = false, rctell = false;
	CBuffer start;
	CStringList found;
	int pos = 0, wc = 0;

	for (int i = 0; i < WordList.count(); i++)
	{
		WordMatch *word = (WordMatch *)WordList[i];
		if (!word->check[pCheck])
			continue;

		for (int j = 0; j < pBuffer.length(); j++)
		{
			for (int k = 0; k < word->match.length(); k++)
			{
				char c1 = pBuffer[j + k];
				char c2 = word->match[k];
				if (c2 != '?' && (isUpper(c2) && c2 != c1) || (isLower(c2) && toLower(c2) != toLower(c1)))
				{
					if (wc >= word->precision)
					{
						found.add(start);

						for (int l = 0; l < (int)sizeof(word->action); l++)
						{
							if (!word->action[l])
								continue;

							switch (l)
							{
								case FILTERA_LOG:
									if (logsave)
										break;

									logsave = true;
									if (pPlayer != NULL)
										errorOut("wordfilter.txt", CBuffer() << pPlayer->accountName << " has used rude words while chatting: " << start);
								break;

								case FILTERA_REPLACE:
									pos = pBuffer.find(' ', j);
									pos = (pos == -1 ? start.length() : pos-j+1);
									for (int m = 0; m < pos; m++)
										pBuffer.replace(j + m, '*');
								break;

								case FILTERA_TELLRC:
									if (rctell)
										break;

									rctell = true;
									if (pPlayer != NULL)
										sendRCPacket(CPacket() << (char)DRCLOG << pPlayer->accountName << " has used rude words while chatting: " << start);
								break;

								case FILTERA_WARN:
									pBuffer = (word->warnmessage.length() > 0 ? word->warnmessage : warnmessage);
								break;

								case FILTERA_JAIL: // kinda useless...?
								break;

								case FILTERA_BAN:
									if (pPlayer != NULL)
									{
										CBuffer pLog = CBuffer() << "\n" << getTimeStr(0) << "\n" << pPlayer->accountName << " has used rude words while chatting: " << start;
										pPlayer->setBan(pLog, true);
									}
								break;
							}
						}
					}

					start.clear();
					wc = 0;
					break;
				}

				start.writeChar(c1);
				wc++;
			}
		}
	}

	return (found.count() > 0);
}
Exemple #29
0
void dots::Put3(STROEM * file,wint_t ch,flags & flgs) // called from PutN, Put2 and GetPut
    {
    /* Put3 generally causes a newline (ch=='\n') to be written.
    Exception: inside htmltags.
    */

    if(flgs.inhtmltag)
        {
        flgs.in_abbreviation = false;
        flgs.person_name = not_a_name;
        flgs.number_final_dot = false;
        // This code writes the complete HTML-tag
        if(Option.suppressHTML)
            {
            ch = ' ';
            }
        else
            {
            if(ch)
                {
                if(isSpace(ch))
                    {
                    last = ' ';
                    //return; // 20100107
                    }
                else 
                    {
                    if(isFlatSpace(last))// last == ' ' ||  last == 0xA0 || last == 0x3000)
                        {
                        pRegularizationFnc(file,' ',flgs);
                        }
                    pRegularizationFnc(file,ch,flgs);
                    }
                }
            }
        }
    else
        {
        if(isFlatSpace(ch))//20100106 // ch == 0xA0) // 20071112
            ch = ' ';
        else if((unsigned int)ch < ' ') // replace tabs by spaces and all other non-white space by an asterisk
            {
            if(ch != '\n')
                ch = '*';
            }

        if(ch == ' ')
            {
            if(last != '\n') // Spaces at the beginning of a line are ignored. Only spaces after words are recorded in 'last'.
                {
                last = ' ';
                }
            return;
            }

        if(ch == '\n')
            {
            if(trailingDotFollowingNumber)
                {
                if(Option.tokenize)
                    pRegularizationFnc(file, Option.tokenSplit,flgs); // insert blank before dot if number followed by dot is at the end of the line
                pRegularizationFnc(file,'.',flgs);
                trailingDotFollowingNumber = false;
                flgs.in_abbreviation = false;
                }
            }
        else if(isFlatSpace(last))// last == ' ' || last == 0xA0)
            {
            wint_t lastToWrite = ' ';
            if(!isLower(ch)) // Might be an indication that a new sentence starts here. 
                // Check preceding token for trailing dot that might be a
                // sentence delimiter after all.
                {
                if(trailingDotFollowingNumber) // ... in 1999. Next month ...   ch=='N', last is ' '
                    //            ^ Not written from here
                    {// Regard dot as sentence delimiter after all
                    if(Option.tokenize)
                        pRegularizationFnc(file, Option.tokenSplit,flgs); // Insert blank before dot if number followed by dot is followed by capitalised word.
                    pRegularizationFnc(file,'.',flgs);
                    trailingDotFollowingNumber = false;
                    lastToWrite = '\n'; // Number seems to be the last word of the previous sentence. Fake history.
                    // ... in 1999.
                    // Next month ...
                    flgs.in_abbreviation = false;
                    }
                else if(flgs.in_abbreviation /*&& flgs.newSegment*/)
                    {
                    switch(flgs.person_name)
                        {
                        case initial: 
                            flgs.person_name = not_a_name;
                            break;
                        case not_a_name:
                        default: // Skema 1. Affald fra husholdninger --> Skema 1. | Affald fra husholdninger       20040420
                            if(!flgs.expectCapitalizedWord)
                                lastToWrite = '\n'; // Abbreviation seems to be the last word of the previous sentence
                            break;
                        }
                    }
                }
            else if(trailingDotFollowingNumber)
                { // Now we suppose that the dot trailing the number is part of that number.
                pRegularizationFnc(file,'.',flgs);
                trailingDotFollowingNumber = false;
                }
            if((lastToWrite != ' ' && lastToWrite != 0xA0) || flgs.writtentoline)
                pRegularizationFnc(file,lastToWrite,flgs);
            flgs.writtentoline = (lastToWrite == ' ' || lastToWrite == 0xA0);
            if(Option.emptyline && flgs.in_abbreviation && !flgs.writtentoline)
                { // Make sure to send \n next time
                ensureEmptyLine = true;
                }
            flgs.in_abbreviation = false;
            }

        if(flgs.number_final_dot == 1 && !(flgs.person_name == initial)) // This can only be the case if ch == separating character
            {
            trailingDotFollowingNumber = true;
            }
        else
            {
            if(ch != '\n' && ensureEmptyLine)
                {
                pRegularizationFnc(file,'\n',flgs);
                }
            ensureEmptyLine = false;
            pRegularizationFnc(file,ch,flgs);
            flgs.writtentoline = ch != '\n';
            if(Option.emptyline && flgs.in_abbreviation && !flgs.writtentoline)
                { // Make sure to send \n next time
                ensureEmptyLine = true;
                }
            }
        }

    last = ch;
    //    flgs.newSegment = false;
    }
Exemple #30
0
/*
static bool testRoman(const char * rom)
    {
    int result;
    parseRoman(128);
    for(const char * p = rom;*p;++p)
        {
        result = parseRoman(*p);
        if(result == T)
            return true;
        else if(result == F)
            return false;
        }
    result = parseRoman(0);
    if(result == T)
        return true;
    else
        return false;
    }

static void testRomans()
    {
    const char * numbers[] =
        {""
        ,"i"
        ,"ii"
        ,"iii"
        ,"iv"
        ,"v"
        ,"vi"
        ,"vii"
        ,"viii"
        ,"ix"
        ,"x"
        ,"xi"
        ,"xiv"
        ,"xix"
        ,"mmcdxcviii"
        ,"MMCDXCVIII"
        ,"ci"
        ,"mi"
        ,"viv"
        ,"viiii"
        ,"xxc"
        ,"ic"
        ,"lil"
        ,"mil"
        ,"MMMCDXLIV"
        ,"MMMCDxliv"
        ,0
        };
    for(const char ** q = numbers;*q;++q)
        {
        printf("%s\t:",*q);
        if(testRoman(*q))
            printf("OK\n");
        else
            printf("..\n");
        }
    }
*/
void textSource::updateFlags(wint_t ch,flags & flgs)
    {

    if(ch == '-')
        hyphens++;
    else
        hyphens = 0;
    if(!nrNonSpaceBytes)
        {
        if(!isUpper(ch) && !allNumber)
/*
1. 
omejitev  odvisno  od.

==>

1 .
omejitev odvisno od .

because the 'header' 1. seems to be a list (or section) number. 
It is not likely starting a sentence, even though the first
character is lower case.
*/
            WriteParAfterHeadingOrField = false;
        wordComing = true;
        lastCharIsSemipunct = false;
       //evidently trivial: nrNonSpaceBytes = 0;
        nrNoStartCaps = 0;
        if(  flgs.hyphenFound
          || flgs.semiPunctuationFound
          || (  (  (  !flgs.punctuationFound
                   && !isUpper(ch)
                   )
                || flgs.in_abbreviation
                || flgs.person_name == initial
                ) 
             && flgs.writtentoline
             )
          )
            {
            /*
            h har netop været vært for en institut-
            dag på Institut for Medier, Erkendelse
            */
            nrStartCaps = -10; // Smaller chance that line starting here is a headline.
            allcaps = false;
            }
        else
            {
            nrStartCaps = 0;
            allcaps = true;
            }

        allNumber = !isFlatSpace(ch);
        lowerRoman = false;
        upperRoman = false;
        arabic = false;
        parseRoman(128);
        }
    if(isFlatSpace(ch))
        {
        wordComing = true;
        int result = parseRoman(0);
        if(result == F)
            {
            upperRoman = false;
            lowerRoman = false;
            }
        parseRoman(128);
        }
    else 
        {
        lastCharIsSemipunct = isSemiPunct(ch);
        if(!isLower(ch))
            {
            if(wordComing)
                {
                if(isUpper(ch))
                    ++nrStartCaps;
                }
            if(allNumber)
                {
                if(!lowerRoman && !arabic && strchr("IVXLCDM",ch))
                    {
                    int result = parseRoman(ch);
                    allNumber = upperRoman = (result != F);
                    }
                else
                    {
                    allNumber = false;
                    }
                }
            }
        else
            {
            if(!strchr("ivxlcdm-/().:0123456789",ch))
                allNumber = false;
            if(allNumber)
                {
                if(strchr("-/().:",ch))
                    {
                    lowerRoman = false;
                    upperRoman = false;
                    arabic = false;
                    }
                else
                    {
                    if(upperRoman)
                        {
                        allNumber = false;
                        }
                    else if(!arabic && strchr("ivxlcdm",ch))
                        {
                        int result = parseRoman(ch);
                        allNumber = lowerRoman = (result != F);                            
                        }
                    else if(!lowerRoman && strchr("0123456789",ch))
                        {
                        arabic = true;
                        }
                    else
                        {
                        allNumber = false;
                        }
                    }
                }
            if(wordComing && !allNumber) // 'iv. The Big Dipper' should be o.k.
                {
                if(!isUpper(ch))
                    ++nrNoStartCaps;
                }
            if(!allNumber)
                allcaps = false; // 'iv. THE BIG DIPPER' should be o.k.
            }
        nrNonSpaceBytes++;
        wordComing = false;
        }
    }