Ejemplo n.º 1
0
void ATemplateNodeHandler_DADA::_loadStaticData()
{
  AXmlElement::CONST_CONTAINER nodes;

  m_Services.useConfiguration().getConfigRoot().find("AOS_DadaData/dataset", nodes);
  AXmlElement::CONST_CONTAINER::iterator it;
  for (it = nodes.begin(); it != nodes.end(); ++it)
  {
    AString strSet;
    (*it)->getAttributes().get(ASW("name",4), strSet);
    if (strSet.isEmpty())
      ATHROW_EX(*it, AException::InvalidData, ASWNL("AOS_DadaData/dataset missing 'name' parameter"));

    ADadaDataHolder *pddh = new ADadaDataHolder();
    pddh->readData(m_Services, *it);
    m_Objects.insert(strSet, pddh, true);
  }

  nodes.clear();
  m_Services.useConfiguration().getConfigRoot().find(ASW("AOS_DadaData/template",21), nodes);
  it = nodes.begin();
  while (it != nodes.end())
  {
    AString str;
    (*it)->emitContent(str);
    
    AString strName;
    if ((*it)->getAttributes().get(ASW("name",4), strName))
    {
      AFilename filename(m_Services.useConfiguration().getAosBaseDataDirectory(), str, false);
      if (AFileSystem::exists(filename))
      {
        AFile_Physical file(filename, ASW("r", 1));
        file.open();

        str.clear();
        while (AConstant::npos != file.readLine(str))
        {
          if ('#' != str.at(0, '\x0'))
          {
            m_Templates[strName].push_back(str);
          }
          str.clear();
        }
      }
      else
        m_Services.useLog().add(ARope("AOS_DadaData: Missing file: ")+filename, ALog::EVENT_WARNING);
    }
    else
      m_Services.useLog().add(ASWNL("AOS_DadaData: AOS_DadaData/template missing 'name' attribute"), ALog::EVENT_FAILURE);

    ++it;
  }
}
Ejemplo n.º 2
0
void AWordUtility::getSoundexForm(const AString& source, AString& result, size_t minSize)
{
  result.clear();
  if (source.isEmpty())
    return;

  //a_First character appended as is
  AString str(source);
  str.makeLower();
  size_t sourceSize = str.getSize();

  size_t pos = 0;
  while (pos < sourceSize)
  {
    switch(str.at(pos))
    {
      case 'a':
      case 'e':
      case 'i':
      case 'o':
      case 'u':
      case 'h':
      case 'w':
      case 'y':
        break;
//---      
      case 'p':
        if (pos == 0 && sourceSize > 2)
        {
          if ('s' == str.at(pos+1))
          {
            break;  //a_ps => s at word start
          }
          else if ('h' == str.at(pos+1))
          {
            ++pos; //a_pf => f, fallthrough
          }
        }
      case 'b':
      case 'f':
      case 'v':
        if (!result.isEmpty())
        {
          if (str.at(pos) != result.at(result.getSize()-1))
            result.append('1');
        }
        else
            result.append('1');
        break;
//---      
      case 'd':
        if (pos+1 < sourceSize)
        {
          if ('g' == str.at(pos+1))
            break;        //a_dg => g
        }                 //a_Fallthough from d to t
      case 't':
        if (pos+1 < sourceSize && 'c' == str.at(pos+1))
        {
          if (pos+2 < sourceSize && 'h' == str.at(pos+2))
          {
            ++pos;
            break;  //a_tch => ch
          }
        }
        if (!result.isEmpty())
        {
          if (str.at(pos) != result.at(result.getSize()-1))
            result.append('3');
        }
        else
            result.append('3');
        break;
//---      
      case 'g':
        if (pos+1 < sourceSize)
        {
          if ('h' == str.at(pos+1))
          {
            if (pos+2 < sourceSize)
            {
              if ('t' == str.at(pos+2))
              {
                ++pos;
                break;   //a_ght => t
              }
              else
                break;   //a_gh => h
            }
          }
          else if ('n' == str.at(pos+1))
          {
            break;       //a_gn => n
          }
        } //a_Fallthrough

      case 'k':
        if (pos+1 < sourceSize && 'n' == str.at(pos+1))
        {
          break;       //a_kn => n
        }  //a_Fallthrough

      case 'c':
      case 'j':
      case 'q':
      case 's':
      case 'x':
      case 'z':
        if (!result.isEmpty())
        {
          if (str.at(pos) != result.at(result.getSize()-1))
            result.append('2');
        }
        else
            result.append('2');
        break;
//---      
      case 'l':
        if (!result.isEmpty())
        {
          if (str.at(pos) != result.at(result.getSize()-1))
            result.append('4');
        }
        else
            result.append('4');
        break;
//---      
      case 'm':
        if ('b' == str.at(pos+1, '\x0'))
        {
          ++pos;  //a_mb => m
        } //a_ Fallthrough
      case 'n':
        if (!result.isEmpty())
        {
          if (str.at(pos) != result.at(result.getSize()-1))
            result.append('5');
        }
        else
            result.append('5');
//---      
      case 'r':
        if (!result.isEmpty())
        {
          if (str.at(pos) != result.at(result.getSize()-1))
            result.append('6');
        }
        else
            result.append('6');
        break;
    }
    ++pos;
  }

  if (result.getSize() < minSize)
    result.setSize(minSize, '0');
}
Ejemplo n.º 3
0
void AWordUtility::getPlural(const AString& one, AString& many)
{
  many.clear();
  
  //a_Words of size 1 or 2, just append s and return
  if (one.getSize() < 3)
  {
    many.assign(one);
    many.append('s');
    return;
  }
  
  switch(one.last())
  {
    case 's':
    {
      char c = one.at(one.getSize()-2);
      if ('i' == c)
      {
        //a_"is" -> "es"
        many.assign(one);
        many.set('i', many.getSize()-2);
      }
      else if ('u' == c)
      {
        //a_"us" -> "ii"
        one.peek(many, 0, one.getSize()-2);
        many.append("ii", 2);
      }
      else
      {
        many.assign(one);
        many.append("es", 2);
      }
    }
    break;
    
    case 'z':
    case 'x':
      many.assign(one);
      many.append("es", 2);
    break;

    case 'h':
    {
      char c = one.at(one.getSize()-2);
      if ('s' == c || 'c' == c)
      {
        many.assign(one);
        many.append("es", 2);
      }
      else
      {
        many.assign(one);
        many.append('s');
      }
    }
    break;

    case 'y':
    {
      char c = one.at(one.getSize()-2);
      if (AConstant::npos != sstr_Vowels.find(c))
      {
        //a_vowel+'y', add 's'
        many.assign(one);
        many.append('s');
      }
      else
      {
        //a_consonant+'y', convert 'y' to 'ies'
        one.peek(many, 0, one.getSize()-1);
        many.append("ies", 3);
      }
    }
    break;

    default:
      many.assign(one);
      many.append('s');
    break;
  }
  
}
Ejemplo n.º 4
0
/*
  Sound conversions
Pre-process
  Before   After
  ght      t

Process ending
  Before   After
  e        (remove)
  es       s
  ie       i
  ies      s
  ng       n
  y        i
  eau      o

Process
  Before   After
  ch       C
  ck       k
  ea       I
  gh       g
  gn       n
  ie       i
  kh       k
  kn       n
  ks       x
  mb       m
  nc       nk
  oo       U
  ou       u
  ph       f
  ps       s
  q(~u)    k    (q without u)
  qu       kw
  sh       S
  tia      Sa
  th       Z
  wh       w
  ugh      f
  zh       Z

Post-process (@-any vowel)
  ang@     anj@


Phonetics:
 ch = C  (chair = Cair)
 sh = S  (bash = baS)
 zh = Z  (zhivago = Zivago)
 oo = U  (boot = bUt)
*/
void AWordUtility::getPhoneticForm(const AString& source, AString& result)
{
  AString work(source);
  work.makeLower();

  const char IGNORE_CHAR = '_';
  result.clear();
  size_t workSize = work.getSize();
  if (!workSize)
    return;
  
  //a_STEP 1
  //a_Preprocess (before and after data must be same size)
  //a_These replacements take precedence
  int i;
  const int iiPreCount = 1;
  const AString preBefore[iiPreCount] = { "ght" };
  const AString preAfter[iiPreCount]  = { "t__" };
  for (i=0; i<iiPreCount; ++i)
    work.replace(preBefore[i], preAfter[i]);

  //a_STEP 2
  //a_Ending (data in reverse) (before and after data must be same size)
  const int iiEndingCount = 9;
  const AString endBefore[iiEndingCount] = {  "e", "se", "sei", "ei", "yc", "gn", "uae", "y", "eu" };
  const AString endAfter[iiEndingCount]  = {  "_", "s_", "s_i", "_i", "is", "_n", "__o", "i", "_i" };
  work.reverse();
  for (i=0; i<iiEndingCount; ++i)
  {
    if (0 == work.find(endBefore[i]))
    {
      work.overwrite(0, endAfter[i]);
    }
  }
  work.reverse();


  //a_STEP 4
  //a_Iterate and process sounds
  AString temp;
  u4 pos = 0;
  while(pos < workSize)
  {
    switch(work.at(pos))
    {
      //a_ ch->C
      case 'c':
        if ('h' == work.at(pos+1, '\x0'))
        {
          temp.append('C');
          ++pos;
        }
        else if ('k' == work.at(pos+1, '\x0'))
        {
          temp.append('k');
          ++pos;
        }
        else
          temp.append('c');

      break;

      //a_ ea->I
      case 'e':
        if ('a' == work.at(pos+1, '\x0'))
        {
          temp.append('I');
          ++pos;
        }
        else
          temp.append('e');

      break;

      //a_ gh->g, ght->t, gn->n
      case 'g':
        if ('n' == work.at(pos+1, '\x0'))
        {
          temp.append('n');
          ++pos;
        }
        else if ('h' == work.at(pos+1, '\x0'))
        {
          temp.append('g');
          ++pos;
        }
        else
          temp.append('g');
      break;

      //a_ ie->i
      case 'i':
        if ('e' == work.at(pos+1, '\x0'))
        {
          temp.append('i');
          ++pos;
        }
        else
          temp.append('i');

      break;

      //a_kh->k, kn->n, ks->x
      case 'k':
        if ('h' == work.at(pos+1, '\x0'))
        {
          temp.append('k');
          ++pos;
        }
        else if ('n' == work.at(pos+1, '\x0'))
        {
          temp.append('n');
          ++pos;
        }
        else if ('s' == work.at(pos+1, '\x0'))
        {
          temp.append('x');
          ++pos;
        }
        else
          temp.append('k');
      break;

      //a_nc->nk
      case 'n':
        if ('c' == work.at(pos+1, '\x0'))
        {
          temp.append("nk");
          ++pos;
        }
        else
          temp.append('n');
      break;

      //a_ qu->kw
      case 'm':
        if ('b' == work.at(pos+1, '\x0'))
        {
          temp.append('m');
          ++pos;
        }
        else
          temp.append('m');
      break;

      //a_ oo->U
      case 'o':
        if ('o' == work.at(pos+1, '\x0'))
        {
          temp.append('U');
          ++pos;
        }
        else if ('u' == work.at(pos+1, '\x0'))
        {
          temp.append('u');
          ++pos;
        }
        else
          temp.append('o');
      break;

      //a_ ph->f, ps->s
      case 'p':
        if ('h' == work.at(pos+1, '\x0'))
        {
          temp.append('f');
          ++pos;
        }
        else if ('s' == work.at(pos+1, '\x0'))
        {
          temp.append('s');
          ++pos;
        }
        else
          temp.append('p');
      break;

      //a_ qu->kw
      case 'q':
        if ('u' == work.at(pos+1, '\x0'))
        {
          temp.append("kw", 2);
          ++pos;
        }
        else
          temp.append('k');
      break;

      //a_ sh->S
      case 's':
        if ('h' == work.at(pos+1, '\x0'))
        {
          temp.append('S');
          ++pos;
        }
        else
          temp.append('s');
      break;

      //a_ th->s, tia->Sa
      case 't':
        if ('h' == work.at(pos+1, '\x0'))
        {
          temp.append('Z');
          ++pos;
        }
        else if ('i' == work.at(pos+1, '\x0') && 'o' == work.at(pos+2, '\x0'))
        {
          temp.append("Su");
          pos += 2;
        }
        else
          temp.append('t');
      break;

      //a_ wh->w
      case 'w':
        if ('h' == work.at(pos+1, '\x0'))
        {
          temp.append('w');
          ++pos;
        }
        else
          temp.append('w');
      break;

      //a_ zh->Z
      case 'z':
        if ('h' == work.at(pos+1, '\x0'))
        {
          temp.append('Z');
          ++pos;
        }
        else
          temp.append('z');
      break;

      //a_Skip underscores
      case IGNORE_CHAR:
        break;

      default:
        temp.append(work.at(pos));
    }
    
    ++pos;
  }

  //a_STEP 4
  //a_Remove duplicates (by insering underscore which will be ignored later)
  workSize = temp.getSize();
  pos = 0;
  while(pos < workSize)
  {
    if (temp.at(pos) == temp.at(pos+1, '\x0'))
      temp.use(pos) = IGNORE_CHAR;
    ++pos;
  }

  //a_STEP 6
  //a_Post process
  pos = 0;
  workSize = temp.getSize();
  while(pos < workSize)
  {
    switch(temp.at(pos))
    {
      case 'a':
        if ('n' == temp.at(pos+1, '\x0') && 'g' == temp.at(pos+2, '\x0'))
        {
          char c = temp.at(pos+3,'\x0');
          if (c && AConstant::npos != msstr_SoundVowels.find(c))
          {
            result.append("anj", 3);
            pos += 2;
          }
        }
        else
          result.append('a');
      break;

      case IGNORE_CHAR:
        break;

      default:
        result.append(temp.at(pos));
    }

    ++pos;
  }
}