Click here to Skip to main content
15,867,308 members
Articles / Desktop Programming / MFC
Tip/Trick

Finding a substring in a text

Rate me:
Please Sign up or sign in to vote.
3.50/5 (2 votes)
15 Aug 2011CPOL 24.3K   3   1
How to find a substring in a text, forward and backward, with Case Sensitive and Match Whole Word options.

Here's how to find a substring into a text, forward and backward, with Case Sensitive and Match Whole Word options (the main function is RichEditFind). If bMatchCase is false, it changes the text case to lower so that we can find a case insensitive match (if it is true, it doesn't alter the text). If bWholeWord is true, it is trying to find a match for which the previous and the next characters are separators. Separators are any characters that are not alphanumerical letters (a-z, A-Z, 0-9). If bWholeWord is false, the previous and next characters are ignored (can be alphanumerical). nPosition specifies the start position for finding a substring in the text. When bReverse is false, CString's Find function is used to find a substring in the text, otherwise the ReverseFind function is used (because the CString class can only search characters in a string).


C++
BOOL IsSeparator(CString strRichEdit, CString strFindText, int nPosition)
{
   // this function is used to check if the previous and next characters are alfanumeric 
   int lenSub = strFindText.GetLength();
   int len = strRichEdit.GetLength();
   int nPrevChar = nPosition - 1;
   int nNextChar = nPosition + lenSub;
 
   // does the previous character is separator?
   if (nPosition > 0)
   {
      if ((strRichEdit.GetAt(nPrevChar) >= _T('A')) &&
         (strRichEdit.GetAt(nPrevChar) <= _T('Z')))
      {
         return FALSE;
      }
      if ((strRichEdit.GetAt(nPrevChar) >= _T('a')) &&
         (strRichEdit.GetAt(nPrevChar) <= _T('z')))
      {
         return FALSE;
      }
      if ((strRichEdit.GetAt(nPrevChar) >= _T('0')) &&
         (strRichEdit.GetAt(nPrevChar) <= _T('9')))
      {
         return FALSE;
      }
   }
 
   // does the next character is separator?
   if (nNextChar < len)
   {
      if ((strRichEdit.GetAt(nNextChar) >= _T('A')) &&
         (strRichEdit.GetAt(nNextChar) <= _T('Z')))
      {
         return FALSE;
      }
      if ((strRichEdit.GetAt(nNextChar) >= _T('a')) &&
         (strRichEdit.GetAt(nNextChar) <= _T('z')))
      {
         return FALSE;
      }
      if ((strRichEdit.GetAt(nNextChar) >= _T('0')) &&
         (strRichEdit.GetAt(nNextChar) <= _T('9')))
      {
         return FALSE;
      }
   }
   return TRUE; // the character before the substring and
                // the character after the substring are separators
}
 
int ReverseFind(LPCTSTR lpszData, LPCTSTR lpszSub, int startpos)
{
   // this function is used to find lpszSub
   // substring in reverse order into lpszData 
   int lenSub = lstrlen( lpszSub );
   int len = lstrlen( lpszData );
 
   if (0 < lenSub && 0 < len)
   {
      if (startpos == -1 || startpos >= len) startpos = len - 1;
      for (LPCTSTR lpszReverse = lpszData + startpos; 
         lpszReverse != lpszData; --lpszReverse)
         if (_tcsncmp(lpszSub, lpszReverse, lenSub ) == 0)
            return (lpszReverse - lpszData);
   }
   return -1;
}
 
int RichEditFind(CString strRichEdit, CString strFindText,
   int nPosition, BOOL bReverse, BOOL bMatchCase, BOOL bWholeWord)
{
   // this function does the actual search with
   // Case Sensitive and Match Whole Word options
   if (nPosition < 0)
      nPosition = strRichEdit.GetLength() - 1;
 
   if (bReverse) // searching a substring in reverse order?
   {
      if (bMatchCase) // is Case Sensitive option enabled?
      {
         if (bWholeWord) // is Match Whole Word option enabled?
         {
            int nRetVal = ReverseFind(strRichEdit, strFindText, nPosition);
            while (nRetVal != -1)
            {
               if (IsSeparator(strRichEdit, strFindText, nRetVal))
                  return nRetVal;
               nRetVal = ReverseFind(strRichEdit, strFindText, --nRetVal);
            }
         }
         else
         {
            return ReverseFind(strRichEdit, strFindText, nPosition);
         }
      }
      else
      {
         strRichEdit.MakeLower();
         strFindText.MakeLower();
 
         if (bWholeWord) // is Match Whole Word option enabled?
         {
            int nRetVal = ReverseFind(strRichEdit, strFindText, nPosition);
            while (nRetVal != -1)
            {
               if (IsSeparator(strRichEdit, strFindText, nRetVal))
                  return nRetVal;
               nRetVal = ReverseFind(strRichEdit, strFindText, --nRetVal);
            }
         }
         else
         {
            return ReverseFind(strRichEdit, strFindText, nPosition);
         }
      }
   }
   else // normal search
   {
      if (bMatchCase) // is Case Sensitive option enabled?
      {
         if (bWholeWord) // is Match Whole Word option enabled?
         {
            int nRetVal = strRichEdit.Find(strFindText, nPosition);
            while (nRetVal != -1)
            {
               if (IsSeparator(strRichEdit, strFindText, nRetVal))
                  return nRetVal;
               nRetVal = strRichEdit.Find(strFindText, ++nRetVal);
            }
         }
         else
         {
            return strRichEdit.Find(strFindText, nPosition);
         }
      }
      else
      {
         strRichEdit.MakeLower();
         strFindText.MakeLower();
 
         if (bWholeWord) // is Match Whole Word option enabled?
         {
            int nRetVal = strRichEdit.Find(strFindText, nPosition);
            while (nRetVal != -1)
            {
               if (IsSeparator(strRichEdit, strFindText, nRetVal))
                  return nRetVal;
               nRetVal = strRichEdit.Find(strFindText, ++nRetVal);
            }
         }
         else
         {
            return strRichEdit.Find(strFindText, nPosition);
         }
      }
   }
   return -1;
}

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Software Developer NXP Semiconductors
Romania Romania
My professional background includes knowledge of analyst programmer for Microsoft Visual C++, Microsoft Visual C#, Microsoft Visual Basic, Sun Java, assembly for Intel 80x86 microprocessors, assembly for PIC microcontrollers (produced by Microchip Inc.), relational databases (MySQL, Oracle, SQL Server), concurrent version systems, bug tracking systems, web design (HTML5, CSS3, XML, PHP/MySQL, JavaScript).

Comments and Discussions

 
GeneralReason for my vote of 2 Grossly inefficient code Pin
Member 793689722-Aug-11 15:52
Member 793689722-Aug-11 15:52 

General General    News News    Suggestion Suggestion    Question Question    Bug Bug    Answer Answer    Joke Joke    Praise Praise    Rant Rant    Admin Admin   

Use Ctrl+Left/Right to switch messages, Ctrl+Up/Down to switch threads, Ctrl+Shift+Left/Right to switch pages.