Introduction
In many of my Identity Management (IdM) projects, I am facing a predicament of "dirty data". The term of "dirty data" is used to describe incorrect or misleading data residing within a data-source.
Self-service data-sources (such as web-portals, phone directories, etc.) are the biggest producers of inconsistently entered data, which is understandable in the scenario when any user is allowed to modify his/her data manually with little guidelines and data verification(s).
There are many different types of user-provided data Identity Management that professionals will face; one of the most common data types that is "outsourced" for entering to the end-user is a user's phone number(s). In the end all synchronized data sources could consume that data, which could lead to difficulties in processing, if/when application(s) expects more consistent data format.
What Problem Does this Solution Solve
Re-formatting and normalizing irregularly entered/stored North American phone numbers.
How Does this Help
Having uniformed phone numbers will allow a programmer/system administrator or any other IT professional to store clean data into the receiving data-source.
Using the Code
Here in North America we are lucky to have uniformed phone numbering plan (from programmer stand-point), known as North American Numbering Plan (NANP); NANP makes parsing of the phone number relatively easy. This article covers only North American phone number format and does not attempt to parse any other formats for any other phone systems. Direct application of this custom format provider to other types of phone numbers could result in unpredictable results. However you can extend this code to process other types of the phone numbers by adding methods that would recognize formats of the phone numbers specific to your local phone system. A good example would be the French phone system, which is persistent in its numbering rules and therefore can be quantified by format provider relatively easily.
How Does the Code Actually Work
The implementation of IFormatProvider
interface is rather well documented on the MSDN site. This particular application of the IFormatProvider
works with several predefined "codes" to distinguish between several desirable outcome formats of the string
.
Understood Formats
{0:a}
Example: 1-555-563-3434 (Hyphenated){0:c}
Example: 15555633434 (Numeric only){0:d}
Example: +1 (555) 123-4567 (Default){0:de}
Example: 1 (555) 563-3434 ex 5555 (Default with Extension) {0:e}
Example: 1-555-563-3434 ex 5555 (Extension){0:s}
Example: 1 555 563 3434 (Space)
The code provided below will demonstrate how to use the Lost and Found Identity Phone formatter:
using LostAndFoundIdentity.Text;
namespace Console
{
class Program
{
static void Main(string[] args)
{
string[] values = new string[] {
"1 555 123-4567",
"555 123-4567",
"1234567",
"+1 555 543-22-34",
"1(555)5633-434",
"555-5555 ext55",
"1 564 6654634 ex 5555"
};
foreach (string value in values)
{
System.Console.WriteLine("Input string: " + value);
string resultA = string.Format(new LafiPhoneFormatProvider(), "{0:a}", value);
System.Console.WriteLine("{:a} " + resultA + "\t\t" + "isModified: " +
!value.Equals(resultA));
string resultC = string.Format(new LafiPhoneFormatProvider(), "{0:c}", value);
System.Console.WriteLine("{:c} " + resultC + "\t\t" + "isModified: " +
!value.Equals(resultC));
string resultD = string.Format(new LafiPhoneFormatProvider(), "{0:d}", value);
System.Console.WriteLine("{:d} " + resultD + "\t\t" + "isModified: " +
!value.Equals(resultD));
string resultDE = string.Format(new LafiPhoneFormatProvider(), "{0:de}", value);
System.Console.WriteLine("{:de} " + resultDE + "\t\t" + "isModified: " +
!value.Equals(resultDE));
string resultE = string.Format(new LafiPhoneFormatProvider(), "{0:e}", value);
System.Console.WriteLine("{:e} " + resultE + "\t\t" + "isModified: " +
!value.Equals(resultE));
string resultS = string.Format(new LafiPhoneFormatProvider(), "{0:s}", value);
System.Console.WriteLine("{:s} " + resultS + "\t\t" + "isModified: " +
!value.Equals(resultS));
System.Console.WriteLine("- - - - - ");
}
}
}
}
Resulting Output
Input string: 1 555 123-4567
{:a} 1-555-123-4567 isModified: True
{:c} 15551234567 isModified: True
{:d} +1 (555) 123-4567 isModified: True
{:de} +1 (555) 123-4567 isModified: True
{:e} 1-555-123-4567 isModified: True
{:s} 1 555 123 4567 isModified: True
- - - - -
Input string: 555 123-4567
{:a} 555-123-4567 isModified: True
{:c} 5551234567 isModified: True
{:d} (555) 123-4567 isModified: True
{:de} (555) 123-4567 isModified: True
{:e} 555-123-4567 isModified: True
{:s} 555 123 4567 isModified: True
- - - - -
Input string: 1234567
{:a} 123-4567 isModified: True
{:c} 1234567 isModified: False
{:d} 123-4567 isModified: True
{:de} 123-4567 isModified: True
{:e} 123-4567 isModified: True
{:s} 123 4567 isModified: True
- - - - -
Input string: +1 555 543-22-34
{:a} 1-555-543-2234 isModified: True
{:c} 15555432234 isModified: True
{:d} +1 (555) 543-2234 isModified: True
{:de} +1 (555) 543-2234 isModified: True
{:e} 1-555-543-2234 isModified: True
{:s} 1 555 543 2234 isModified: True
- - - - -
Input string: 1(555)5633-434
{:a} 1-555-563-3434 isModified: True
{:c} 15555633434 isModified: True
{:d} +1 (555) 563-3434 isModified: True
{:de} +1 (555) 563-3434 isModified: True
{:e} 1-555-563-3434 isModified: True
{:s} 1 555 563 3434 isModified: True
- - - - -
Input string: 555-5555 ext55
{:a} 555-5555 ext55 isModified: False
{:c} 555-5555 ext55 isModified: False
{:d} 555-5555 ext55 isModified: False
{:de} 555-5555 Ext. 55 isModified: True
{:e} 555-5555 Ext. 55 isModified: True
{:s} 555-5555 ext55 isModified: False
- - - - -
Input string: 1 564 6654634 ex 5555
{:a} 1 564 6654634 ex 5555 isModified: False
{:c} 1 564 6654634 ex 5555 isModified: False
{:d} 1 564 6654634 ex 5555 isModified: False
{:de} +1 (564) 665-4634 Ext. 5555 isModified: True
{:e} 1-564-665-4634 Ext. 5555 isModified: True
{:s} 1 564 6654634 ex 5555 isModified: False
- - - - -
Points of Interest
This "Lost And Found Identity" phone format provider is implemented as IFormatProvider
interface implementation, which allows to use it in many applications outside of my original intention as an add-on for Microsoft's Identity Lifecycle Manager 2007/Forefront Identity Manager 2010. I can see that this format provider can be easily adopted for PowerShell data-processing or any other data-processing/"brushing".
What is Going On Inside the Code Snippets?
This code calls the LafiPhoneFormatProvider
(provided below) class that is taking care of recognizing user's input and desired format and reformatting provided string
.
LafiPhoneFormatProvider class
[assembly: System.CLSCompliant(true)]
namespace LostAndFoundIdentity.Text
{
using System;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.Text.RegularExpressions;
[SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly",
MessageId = "Lafi", Justification = "'Lafi' stands for Lost And Found Identity")]
public class LafiPhoneFormatProvider : ICustomFormatter, IFormatProvider
{
private const string ExtensionFormula =
"((\\s{1,2})?(e|ext|ex|extn|extension|x)(\\.)?(\\s{1,2})?)(\\d+)";
public object GetFormat(Type formatType)
{
if (formatType == typeof(ICustomFormatter))
{
return this;
}
else
{
return null;
}
}
public string Format(string format, object arg, IFormatProvider formatProvider)
{
string result = arg.ToString();
switch (format.ToUpperInvariant())
{
case null:
{
return result;
}
case "A":
{
return FormatPhone(result, "-");
}
case "C":
{
return FormatPhone(result, string.Empty);
}
case "D":
{
return FormatPhone(result);
}
case "DE":
{
if (HasExtension(result))
{
string extension = GetExtension(result);
string phone = SubstructExtension(result);
phone = FormatPhone(phone);
phone = string.Format(CultureInfo.CurrentCulture,
"{0} Ext. {1}", phone, extension);
return phone;
}
return FormatPhone(result);
}
case "E":
{
if (HasExtension(result))
{
string extension = GetExtension(result);
string phone = SubstructExtension(result);
phone = FormatPhone(phone, "-");
phone = string.Format(CultureInfo.CurrentCulture,
"{0} Ext. {1}", phone, extension);
return phone;
}
return FormatPhone(result, "-");
}
case "S":
{
return FormatPhone(result, " ");
}
default:
{
throw new FormatException(
"'" + format + "' is not a supported format type.");
}
}
}
private static string FormatPhone(string value, string separator)
{
string tempString = GetNumericValue(value);
string countryCode = string.Empty;
string areaCode = string.Empty;
string firstThree = string.Empty;
string lastFour = string.Empty;
switch (tempString.Length)
{
case 7: {
firstThree = tempString.Substring(0, 3);
lastFour = tempString.Substring(3, 4);
return string.Format(CultureInfo.CurrentCulture, "{0}{2}{1}",
firstThree, lastFour, separator);
}
case 10: {
areaCode = tempString.Substring(0, 3);
firstThree = tempString.Substring(3, 3);
lastFour = tempString.Substring(6, 4);
return string.Format(CultureInfo.CurrentCulture,
"{0}{3}{1}{3}{2}", areaCode, firstThree,
lastFour, separator);
}
case 11: {
countryCode = tempString.Substring(0, 1);
areaCode = tempString.Substring(1, 3);
firstThree = tempString.Substring(4, 3);
lastFour = tempString.Substring(7, 4);
return string.Format(CultureInfo.CurrentCulture,
"{0}{4}{1}{4}{2}{4}{3}", countryCode, areaCode, firstThree,
lastFour, separator);
}
default:
{
return value;
}
}
}
private static string FormatPhone(string value)
{
string tempString = GetNumericValue(value);
string countryCode = string.Empty;
string areaCode = string.Empty;
string firstThree = string.Empty;
string lastFour = string.Empty;
switch (tempString.Length)
{
case 7: {
firstThree = tempString.Substring(0, 3);
lastFour = tempString.Substring(3, 4);
return string.Format(CultureInfo.CurrentCulture, "{0}-{1}",
firstThree, lastFour);
}
case 10: {
areaCode = tempString.Substring(0, 3);
firstThree = tempString.Substring(3, 3);
lastFour = tempString.Substring(6, 4);
return string.Format(CultureInfo.CurrentCulture,
"({0}) {1}-{2}", areaCode, firstThree, lastFour);
}
case 11: {
countryCode = tempString.Substring(0, 1);
areaCode = tempString.Substring(1, 3);
firstThree = tempString.Substring(4, 3);
lastFour = tempString.Substring(7, 4);
return string.Format(CultureInfo.CurrentCulture,
"+{0} ({1}) {2}-{3}", countryCode, areaCode, firstThree,
lastFour);
}
default:
{
return value;
}
}
}
private static string GetNumericValue(string value)
{
Regex notNumerical = new Regex("[\\D]");
foreach (Match match in notNumerical.Matches(value))
{
value = value.Replace(match.Value, string.Empty);
}
return value;
}
private static string GetExtension(string value)
{
Regex extension = new Regex(ExtensionFormula,
RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
MatchCollection matches = extension.Matches(value);
if (0 == matches.Count || string.IsNullOrEmpty(matches[0].Groups[6].Value))
{
return string.Empty;
}
return matches[0].Groups[6].Value;
}
private static string SubstructExtension(string value)
{
Regex extension = new Regex(ExtensionFormula,
RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
value = extension.Replace(value, string.Empty);
return value;
}
private static bool HasExtension(string value)
{
Regex extension = new Regex(ExtensionFormula,
RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
MatchCollection matches = extension.Matches(value);
if (0 == matches.Count)
{
return false;
}
else
{
return true;
}
}
}
}
History
- 15th June, 2009: Initial post