C# - Light and Fast CSV Parser






4.83/5 (14 votes)
Light yet functional CSV Parser with custom delimiters and qualifiers, yield returns records.
Introduction
Parsing CSV files may sound like an easy task, but in reality it is not that trivial. Below is a CsvParser
class implementation that I use in my own projects. It supports the following features that I find critical:
- Custom Delimiter and Qualifier characters
- Supports quoting notation (allows delimiter character to be part of a value)
- Supports quote escaping (allows quote character to be part of a value)
- Supports both '
\n
' and '\r\n
' line endings - Designed to return
IEnumerable
viayield return
(no memory buffers) - Designed to return Header and the rest of lines separately (using
Tuple
)
Source Code
public static class CsvParser
{
private static Tuple<T, IEnumerable<T>> HeadAndTail<T>(this IEnumerable<T> source)
{
if (source == null)
throw new ArgumentNullException("source");
var en = source.GetEnumerator();
en.MoveNext();
return Tuple.Create(en.Current, EnumerateTail(en));
}
private static IEnumerable<T> EnumerateTail<T>(IEnumerator<T> en)
{
while (en.MoveNext()) yield return en.Current;
}
public static IEnumerable<IList<string>>
Parse(string content, char delimiter, char qualifier)
{
using (var reader = new StringReader(content))
return Parse(reader, delimiter, qualifier);
}
public static Tuple<IList<string>, IEnumerable<IList<string>>>
ParseHeadAndTail(TextReader reader, char delimiter, char qualifier)
{
return HeadAndTail(Parse(reader, delimiter, qualifier));
}
public static IEnumerable<IList<string>>
Parse(TextReader reader, char delimiter, char qualifier)
{
var inQuote = false;
var record = new List<string>();
var sb = new StringBuilder();
while (reader.Peek() != -1)
{
var readChar = (char) reader.Read();
if (readChar == '\n' || (readChar == '\r' && (char) reader.Peek() == '\n'))
{
// If it's a \r\n combo consume the \n part and throw it away.
if (readChar == '\r')
reader.Read();
if (inQuote)
{
if (readChar == '\r')
sb.Append('\r');
sb.Append('\n');
}
else
{
if (record.Count > 0 || sb.Length > 0)
{
record.Add(sb.ToString());
sb.Clear();
}
if (record.Count > 0)
yield return record;
record = new List<string>(record.Count);
}
}
else if (sb.Length == 0 && !inQuote)
{
if (readChar == qualifier)
inQuote = true;
else if (readChar == delimiter)
{
record.Add(sb.ToString());
sb.Clear();
}
else if (char.IsWhiteSpace(readChar))
{
// Ignore leading whitespace
}
else
sb.Append(readChar);
}
else if (readChar == delimiter)
{
if (inQuote)
sb.Append(delimiter);
else
{
record.Add(sb.ToString());
sb.Clear();
}
}
else if (readChar == qualifier)
{
if (inQuote)
{
if ((char) reader.Peek() == qualifier)
{
reader.Read();
sb.Append(qualifier);
}
else
inQuote = false;
}
else
sb.Append(readChar);
}
else
sb.Append(readChar);
}
if (record.Count > 0 || sb.Length > 0)
record.Add(sb.ToString());
if (record.Count > 0)
yield return record;
}
}
Using the Code
Here is an example of reading CSV file. The following code snippet parses out the first 5 records and prints them out to the Console
in form of key/value pairs:
const string fileName = @"C:\Temp\file.csv";
using (var stream = File.OpenRead(fileName))
using (var reader = new StreamReader(stream))
{
var data = CsvParser.ParseHeadAndTail(reader, ',', '"');
var header = data.Item1;
var lines = data.Item2;
foreach (var line in lines.Take(5))
{
for (var i = 0; i < header.Count; i++)
if (!string.IsNullOrEmpty(line[i]))
Console.WriteLine("{0}={1}", header[i], line[i]);
Console.WriteLine();
}
}
Console.ReadLine();
History
- 27th September, 2014: Initial version