Really late to answer here, but I was curious what a slightly more rigorous exploration would yield. The method used to analyze/select your population, for such a small population (8K items), with such a simple filter (starts with 'a'), should be largely irrelevant. Even with a population of 100M items, the worst I could do was about 2 seconds.
Instead, its far more likely that your problem is with the generation/reading of your items.
I am guessing you must have an exceptionally slow CSV reader. Even for the comparatively small 8K item size, unless you have a huge character size per item, I would expect fast performance from even the worst CSV reader (of which I am aware).
Results and test harness follow...
Results:
StartsWithA: 00:00:31.3843456
GetMatchesIndexCount: 00:00:00.4149453
GetMatchesIndexListAdHoc: 00:00:00.4930803
GetMatchesIndexListPreAllocated: 00:00:00.4762712
GetMatchesIndexListTruncated: 00:00:00.4896025
GetMatchesForeachCount: 00:00:00.4298655
GetMatchesForeachListAdHoc: 00:00:00.4599720
GetMatchesForeachListPreAllocated: 00:00:00.4488830
GetMatchesForeachListTruncated: 00:00:02.0583127
GetMatchesLinqArray: 00:00:00.5453610
GetMatchesLinqList: 00:00:00.4848105
Program:
using System;
using System.Linq;
using System.Text;
using System.Collections.Generic;
using System.Diagnostics;
namespace PrefixBenchmark
{
public class Program
{
public const int OddsOfA = 26;
public const int SampleCount = 100000000;
public const int MaximumLength = 5;
public static void Main(string[] args)
{
var stopwatch = new Stopwatch();
stopwatch.Restart();
string[] sequence = StartsWithA(SampleCount, new Random(), OddsOfA, MaximumLength);
stopwatch.Stop();
Console.WriteLine($"{nameof(StartsWithA)}: {stopwatch.Elapsed}");
Time(nameof(GetMatchesIndexCount), stopwatch, sequence, GetMatchesIndexCount);
Time(nameof(GetMatchesIndexListAdHoc), stopwatch, sequence, GetMatchesIndexListAdHoc);
Time(nameof(GetMatchesIndexListPreAllocated), stopwatch, sequence, GetMatchesIndexListPreAllocated);
Time(nameof(GetMatchesIndexListTruncated), stopwatch, sequence, GetMatchesIndexListTruncated);
Time(nameof(GetMatchesForeachCount), stopwatch, sequence, GetMatchesForeachCount);
Time(nameof(GetMatchesForeachListAdHoc), stopwatch, sequence, GetMatchesForeachListAdHoc);
Time(nameof(GetMatchesForeachListPreAllocated), stopwatch, sequence, GetMatchesForeachListPreAllocated);
Time(nameof(GetMatchesForeachListTruncated), stopwatch, sequence, GetMatchesForeachListTruncated);
Time(nameof(GetMatchesLinqArray), stopwatch, sequence, GetMatchesLinqArray);
Time(nameof(GetMatchesLinqList), stopwatch, sequence, GetMatchesLinqList);
}
private static T Time<T>(string name, Stopwatch stopwatch, string[] sequence,
Func<string[], T> test)
{
stopwatch.Restart();
T result = test(sequence);
stopwatch.Stop();
Console.WriteLine($"{name}: {stopwatch.Elapsed}");
return result;
}
private static int GetMatchesIndexCount(string[] sequence)
{
int length = sequence.Length;
int count = 0;
for (int index = 0; index < length; index++)
if (sequence[index].StartsWith('a'))
count++;
return count;
}
private static List<string> GetMatchesIndexListAdHoc(string[] sequence)
{
int length = sequence.Length;
var list = new List<string>();
for (int index = 0; index < length; index++)
{
string candidate = sequence[index];
if (candidate.StartsWith('a'))
list.Add(candidate);
}
return list;
}
private static List<string> GetMatchesIndexListPreAllocated(string[] sequence)
{
int length = sequence.Length;
var list = new List<string>(length);
for (int index = 0; index < length; index++)
{
string candidate = sequence[index];
if (candidate.StartsWith('a'))
list.Add(candidate);
}
return list;
}
private static List<string> GetMatchesIndexListTruncated(string[] sequence)
{
int length = sequence.Length;
var list = new List<string>(length);
for (int index = 0; index < length; index++)
{
string candidate = sequence[index];
if (candidate.StartsWith('a'))
list.Add(candidate);
}
list.TrimExcess();
return list;
}
private static int GetMatchesForeachCount(string[] sequence)
{
int count = 0;
foreach (string candidate in sequence)
if (candidate.StartsWith('a'))
count++;
return count;
}
private static List<string> GetMatchesForeachListAdHoc(string[] sequence)
{
var list = new List<string>();
foreach (string candidate in sequence)
if (candidate.StartsWith('a'))
list.Add(candidate);
return list;
}
private static List<string> GetMatchesForeachListPreAllocated(string[] sequence)
{
var list = new List<string>(sequence.Length);
foreach (string candidate in sequence)
if (candidate.StartsWith('a'))
list.Add(candidate);
return list;
}
private static List<string> GetMatchesForeachListTruncated(string[] sequence)
{
var list = new List<string>(sequence.Length);
foreach (string candidate in sequence)
if (candidate.StartsWith('a'))
list.Add(candidate);
list.TrimExcess();
return list;
}
private static string[] GetMatchesLinqArray(string[] sequence) =>
sequence
.Where(candidate => candidate.StartsWith('a'))
.ToArray();
private static List<string> GetMatchesLinqList(string[] sequence) =>
sequence
.Where(candidate => candidate.StartsWith('a'))
.ToList();
protected static string[] StartsWithA(int sampleCount, Random random, int odds,
int maximumLength)
{
string[] samples = new string[sampleCount];
for (int index = 0; index < sampleCount; index++)
samples[index] = StartsWithA(random, odds, maximumLength);
return samples;
}
protected static string StartsWithA(Random random, int odds, int maximumLength)
{
int length = random.Next(maximumLength) + 1;
var builder = new StringBuilder(random.Next(maximumLength) + 1);
builder.Append(IsSelected(random, odds) ? 'a' : 'b');
for (int index = 0; index < length; index++)
builder.Append('b');
return builder.ToString();
}
protected static bool IsSelected(Random random, int odds) =>
random.Next(odds) == 0;
}
}