我一直在寻找Contains
,Exists
和Any
方法之间的性能基准测试。我只想出于好奇而发现这一点,因为我总是在这些中感到困惑。关于SO的许多问题描述了这些方法的定义,例如:
List<T>
所以我决定自己做。我将其添加为答案。对结果有任何更多见解是最受欢迎的。我还对数组进行了基准测试以查看结果
根据文件:
List.Exists(对象方法)
确定List(T)是否包含与指定谓词定义的条件匹配的元素。
IEnumerable.Any(扩展方法)
确定序列的任何元素是否满足条件。
List.Contains(对象方法)
确定元素是否在List中。
标杆:
码:
LINQ extension methods - Any() vs. Where() vs. Exists()
结果
static void Main(string[] args)
{
ContainsExistsAnyShort();
ContainsExistsAny();
}
private static void ContainsExistsAny()
{
Console.WriteLine("***************************************");
Console.WriteLine("********* ContainsExistsAny ***********");
Console.WriteLine("***************************************");
List<int> list = new List<int>(6000000);
Random random = new Random();
for (int i = 0; i < 6000000; i++)
{
list.Add(random.Next(6000000));
}
int[] arr = list.ToArray();
find(list, arr);
}
private static void ContainsExistsAnyShort()
{
Console.WriteLine("***************************************");
Console.WriteLine("***** ContainsExistsAnyShortRange *****");
Console.WriteLine("***************************************");
List<int> list = new List<int>(2000);
Random random = new Random();
for (int i = 0; i < 2000; i++)
{
list.Add(random.Next(6000000));
}
int[] arr = list.ToArray();
find(list, arr);
}
private static void find(List<int> list, int[] arr)
{
Random random = new Random();
int[] find = new int[10000];
for (int i = 0; i < 10000; i++)
{
find[i] = random.Next(6000000);
}
Stopwatch watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Contains(find[rpt]);
}
watch.Stop();
Console.WriteLine("List/Contains: {0:N0}ms", watch.ElapsedMilliseconds);
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Exists(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine("List/Exists: {0:N0}ms", watch.ElapsedMilliseconds);
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Any(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine("List/Any: {0:N0}ms", watch.ElapsedMilliseconds);
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
arr.Contains(find[rpt]);
}
watch.Stop();
Console.WriteLine("Array/Contains: {0:N0}ms", watch.ElapsedMilliseconds);
Console.WriteLine("Arrays do not have Exists");
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
arr.Any(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine("Array/Any: {0:N0}ms", watch.ElapsedMilliseconds);
}
最快的方法是使用***************************************
***** ContainsExistsAnyShortRange *****
***************************************
List/Contains: 96ms
List/Exists: 146ms
List/Any: 381ms
Array/Contains: 34ms
Arrays do not have Exists
Array/Any: 410ms
***************************************
********* ContainsExistsAny ***********
***************************************
List/Contains: 257,996ms
List/Exists: 379,951ms
List/Any: 884,853ms
Array/Contains: 72,486ms
Arrays do not have Exists
Array/Any: 1,013,303ms
。 HashSet
的Contains
是O(1)。
我拿了你的代码并为HashSet
添加了一个基准.HashSet<int>
的性能成本几乎为零。
HashSet<int> set = new HashSet<int>(list);
结果
void Main()
{
ContainsExistsAnyShort();
ContainsExistsAny();
}
private static void ContainsExistsAny()
{
Console.WriteLine("***************************************");
Console.WriteLine("********* ContainsExistsAny ***********");
Console.WriteLine("***************************************");
List<int> list = new List<int>(6000000);
Random random = new Random();
for (int i = 0; i < 6000000; i++)
{
list.Add(random.Next(6000000));
}
int[] arr = list.ToArray();
HashSet<int> set = new HashSet<int>(list);
find(list, arr, set);
}
private static void ContainsExistsAnyShort()
{
Console.WriteLine("***************************************");
Console.WriteLine("***** ContainsExistsAnyShortRange *****");
Console.WriteLine("***************************************");
List<int> list = new List<int>(2000);
Random random = new Random();
for (int i = 0; i < 2000; i++)
{
list.Add(random.Next(6000000));
}
int[] arr = list.ToArray();
HashSet<int> set = new HashSet<int>(list);
find(list, arr, set);
}
private static void find(List<int> list, int[] arr, HashSet<int> set)
{
Random random = new Random();
int[] find = new int[10000];
for (int i = 0; i < 10000; i++)
{
find[i] = random.Next(6000000);
}
Stopwatch watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Contains(find[rpt]);
}
watch.Stop();
Console.WriteLine("List/Contains: {0}ms", watch.ElapsedMilliseconds);
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Exists(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine("List/Exists: {0}ms", watch.ElapsedMilliseconds);
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Any(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine("List/Any: {0}ms", watch.ElapsedMilliseconds);
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
arr.Contains(find[rpt]);
}
watch.Stop();
Console.WriteLine("Array/Contains: {0}ms", watch.ElapsedMilliseconds);
Console.WriteLine("Arrays do not have Exists");
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
arr.Any(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine("Array/Any: {0}ms", watch.ElapsedMilliseconds);
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
set.Contains(find[rpt]);
}
watch.Stop();
Console.WriteLine("HashSet/Contains: {0}ms", watch.ElapsedMilliseconds);
}
值得一提的是,这种比较有点不公平,因为***************************************
***** ContainsExistsAnyShortRange *****
***************************************
List/Contains: 65ms
List/Exists: 106ms
List/Any: 222ms
Array/Contains: 20ms
Arrays do not have Exists
Array/Any: 281ms
HashSet/Contains: 0ms
***************************************
********* ContainsExistsAny ***********
***************************************
List/Contains: 120522ms
List/Exists: 250445ms
List/Any: 653530ms
Array/Contains: 40801ms
Arrays do not have Exists
Array/Any: 522371ms
HashSet/Contains: 3ms
类不拥有Array
方法,它通过连续的Contains()
使用IEnumerable<T>
的扩展方法,因此它没有针对Enumerator
实例进行优化;另一方面,Array
有自己的实现,完全针对所有尺寸进行了优化。
为了比较公平,你可以使用为HashSet<T>
实例实现的静态方法int Array.IndexOf()
,即使它使用Array
循环稍微高于for
。
话虽如此,Enumerator
的表现类似于HashSet<T>.Contains()
的小集合,我想说,最多5个元素,对大型集合更有效。