IsEq benchmarks

2025-12-29 06:28:19 +00:00 · 2022-06-11 05:00:40 +04:00
parent 02265fd5ab
commit f7fe64f386
4 changed files with 94 additions and 2 deletions
--- a/.res/bench-is-equal-10-million.png
+++ b/.res/bench-is-equal-10-million.png
--- a/README.md
+++ b/README.md
@ -75,10 +75,12 @@ if mySet.Contains(5) {

 mySet.Remove(4)

-// Intersections
 myOtherSet := nset.NewNSet[uint32]()
 myOtherSet.AddMany(0, 1, 2, 4, 14)

+println("Are the two sets equal:", myOtherSet.IsEq(mySet))  //False
+
+// Intersections
 println("There is intersection:", myOtherSet.HasIntersection(mySet))    //True

 intersection := mySet.GetIntersection(myOtherSet)
@ -95,7 +97,7 @@ println(myOtherSet.ContainsAll(0, 1, 2, 4, 14, 256, 300))  //True

 ## Benchmarks

-NSet is faster than the built-in Go hash map in all operations (add, check, delete) by `~50% to ~3900%` depending on the operation and data size.
+NSet is faster than the built-in Go hash map in all operations (add, check, delete) by `~50% to ~3900%` (and even `8130x` checking equality) depending on the operation and data size.

 In the benchmarks below, ones that have 'Rand' in the name mean that access patterns are randomized to test certain use cases.
 To make sure the test is fair the seed is the same for both Go Map and NSet. Here both suffer slowdowns but NSet remains faster.
@ -130,6 +132,12 @@ myMap := make(map[uint16], 100)

 Map benefits from sizing while NSet isn't affected, but in both cases NSet remains faster.

+Another case where NSet really shines is checking if two sets are equal.
+Below is a benchmark that checks whether two NSets/maps with 10 Million elements in each are equal (They are equal, which is the worst case).
+![Benchmarking IsEq with 10,000,000 elements](./.res/bench-is-equal-10-million.png)
+
+Here NSet finishes in `0.1ms` but Map takes almost a second with `813ms`.
+
 ## How NSet works

 NSet works by using a single bit to indicate whether a number exists or not.
--- a/nset.go
+++ b/nset.go
@ -199,6 +199,35 @@ func (n *NSet[T]) GetIntersection(otherSet *NSet[T]) *NSet[T] {
 	return outSet
 }

+func (n *NSet[T]) IsEq(otherSet *NSet[T]) bool {
+
+	if n.StorageUnitCount != otherSet.StorageUnitCount {
+		return false
+	}
+
+	//Equal storage unit count doesn't mean all buckets have same size, so we check per bucket
+	for i := 0; i < len(n.Buckets); i++ {
+		if n.Buckets[i].StorageUnitCount != otherSet.Buckets[i].StorageUnitCount {
+			return false
+		}
+	}
+
+	for i := 0; i < len(n.Buckets); i++ {
+
+		b1 := &n.Buckets[i]
+		b2 := &otherSet.Buckets[i]
+
+		for j := 0; j < len(b1.Data); j++ {
+
+			if b1.Data[j] != b2.Data[j] {
+				return false
+			}
+		}
+	}
+
+	return true
+}
+
 func (n *NSet[T]) HasIntersection(otherSet *NSet[T]) bool {

 	for i := 0; i < len(n.Buckets); i++ {
--- a/nset_test.go
+++ b/nset_test.go
@ -75,6 +75,14 @@ func TestNSet(t *testing.T) {
 	unionedSet := nset.UnionSets(n6, n7)
 	AllTrue(t, !n6.Contains(math.MaxUint32), !n7.ContainsAny(4, 7, 100, 1000), unionedSet.ContainsAll(4, 7, 100, 1000, math.MaxUint32), unionedSet.StorageUnitCount == n6.StorageUnitCount+n7OldStorageUnitCount)

+	//Equality
+	AllTrue(t, !n6.IsEq(n7))
+
+	n7.Union(n6)
+	AllTrue(t, !n6.IsEq(n7))
+
+	n6.Union(n7)
+	AllTrue(t, n6.IsEq(n7))
 }

 func TestNSetFullRange(t *testing.T) {
@ -380,3 +388,50 @@ func BenchmarkMapDeleteRand(b *testing.B) {
 		delete(hMap, randVal)
 	}
 }
+
+func BenchmarkNSetIsEq(b *testing.B) {
+
+	b.StopTimer()
+	s1 := nset.NewNSet[uint32]()
+	s2 := nset.NewNSet[uint32]()
+	for i := uint32(0); i < maxBenchSize; i++ {
+		s1.Add(i)
+		s2.Add(i)
+	}
+	b.StartTimer()
+
+	for i := 0; i < b.N; i++ {
+		s1.IsEq(s2)
+	}
+}
+
+func BenchmarkMapIsEq(b *testing.B) {
+
+	b.StopTimer()
+	m1 := map[uint32]struct{}{}
+	m2 := map[uint32]struct{}{}
+	for i := uint32(0); i < maxBenchSize; i++ {
+		m1[i] = struct{}{}
+		m2[i] = struct{}{}
+	}
+	b.StartTimer()
+
+	mapsAreEq := func(m1, m2 map[uint32]struct{}) bool {
+
+		if len(m1) != len(m2) {
+			return false
+		}
+
+		for k := range m1 {
+			if _, ok := m2[k]; !ok {
+				return false
+			}
+		}
+
+		return true
+	}
+
+	for i := 0; i < b.N; i++ {
+		mapsAreEq(m1, m2)
+	}
+}