IsEq benchmarks

This commit is contained in:
bloeys
2022-06-11 05:00:40 +04:00
parent 02265fd5ab
commit f7fe64f386
4 changed files with 94 additions and 2 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

View File

@ -75,10 +75,12 @@ if mySet.Contains(5) {
mySet.Remove(4)
// Intersections
myOtherSet := nset.NewNSet[uint32]()
myOtherSet.AddMany(0, 1, 2, 4, 14)
println("Are the two sets equal:", myOtherSet.IsEq(mySet)) //False
// Intersections
println("There is intersection:", myOtherSet.HasIntersection(mySet)) //True
intersection := mySet.GetIntersection(myOtherSet)
@ -95,7 +97,7 @@ println(myOtherSet.ContainsAll(0, 1, 2, 4, 14, 256, 300)) //True
## Benchmarks
NSet is faster than the built-in Go hash map in all operations (add, check, delete) by `~50% to ~3900%` depending on the operation and data size.
NSet is faster than the built-in Go hash map in all operations (add, check, delete) by `~50% to ~3900%` (and even `8130x` checking equality) depending on the operation and data size.
In the benchmarks below, ones that have 'Rand' in the name mean that access patterns are randomized to test certain use cases.
To make sure the test is fair the seed is the same for both Go Map and NSet. Here both suffer slowdowns but NSet remains faster.
@ -130,6 +132,12 @@ myMap := make(map[uint16], 100)
Map benefits from sizing while NSet isn't affected, but in both cases NSet remains faster.
Another case where NSet really shines is checking if two sets are equal.
Below is a benchmark that checks whether two NSets/maps with 10 Million elements in each are equal (They are equal, which is the worst case).
![Benchmarking IsEq with 10,000,000 elements](./.res/bench-is-equal-10-million.png)
Here NSet finishes in `0.1ms` but Map takes almost a second with `813ms`.
## How NSet works
NSet works by using a single bit to indicate whether a number exists or not.

29
nset.go
View File

@ -199,6 +199,35 @@ func (n *NSet[T]) GetIntersection(otherSet *NSet[T]) *NSet[T] {
return outSet
}
func (n *NSet[T]) IsEq(otherSet *NSet[T]) bool {
if n.StorageUnitCount != otherSet.StorageUnitCount {
return false
}
//Equal storage unit count doesn't mean all buckets have same size, so we check per bucket
for i := 0; i < len(n.Buckets); i++ {
if n.Buckets[i].StorageUnitCount != otherSet.Buckets[i].StorageUnitCount {
return false
}
}
for i := 0; i < len(n.Buckets); i++ {
b1 := &n.Buckets[i]
b2 := &otherSet.Buckets[i]
for j := 0; j < len(b1.Data); j++ {
if b1.Data[j] != b2.Data[j] {
return false
}
}
}
return true
}
func (n *NSet[T]) HasIntersection(otherSet *NSet[T]) bool {
for i := 0; i < len(n.Buckets); i++ {

View File

@ -75,6 +75,14 @@ func TestNSet(t *testing.T) {
unionedSet := nset.UnionSets(n6, n7)
AllTrue(t, !n6.Contains(math.MaxUint32), !n7.ContainsAny(4, 7, 100, 1000), unionedSet.ContainsAll(4, 7, 100, 1000, math.MaxUint32), unionedSet.StorageUnitCount == n6.StorageUnitCount+n7OldStorageUnitCount)
//Equality
AllTrue(t, !n6.IsEq(n7))
n7.Union(n6)
AllTrue(t, !n6.IsEq(n7))
n6.Union(n7)
AllTrue(t, n6.IsEq(n7))
}
func TestNSetFullRange(t *testing.T) {
@ -380,3 +388,50 @@ func BenchmarkMapDeleteRand(b *testing.B) {
delete(hMap, randVal)
}
}
func BenchmarkNSetIsEq(b *testing.B) {
b.StopTimer()
s1 := nset.NewNSet[uint32]()
s2 := nset.NewNSet[uint32]()
for i := uint32(0); i < maxBenchSize; i++ {
s1.Add(i)
s2.Add(i)
}
b.StartTimer()
for i := 0; i < b.N; i++ {
s1.IsEq(s2)
}
}
func BenchmarkMapIsEq(b *testing.B) {
b.StopTimer()
m1 := map[uint32]struct{}{}
m2 := map[uint32]struct{}{}
for i := uint32(0); i < maxBenchSize; i++ {
m1[i] = struct{}{}
m2[i] = struct{}{}
}
b.StartTimer()
mapsAreEq := func(m1, m2 map[uint32]struct{}) bool {
if len(m1) != len(m2) {
return false
}
for k := range m1 {
if _, ok := m2[k]; !ok {
return false
}
}
return true
}
for i := 0; i < b.N; i++ {
mapsAreEq(m1, m2)
}
}