diff --git a/.res/bench-is-equal-10-million.png b/.res/bench-is-equal-10-million.png new file mode 100755 index 0000000..671d265 Binary files /dev/null and b/.res/bench-is-equal-10-million.png differ diff --git a/README.md b/README.md index 811bd65..1a3b2ca 100644 --- a/README.md +++ b/README.md @@ -75,10 +75,12 @@ if mySet.Contains(5) { mySet.Remove(4) -// Intersections myOtherSet := nset.NewNSet[uint32]() myOtherSet.AddMany(0, 1, 2, 4, 14) +println("Are the two sets equal:", myOtherSet.IsEq(mySet)) //False + +// Intersections println("There is intersection:", myOtherSet.HasIntersection(mySet)) //True intersection := mySet.GetIntersection(myOtherSet) @@ -95,7 +97,7 @@ println(myOtherSet.ContainsAll(0, 1, 2, 4, 14, 256, 300)) //True ## Benchmarks -NSet is faster than the built-in Go hash map in all operations (add, check, delete) by `~50% to ~3900%` depending on the operation and data size. +NSet is faster than the built-in Go hash map in all operations (add, check, delete) by `~50% to ~3900%` (and even `8130x` checking equality) depending on the operation and data size. In the benchmarks below, ones that have 'Rand' in the name mean that access patterns are randomized to test certain use cases. To make sure the test is fair the seed is the same for both Go Map and NSet. Here both suffer slowdowns but NSet remains faster. @@ -130,6 +132,12 @@ myMap := make(map[uint16], 100) Map benefits from sizing while NSet isn't affected, but in both cases NSet remains faster. +Another case where NSet really shines is checking if two sets are equal. +Below is a benchmark that checks whether two NSets/maps with 10 Million elements in each are equal (They are equal, which is the worst case). +![Benchmarking IsEq with 10,000,000 elements](./.res/bench-is-equal-10-million.png) + +Here NSet finishes in `0.1ms` but Map takes almost a second with `813ms`. + ## How NSet works NSet works by using a single bit to indicate whether a number exists or not. diff --git a/nset.go b/nset.go index f13b0be..c4194e0 100644 --- a/nset.go +++ b/nset.go @@ -199,6 +199,35 @@ func (n *NSet[T]) GetIntersection(otherSet *NSet[T]) *NSet[T] { return outSet } +func (n *NSet[T]) IsEq(otherSet *NSet[T]) bool { + + if n.StorageUnitCount != otherSet.StorageUnitCount { + return false + } + + //Equal storage unit count doesn't mean all buckets have same size, so we check per bucket + for i := 0; i < len(n.Buckets); i++ { + if n.Buckets[i].StorageUnitCount != otherSet.Buckets[i].StorageUnitCount { + return false + } + } + + for i := 0; i < len(n.Buckets); i++ { + + b1 := &n.Buckets[i] + b2 := &otherSet.Buckets[i] + + for j := 0; j < len(b1.Data); j++ { + + if b1.Data[j] != b2.Data[j] { + return false + } + } + } + + return true +} + func (n *NSet[T]) HasIntersection(otherSet *NSet[T]) bool { for i := 0; i < len(n.Buckets); i++ { diff --git a/nset_test.go b/nset_test.go index 8f10eda..f4fc0d8 100755 --- a/nset_test.go +++ b/nset_test.go @@ -75,6 +75,14 @@ func TestNSet(t *testing.T) { unionedSet := nset.UnionSets(n6, n7) AllTrue(t, !n6.Contains(math.MaxUint32), !n7.ContainsAny(4, 7, 100, 1000), unionedSet.ContainsAll(4, 7, 100, 1000, math.MaxUint32), unionedSet.StorageUnitCount == n6.StorageUnitCount+n7OldStorageUnitCount) + //Equality + AllTrue(t, !n6.IsEq(n7)) + + n7.Union(n6) + AllTrue(t, !n6.IsEq(n7)) + + n6.Union(n7) + AllTrue(t, n6.IsEq(n7)) } func TestNSetFullRange(t *testing.T) { @@ -380,3 +388,50 @@ func BenchmarkMapDeleteRand(b *testing.B) { delete(hMap, randVal) } } + +func BenchmarkNSetIsEq(b *testing.B) { + + b.StopTimer() + s1 := nset.NewNSet[uint32]() + s2 := nset.NewNSet[uint32]() + for i := uint32(0); i < maxBenchSize; i++ { + s1.Add(i) + s2.Add(i) + } + b.StartTimer() + + for i := 0; i < b.N; i++ { + s1.IsEq(s2) + } +} + +func BenchmarkMapIsEq(b *testing.B) { + + b.StopTimer() + m1 := map[uint32]struct{}{} + m2 := map[uint32]struct{}{} + for i := uint32(0); i < maxBenchSize; i++ { + m1[i] = struct{}{} + m2[i] = struct{}{} + } + b.StartTimer() + + mapsAreEq := func(m1, m2 map[uint32]struct{}) bool { + + if len(m1) != len(m2) { + return false + } + + for k := range m1 { + if _, ok := m2[k]; !ok { + return false + } + } + + return true + } + + for i := 0; i < b.N; i++ { + mapsAreEq(m1, m2) + } +}