diff --git a/.res/bench-getAllElements-1-million.png b/.res/bench-getAllElements-1-million.png new file mode 100755 index 0000000..d454ff6 Binary files /dev/null and b/.res/bench-getAllElements-1-million.png differ diff --git a/README.md b/README.md index 1a3b2ca..8d7655c 100644 --- a/README.md +++ b/README.md @@ -134,9 +134,18 @@ Map benefits from sizing while NSet isn't affected, but in both cases NSet remai Another case where NSet really shines is checking if two sets are equal. Below is a benchmark that checks whether two NSets/maps with 10 Million elements in each are equal (They are equal, which is the worst case). +Here NSet finishes in `0.1ms` but Map takes almost a second with `813ms`. ![Benchmarking IsEq with 10,000,000 elements](./.res/bench-is-equal-10-million.png) -Here NSet finishes in `0.1ms` but Map takes almost a second with `813ms`. +Next we have `GetAllElements`, which simply returns an array of all the elements of NSet/Map (note this is dangerous in NSet. See [Memory characteristics](#memory-characteristics)). +![Benchmarking GetAllElements with 1,000,000 elements](.res/bench-getAllElements-1-million.png) + +With `GetAllElements` NSet is faster when its elements are closer together (or you have many numbers), but gets a lot slower when +dealing with a few random numbers. This is because you might get two numbers like `1` and `1_000_000` which NSet +will store in two far away places with a lot of nothing in between. In a map these will be stored close together. + +With 1M ordered elements NSet takes `~2ms` and map `~9ms`, but with a random 1M elements NSet takes `~129ms` +while map takes `~9ms`. Map scales with the amount of elements, while NSet is affected by number distribution as well. ## How NSet works diff --git a/nset.go b/nset.go index c4194e0..00cb457 100644 --- a/nset.go +++ b/nset.go @@ -2,6 +2,7 @@ package nset import ( "fmt" + "math/bits" "reflect" "strings" ) @@ -18,7 +19,7 @@ const ( ) //IntsIf is limited to uint32 because we can store ALL 4 Billion uint32 numbers -//in 256MB with NSet (instead of the normal 16GB for an array of all uint32s). +//in 512MB with NSet (instead of the normal 16GB for an array of all uint32s). //But if we allow uint64 (or int, since int can be 64-bit) users can easily put a big 64-bit number and use more RAM than maybe Google and crash. type IntsIf interface { uint8 | uint16 | uint32 @@ -171,32 +172,69 @@ func (n *NSet[T]) GetIntersection(otherSet *NSet[T]) *NSet[T] { b1 := &n.Buckets[i] b2 := &otherSet.Buckets[i] - //bucketIndexBits are the bits removed from the original value to use for bucket indexing. - //We will use this to restore the original value 'x' once an intersection is detected - bucketIndexBits := T(i << n.shiftAmount) - for j := 0; j < len(b1.Data) && j < len(b2.Data); j++ { + newB := &outSet.Buckets[i] + for j := uint32(0); j < b1.StorageUnitCount && j < b2.StorageUnitCount; j++ { if b1.Data[j]&b2.Data[j] == 0 { continue } - mask := StorageType(1 << 0) //This will be used to check set bits. Numbers will be reconstructed only for set bits - commonBits := b1.Data[j] & b2.Data[j] //Bits that are set on both storage units (aka the intersection) - firstStorageUnitValue := T(j*StorageTypeBits) | bucketIndexBits //StorageUnitIndex = noBucketBitsX / StorageTypeBits. So: noBucketBitsX = StorageUnitIndex * StorageTypeBits; Then: x = noBucketBitsX | bucketIndexBits - for k := T(0); k < StorageTypeBits; k++ { + if newB.StorageUnitCount < j+1 { + storageUnitsToAdd := j + 1 - newB.StorageUnitCount + newB.Data = append(newB.Data, make([]StorageType, storageUnitsToAdd)...) - if commonBits&mask > 0 { - outSet.Add(firstStorageUnitValue + k) - // fmt.Printf("Bucket=%d, Storage unit=%d, bitPos=%d, value=%d\n", i, j, k, firstStorageUnitValue+k) + newB.StorageUnitCount += storageUnitsToAdd + outSet.StorageUnitCount += storageUnitsToAdd + } + + newB.Data[j] = b1.Data[j] & b2.Data[j] + } + } + + return outSet +} + +//GetAllElements returns all the added numbers added to NSet. +//NOTE: Be careful with this if you have a lot of elements in NSet because NSet is compressed while the returned array is not. +//In the worst case (all uint32s stored) the returned array will be ~4.2 billion elements and will use 16+ GBs of RAM. +func (n *NSet[T]) GetAllElements() []T { + + elements := make([]T, 0) + + for i := 0; i < BucketCount; i++ { + + //bucketIndexBits are the bits removed from the original value to use for bucket indexing. + //We will use this to restore the original value 'x' once an intersection is detected + bucketIndexBits := T(i << n.shiftAmount) + + b1 := &n.Buckets[i] + for j := 0; j < len(b1.Data); j++ { + + storageUnit := b1.Data[j] + onesCount := bits.OnesCount64(uint64(storageUnit)) + if onesCount == 0 { + continue + } + elementsToAdd := make([]T, 0, onesCount) + + mask := StorageType(1 << 0) //This will be used to check set bits. Numbers will be reconstructed only for set bits + firstStorageUnitValue := T(j*StorageTypeBits) | bucketIndexBits //StorageUnitIndex = noBucketBitsX / StorageTypeBits. So: noBucketBitsX = StorageUnitIndex * StorageTypeBits; Then: x = noBucketBitsX | bucketIndexBits + + for k := T(0); onesCount > 0 && k < StorageTypeBits; k++ { + + if storageUnit&mask > 0 { + elementsToAdd = append(elementsToAdd, firstStorageUnitValue+k) + onesCount-- } mask <<= 1 } + elements = append(elements, elementsToAdd...) } } - return outSet + return elements } func (n *NSet[T]) IsEq(otherSet *NSet[T]) bool { diff --git a/nset_test.go b/nset_test.go index f4fc0d8..7d5f6de 100755 --- a/nset_test.go +++ b/nset_test.go @@ -55,7 +55,11 @@ func TestNSet(t *testing.T) { n5.AddMany(0, 1, 63, 64, math.MaxUint32) n4n5 := n4.GetIntersection(n5) - AllTrue(t, n4n5.ContainsAll(0, 1, 64, math.MaxUint32), !n4n5.Contains(63)) + + n4n5Twin := nset.NewNSet[uint32]() + n4n5Twin.AddMany(0, 1, 64, math.MaxUint32) + + AllTrue(t, n4n5.ContainsAll(0, 1, 64, math.MaxUint32), !n4n5.Contains(63), n4n5Twin.IsEq(n4n5)) //Union n6 := nset.NewNSet[uint32]() @@ -83,6 +87,13 @@ func TestNSet(t *testing.T) { n6.Union(n7) AllTrue(t, n6.IsEq(n7)) + + //GetAllElements + n8 := nset.NewNSet[uint32]() + n8.AddMany(0, 1, 55, 1000, 10000) + + n8Elements := n8.GetAllElements() + AllTrue(t, len(n8Elements) == 5, n8Elements[0] == 0, n8Elements[1] == 1, n8Elements[2] == 55, n8Elements[3] == 1000, n8Elements[4] == 10000) } func TestNSetFullRange(t *testing.T) { @@ -435,3 +446,116 @@ func BenchmarkMapIsEq(b *testing.B) { mapsAreEq(m1, m2) } } + +func BenchmarkNSetGetIntersection(b *testing.B) { + + b.StopTimer() + s1 := nset.NewNSet[uint32]() + s2 := nset.NewNSet[uint32]() + for i := uint32(0); i < maxBenchSize; i++ { + s1.Add(i) + s2.Add(i) + } + b.StartTimer() + + for i := 0; i < b.N; i++ { + s1.GetIntersection(s2) + } +} + +var elementCount int + +func BenchmarkNSetGetAllElements(b *testing.B) { + + b.StopTimer() + + s1 := nset.NewNSet[uint32]() + for i := uint32(0); i < 1000_000; i++ { + s1.Add(i) + } + b.StartTimer() + + var elements []uint32 + for i := 0; i < b.N; i++ { + elements = s1.GetAllElements() + } + + elementCount = len(elements) +} + +func BenchmarkMapGetAllElements(b *testing.B) { + + b.StopTimer() + + m1 := map[uint32]struct{}{} + for i := uint32(0); i < 1000_000; i++ { + m1[i] = struct{}{} + } + b.StartTimer() + + getElementsFunc := func(m map[uint32]struct{}) []uint32 { + + e := make([]uint32, 0, len(m)) + for k := range m { + e = append(e, k) + } + + return e + } + + var elements []uint32 + for i := 0; i < b.N; i++ { + elements = getElementsFunc(m1) + } + + elementCount = len(elements) +} + +func BenchmarkNSetGetAllElementsRand(b *testing.B) { + + b.StopTimer() + + rand.Seed(RandSeed) + s1 := nset.NewNSet[uint32]() + for i := uint32(0); i < 1000_000; i++ { + s1.Add(rand.Uint32()) + } + b.StartTimer() + + var elements []uint32 + for i := 0; i < b.N; i++ { + elements = s1.GetAllElements() + } + + elementCount = len(elements) +} + +func BenchmarkMapGetAllElementsRand(b *testing.B) { + + b.StopTimer() + + rand.Seed(RandSeed) + + m1 := map[uint32]struct{}{} + for i := uint32(0); i < 1000_000; i++ { + m1[rand.Uint32()] = struct{}{} + } + + getElementsFunc := func(m map[uint32]struct{}) []uint32 { + + e := make([]uint32, 0, len(m)) + for k := range m { + e = append(e, k) + } + + return e + } + b.StartTimer() + + var elements []uint32 + for i := 0; i < b.N; i++ { + elements = getElementsFunc(m1) + } + + elementCount = len(elements) +}