Optimize GetIntersections+GetAllElements+bench+readme

This commit is contained in:
bloeys
2022-06-11 07:38:50 +04:00
parent f7fe64f386
commit 5a6e13b9f1
4 changed files with 186 additions and 15 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

View File

@ -134,9 +134,18 @@ Map benefits from sizing while NSet isn't affected, but in both cases NSet remai
Another case where NSet really shines is checking if two sets are equal. Another case where NSet really shines is checking if two sets are equal.
Below is a benchmark that checks whether two NSets/maps with 10 Million elements in each are equal (They are equal, which is the worst case). Below is a benchmark that checks whether two NSets/maps with 10 Million elements in each are equal (They are equal, which is the worst case).
Here NSet finishes in `0.1ms` but Map takes almost a second with `813ms`.
![Benchmarking IsEq with 10,000,000 elements](./.res/bench-is-equal-10-million.png) ![Benchmarking IsEq with 10,000,000 elements](./.res/bench-is-equal-10-million.png)
Here NSet finishes in `0.1ms` but Map takes almost a second with `813ms`. Next we have `GetAllElements`, which simply returns an array of all the elements of NSet/Map (note this is dangerous in NSet. See [Memory characteristics](#memory-characteristics)).
![Benchmarking GetAllElements with 1,000,000 elements](.res/bench-getAllElements-1-million.png)
With `GetAllElements` NSet is faster when its elements are closer together (or you have many numbers), but gets a lot slower when
dealing with a few random numbers. This is because you might get two numbers like `1` and `1_000_000` which NSet
will store in two far away places with a lot of nothing in between. In a map these will be stored close together.
With 1M ordered elements NSet takes `~2ms` and map `~9ms`, but with a random 1M elements NSet takes `~129ms`
while map takes `~9ms`. Map scales with the amount of elements, while NSet is affected by number distribution as well.
## How NSet works ## How NSet works

64
nset.go
View File

@ -2,6 +2,7 @@ package nset
import ( import (
"fmt" "fmt"
"math/bits"
"reflect" "reflect"
"strings" "strings"
) )
@ -18,7 +19,7 @@ const (
) )
//IntsIf is limited to uint32 because we can store ALL 4 Billion uint32 numbers //IntsIf is limited to uint32 because we can store ALL 4 Billion uint32 numbers
//in 256MB with NSet (instead of the normal 16GB for an array of all uint32s). //in 512MB with NSet (instead of the normal 16GB for an array of all uint32s).
//But if we allow uint64 (or int, since int can be 64-bit) users can easily put a big 64-bit number and use more RAM than maybe Google and crash. //But if we allow uint64 (or int, since int can be 64-bit) users can easily put a big 64-bit number and use more RAM than maybe Google and crash.
type IntsIf interface { type IntsIf interface {
uint8 | uint16 | uint32 uint8 | uint16 | uint32
@ -171,32 +172,69 @@ func (n *NSet[T]) GetIntersection(otherSet *NSet[T]) *NSet[T] {
b1 := &n.Buckets[i] b1 := &n.Buckets[i]
b2 := &otherSet.Buckets[i] b2 := &otherSet.Buckets[i]
//bucketIndexBits are the bits removed from the original value to use for bucket indexing. newB := &outSet.Buckets[i]
//We will use this to restore the original value 'x' once an intersection is detected for j := uint32(0); j < b1.StorageUnitCount && j < b2.StorageUnitCount; j++ {
bucketIndexBits := T(i << n.shiftAmount)
for j := 0; j < len(b1.Data) && j < len(b2.Data); j++ {
if b1.Data[j]&b2.Data[j] == 0 { if b1.Data[j]&b2.Data[j] == 0 {
continue continue
} }
mask := StorageType(1 << 0) //This will be used to check set bits. Numbers will be reconstructed only for set bits if newB.StorageUnitCount < j+1 {
commonBits := b1.Data[j] & b2.Data[j] //Bits that are set on both storage units (aka the intersection) storageUnitsToAdd := j + 1 - newB.StorageUnitCount
firstStorageUnitValue := T(j*StorageTypeBits) | bucketIndexBits //StorageUnitIndex = noBucketBitsX / StorageTypeBits. So: noBucketBitsX = StorageUnitIndex * StorageTypeBits; Then: x = noBucketBitsX | bucketIndexBits newB.Data = append(newB.Data, make([]StorageType, storageUnitsToAdd)...)
for k := T(0); k < StorageTypeBits; k++ {
if commonBits&mask > 0 { newB.StorageUnitCount += storageUnitsToAdd
outSet.Add(firstStorageUnitValue + k) outSet.StorageUnitCount += storageUnitsToAdd
// fmt.Printf("Bucket=%d, Storage unit=%d, bitPos=%d, value=%d\n", i, j, k, firstStorageUnitValue+k) }
newB.Data[j] = b1.Data[j] & b2.Data[j]
}
}
return outSet
}
//GetAllElements returns all the added numbers added to NSet.
//NOTE: Be careful with this if you have a lot of elements in NSet because NSet is compressed while the returned array is not.
//In the worst case (all uint32s stored) the returned array will be ~4.2 billion elements and will use 16+ GBs of RAM.
func (n *NSet[T]) GetAllElements() []T {
elements := make([]T, 0)
for i := 0; i < BucketCount; i++ {
//bucketIndexBits are the bits removed from the original value to use for bucket indexing.
//We will use this to restore the original value 'x' once an intersection is detected
bucketIndexBits := T(i << n.shiftAmount)
b1 := &n.Buckets[i]
for j := 0; j < len(b1.Data); j++ {
storageUnit := b1.Data[j]
onesCount := bits.OnesCount64(uint64(storageUnit))
if onesCount == 0 {
continue
}
elementsToAdd := make([]T, 0, onesCount)
mask := StorageType(1 << 0) //This will be used to check set bits. Numbers will be reconstructed only for set bits
firstStorageUnitValue := T(j*StorageTypeBits) | bucketIndexBits //StorageUnitIndex = noBucketBitsX / StorageTypeBits. So: noBucketBitsX = StorageUnitIndex * StorageTypeBits; Then: x = noBucketBitsX | bucketIndexBits
for k := T(0); onesCount > 0 && k < StorageTypeBits; k++ {
if storageUnit&mask > 0 {
elementsToAdd = append(elementsToAdd, firstStorageUnitValue+k)
onesCount--
} }
mask <<= 1 mask <<= 1
} }
elements = append(elements, elementsToAdd...)
} }
} }
return outSet return elements
} }
func (n *NSet[T]) IsEq(otherSet *NSet[T]) bool { func (n *NSet[T]) IsEq(otherSet *NSet[T]) bool {

View File

@ -55,7 +55,11 @@ func TestNSet(t *testing.T) {
n5.AddMany(0, 1, 63, 64, math.MaxUint32) n5.AddMany(0, 1, 63, 64, math.MaxUint32)
n4n5 := n4.GetIntersection(n5) n4n5 := n4.GetIntersection(n5)
AllTrue(t, n4n5.ContainsAll(0, 1, 64, math.MaxUint32), !n4n5.Contains(63))
n4n5Twin := nset.NewNSet[uint32]()
n4n5Twin.AddMany(0, 1, 64, math.MaxUint32)
AllTrue(t, n4n5.ContainsAll(0, 1, 64, math.MaxUint32), !n4n5.Contains(63), n4n5Twin.IsEq(n4n5))
//Union //Union
n6 := nset.NewNSet[uint32]() n6 := nset.NewNSet[uint32]()
@ -83,6 +87,13 @@ func TestNSet(t *testing.T) {
n6.Union(n7) n6.Union(n7)
AllTrue(t, n6.IsEq(n7)) AllTrue(t, n6.IsEq(n7))
//GetAllElements
n8 := nset.NewNSet[uint32]()
n8.AddMany(0, 1, 55, 1000, 10000)
n8Elements := n8.GetAllElements()
AllTrue(t, len(n8Elements) == 5, n8Elements[0] == 0, n8Elements[1] == 1, n8Elements[2] == 55, n8Elements[3] == 1000, n8Elements[4] == 10000)
} }
func TestNSetFullRange(t *testing.T) { func TestNSetFullRange(t *testing.T) {
@ -435,3 +446,116 @@ func BenchmarkMapIsEq(b *testing.B) {
mapsAreEq(m1, m2) mapsAreEq(m1, m2)
} }
} }
func BenchmarkNSetGetIntersection(b *testing.B) {
b.StopTimer()
s1 := nset.NewNSet[uint32]()
s2 := nset.NewNSet[uint32]()
for i := uint32(0); i < maxBenchSize; i++ {
s1.Add(i)
s2.Add(i)
}
b.StartTimer()
for i := 0; i < b.N; i++ {
s1.GetIntersection(s2)
}
}
var elementCount int
func BenchmarkNSetGetAllElements(b *testing.B) {
b.StopTimer()
s1 := nset.NewNSet[uint32]()
for i := uint32(0); i < 1000_000; i++ {
s1.Add(i)
}
b.StartTimer()
var elements []uint32
for i := 0; i < b.N; i++ {
elements = s1.GetAllElements()
}
elementCount = len(elements)
}
func BenchmarkMapGetAllElements(b *testing.B) {
b.StopTimer()
m1 := map[uint32]struct{}{}
for i := uint32(0); i < 1000_000; i++ {
m1[i] = struct{}{}
}
b.StartTimer()
getElementsFunc := func(m map[uint32]struct{}) []uint32 {
e := make([]uint32, 0, len(m))
for k := range m {
e = append(e, k)
}
return e
}
var elements []uint32
for i := 0; i < b.N; i++ {
elements = getElementsFunc(m1)
}
elementCount = len(elements)
}
func BenchmarkNSetGetAllElementsRand(b *testing.B) {
b.StopTimer()
rand.Seed(RandSeed)
s1 := nset.NewNSet[uint32]()
for i := uint32(0); i < 1000_000; i++ {
s1.Add(rand.Uint32())
}
b.StartTimer()
var elements []uint32
for i := 0; i < b.N; i++ {
elements = s1.GetAllElements()
}
elementCount = len(elements)
}
func BenchmarkMapGetAllElementsRand(b *testing.B) {
b.StopTimer()
rand.Seed(RandSeed)
m1 := map[uint32]struct{}{}
for i := uint32(0); i < 1000_000; i++ {
m1[rand.Uint32()] = struct{}{}
}
getElementsFunc := func(m map[uint32]struct{}) []uint32 {
e := make([]uint32, 0, len(m))
for k := range m {
e = append(e, k)
}
return e
}
b.StartTimer()
var elements []uint32
for i := 0; i < b.N; i++ {
elements = getElementsFunc(m1)
}
elementCount = len(elements)
}