diff --git a/.res/bench-10-million.png b/.res/bench-10-million.png deleted file mode 100755 index e05967d..0000000 Binary files a/.res/bench-10-million.png and /dev/null differ diff --git a/.res/bench-100-million.png b/.res/bench-100-million.png new file mode 100755 index 0000000..360b67a Binary files /dev/null and b/.res/bench-100-million.png differ diff --git a/.res/bench-100.png b/.res/bench-100.png index edd4e6f..129b28b 100755 Binary files a/.res/bench-100.png and b/.res/bench-100.png differ diff --git a/README.md b/README.md index 0ce517c..de84d43 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ get intersections. - [Usage](#usage) - [Benchmarks](#benchmarks) - [How NSet works](#how-nset-works) - - [A note on memory usage](#a-note-on-memory-usage) + - [Memory characteristics](#memory-characteristics) ## When to use NSet @@ -59,8 +59,8 @@ func ExistsInArray(myArray []int, item int) bool { To install run `go get github.com/bloeys/nset` Then usage is very simple: -```go +```go mySet := nset.NewNSet[uint32]() mySet.Add(0) @@ -78,24 +78,33 @@ mySet.Remove(4) ## Benchmarks -NSet is faster than the built-in Go hash map in all operations (add, check, delete) by `1.6x to 64x` depending on the operation and data size. +NSet is faster than the built-in Go hash map in all operations (add, check, delete) by `~50% to ~3900%` depending on the operation and data size. -Benchmark with 100 elements: +In the benchmarks below, ones that have 'Rand' in the name mean that access patterns are randomized to test certain use cases. +To make sure the test is fair the seed is the same for both Go Map and NSet. Here both suffer slowdowns but NSet remains faster. + +Adding all uint32 to the map would eat tons of RAM, so we limit both NSet and Map to 10 Million values (0->10M). But because +NSet is optimized for this, there are two additional benchmarks that are only for NSet: `NSetRandNoSizeLimit` and `NSetContainsRandFullRange`. + +NSetAddRandNoSizeLimit removes the limit on the values so NSet will potentially get 10s or 100s of millions of random values. +Even with no limit, NSet outperforms the Map thats limited to 10M by ~200%. + +NSetContainsRandFullRange adds all 4 billion Uint32 values to NSet then randomly checks if they exist. This is by far +the most extreme test, but is still faster than access on a map with 400x less values. A less loaded NSet performs better, +but the difference between best case and worst case NSet is minor and doesn't increase much as the storage increases. + +Benchmark with 100 elements (Ignore NSetContainsRandFullRange and NSetContainsRandFullRange): ![Benchmark of 100 elements](./.res/bench-100.png) -Benchmark with 10,000,000 elements: +Benchmark with 100,000,000 elements: -![Benchmark of 10,000,000 elements](./.res/bench-10-million.png) +![Benchmark of 100,000,000 elements](./.res/bench-100-million.png) -As can be seen from the benchmarks, NSet has almost no change in its performance even with 10 million elements, while the -hash map slows down a lot as the size grows. NSet practically doesn't allocate at all. But it should be noted that -allocation can happen when adding a number bigger than all previously entered numbers. +As can be seen from the benchmarks, NSet has relatively small change in its performance even with 100 million elements, while the +hash map slows down a lot as the size grows. -Benchmarks that have 'Rand' in them mean that access patterns are randomized which can cause cache invalidation. -To make sure the test is fair the seed is the same for both Go Map and NSet. Here both suffer slowdowns but NSet remains faster. - -Benchmarks that have `Presized` in them means that the data structure was fully allocated before usage, like: +NSet also allocates less, and in fact will only allocate when adding a number bigger than all previously entered numbers. ```go //This map already has space for ~100 elements and so doesn't need to resize, which is costly @@ -113,10 +122,21 @@ These bit flags are stored as an array of uint64, where the `0` uses the first b Now assume we have added the numbers `1`, `2` and `3`, then we add number `65`. The first 3 numbers fit in the first uint64 integer of the array, but `65` doesn't so at this point the array is expanded until we have enough 65 bits or more, so 1 more integer is added and the second bit of the second integer is set. -### A note on memory usage +### Memory characteristics This setup gives us very high add/get/remove efficiency, but in some cases can produce worse memory usage. For example, if you make an empty set -then add `5000` NSet will be forced to create 78 integers and then set one bit on the last integer. So if you have a few huge numbers (a number in the millions or billions) then you will be using more memory than a hash map or an array. +then add the number `5000` NSet will be forced to create 78 integers and then set one bit on the last integer. So if you have a few huge numbers (a number in the millions or billions) then you will be using more memory than a hash map or an array. -But if your numbers are smaller and/or closer together then you will have **a lot better** memory efficiency. An array storing all -4 billion uint32 integers will use 16GBs of memory, while NSet with all 4 billion will only use 256MB. +But if your numbers are smaller and/or closer together then you will have **a lot better** memory efficiency. A normal array storing all +4 billion uint32 integers will use `16 GB` of memory, while NSet can store all 4 billion integers with only use `512 MB`. + +To improve the worst case scenario, which happens when someone just adds the number $2^{32}$ and nothing else (which uses 512 MB), NSet +is split into 128 `buckets`, where each bucket can represent a maximum of $2^{25}$ (~33 million) values. + +The upper 7 bits of a value are used to select a bucket, then the number is placed in a position in that bucket depending on its value +and excluding the bits used by the bucket. + +With this the worst case (e.g. adding MaxUint32) will only increase usage by **up to** `16 MB`. + +> tldr: NSet will use a max of 512 MB when storing all uint32 (as opposed to 16GB if you used an array/map), but it might reach this max before +> adding all uint32 numbers. diff --git a/nset.go b/nset.go index 2182072..25160f4 100644 --- a/nset.go +++ b/nset.go @@ -2,14 +2,20 @@ package nset import ( "fmt" + "reflect" "strings" ) var _ fmt.Stringer = &NSet[uint8]{} +type BucketType uint8 type StorageType uint64 -const StorageTypeBits = 64 +const ( + BucketCount = 128 + StorageTypeBits = 64 + BucketIndexingBits = 7 +) //IntsIf is limited to uint32 because we can store ALL 4 Billion uint32 numbers //in 256MB with NSet (instead of the normal 16GB for an array of all uint32s). @@ -18,31 +24,43 @@ type IntsIf interface { uint8 | uint16 | uint32 } -type NSet[T IntsIf] struct { +type Bucket struct { Data []StorageType - StorageUnitCount uint64 + StorageUnitCount uint32 +} + +type NSet[T IntsIf] struct { + Buckets [BucketCount]Bucket + //StorageUnitCount the number of uint64 integers that are used to indicate presence of numbers in the set + StorageUnitCount uint32 + shiftAmount T } func (n *NSet[T]) Add(x T) { + bucket := n.GetBucketFromValue(x) unitIndex := n.GetStorageUnitIndex(x) - if unitIndex >= n.Size() { - storageUnitsToAdd := unitIndex - n.Size() + 1 - n.Data = append(n.Data, make([]StorageType, storageUnitsToAdd)...) + if unitIndex >= bucket.StorageUnitCount { + + storageUnitsToAdd := unitIndex - bucket.StorageUnitCount + 1 + bucket.Data = append(bucket.Data, make([]StorageType, storageUnitsToAdd)...) + n.StorageUnitCount += storageUnitsToAdd + bucket.StorageUnitCount += storageUnitsToAdd } - n.Data[unitIndex] |= 1 << (x % StorageTypeBits) + bucket.Data[unitIndex] |= n.GetBitMask(x) } func (n *NSet[T]) Remove(x T) { + b := n.GetBucketFromValue(x) unitIndex := n.GetStorageUnitIndex(x) - if unitIndex >= n.Size() { + if unitIndex >= b.StorageUnitCount { return } - n.Data[unitIndex] ^= 1 << (x % StorageTypeBits) + b.Data[unitIndex] ^= n.GetBitMask(x) } func (n *NSet[T]) Contains(x T) bool { @@ -72,67 +90,77 @@ func (n *NSet[T]) ContainsAll(values ...T) bool { } func (n *NSet[T]) isSet(x T) bool { + b := n.GetBucketFromValue(x) unitIndex := n.GetStorageUnitIndex(x) - return unitIndex < n.Size() && n.Data[unitIndex]&(1<<(x%StorageTypeBits)) != 0 + return unitIndex < b.StorageUnitCount && b.Data[unitIndex]&n.GetBitMask(x) != 0 } -func (n *NSet[T]) GetStorageUnitIndex(x T) uint64 { - return uint64(x) / StorageTypeBits +func (n *NSet[T]) GetBucketFromValue(x T) *Bucket { + return &n.Buckets[n.GetBucketIndex(x)] } -func (n *NSet[T]) GetStorageUnit(x T) StorageType { - return n.Data[x/StorageTypeBits] +func (n *NSet[T]) GetBucketIndex(x T) BucketType { + //Use the top 'n' bits as the index to the bucket + return BucketType(x >> n.shiftAmount) } -//Size returns the number of storage units -func (n *NSet[T]) Size() uint64 { - return n.StorageUnitCount +func (n *NSet[T]) GetStorageUnitIndex(x T) uint32 { + //The top 'n' bits are used to select the bucket so we need to remove them before finding storage + //unit and bit mask. This is done by shifting left by 4 which removes the top 'n' bits, + //then shifting right by 4 which puts the bits back to their original place, but now + //the top 'n' bits are zeros. + return uint32( + ((x << BucketIndexingBits) >> BucketIndexingBits) / StorageTypeBits) } -func (n *NSet[T]) ElementCap() uint64 { - return uint64(len(n.Data) * StorageTypeBits) +func (n *NSet[T]) GetBitMask(x T) StorageType { + //Removes top 'n' bits + return 1 << (((x << BucketIndexingBits) >> BucketIndexingBits) % StorageTypeBits) } //String returns a string of the storage as bytes separated by spaces. A comma is between each storage unit func (n *NSet[T]) String() string { b := strings.Builder{} - b.Grow(len(n.Data)*StorageTypeBits + len(n.Data)*2) + b.Grow(int(n.StorageUnitCount*StorageTypeBits + n.StorageUnitCount*2)) - for i := 0; i < len(n.Data); i++ { + for i := 0; i < len(n.Buckets); i++ { - x := n.Data[i] - shiftAmount := StorageTypeBits - 8 - for shiftAmount >= 0 { + bucket := &n.Buckets[i] + for j := 0; j < len(bucket.Data); j++ { - byteToShow := uint8(x >> shiftAmount) - if shiftAmount > 0 { - b.WriteString(fmt.Sprintf("%08b ", byteToShow)) - } else { - b.WriteString(fmt.Sprintf("%08b", byteToShow)) + x := bucket.Data[j] + shiftAmount := StorageTypeBits - 8 + for shiftAmount >= 0 { + + byteToShow := uint8(x >> shiftAmount) + if shiftAmount > 0 { + b.WriteString(fmt.Sprintf("%08b ", byteToShow)) + } else { + b.WriteString(fmt.Sprintf("%08b", byteToShow)) + } + + shiftAmount -= 8 } - - shiftAmount -= 8 + b.WriteString(", ") } - b.WriteString(", ") } return b.String() } -func NewNSet[T IntsIf]() NSet[T] { +func NewNSet[T IntsIf]() *NSet[T] { - return NSet[T]{ - Data: make([]StorageType, 1), - StorageUnitCount: 1, + n := &NSet[T]{ + Buckets: [BucketCount]Bucket{}, + StorageUnitCount: 0, + //We use this to either extract or clear the top 'n' bits, as they are used to select the bucket + shiftAmount: T(reflect.TypeOf(*new(T)).Bits()) - BucketIndexingBits, } -} -//NewNSetWithMax creates a set that already has capacity to hold till at least largestNum without resizing. -//Note that this is NOT the count of elements you want to store, instead you input the largest value you want to store. You can store larger values as well. -func NewNSetWithMax[T IntsIf](largestNum T) NSet[T] { - return NSet[T]{ - Data: make([]StorageType, largestNum/StorageTypeBits+1), - StorageUnitCount: uint64(largestNum/StorageTypeBits + 1), + for i := 0; i < len(n.Buckets); i++ { + n.Buckets[i].Data = make([]StorageType, 0) } + + return n } diff --git a/nset_test.go b/nset_test.go index c9489c5..5830525 100755 --- a/nset_test.go +++ b/nset_test.go @@ -1,6 +1,8 @@ package nset_test import ( + "fmt" + "math" "math/rand" "testing" @@ -13,27 +15,61 @@ const ( ) var ( - dump int + dump int + fullRangeNSet *nset.NSet[uint32] ) func TestNSet(t *testing.T) { n := nset.NewNSet[uint32]() - IsEq(t, 1, cap(n.Data)) - n.Add(0) n.Add(1) n.Add(63) + n.Add(math.MaxUint32) - AllTrue(t, n.Contains(0), n.Contains(1), n.Contains(63), !n.Contains(10), !n.Contains(599)) + AllTrue(t, n.Contains(0), n.Contains(1), n.Contains(63), n.Contains(math.MaxUint32), !n.Contains(10), !n.Contains(599)) AllTrue(t, n.ContainsAll(0, 1, 63), !n.ContainsAll(9, 0, 1), !n.ContainsAll(0, 1, 63, 99)) AllTrue(t, n.ContainsAny(0, 1, 63), n.ContainsAny(9, 99, 999, 1), !n.ContainsAny(9, 99, 999)) + IsEq(t, nset.BucketCount-1, n.GetBucketIndex(math.MaxUint32)) + IsEq(t, math.MaxUint32/64/nset.BucketCount, n.GetStorageUnitIndex(math.MaxUint32)) + n.Remove(1) AllTrue(t, n.Contains(0), n.Contains(63), !n.Contains(1)) +} + +func TestNSetFullRange(t *testing.T) { + + if fullRangeNSet == nil { + + fullRangeNSet = nset.NewNSet[uint32]() + println("Adding all uint32 to NSet...") + for i := uint32(0); i < math.MaxUint32; i++ { + fullRangeNSet.Add(i) + if i%1_000_000_000 == 0 { + fmt.Printf("i=%d billion\n", i) + } + } + fullRangeNSet.Add(math.MaxUint32) + } + + n := fullRangeNSet + IsEq(t, 67_108_864, n.StorageUnitCount) + for i := 0; i < len(n.Buckets); i++ { + + b := &n.Buckets[i] + IsEq(t, 524288, b.StorageUnitCount) + + for j := 0; j < len(b.Data); j++ { + if b.Data[j] != math.MaxUint64 { + t.Errorf("Error: storage unit is NOT equal to MaxUint64 (i=%d,j=%d)! Expected math.MaxUint64 but got '%08b'\n", + i, + j, + b.Data[j]) + } + } + } - n = nset.NewNSetWithMax[uint32](100) - IsEq(t, 2, cap(n.Data)) } func AllTrue(t *testing.T, values ...bool) bool { @@ -85,6 +121,16 @@ func BenchmarkNSetAddRand(b *testing.B) { } } +func BenchmarkNSetAddRandNoSizeLimit(b *testing.B) { + + n := nset.NewNSet[uint32]() + + rand.Seed(RandSeed) + for i := 0; i < b.N; i++ { + n.Add(rand.Uint32()) + } +} + func BenchmarkMapAddRand(b *testing.B) { hMap := map[uint32]struct{}{} @@ -95,44 +141,6 @@ func BenchmarkMapAddRand(b *testing.B) { } } -func BenchmarkNSetAddPresized(b *testing.B) { - - n := nset.NewNSetWithMax[uint32](maxBenchSize - 1) - - for i := uint32(0); i < uint32(b.N); i++ { - n.Add(i % maxBenchSize) - } -} - -func BenchmarkMapAddPresized(b *testing.B) { - - hMap := make(map[uint32]struct{}, maxBenchSize-1) - - for i := uint32(0); i < uint32(b.N); i++ { - hMap[i%maxBenchSize] = struct{}{} - } -} - -func BenchmarkNSetAddPresizedRand(b *testing.B) { - - n := nset.NewNSetWithMax[uint32](maxBenchSize - 1) - - rand.Seed(RandSeed) - for i := 0; i < b.N; i++ { - n.Add(rand.Uint32() % maxBenchSize) - } -} - -func BenchmarkMapAddPresizedRand(b *testing.B) { - - hMap := make(map[uint32]struct{}, maxBenchSize-1) - - rand.Seed(RandSeed) - for i := 0; i < b.N; i++ { - hMap[rand.Uint32()%maxBenchSize] = struct{}{} - } -} - func BenchmarkNSetContains(b *testing.B) { //Init @@ -202,6 +210,39 @@ func BenchmarkNSetContainsRand(b *testing.B) { dump = found } +func BenchmarkNSetContainsRandFullRange(b *testing.B) { + + //Init + if fullRangeNSet == nil { + + b.StopTimer() + + fullRangeNSet = nset.NewNSet[uint32]() + println("Preparing full range NSet...") + for i := uint32(0); i < math.MaxUint32; i++ { + fullRangeNSet.Add(i) + } + fullRangeNSet.Add(math.MaxUint32) + + b.StartTimer() + } + + n := fullRangeNSet + + //Work + found := 0 + rand.Seed(RandSeed) + for i := 0; i < b.N; i++ { + + randVal := rand.Uint32() + if n.Contains(randVal) { + found++ + } + } + + dump = found +} + func BenchmarkMapContainsRand(b *testing.B) { //Init