Bucketed NSet

2025-12-29 06:28:19 +00:00 · 2022-06-10 11:00:26 +04:00
parent c1de522f10
commit 52e80d82e4
6 changed files with 193 additions and 104 deletions
--- a/.res/bench-10-million.png
+++ b/.res/bench-10-million.png
--- a/.res/bench-100-million.png
+++ b/.res/bench-100-million.png
--- a/.res/bench-100.png
+++ b/.res/bench-100.png
--- a/README.md
+++ b/README.md
@ -13,7 +13,7 @@ get intersections.
  - [Usage](#usage)
  - [Benchmarks](#benchmarks)
  - [How NSet works](#how-nset-works)
-    - [A note on memory usage](#a-note-on-memory-usage)
+    - [Memory characteristics](#memory-characteristics)

 ## When to use NSet

@ -59,8 +59,8 @@ func ExistsInArray(myArray []int, item int) bool {
 To install run `go get github.com/bloeys/nset`

 Then usage is very simple:
-```go

+```go
 mySet := nset.NewNSet[uint32]()

 mySet.Add(0)
@ -78,24 +78,33 @@ mySet.Remove(4)

 ## Benchmarks

-NSet is faster than the built-in Go hash map in all operations (add, check, delete) by `1.6x to 64x` depending on the operation and data size.
+NSet is faster than the built-in Go hash map in all operations (add, check, delete) by `~50% to ~3900%` depending on the operation and data size.

-Benchmark with 100 elements:
+In the benchmarks below, ones that have 'Rand' in the name mean that access patterns are randomized to test certain use cases.
+To make sure the test is fair the seed is the same for both Go Map and NSet. Here both suffer slowdowns but NSet remains faster.
+
+Adding all uint32 to the map would eat tons of RAM, so we limit both NSet and Map to 10 Million values (0->10M). But because
+NSet is optimized for this, there are two additional benchmarks that are only for NSet: `NSetRandNoSizeLimit` and `NSetContainsRandFullRange`.
+
+NSetAddRandNoSizeLimit removes the limit on the values so NSet will potentially get 10s or 100s of millions of random values.
+Even with no limit, NSet outperforms the Map thats limited to 10M by ~200%.
+
+NSetContainsRandFullRange adds all 4 billion Uint32 values to NSet then randomly checks if they exist. This is by far
+the most extreme test, but is still faster than access on a map with 400x less values. A less loaded NSet performs better,
+but the difference between best case and worst case NSet is minor and doesn't increase much as the storage increases.
+
+Benchmark with 100 elements (Ignore NSetContainsRandFullRange and NSetContainsRandFullRange):

 ![Benchmark of 100 elements](./.res/bench-100.png)

-Benchmark with 10,000,000 elements:
+Benchmark with 100,000,000 elements:

-![Benchmark of 10,000,000 elements](./.res/bench-10-million.png)
+![Benchmark of 100,000,000 elements](./.res/bench-100-million.png)

-As can be seen from the benchmarks, NSet has almost no change in its performance even with 10 million elements, while the
-hash map slows down a lot as the size grows. NSet practically doesn't allocate at all. But it should be noted that
-allocation can happen when adding a number bigger than all previously entered numbers.
+As can be seen from the benchmarks, NSet has relatively small change in its performance even with 100 million elements, while the
+hash map slows down a lot as the size grows.

-Benchmarks that have 'Rand' in them mean that access patterns are randomized which can cause cache invalidation.
-To make sure the test is fair the seed is the same for both Go Map and NSet. Here both suffer slowdowns but NSet remains faster.
-
-Benchmarks that have `Presized` in them means that the data structure was fully allocated before usage, like:
+NSet also allocates less, and in fact will only allocate when adding a number bigger than all previously entered numbers.

 ```go
 //This map already has space for ~100 elements and so doesn't need to resize, which is costly
@ -113,10 +122,21 @@ These bit flags are stored as an array of uint64, where the `0` uses the first b
 Now assume we have added the numbers `1`, `2` and `3`, then we add number `65`. The first 3 numbers fit in the first uint64 integer of the array, but `65` doesn't
 so at this point the array is expanded until we have enough 65 bits or more, so 1 more integer is added and the second bit of the second integer is set.

-### A note on memory usage
+### Memory characteristics

 This setup gives us very high add/get/remove efficiency, but in some cases can produce worse memory usage. For example, if you make an empty set
-then add `5000` NSet will be forced to create 78 integers and then set one bit on the last integer. So if you have a few huge numbers (a number in the millions or billions) then you will be using more memory than a hash map or an array.
+then add the number `5000` NSet will be forced to create 78 integers and then set one bit on the last integer. So if you have a few huge numbers (a number in the millions or billions) then you will be using more memory than a hash map or an array.

-But if your numbers are smaller and/or closer together then you will have **a lot better** memory efficiency. An array storing all
-4 billion uint32 integers will use 16GBs of memory, while NSet with all 4 billion will only use 256MB.
+But if your numbers are smaller and/or closer together then you will have **a lot better** memory efficiency. A normal array storing all
+4 billion uint32 integers will use `16 GB` of memory, while NSet can store all 4 billion integers with only use `512 MB`.
+
+To improve the worst case scenario, which happens when someone just adds the number $2^{32}$ and nothing else (which uses 512 MB), NSet
+is split into 128 `buckets`, where each bucket can represent a maximum of $2^{25}$ (~33 million) values.
+
+The upper 7 bits of a value are used to select a bucket, then the number is placed in a position in that bucket depending on its value
+and excluding the bits used by the bucket.
+
+With this the worst case (e.g. adding MaxUint32) will only increase usage by **up to** `16 MB`.
+
+> tldr: NSet will use a max of 512 MB when storing all uint32 (as opposed to 16GB if you used an array/map), but it might reach this max before
+> adding all uint32 numbers.
--- a/nset.go
+++ b/nset.go
@ -2,14 +2,20 @@ package nset

 import (
 	"fmt"
+	"reflect"
 	"strings"
 )

 var _ fmt.Stringer = &NSet[uint8]{}

+type BucketType uint8
 type StorageType uint64

-const StorageTypeBits = 64
+const (
+	BucketCount        = 128
+	StorageTypeBits    = 64
+	BucketIndexingBits = 7
+)

 //IntsIf is limited to uint32 because we can store ALL 4 Billion uint32 numbers
 //in 256MB with NSet (instead of the normal 16GB for an array of all uint32s).
@ -18,31 +24,43 @@ type IntsIf interface {
 	uint8 | uint16 | uint32
 }

-type NSet[T IntsIf] struct {
+type Bucket struct {
 	Data             []StorageType
-	StorageUnitCount uint64
+	StorageUnitCount uint32
+}
+
+type NSet[T IntsIf] struct {
+	Buckets [BucketCount]Bucket
+	//StorageUnitCount the number of uint64 integers that are used to indicate presence of numbers in the set
+	StorageUnitCount uint32
+	shiftAmount      T
 }

 func (n *NSet[T]) Add(x T) {

+	bucket := n.GetBucketFromValue(x)
 	unitIndex := n.GetStorageUnitIndex(x)
-	if unitIndex >= n.Size() {
-		storageUnitsToAdd := unitIndex - n.Size() + 1
-		n.Data = append(n.Data, make([]StorageType, storageUnitsToAdd)...)
+	if unitIndex >= bucket.StorageUnitCount {
+
+		storageUnitsToAdd := unitIndex - bucket.StorageUnitCount + 1
+		bucket.Data = append(bucket.Data, make([]StorageType, storageUnitsToAdd)...)
+
 		n.StorageUnitCount += storageUnitsToAdd
+		bucket.StorageUnitCount += storageUnitsToAdd
 	}

-	n.Data[unitIndex] |= 1 << (x % StorageTypeBits)
+	bucket.Data[unitIndex] |= n.GetBitMask(x)
 }

 func (n *NSet[T]) Remove(x T) {

+	b := n.GetBucketFromValue(x)
 	unitIndex := n.GetStorageUnitIndex(x)
-	if unitIndex >= n.Size() {
+	if unitIndex >= b.StorageUnitCount {
 		return
 	}

-	n.Data[unitIndex] ^= 1 << (x % StorageTypeBits)
+	b.Data[unitIndex] ^= n.GetBitMask(x)
 }

 func (n *NSet[T]) Contains(x T) bool {
@ -72,67 +90,77 @@ func (n *NSet[T]) ContainsAll(values ...T) bool {
 }

 func (n *NSet[T]) isSet(x T) bool {
+	b := n.GetBucketFromValue(x)
 	unitIndex := n.GetStorageUnitIndex(x)
-	return unitIndex < n.Size() && n.Data[unitIndex]&(1<<(x%StorageTypeBits)) != 0
+	return unitIndex < b.StorageUnitCount && b.Data[unitIndex]&n.GetBitMask(x) != 0
 }

-func (n *NSet[T]) GetStorageUnitIndex(x T) uint64 {
-	return uint64(x) / StorageTypeBits
+func (n *NSet[T]) GetBucketFromValue(x T) *Bucket {
+	return &n.Buckets[n.GetBucketIndex(x)]
 }

-func (n *NSet[T]) GetStorageUnit(x T) StorageType {
-	return n.Data[x/StorageTypeBits]
+func (n *NSet[T]) GetBucketIndex(x T) BucketType {
+	//Use the top 'n' bits as the index to the bucket
+	return BucketType(x >> n.shiftAmount)
 }

-//Size returns the number of storage units
-func (n *NSet[T]) Size() uint64 {
-	return n.StorageUnitCount
+func (n *NSet[T]) GetStorageUnitIndex(x T) uint32 {
+	//The top 'n' bits are used to select the bucket so we need to remove them before finding storage
+	//unit and bit mask. This is done by shifting left by 4 which removes the top 'n' bits,
+	//then shifting right by 4 which puts the bits back to their original place, but now
+	//the top 'n' bits are zeros.
+	return uint32(
+		((x << BucketIndexingBits) >> BucketIndexingBits) / StorageTypeBits)
 }

-func (n *NSet[T]) ElementCap() uint64 {
-	return uint64(len(n.Data) * StorageTypeBits)
+func (n *NSet[T]) GetBitMask(x T) StorageType {
+	//Removes top 'n' bits
+	return 1 << (((x << BucketIndexingBits) >> BucketIndexingBits) % StorageTypeBits)
 }

 //String returns a string of the storage as bytes separated by spaces. A comma is between each storage unit
 func (n *NSet[T]) String() string {

 	b := strings.Builder{}
-	b.Grow(len(n.Data)*StorageTypeBits + len(n.Data)*2)
+	b.Grow(int(n.StorageUnitCount*StorageTypeBits + n.StorageUnitCount*2))

-	for i := 0; i < len(n.Data); i++ {
+	for i := 0; i < len(n.Buckets); i++ {

-		x := n.Data[i]
-		shiftAmount := StorageTypeBits - 8
-		for shiftAmount >= 0 {
+		bucket := &n.Buckets[i]
+		for j := 0; j < len(bucket.Data); j++ {

-			byteToShow := uint8(x >> shiftAmount)
-			if shiftAmount > 0 {
-				b.WriteString(fmt.Sprintf("%08b ", byteToShow))
-			} else {
-				b.WriteString(fmt.Sprintf("%08b", byteToShow))
+			x := bucket.Data[j]
+			shiftAmount := StorageTypeBits - 8
+			for shiftAmount >= 0 {
+
+				byteToShow := uint8(x >> shiftAmount)
+				if shiftAmount > 0 {
+					b.WriteString(fmt.Sprintf("%08b ", byteToShow))
+				} else {
+					b.WriteString(fmt.Sprintf("%08b", byteToShow))
+				}
+
+				shiftAmount -= 8
 			}
-
-			shiftAmount -= 8
+			b.WriteString(", ")
 		}
-		b.WriteString(", ")
 	}

 	return b.String()
 }

-func NewNSet[T IntsIf]() NSet[T] {
+func NewNSet[T IntsIf]() *NSet[T] {

-	return NSet[T]{
-		Data:             make([]StorageType, 1),
-		StorageUnitCount: 1,
+	n := &NSet[T]{
+		Buckets:          [BucketCount]Bucket{},
+		StorageUnitCount: 0,
+		//We use this to either extract or clear the top 'n' bits, as they are used to select the bucket
+		shiftAmount: T(reflect.TypeOf(*new(T)).Bits()) - BucketIndexingBits,
 	}
-}

-//NewNSetWithMax creates a set that already has capacity to hold till at least largestNum without resizing.
-//Note that this is NOT the count of elements you want to store, instead you input the largest value you want to store. You can store larger values as well.
-func NewNSetWithMax[T IntsIf](largestNum T) NSet[T] {
-	return NSet[T]{
-		Data:             make([]StorageType, largestNum/StorageTypeBits+1),
-		StorageUnitCount: uint64(largestNum/StorageTypeBits + 1),
+	for i := 0; i < len(n.Buckets); i++ {
+		n.Buckets[i].Data = make([]StorageType, 0)
 	}
+
+	return n
 }
--- a/nset_test.go
+++ b/nset_test.go
@ -1,6 +1,8 @@
 package nset_test

 import (
+	"fmt"
+	"math"
 	"math/rand"
 	"testing"

@ -13,27 +15,61 @@ const (
 )

 var (
-	dump int
+	dump          int
+	fullRangeNSet *nset.NSet[uint32]
 )

 func TestNSet(t *testing.T) {

 	n := nset.NewNSet[uint32]()
-	IsEq(t, 1, cap(n.Data))
-
 	n.Add(0)
 	n.Add(1)
 	n.Add(63)
+	n.Add(math.MaxUint32)

-	AllTrue(t, n.Contains(0), n.Contains(1), n.Contains(63), !n.Contains(10), !n.Contains(599))
+	AllTrue(t, n.Contains(0), n.Contains(1), n.Contains(63), n.Contains(math.MaxUint32), !n.Contains(10), !n.Contains(599))
 	AllTrue(t, n.ContainsAll(0, 1, 63), !n.ContainsAll(9, 0, 1), !n.ContainsAll(0, 1, 63, 99))
 	AllTrue(t, n.ContainsAny(0, 1, 63), n.ContainsAny(9, 99, 999, 1), !n.ContainsAny(9, 99, 999))

+	IsEq(t, nset.BucketCount-1, n.GetBucketIndex(math.MaxUint32))
+	IsEq(t, math.MaxUint32/64/nset.BucketCount, n.GetStorageUnitIndex(math.MaxUint32))
+
 	n.Remove(1)
 	AllTrue(t, n.Contains(0), n.Contains(63), !n.Contains(1))
+}
+
+func TestNSetFullRange(t *testing.T) {
+
+	if fullRangeNSet == nil {
+
+		fullRangeNSet = nset.NewNSet[uint32]()
+		println("Adding all uint32 to NSet...")
+		for i := uint32(0); i < math.MaxUint32; i++ {
+			fullRangeNSet.Add(i)
+			if i%1_000_000_000 == 0 {
+				fmt.Printf("i=%d billion\n", i)
+			}
+		}
+		fullRangeNSet.Add(math.MaxUint32)
+	}
+
+	n := fullRangeNSet
+	IsEq(t, 67_108_864, n.StorageUnitCount)
+	for i := 0; i < len(n.Buckets); i++ {
+
+		b := &n.Buckets[i]
+		IsEq(t, 524288, b.StorageUnitCount)
+
+		for j := 0; j < len(b.Data); j++ {
+			if b.Data[j] != math.MaxUint64 {
+				t.Errorf("Error: storage unit is NOT equal to MaxUint64 (i=%d,j=%d)! Expected math.MaxUint64 but got '%08b'\n",
+					i,
+					j,
+					b.Data[j])
+			}
+		}
+	}

-	n = nset.NewNSetWithMax[uint32](100)
-	IsEq(t, 2, cap(n.Data))
 }

 func AllTrue(t *testing.T, values ...bool) bool {
@ -85,6 +121,16 @@ func BenchmarkNSetAddRand(b *testing.B) {
 	}
 }

+func BenchmarkNSetAddRandNoSizeLimit(b *testing.B) {
+
+	n := nset.NewNSet[uint32]()
+
+	rand.Seed(RandSeed)
+	for i := 0; i < b.N; i++ {
+		n.Add(rand.Uint32())
+	}
+}
+
 func BenchmarkMapAddRand(b *testing.B) {

 	hMap := map[uint32]struct{}{}
@ -95,44 +141,6 @@ func BenchmarkMapAddRand(b *testing.B) {
 	}
 }

-func BenchmarkNSetAddPresized(b *testing.B) {
-
-	n := nset.NewNSetWithMax[uint32](maxBenchSize - 1)
-
-	for i := uint32(0); i < uint32(b.N); i++ {
-		n.Add(i % maxBenchSize)
-	}
-}
-
-func BenchmarkMapAddPresized(b *testing.B) {
-
-	hMap := make(map[uint32]struct{}, maxBenchSize-1)
-
-	for i := uint32(0); i < uint32(b.N); i++ {
-		hMap[i%maxBenchSize] = struct{}{}
-	}
-}
-
-func BenchmarkNSetAddPresizedRand(b *testing.B) {
-
-	n := nset.NewNSetWithMax[uint32](maxBenchSize - 1)
-
-	rand.Seed(RandSeed)
-	for i := 0; i < b.N; i++ {
-		n.Add(rand.Uint32() % maxBenchSize)
-	}
-}
-
-func BenchmarkMapAddPresizedRand(b *testing.B) {
-
-	hMap := make(map[uint32]struct{}, maxBenchSize-1)
-
-	rand.Seed(RandSeed)
-	for i := 0; i < b.N; i++ {
-		hMap[rand.Uint32()%maxBenchSize] = struct{}{}
-	}
-}
-
 func BenchmarkNSetContains(b *testing.B) {

 	//Init
@ -202,6 +210,39 @@ func BenchmarkNSetContainsRand(b *testing.B) {
 	dump = found
 }

+func BenchmarkNSetContainsRandFullRange(b *testing.B) {
+
+	//Init
+	if fullRangeNSet == nil {
+
+		b.StopTimer()
+
+		fullRangeNSet = nset.NewNSet[uint32]()
+		println("Preparing full range NSet...")
+		for i := uint32(0); i < math.MaxUint32; i++ {
+			fullRangeNSet.Add(i)
+		}
+		fullRangeNSet.Add(math.MaxUint32)
+
+		b.StartTimer()
+	}
+
+	n := fullRangeNSet
+
+	//Work
+	found := 0
+	rand.Seed(RandSeed)
+	for i := 0; i < b.N; i++ {
+
+		randVal := rand.Uint32()
+		if n.Contains(randVal) {
+			found++
+		}
+	}
+
+	dump = found
+}
+
 func BenchmarkMapContainsRand(b *testing.B) {

 	//Init