13 Commits
v1.10.0 ... dev

Author SHA1 Message Date
ab047eb986 Fix NSet.SetBits in GetDifference 2024-09-09 07:55:06 +04:00
17b8c38162 Add GetDifference method 2024-09-09 07:47:36 +04:00
95b3af20e8 Merge pull request #3 from bloeys/bit-counter
Implement NSet.Len() that returns number of set bits + Bug fixes + Small optimizations
2024-07-22 00:43:38 +04:00
ecf82c0dfb Replace deprecated rand.Seed 2024-07-22 00:38:55 +04:00
f5a31814c7 Add SetBits test for Full uint32 set 2024-07-22 00:21:04 +04:00
a666d180ec Bug in IsEq 2024-07-21 23:40:06 +04:00
932c843282 Keep count of set bits to offer .Len+optimizations 2024-07-21 23:01:03 +04:00
567e541e64 Improve readme 2023-01-02 03:42:42 +04:00
f47e5b3300 Better isEqual bench 2022-06-13 01:36:04 +04:00
d4b9c6d3c7 Intersection benchmarks 2022-06-11 08:13:34 +04:00
c55e5b0f01 Docs and bench 2022-06-11 07:59:03 +04:00
5a6e13b9f1 Optimize GetIntersections+GetAllElements+bench+readme 2022-06-11 07:38:50 +04:00
f7fe64f386 IsEq benchmarks 2022-06-11 05:00:40 +04:00
7 changed files with 800 additions and 60 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
.res/bench-union-10-million.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

View File

@ -6,12 +6,19 @@ By 'set' we mean something like a hash map, but instead of key/value pairs there
You can do the normal operations of add, check if item exists, and delete, but you can also do things like union sets and
get intersections.
NSet is generally faster than the built-in Go `map` by ~50% to ~3900%, and even more in certain cases. Not only is NSet faster,
but it can store *all* 4 billion uint32 values using only `512MB`, while a map needs `16GB`, meaning NSet uses almost `97%` *less* memory.
**Contents**:
- [NSet](#nset)
- [When to use NSet](#when-to-use-nset)
- [Usage](#usage)
- [Benchmarks](#benchmarks)
- [Equality](#equality)
- [Extracting elements](#extracting-elements)
- [Intersection](#intersection)
- [Union](#union)
- [How NSet works](#how-nset-works)
- [Memory characteristics](#memory-characteristics)
@ -75,10 +82,12 @@ if mySet.Contains(5) {
mySet.Remove(4)
// Intersections
myOtherSet := nset.NewNSet[uint32]()
myOtherSet.AddMany(0, 1, 2, 4, 14)
println("Are the two sets equal:", myOtherSet.IsEq(mySet)) //False
// Intersections
println("There is intersection:", myOtherSet.HasIntersection(mySet)) //True
intersection := mySet.GetIntersection(myOtherSet)
@ -95,9 +104,9 @@ println(myOtherSet.ContainsAll(0, 1, 2, 4, 14, 256, 300)) //True
## Benchmarks
NSet is faster than the built-in Go hash map in all operations (add, check, delete) by `~50% to ~3900%` depending on the operation and data size.
NSet is generally faster than the built-in Go hash map by `~50% to ~3900%` (and even `8130x` checking equality) depending on the operation and data size.
In the benchmarks below, ones that have 'Rand' in the name mean that access patterns are randomized to test certain use cases.
In the benchmarks below, ones that have 'Rand' in the name mean that access patterns are randomized to test worst case scenarios.
To make sure the test is fair the seed is the same for both Go Map and NSet. Here both suffer slowdowns but NSet remains faster.
Adding all uint32 to the map would eat tons of RAM, so we limit both NSet and Map to 10 Million values (0->10M). But because
@ -130,21 +139,58 @@ myMap := make(map[uint16], 100)
Map benefits from sizing while NSet isn't affected, but in both cases NSet remains faster.
### Equality
Another case where NSet really shines is checking if two sets are equal.
Below is a benchmark that checks whether two NSets/maps with 10 Million elements in each are equal (They are equal, which is the worst case).
Here NSet finishes in `0.1ms` but Map takes almost a second with `813ms`. With a few large numbers NSet suffers a bit but then its performance remains or even improves with more elements, while map degrades greatly.
![Benchmarking IsEq with 10 and 100 million elements](./.res/bench-is-equal-10-100-mil.png)
Next we have `GetAllElements`, which simply returns an array of all the elements of NSet/Map (note this is dangerous in NSet. See [Memory characteristics](#memory-characteristics)).
![Benchmarking GetAllElements with 10,000,000 elements](.res/bench-getAllElements-10-million.png)
### Extracting elements
With `GetAllElements` NSet is faster when its elements are closer together value wise (or if you have many numbers), but gets a lot slower when
dealing with a few random numbers with a big difference between them. This is because you might get two numbers like `1` and `1_000_000` which NSet
will store in two far away places with a lot of nothing in between. In a map these will be stored close together.
With 10M ordered elements NSet takes `~31ms` and map `~97ms`, but with a random 10M elements NSet takes `~525ms`
while map takes `~95ms`. Map scales with the amount of elements, while NSet is affected by number distribution as well.
Similar to getting elements is intersection:
### Intersection
![Benchmarking GetIntersection with 10,000,000 elements](./.res/bench-getIntersection-10-million.png)
Here NSet is always many times faster, but the effect of number distribution on NSet's performance is clear, while map's performance
only scales with number of elements.
### Union
![Benchmarking GetUnion with 10,000,000 elements](./.res/bench-union-10-million.png)
With unions NSet is a clear winner in all cases where for 10M elements NSet takes between `~0.37ms` and `~180ms`, while
map takes `~1959ms`, around 10x slower.
## How NSet works
NSet works by using a single bit to indicate whether a number exists or not.
These bit flags are stored as an array of uint64, where the `0` uses the first bit of the first uint64,
`1` uses the second bit of the first uint64 in the array and so on. So each uint64 represents 64 numbers.
NSet works by using a single bit to indicate whether a number exists or not, and these bit flags are stored as an array of uint64.
For example, the number `0` will use the first bit of the first uint64,
`1` uses the second bit of the first uint64 in the array and so on. As such, each uint64 represents 64 numbers.
Now assume we have added the numbers `1`, `2` and `3`, then we add number `65`. The first 3 numbers fit in the first uint64 integer of the array, but `65` doesn't
so at this point the array is expanded until we have enough 65 bits or more, so 1 more integer is added and the second bit of the second integer is set.
so at this point the array is expanded until we have 65 bits or more, so 1 more integer is added and the second bit of the second integer is set.
### Memory characteristics
This setup gives us very high add/get/remove efficiency, but in some cases can produce worse memory usage. For example, if you make an empty set
then add the number `5000` NSet will be forced to create 78 integers and then set one bit on the last integer. So if you have a few huge numbers (a number in the millions or billions) then you will be using more memory than a hash map or an array.
But if your numbers are smaller and/or closer together then you will have **a lot better** memory efficiency. A normal array storing all
But if your numbers are smaller and/or closer together then you will have *a much better* memory efficiency. A normal array storing all
4 billion uint32 integers will use `16 GB` of memory, while NSet can store all 4 billion integers with only use `512 MB`.
To improve the worst case scenario, which happens when someone just adds the number $2^{32}$ and nothing else (which uses 512 MB), NSet
@ -153,7 +199,7 @@ is split into 128 `buckets`, where each bucket can represent a maximum of $2^{25
The upper 7 bits of a value are used to select a bucket, then the number is placed in a position in that bucket depending on its value
and excluding the bits used by the bucket.
With this the worst case (e.g. adding MaxUint32) will only increase usage by **up to** `16 MB`.
With this the worst case (e.g. adding MaxUint32) will only increase usage by *up to* `16 MB`.
> tldr: NSet will use a max of 512 MB when storing all uint32 (as opposed to 16GB if you used an array/map), but it might reach this max before
> adding all uint32 numbers.

222
nset.go
View File

@ -1,9 +1,12 @@
package nset
import (
"bytes"
"fmt"
"math/bits"
"reflect"
"strings"
"unsafe"
)
var _ fmt.Stringer = &NSet[uint8]{}
@ -13,12 +16,16 @@ type StorageType uint64
const (
BucketCount = 128
// StorageTypeBits is the number of bits used per storage unit in each bucket.
//
// NOTE: this must be a power of 2, otherwise FastModPower2 will break and must be replaced by a normal x%y
// NOTE: GetStorageUnitIndex must be adjusted if this value is changed
StorageTypeBits = 64
BucketIndexingBits = 7
)
// IntsIf is limited to uint32 because we can store ALL 4 Billion uint32 numbers
//in 256MB with NSet (instead of the normal 16GB for an array of all uint32s).
// in 512MB with NSet (instead of the normal 16GB for an array of all uint32s).
// But if we allow uint64 (or int, since int can be 64-bit) users can easily put a big 64-bit number and use more RAM than maybe Google and crash.
type IntsIf interface {
uint8 | uint16 | uint32
@ -34,6 +41,7 @@ type NSet[T IntsIf] struct {
//StorageUnitCount the number of uint64 integers that are used to indicate presence of numbers in the set
StorageUnitCount uint32
shiftAmount T
SetBits uint64
}
func (n *NSet[T]) Add(x T) {
@ -50,7 +58,11 @@ func (n *NSet[T]) Add(x T) {
bucket.StorageUnitCount += storageUnitsToAdd
}
bucket.Data[unitIndex] |= n.GetBitMask(x)
oldStorage := bucket.Data[unitIndex]
newStorage := oldStorage | n.GetBitMask(x)
bucket.Data[unitIndex] = newStorage
n.SetBits += uint64(bits.OnesCount64(uint64(^oldStorage) & uint64(newStorage)))
}
func (n *NSet[T]) AddMany(values ...T) {
@ -70,9 +82,12 @@ func (n *NSet[T]) AddMany(values ...T) {
bucket.StorageUnitCount += storageUnitsToAdd
}
bucket.Data[unitIndex] |= n.GetBitMask(x)
}
oldStorage := bucket.Data[unitIndex]
newStorage := oldStorage | n.GetBitMask(x)
bucket.Data[unitIndex] = newStorage
n.SetBits += uint64(bits.OnesCount64(uint64(^oldStorage) & uint64(newStorage)))
}
}
func (n *NSet[T]) Remove(x T) {
@ -83,7 +98,11 @@ func (n *NSet[T]) Remove(x T) {
return
}
b.Data[unitIndex] ^= n.GetBitMask(x)
oldStorage := b.Data[unitIndex]
newStorage := oldStorage &^ n.GetBitMask(x)
b.Data[unitIndex] = newStorage
n.SetBits -= uint64(bits.OnesCount64(uint64(oldStorage) & uint64(^newStorage)))
}
func (n *NSet[T]) Contains(x T) bool {
@ -128,18 +147,26 @@ func (n *NSet[T]) GetBucketIndex(x T) BucketType {
}
func (n *NSet[T]) GetStorageUnitIndex(x T) uint32 {
//The top 'n' bits are used to select the bucket so we need to remove them before finding storage
//unit and bit mask. This is done by shifting left by 4 which removes the top 'n' bits,
//then shifting right by 4 which puts the bits back to their original place, but now
//the top 'n' bits are zeros.
return uint32(((x << BucketIndexingBits) >> BucketIndexingBits) / StorageTypeBits)
// Since StorageTypeBits is known and is a power of 2, we can replace the division
// with a right shift.
//
// The below return is equal to: return uint32(((x << BucketIndexingBits) >> BucketIndexingBits) / StorageTypeBits)
return uint32(((x << BucketIndexingBits) >> BucketIndexingBits) >> 6)
}
func (n *NSet[T]) GetBitMask(x T) StorageType {
//Removes top 'n' bits
return 1 << (((x << BucketIndexingBits) >> BucketIndexingBits) % StorageTypeBits)
return 1 << FastModPower2(((x<<BucketIndexingBits)>>BucketIndexingBits), StorageTypeBits)
}
// Union does n1=Union(n1, n2), so the current set will be updated
// such that its a union of its old value and the passed set
func (n *NSet[T]) Union(otherSet *NSet[T]) {
for i := 0; i < BucketCount; i++ {
@ -157,11 +184,18 @@ func (n *NSet[T]) Union(otherSet *NSet[T]) {
}
for j := 0; j < len(b1.Data) && j < len(b2.Data); j++ {
b1.Data[j] |= b2.Data[j]
oldStorage := b1.Data[j]
newStorage := oldStorage | b2.Data[j]
b1.Data[j] = newStorage
n.SetBits += uint64(bits.OnesCount64(uint64(^oldStorage) & uint64(newStorage)))
}
}
}
// GetIntersection returns a new set that's the intersection between
// this set and the passed set
func (n *NSet[T]) GetIntersection(otherSet *NSet[T]) *NSet[T] {
outSet := NewNSet[T]()
@ -171,34 +205,151 @@ func (n *NSet[T]) GetIntersection(otherSet *NSet[T]) *NSet[T] {
b1 := &n.Buckets[i]
b2 := &otherSet.Buckets[i]
//bucketIndexBits are the bits removed from the original value to use for bucket indexing.
//We will use this to restore the original value 'x' once an intersection is detected
bucketIndexBits := T(i << n.shiftAmount)
for j := 0; j < len(b1.Data) && j < len(b2.Data); j++ {
newB := &outSet.Buckets[i]
for j := uint32(0); j < b1.StorageUnitCount && j < b2.StorageUnitCount; j++ {
if b1.Data[j]&b2.Data[j] == 0 {
continue
}
mask := StorageType(1 << 0) //This will be used to check set bits. Numbers will be reconstructed only for set bits
commonBits := b1.Data[j] & b2.Data[j] //Bits that are set on both storage units (aka the intersection)
firstStorageUnitValue := T(j*StorageTypeBits) | bucketIndexBits //StorageUnitIndex = noBucketBitsX / StorageTypeBits. So: noBucketBitsX = StorageUnitIndex * StorageTypeBits; Then: x = noBucketBitsX | bucketIndexBits
for k := T(0); k < StorageTypeBits; k++ {
if newB.StorageUnitCount < j+1 {
storageUnitsToAdd := j + 1 - newB.StorageUnitCount
newB.Data = append(newB.Data, make([]StorageType, storageUnitsToAdd)...)
if commonBits&mask > 0 {
outSet.Add(firstStorageUnitValue + k)
// fmt.Printf("Bucket=%d, Storage unit=%d, bitPos=%d, value=%d\n", i, j, k, firstStorageUnitValue+k)
}
mask <<= 1
newB.StorageUnitCount += storageUnitsToAdd
outSet.StorageUnitCount += storageUnitsToAdd
}
newStorage := b1.Data[j] & b2.Data[j]
newB.Data[j] = newStorage
outSet.SetBits += uint64(bits.OnesCount64(uint64(newStorage)))
}
}
return outSet
}
// GetDifference returns a new set that contains the elements in this set
// that are not in the passed set.
//
// For example, if s1=(1,2,3,4,5) and s2=(1,3,4), the output is
// s3=Diff(s1,s2)=(2,5)
func (n *NSet[T]) GetDifference(otherSet *NSet[T]) *NSet[T] {
outSet := NewNSet[T]()
for i := 0; i < BucketCount; i++ {
b1 := &n.Buckets[i]
b2 := &otherSet.Buckets[i]
outSet.StorageUnitCount += b1.StorageUnitCount
newB := &outSet.Buckets[i]
newB.StorageUnitCount = b1.StorageUnitCount
newB.Data = make([]StorageType, newB.StorageUnitCount)
for j := uint32(0); j < b1.StorageUnitCount && j < b2.StorageUnitCount; j++ {
newStorage := b1.Data[j] & (^b2.Data[j])
newB.Data[j] = newStorage
outSet.SetBits += uint64(bits.OnesCount64(uint64(newStorage)))
}
if b1.StorageUnitCount > b2.StorageUnitCount {
copy(newB.Data[b2.StorageUnitCount:], b1.Data[b2.StorageUnitCount:])
for j := uint32(b2.StorageUnitCount); j < newB.StorageUnitCount; j++ {
storage := newB.Data[j]
outSet.SetBits += uint64(bits.OnesCount64(uint64(storage)))
}
}
}
return outSet
}
// GetAllElements returns all the added numbers added to NSet.
//
// NOTE: Be careful with this if you have a lot of elements in NSet because NSet is compressed while the returned array is not.
// In the worst case (all uint32s stored) the returned array will be ~4.2 billion elements and will use 16+ GBs of RAM.
func (n *NSet[T]) GetAllElements() []T {
elements := make([]T, 0, n.SetBits)
if n.SetBits == 0 {
return elements
}
for i := 0; i < BucketCount; i++ {
//bucketIndexBits are the bits removed from the original value to use for bucket indexing.
//We will use this to restore the original value 'x' once an intersection is detected
bucketIndexBits := T(i << n.shiftAmount)
b1 := &n.Buckets[i]
for j := 0; j < len(b1.Data); j++ {
storageUnit := b1.Data[j]
if storageUnit == 0 {
continue
}
onesCount := bits.OnesCount64(uint64(storageUnit))
mask := StorageType(1 << 0) //This will be used to check set bits. Numbers will be reconstructed only for set bits
firstStorageUnitValue := T(j*StorageTypeBits) | bucketIndexBits //StorageUnitIndex = noBucketBitsX / StorageTypeBits. So: noBucketBitsX = StorageUnitIndex * StorageTypeBits; Then: x = noBucketBitsX | bucketIndexBits
for k := T(0); onesCount > 0 && k < StorageTypeBits; k++ {
if storageUnit&mask > 0 {
elements = append(elements, firstStorageUnitValue+k)
onesCount--
}
mask <<= 1
}
}
}
return elements
}
func (n *NSet[T]) IsEq(otherSet *NSet[T]) bool {
if n.SetBits != otherSet.SetBits {
return false
}
//Equal storage unit count doesn't mean all buckets have same size, so we check per bucket
for i := 0; i < len(n.Buckets); i++ {
if n.Buckets[i].StorageUnitCount != otherSet.Buckets[i].StorageUnitCount {
return false
}
}
for i := 0; i < len(n.Buckets); i++ {
b1 := &n.Buckets[i]
b2 := &otherSet.Buckets[i]
// The .Data[0] will panic if either unit count is zero, so these checks
// both avoid that panic and provide an early exit
bucketsEqual := (b1.StorageUnitCount == 0 && b2.StorageUnitCount == 0) ||
(b1.StorageUnitCount == b2.StorageUnitCount && bytes.Equal(
unsafe.Slice((*byte)(unsafe.Pointer(&b1.Data[0])), len(b1.Data)*int(unsafe.Sizeof(b1.Data[0]))),
unsafe.Slice((*byte)(unsafe.Pointer(&b2.Data[0])), len(b2.Data)*int(unsafe.Sizeof(b2.Data[0]))),
))
if !bucketsEqual {
return false
}
}
return true
}
func (n *NSet[T]) HasIntersection(otherSet *NSet[T]) bool {
for i := 0; i < len(n.Buckets); i++ {
@ -267,9 +418,20 @@ func (n *NSet[T]) Copy() *NSet[T] {
}
// Len returns the number of values stored (i.e. bits set to 1).
// It is the same as NSet.SetBits.
func (n *NSet[T]) Len() uint64 {
return n.SetBits
}
func UnionSets[T IntsIf](set1, set2 *NSet[T]) *NSet[T] {
newSet := NewNSet[T]()
// This is an optimization that makes it so that we only need to count bits
// when doing union with set2
newSet.SetBits = set1.SetBits
for i := 0; i < BucketCount; i++ {
b1 := &set1.Buckets[i]
@ -288,18 +450,26 @@ func UnionSets[T IntsIf](set1, set2 *NSet[T]) *NSet[T] {
newSet.StorageUnitCount += bucketSize
//Union fields of both sets on the new set
for j := 0; j < len(b1.Data); j++ {
newB.Data[j] |= b1.Data[j]
}
copy(newB.Data, b1.Data)
for j := 0; j < len(b2.Data); j++ {
newB.Data[j] |= b2.Data[j]
oldStorage := newB.Data[j]
newStorage := oldStorage | b2.Data[j]
newB.Data[j] = newStorage
newSet.SetBits += uint64(bits.OnesCount64(uint64(^oldStorage) & uint64(newStorage)))
}
}
return newSet
}
// FastModPower2 is a fast version of x%y that only works when y is a power of 2
func FastModPower2[T uint8 | uint16 | uint32 | uint64](x, y T) T {
return x & (y - 1)
}
func NewNSet[T IntsIf]() *NSet[T] {
n := &NSet[T]{

View File

@ -22,10 +22,23 @@ var (
func TestNSet(t *testing.T) {
n1 := nset.NewNSet[uint32]()
// Double add/remove of the same value is not only important to test SetBits, but also
// to test for bugs where double adding/removing incorrectly flips bits (checked using Contains())
n1.Add(0)
AllTrue(t, n1.Len() == 1)
n1.Add(0)
AllTrue(t, n1.Len() == 1)
n1.Add(1)
AllTrue(t, n1.Len() == 2)
n1.Add(63)
AllTrue(t, n1.Len() == 3)
n1.Add(math.MaxUint32)
AllTrue(t, n1.Len() == 4)
AllTrue(t, n1.Contains(0), n1.Contains(1), n1.Contains(63), n1.Contains(math.MaxUint32), !n1.Contains(10), !n1.Contains(599))
AllTrue(t, n1.ContainsAll(0, 1, 63), !n1.ContainsAll(9, 0, 1), !n1.ContainsAll(0, 1, 63, 99))
@ -35,7 +48,12 @@ func TestNSet(t *testing.T) {
IsEq(t, math.MaxUint32/64/nset.BucketCount, n1.GetStorageUnitIndex(math.MaxUint32))
nCopy := n1.Copy()
n1.Remove(1)
AllTrue(t, n1.Len() == 3)
n1.Remove(1)
AllTrue(t, n1.Len() == 3)
AllTrue(t, n1.Contains(0), n1.Contains(63), !n1.Contains(1), nCopy.ContainsAll(0, 1, 63, math.MaxUint32))
@ -55,26 +73,60 @@ func TestNSet(t *testing.T) {
n5.AddMany(0, 1, 63, 64, math.MaxUint32)
n4n5 := n4.GetIntersection(n5)
AllTrue(t, n4n5.ContainsAll(0, 1, 64, math.MaxUint32), !n4n5.Contains(63))
n4n5Twin := nset.NewNSet[uint32]()
n4n5Twin.AddMany(0, 1, 64, math.MaxUint32)
AllTrue(t, n4n5.Len() == 4, n4n5.Len() == n4n5Twin.Len(), n4n5.ContainsAll(0, 1, 64, math.MaxUint32), !n4n5.Contains(63), n4n5Twin.IsEq(n4n5))
// Union
n6 := nset.NewNSet[uint32]()
n6.AddMany(4, 7, 100, 1000)
n6.AddMany(1, 4, 7, 100, 1000)
n7 := nset.NewNSet[uint32]()
n7.AddMany(math.MaxUint32)
n7.AddMany(1, math.MaxUint32)
n7OldStorageUnitCount := n7.StorageUnitCount
n7.Union(n6)
AllTrue(t, n6.ContainsAll(4, 7, 100, 1000), !n6.Contains(math.MaxUint32), n7.ContainsAll(4, 7, 100, 1000, math.MaxUint32), n7.StorageUnitCount == n7OldStorageUnitCount+n6.StorageUnitCount)
AllTrue(t, n6.Len() == 5, n7.Len() == 6, n6.ContainsAll(1, 4, 7, 100, 1000), !n6.Contains(math.MaxUint32), n7.ContainsAll(1, 4, 7, 100, 1000, math.MaxUint32), n7.StorageUnitCount == n7OldStorageUnitCount+n6.StorageUnitCount-1)
// UnionSets
n7 = nset.NewNSet[uint32]()
n7.AddMany(math.MaxUint32)
n7.AddMany(4, math.MaxUint32)
unionedSet := nset.UnionSets(n6, n7)
AllTrue(t, !n6.Contains(math.MaxUint32), !n7.ContainsAny(4, 7, 100, 1000), unionedSet.ContainsAll(4, 7, 100, 1000, math.MaxUint32), unionedSet.StorageUnitCount == n6.StorageUnitCount+n7OldStorageUnitCount)
AllTrue(t, unionedSet.Len() == 6, !n6.Contains(math.MaxUint32), !n7.ContainsAny(7, 100, 1000), unionedSet.ContainsAll(4, 7, 100, 1000, math.MaxUint32), unionedSet.StorageUnitCount == n6.StorageUnitCount+n7OldStorageUnitCount-1)
// Equality
AllTrue(t, !n6.IsEq(n7))
n7.Union(n6)
AllTrue(t, !n6.IsEq(n7))
n6.Union(n7)
AllTrue(t, n6.IsEq(n7))
// GetAllElements
n8 := nset.NewNSet[uint32]()
n8.AddMany(0, 1, 55, 1000, 10000)
n8Elements := n8.GetAllElements()
AllTrue(t, len(n8Elements) == 5, n8Elements[0] == 0, n8Elements[1] == 1, n8Elements[2] == 55, n8Elements[3] == 1000, n8Elements[4] == 10000)
// GetDifference
nDiff1 := nset.NewNSet[uint32]()
nDiff1.AddMany(1, 2, 3)
nDiff2 := nset.NewNSet[uint32]()
nDiff2.AddMany(1, 3, 4)
nDiff3 := nDiff1.GetDifference(nDiff2)
AllTrue(t, nDiff3.SetBits == 1, nDiff3.StorageUnitCount == 1, nDiff3.Contains(2), !nDiff3.ContainsAny(1, 3, 4))
nDiff1.AddMany(1, 2, 3, 4, 5, math.MaxUint32)
nDiff3 = nDiff1.GetDifference(nDiff2)
AllTrue(t, nDiff3.SetBits == 3, nDiff3.ContainsAll(2, 5, math.MaxUint32), !nDiff3.ContainsAny(1, 3, 4))
}
func TestNSetFullRange(t *testing.T) {
@ -90,6 +142,7 @@ func TestNSetFullRange(t *testing.T) {
}
}
fullRangeNSet.Add(math.MaxUint32)
AllTrue(t, fullRangeNSet.SetBits == math.MaxUint32+1)
}
n := fullRangeNSet
@ -101,7 +154,7 @@ func TestNSetFullRange(t *testing.T) {
for j := 0; j < len(b.Data); j++ {
if b.Data[j] != math.MaxUint64 {
t.Errorf("Error: storage unit is NOT equal to MaxUint64 (i=%d,j=%d)! Expected math.MaxUint64 but got '%08b'\n",
t.Fatalf("Error: storage unit is NOT equal to MaxUint64 (i=%d,j=%d)! Expected math.MaxUint64 but got '%08b'\n",
i,
j,
b.Data[j])
@ -111,11 +164,12 @@ func TestNSetFullRange(t *testing.T) {
}
func AllTrue(t *testing.T, values ...bool) bool {
func AllTrue(t *testing.T, values ...bool) (success bool) {
for i := 0; i < len(values); i++ {
if !values[i] {
t.Errorf("Expected 'true' but got 'false'\n")
t.Fatalf("Expected 'true' but got 'false'\n")
return false
}
}
@ -128,7 +182,7 @@ func IsEq[T comparable](t *testing.T, expected, val T) bool {
return true
}
t.Errorf("Expected '%v' but got '%v'\n", expected, val)
t.Fatalf("Expected '%v' but got '%v'\n", expected, val)
return false
}
@ -154,7 +208,7 @@ func BenchmarkNSetAddRand(b *testing.B) {
n := nset.NewNSet[uint32]()
rand.Seed(RandSeed)
rand := rand.New(rand.NewSource(RandSeed))
for i := 0; i < b.N; i++ {
n.Add(rand.Uint32() % maxBenchSize)
}
@ -164,7 +218,7 @@ func BenchmarkNSetAddRandNoSizeLimit(b *testing.B) {
n := nset.NewNSet[uint32]()
rand.Seed(RandSeed)
rand := rand.New(rand.NewSource(RandSeed))
for i := 0; i < b.N; i++ {
n.Add(rand.Uint32())
}
@ -174,7 +228,7 @@ func BenchmarkMapAddRand(b *testing.B) {
hMap := map[uint32]struct{}{}
rand.Seed(RandSeed)
rand := rand.New(rand.NewSource(RandSeed))
for i := 0; i < b.N; i++ {
hMap[rand.Uint32()%maxBenchSize] = struct{}{}
}
@ -237,7 +291,7 @@ func BenchmarkNSetContainsRand(b *testing.B) {
//Work
found := 0
rand.Seed(RandSeed)
rand := rand.New(rand.NewSource(RandSeed))
for i := 0; i < b.N; i++ {
randVal := rand.Uint32()
@ -270,7 +324,7 @@ func BenchmarkNSetContainsRandFullRange(b *testing.B) {
//Work
found := 0
rand.Seed(RandSeed)
rand := rand.New(rand.NewSource(RandSeed))
for i := 0; i < b.N; i++ {
randVal := rand.Uint32()
@ -295,7 +349,7 @@ func BenchmarkMapContainsRand(b *testing.B) {
//Work
found := 0
rand.Seed(RandSeed)
rand := rand.New(rand.NewSource(RandSeed))
for i := 0; i < b.N; i++ {
randVal := rand.Uint32()
@ -353,7 +407,7 @@ func BenchmarkNSetDeleteRand(b *testing.B) {
b.StartTimer()
//Work
rand.Seed(RandSeed)
rand := rand.New(rand.NewSource(RandSeed))
for i := 0; i < b.N; i++ {
randVal := rand.Uint32()
@ -373,10 +427,480 @@ func BenchmarkMapDeleteRand(b *testing.B) {
b.StartTimer()
//Work
rand.Seed(RandSeed)
rand := rand.New(rand.NewSource(RandSeed))
for i := 0; i < b.N; i++ {
randVal := rand.Uint32()
delete(hMap, randVal)
}
}
func BenchmarkNSetIsEq(b *testing.B) {
b.StopTimer()
s1 := nset.NewNSet[uint32]()
s2 := nset.NewNSet[uint32]()
for i := uint32(0); i < maxBenchSize; i++ {
s1.Add(i)
s2.Add(i)
}
b.StartTimer()
for i := 0; i < b.N; i++ {
s1.IsEq(s2)
}
}
func BenchmarkMapIsEq(b *testing.B) {
b.StopTimer()
m1 := map[uint32]struct{}{}
m2 := map[uint32]struct{}{}
for i := uint32(0); i < maxBenchSize; i++ {
m1[i] = struct{}{}
m2[i] = struct{}{}
}
b.StartTimer()
mapsAreEq := func(m1, m2 map[uint32]struct{}) bool {
if len(m1) != len(m2) {
return false
}
for k := range m1 {
if _, ok := m2[k]; !ok {
return false
}
}
return true
}
for i := 0; i < b.N; i++ {
mapsAreEq(m1, m2)
}
}
func BenchmarkNSetIsEqRand(b *testing.B) {
b.StopTimer()
rand := rand.New(rand.NewSource(RandSeed))
s1 := nset.NewNSet[uint32]()
s2 := nset.NewNSet[uint32]()
for i := uint32(0); i < maxBenchSize; i++ {
r := rand.Uint32()
s1.Add(r)
s2.Add(r)
}
b.StartTimer()
for i := 0; i < b.N; i++ {
s1.IsEq(s2)
}
}
func BenchmarkMapIsEqRand(b *testing.B) {
b.StopTimer()
rand := rand.New(rand.NewSource(RandSeed))
m1 := map[uint32]struct{}{}
m2 := map[uint32]struct{}{}
for i := uint32(0); i < maxBenchSize; i++ {
r := rand.Uint32()
m1[r] = struct{}{}
m2[r] = struct{}{}
}
b.StartTimer()
mapsAreEq := func(m1, m2 map[uint32]struct{}) bool {
if len(m1) != len(m2) {
return false
}
for k := range m1 {
if _, ok := m2[k]; !ok {
return false
}
}
return true
}
for i := 0; i < b.N; i++ {
mapsAreEq(m1, m2)
}
}
func BenchmarkNSetIsEqRand100Mil(b *testing.B) {
b.StopTimer()
rand := rand.New(rand.NewSource(RandSeed))
s1 := nset.NewNSet[uint32]()
s2 := nset.NewNSet[uint32]()
for i := uint32(0); i < 100_000_000; i++ {
r := rand.Uint32()
s1.Add(r)
s2.Add(r)
}
b.StartTimer()
for i := 0; i < b.N; i++ {
s1.IsEq(s2)
}
}
func BenchmarkMapIsEqRand100Mil(b *testing.B) {
b.StopTimer()
rand := rand.New(rand.NewSource(RandSeed))
m1 := map[uint32]struct{}{}
m2 := map[uint32]struct{}{}
for i := uint32(0); i < 100_000_000; i++ {
r := rand.Uint32()
m1[r] = struct{}{}
m2[r] = struct{}{}
}
b.StartTimer()
mapsAreEq := func(m1, m2 map[uint32]struct{}) bool {
if len(m1) != len(m2) {
return false
}
for k := range m1 {
if _, ok := m2[k]; !ok {
return false
}
}
return true
}
for i := 0; i < b.N; i++ {
mapsAreEq(m1, m2)
}
}
var getIntersectionNset *nset.NSet[uint32]
func BenchmarkNSetGetIntersection(b *testing.B) {
b.StopTimer()
s1 := nset.NewNSet[uint32]()
s2 := nset.NewNSet[uint32]()
for i := uint32(0); i < maxBenchSize; i++ {
s1.Add(i)
s2.Add(i)
}
b.StartTimer()
for i := 0; i < b.N; i++ {
getIntersectionNset = s1.GetIntersection(s2)
}
}
var getIntersectionTempMap map[uint32]struct{}
func BenchmarkMapGetIntersection(b *testing.B) {
b.StopTimer()
m1 := map[uint32]struct{}{}
m2 := map[uint32]struct{}{}
for i := uint32(0); i < maxBenchSize; i++ {
m1[i] = struct{}{}
m2[i] = struct{}{}
}
b.StartTimer()
getIntersection := func(m1, m2 map[uint32]struct{}) map[uint32]struct{} {
outMap := map[uint32]struct{}{}
for k := range m1 {
if _, ok := m2[k]; ok {
outMap[k] = struct{}{}
}
}
return outMap
}
for i := 0; i < b.N; i++ {
getIntersectionTempMap = getIntersection(m1, m2)
}
}
func BenchmarkNSetGetIntersectionRand(b *testing.B) {
b.StopTimer()
rand := rand.New(rand.NewSource(RandSeed))
s1 := nset.NewNSet[uint32]()
s2 := nset.NewNSet[uint32]()
for i := uint32(0); i < maxBenchSize; i++ {
r := rand.Uint32()
s1.Add(r)
s2.Add(r)
}
b.StartTimer()
for i := 0; i < b.N; i++ {
getIntersectionNset = s1.GetIntersection(s2)
}
}
func BenchmarkMapGetIntersectionRand(b *testing.B) {
b.StopTimer()
rand := rand.New(rand.NewSource(RandSeed))
m1 := map[uint32]struct{}{}
m2 := map[uint32]struct{}{}
for i := uint32(0); i < maxBenchSize; i++ {
r := rand.Uint32()
m1[r] = struct{}{}
m2[r] = struct{}{}
}
b.StartTimer()
getIntersection := func(m1, m2 map[uint32]struct{}) map[uint32]struct{} {
outMap := map[uint32]struct{}{}
for k := range m1 {
if _, ok := m2[k]; ok {
outMap[k] = struct{}{}
}
}
return outMap
}
for i := 0; i < b.N; i++ {
getIntersectionTempMap = getIntersection(m1, m2)
}
}
var elementCount int
func BenchmarkNSetGetAllElements(b *testing.B) {
b.StopTimer()
s1 := nset.NewNSet[uint32]()
for i := uint32(0); i < maxBenchSize; i++ {
s1.Add(i)
}
b.StartTimer()
var elements []uint32
for i := 0; i < b.N; i++ {
elements = s1.GetAllElements()
}
elementCount = len(elements)
}
func BenchmarkMapGetAllElements(b *testing.B) {
b.StopTimer()
m1 := map[uint32]struct{}{}
for i := uint32(0); i < maxBenchSize; i++ {
m1[i] = struct{}{}
}
b.StartTimer()
getElementsFunc := func(m map[uint32]struct{}) []uint32 {
e := make([]uint32, 0, len(m))
for k := range m {
e = append(e, k)
}
return e
}
var elements []uint32
for i := 0; i < b.N; i++ {
elements = getElementsFunc(m1)
}
elementCount = len(elements)
}
func BenchmarkNSetGetAllElementsRand(b *testing.B) {
b.StopTimer()
rand := rand.New(rand.NewSource(RandSeed))
s1 := nset.NewNSet[uint32]()
for i := uint32(0); i < maxBenchSize; i++ {
s1.Add(rand.Uint32())
}
b.StartTimer()
var elements []uint32
for i := 0; i < b.N; i++ {
elements = s1.GetAllElements()
}
elementCount = len(elements)
}
func BenchmarkMapGetAllElementsRand(b *testing.B) {
b.StopTimer()
rand := rand.New(rand.NewSource(RandSeed))
m1 := map[uint32]struct{}{}
for i := uint32(0); i < maxBenchSize; i++ {
m1[rand.Uint32()] = struct{}{}
}
getElementsFunc := func(m map[uint32]struct{}) []uint32 {
e := make([]uint32, 0, len(m))
for k := range m {
e = append(e, k)
}
return e
}
b.StartTimer()
var elements []uint32
for i := 0; i < b.N; i++ {
elements = getElementsFunc(m1)
}
elementCount = len(elements)
}
var unionSize int
func BenchmarkNSetUnion(b *testing.B) {
b.StopTimer()
s1 := nset.NewNSet[uint32]()
s2 := nset.NewNSet[uint32]()
for i := uint32(0); i < maxBenchSize; i++ {
s1.Add(i)
s2.Add(i)
}
b.StartTimer()
var union *nset.NSet[uint32]
for i := 0; i < b.N; i++ {
union = nset.UnionSets(s1, s2)
}
unionSize = int(union.StorageUnitCount)
}
func BenchmarkMapUnion(b *testing.B) {
b.StopTimer()
m1 := map[uint32]struct{}{}
m2 := map[uint32]struct{}{}
for i := uint32(0); i < maxBenchSize; i++ {
m1[i] = struct{}{}
m2[i] = struct{}{}
}
b.StartTimer()
unionFunc := func(m1, m2 map[uint32]struct{}) map[uint32]struct{} {
u := make(map[uint32]struct{}, len(m1))
for k := range m1 {
u[k] = struct{}{}
}
for k := range m2 {
u[k] = struct{}{}
}
return u
}
var union map[uint32]struct{}
for i := 0; i < b.N; i++ {
union = unionFunc(m1, m2)
}
unionSize = len(union)
}
func BenchmarkNSetUnionRand(b *testing.B) {
b.StopTimer()
randGen := rand.New(rand.NewSource(RandSeed))
s1 := nset.NewNSet[uint32]()
s2 := nset.NewNSet[uint32]()
for i := uint32(0); i < maxBenchSize; i++ {
r := randGen.Uint32()
s1.Add(r)
s2.Add(r)
}
b.StartTimer()
var union *nset.NSet[uint32]
for i := 0; i < b.N; i++ {
union = nset.UnionSets(s1, s2)
}
unionSize = int(union.StorageUnitCount)
}
func BenchmarkMapUnionRand(b *testing.B) {
b.StopTimer()
randGen := rand.New(rand.NewSource(RandSeed))
m1 := map[uint32]struct{}{}
m2 := map[uint32]struct{}{}
for i := uint32(0); i < maxBenchSize; i++ {
r := randGen.Uint32()
m1[r] = struct{}{}
m2[r] = struct{}{}
}
b.StartTimer()
unionFunc := func(m1, m2 map[uint32]struct{}) map[uint32]struct{} {
u := make(map[uint32]struct{}, len(m1))
for k := range m1 {
u[k] = struct{}{}
}
for k := range m2 {
u[k] = struct{}{}
}
return u
}
var union map[uint32]struct{}
for i := 0; i < b.N; i++ {
union = unionFunc(m1, m2)
}
unionSize = len(union)
}