mirror of
https://github.com/bloeys/nset.git
synced 2025-12-29 06:28:19 +00:00
Intersection benchmarks
This commit is contained in:
BIN
.res/bench-union-10-million.png
Executable file
BIN
.res/bench-union-10-million.png
Executable file
Binary file not shown.
|
After Width: | Height: | Size: 35 KiB |
19
README.md
19
README.md
@ -12,6 +12,10 @@ get intersections.
|
|||||||
- [When to use NSet](#when-to-use-nset)
|
- [When to use NSet](#when-to-use-nset)
|
||||||
- [Usage](#usage)
|
- [Usage](#usage)
|
||||||
- [Benchmarks](#benchmarks)
|
- [Benchmarks](#benchmarks)
|
||||||
|
- [Equality](#equality)
|
||||||
|
- [Extracting elements](#extracting-elements)
|
||||||
|
- [Intersection](#intersection)
|
||||||
|
- [Union](#union)
|
||||||
- [How NSet works](#how-nset-works)
|
- [How NSet works](#how-nset-works)
|
||||||
- [Memory characteristics](#memory-characteristics)
|
- [Memory characteristics](#memory-characteristics)
|
||||||
|
|
||||||
@ -132,6 +136,8 @@ myMap := make(map[uint16], 100)
|
|||||||
|
|
||||||
Map benefits from sizing while NSet isn't affected, but in both cases NSet remains faster.
|
Map benefits from sizing while NSet isn't affected, but in both cases NSet remains faster.
|
||||||
|
|
||||||
|
### Equality
|
||||||
|
|
||||||
Another case where NSet really shines is checking if two sets are equal.
|
Another case where NSet really shines is checking if two sets are equal.
|
||||||
Below is a benchmark that checks whether two NSets/maps with 10 Million elements in each are equal (They are equal, which is the worst case).
|
Below is a benchmark that checks whether two NSets/maps with 10 Million elements in each are equal (They are equal, which is the worst case).
|
||||||
Here NSet finishes in `0.1ms` but Map takes almost a second with `813ms`.
|
Here NSet finishes in `0.1ms` but Map takes almost a second with `813ms`.
|
||||||
@ -140,6 +146,8 @@ Here NSet finishes in `0.1ms` but Map takes almost a second with `813ms`.
|
|||||||
Next we have `GetAllElements`, which simply returns an array of all the elements of NSet/Map (note this is dangerous in NSet. See [Memory characteristics](#memory-characteristics)).
|
Next we have `GetAllElements`, which simply returns an array of all the elements of NSet/Map (note this is dangerous in NSet. See [Memory characteristics](#memory-characteristics)).
|
||||||

|

|
||||||
|
|
||||||
|
### Extracting elements
|
||||||
|
|
||||||
With `GetAllElements` NSet is faster when its elements are closer together value wise (or if you have many numbers), but gets a lot slower when
|
With `GetAllElements` NSet is faster when its elements are closer together value wise (or if you have many numbers), but gets a lot slower when
|
||||||
dealing with a few random numbers with a big difference between them. This is because you might get two numbers like `1` and `1_000_000` which NSet
|
dealing with a few random numbers with a big difference between them. This is because you might get two numbers like `1` and `1_000_000` which NSet
|
||||||
will store in two far away places with a lot of nothing in between. In a map these will be stored close together.
|
will store in two far away places with a lot of nothing in between. In a map these will be stored close together.
|
||||||
@ -149,11 +157,20 @@ while map takes `~95ms`. Map scales with the amount of elements, while NSet is a
|
|||||||
|
|
||||||
Similar to getting elements is intersection:
|
Similar to getting elements is intersection:
|
||||||
|
|
||||||

|
### Intersection
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
Here NSet is always many times faster, but the effect of number distribution on NSet's performance is clear, while map's performance
|
Here NSet is always many times faster, but the effect of number distribution on NSet's performance is clear, while map's performance
|
||||||
only scales with number of elements.
|
only scales with number of elements.
|
||||||
|
|
||||||
|
### Union
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
With unions NSet is a clear winner in all cases where for 10M elements NSet takes between `~0.37ms` and `~180ms`, while
|
||||||
|
map takes `~1959ms`, around 10x slower.
|
||||||
|
|
||||||
## How NSet works
|
## How NSet works
|
||||||
|
|
||||||
NSet works by using a single bit to indicate whether a number exists or not.
|
NSet works by using a single bit to indicate whether a number exists or not.
|
||||||
|
|||||||
124
nset_test.go
124
nset_test.go
@ -558,7 +558,7 @@ func BenchmarkNSetGetAllElements(b *testing.B) {
|
|||||||
b.StopTimer()
|
b.StopTimer()
|
||||||
|
|
||||||
s1 := nset.NewNSet[uint32]()
|
s1 := nset.NewNSet[uint32]()
|
||||||
for i := uint32(0); i < 10_000_000; i++ {
|
for i := uint32(0); i < maxBenchSize; i++ {
|
||||||
s1.Add(i)
|
s1.Add(i)
|
||||||
}
|
}
|
||||||
b.StartTimer()
|
b.StartTimer()
|
||||||
@ -576,7 +576,7 @@ func BenchmarkMapGetAllElements(b *testing.B) {
|
|||||||
b.StopTimer()
|
b.StopTimer()
|
||||||
|
|
||||||
m1 := map[uint32]struct{}{}
|
m1 := map[uint32]struct{}{}
|
||||||
for i := uint32(0); i < 10_000_000; i++ {
|
for i := uint32(0); i < maxBenchSize; i++ {
|
||||||
m1[i] = struct{}{}
|
m1[i] = struct{}{}
|
||||||
}
|
}
|
||||||
b.StartTimer()
|
b.StartTimer()
|
||||||
@ -605,7 +605,7 @@ func BenchmarkNSetGetAllElementsRand(b *testing.B) {
|
|||||||
|
|
||||||
rand.Seed(RandSeed)
|
rand.Seed(RandSeed)
|
||||||
s1 := nset.NewNSet[uint32]()
|
s1 := nset.NewNSet[uint32]()
|
||||||
for i := uint32(0); i < 10_000_000; i++ {
|
for i := uint32(0); i < maxBenchSize; i++ {
|
||||||
s1.Add(rand.Uint32())
|
s1.Add(rand.Uint32())
|
||||||
}
|
}
|
||||||
b.StartTimer()
|
b.StartTimer()
|
||||||
@ -625,7 +625,7 @@ func BenchmarkMapGetAllElementsRand(b *testing.B) {
|
|||||||
rand.Seed(RandSeed)
|
rand.Seed(RandSeed)
|
||||||
|
|
||||||
m1 := map[uint32]struct{}{}
|
m1 := map[uint32]struct{}{}
|
||||||
for i := uint32(0); i < 10_000_000; i++ {
|
for i := uint32(0); i < maxBenchSize; i++ {
|
||||||
m1[rand.Uint32()] = struct{}{}
|
m1[rand.Uint32()] = struct{}{}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -647,3 +647,119 @@ func BenchmarkMapGetAllElementsRand(b *testing.B) {
|
|||||||
|
|
||||||
elementCount = len(elements)
|
elementCount = len(elements)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var unionSize int
|
||||||
|
|
||||||
|
func BenchmarkNSetUnion(b *testing.B) {
|
||||||
|
|
||||||
|
b.StopTimer()
|
||||||
|
|
||||||
|
s1 := nset.NewNSet[uint32]()
|
||||||
|
s2 := nset.NewNSet[uint32]()
|
||||||
|
for i := uint32(0); i < maxBenchSize; i++ {
|
||||||
|
s1.Add(i)
|
||||||
|
s2.Add(i)
|
||||||
|
}
|
||||||
|
b.StartTimer()
|
||||||
|
|
||||||
|
var union *nset.NSet[uint32]
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
union = nset.UnionSets(s1, s2)
|
||||||
|
}
|
||||||
|
|
||||||
|
unionSize = int(union.StorageUnitCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkMapUnion(b *testing.B) {
|
||||||
|
|
||||||
|
b.StopTimer()
|
||||||
|
|
||||||
|
m1 := map[uint32]struct{}{}
|
||||||
|
m2 := map[uint32]struct{}{}
|
||||||
|
for i := uint32(0); i < maxBenchSize; i++ {
|
||||||
|
m1[i] = struct{}{}
|
||||||
|
m2[i] = struct{}{}
|
||||||
|
}
|
||||||
|
b.StartTimer()
|
||||||
|
|
||||||
|
unionFunc := func(m1, m2 map[uint32]struct{}) map[uint32]struct{} {
|
||||||
|
|
||||||
|
u := make(map[uint32]struct{}, len(m1))
|
||||||
|
for k := range m1 {
|
||||||
|
u[k] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
for k := range m2 {
|
||||||
|
u[k] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
return u
|
||||||
|
}
|
||||||
|
|
||||||
|
var union map[uint32]struct{}
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
union = unionFunc(m1, m2)
|
||||||
|
}
|
||||||
|
|
||||||
|
unionSize = len(union)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkNSetUnionRand(b *testing.B) {
|
||||||
|
|
||||||
|
b.StopTimer()
|
||||||
|
|
||||||
|
rand.Seed(RandSeed)
|
||||||
|
|
||||||
|
s1 := nset.NewNSet[uint32]()
|
||||||
|
s2 := nset.NewNSet[uint32]()
|
||||||
|
for i := uint32(0); i < maxBenchSize; i++ {
|
||||||
|
r := rand.Uint32()
|
||||||
|
s1.Add(r)
|
||||||
|
s2.Add(r)
|
||||||
|
}
|
||||||
|
b.StartTimer()
|
||||||
|
|
||||||
|
var union *nset.NSet[uint32]
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
union = nset.UnionSets(s1, s2)
|
||||||
|
}
|
||||||
|
|
||||||
|
unionSize = int(union.StorageUnitCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkMapUnionRand(b *testing.B) {
|
||||||
|
|
||||||
|
b.StopTimer()
|
||||||
|
|
||||||
|
rand.Seed(RandSeed)
|
||||||
|
|
||||||
|
m1 := map[uint32]struct{}{}
|
||||||
|
m2 := map[uint32]struct{}{}
|
||||||
|
for i := uint32(0); i < maxBenchSize; i++ {
|
||||||
|
r := rand.Uint32()
|
||||||
|
m1[r] = struct{}{}
|
||||||
|
m2[r] = struct{}{}
|
||||||
|
}
|
||||||
|
b.StartTimer()
|
||||||
|
|
||||||
|
unionFunc := func(m1, m2 map[uint32]struct{}) map[uint32]struct{} {
|
||||||
|
|
||||||
|
u := make(map[uint32]struct{}, len(m1))
|
||||||
|
for k := range m1 {
|
||||||
|
u[k] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
for k := range m2 {
|
||||||
|
u[k] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
return u
|
||||||
|
}
|
||||||
|
|
||||||
|
var union map[uint32]struct{}
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
union = unionFunc(m1, m2)
|
||||||
|
}
|
||||||
|
|
||||||
|
unionSize = len(union)
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user