diff --git a/.res/bench-union-10-million.png b/.res/bench-union-10-million.png
new file mode 100755
index 0000000..802cce8
Binary files /dev/null and b/.res/bench-union-10-million.png differ
diff --git a/README.md b/README.md
index 7126f15..18bf6a7 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,10 @@ get intersections.
   - [When to use NSet](#when-to-use-nset)
   - [Usage](#usage)
   - [Benchmarks](#benchmarks)
+    - [Equality](#equality)
+    - [Extracting elements](#extracting-elements)
+    - [Intersection](#intersection)
+    - [Union](#union)
   - [How NSet works](#how-nset-works)
     - [Memory characteristics](#memory-characteristics)
 
@@ -132,6 +136,8 @@ myMap := make(map[uint16], 100)
 
 Map benefits from sizing while NSet isn't affected, but in both cases NSet remains faster.
 
+### Equality
+
 Another case where NSet really shines is checking if two sets are equal.
 Below is a benchmark that checks whether two NSets/maps with 10 Million elements in each are equal (They are equal, which is the worst case).
 Here NSet finishes in `0.1ms` but Map takes almost a second with `813ms`.
@@ -140,6 +146,8 @@ Here NSet finishes in `0.1ms` but Map takes almost a second with `813ms`.
 Next we have `GetAllElements`, which simply returns an array of all the elements of NSet/Map (note this is dangerous in NSet. See [Memory characteristics](#memory-characteristics)).
 ![Benchmarking GetAllElements with 10,000,000 elements](.res/bench-getAllElements-10-million.png)
 
+### Extracting elements
+
 With `GetAllElements` NSet is faster when its elements are closer together value wise (or if you have many numbers), but gets a lot slower when
 dealing with a few random numbers with a big difference between them. This is because you might get two numbers like `1` and `1_000_000` which NSet
 will store in two far away places with a lot of nothing in between. In a map these will be stored close together.
@@ -149,11 +157,20 @@ while map takes `~95ms`. Map scales with the amount of elements, while NSet is a
 
 Similar to getting elements is intersection:
 
-![Benchmarking GetIntersection with 10,000,000 elements](.res/bench-getIntersection-10-million.png)
+### Intersection
+
+![Benchmarking GetIntersection with 10,000,000 elements](./.res/bench-getIntersection-10-million.png)
 
 Here NSet is always many times faster, but the effect of number distribution on NSet's performance is clear, while map's performance
 only scales with number of elements.
 
+### Union
+
+![Benchmarking GetUnion with 10,000,000 elements](./.res/bench-union-10-million.png)
+
+With unions NSet is a clear winner in all cases where for 10M elements NSet takes between `~0.37ms` and `~180ms`, while
+map takes `~1959ms`, around 10x slower.
+
 ## How NSet works
 
 NSet works by using a single bit to indicate whether a number exists or not.
diff --git a/nset_test.go b/nset_test.go
index 2c58c39..e8616a8 100755
--- a/nset_test.go
+++ b/nset_test.go
@@ -558,7 +558,7 @@ func BenchmarkNSetGetAllElements(b *testing.B) {
 	b.StopTimer()
 
 	s1 := nset.NewNSet[uint32]()
-	for i := uint32(0); i < 10_000_000; i++ {
+	for i := uint32(0); i < maxBenchSize; i++ {
 		s1.Add(i)
 	}
 	b.StartTimer()
@@ -576,7 +576,7 @@ func BenchmarkMapGetAllElements(b *testing.B) {
 	b.StopTimer()
 
 	m1 := map[uint32]struct{}{}
-	for i := uint32(0); i < 10_000_000; i++ {
+	for i := uint32(0); i < maxBenchSize; i++ {
 		m1[i] = struct{}{}
 	}
 	b.StartTimer()
@@ -605,7 +605,7 @@ func BenchmarkNSetGetAllElementsRand(b *testing.B) {
 
 	rand.Seed(RandSeed)
 	s1 := nset.NewNSet[uint32]()
-	for i := uint32(0); i < 10_000_000; i++ {
+	for i := uint32(0); i < maxBenchSize; i++ {
 		s1.Add(rand.Uint32())
 	}
 	b.StartTimer()
@@ -625,7 +625,7 @@ func BenchmarkMapGetAllElementsRand(b *testing.B) {
 	rand.Seed(RandSeed)
 
 	m1 := map[uint32]struct{}{}
-	for i := uint32(0); i < 10_000_000; i++ {
+	for i := uint32(0); i < maxBenchSize; i++ {
 		m1[rand.Uint32()] = struct{}{}
 	}
 
@@ -647,3 +647,119 @@ func BenchmarkMapGetAllElementsRand(b *testing.B) {
 
 	elementCount = len(elements)
 }
+
+var unionSize int
+
+func BenchmarkNSetUnion(b *testing.B) {
+
+	b.StopTimer()
+
+	s1 := nset.NewNSet[uint32]()
+	s2 := nset.NewNSet[uint32]()
+	for i := uint32(0); i < maxBenchSize; i++ {
+		s1.Add(i)
+		s2.Add(i)
+	}
+	b.StartTimer()
+
+	var union *nset.NSet[uint32]
+	for i := 0; i < b.N; i++ {
+		union = nset.UnionSets(s1, s2)
+	}
+
+	unionSize = int(union.StorageUnitCount)
+}
+
+func BenchmarkMapUnion(b *testing.B) {
+
+	b.StopTimer()
+
+	m1 := map[uint32]struct{}{}
+	m2 := map[uint32]struct{}{}
+	for i := uint32(0); i < maxBenchSize; i++ {
+		m1[i] = struct{}{}
+		m2[i] = struct{}{}
+	}
+	b.StartTimer()
+
+	unionFunc := func(m1, m2 map[uint32]struct{}) map[uint32]struct{} {
+
+		u := make(map[uint32]struct{}, len(m1))
+		for k := range m1 {
+			u[k] = struct{}{}
+		}
+
+		for k := range m2 {
+			u[k] = struct{}{}
+		}
+
+		return u
+	}
+
+	var union map[uint32]struct{}
+	for i := 0; i < b.N; i++ {
+		union = unionFunc(m1, m2)
+	}
+
+	unionSize = len(union)
+}
+
+func BenchmarkNSetUnionRand(b *testing.B) {
+
+	b.StopTimer()
+
+	rand.Seed(RandSeed)
+
+	s1 := nset.NewNSet[uint32]()
+	s2 := nset.NewNSet[uint32]()
+	for i := uint32(0); i < maxBenchSize; i++ {
+		r := rand.Uint32()
+		s1.Add(r)
+		s2.Add(r)
+	}
+	b.StartTimer()
+
+	var union *nset.NSet[uint32]
+	for i := 0; i < b.N; i++ {
+		union = nset.UnionSets(s1, s2)
+	}
+
+	unionSize = int(union.StorageUnitCount)
+}
+
+func BenchmarkMapUnionRand(b *testing.B) {
+
+	b.StopTimer()
+
+	rand.Seed(RandSeed)
+
+	m1 := map[uint32]struct{}{}
+	m2 := map[uint32]struct{}{}
+	for i := uint32(0); i < maxBenchSize; i++ {
+		r := rand.Uint32()
+		m1[r] = struct{}{}
+		m2[r] = struct{}{}
+	}
+	b.StartTimer()
+
+	unionFunc := func(m1, m2 map[uint32]struct{}) map[uint32]struct{} {
+
+		u := make(map[uint32]struct{}, len(m1))
+		for k := range m1 {
+			u[k] = struct{}{}
+		}
+
+		for k := range m2 {
+			u[k] = struct{}{}
+		}
+
+		return u
+	}
+
+	var union map[uint32]struct{}
+	for i := 0; i < b.N; i++ {
+		union = unionFunc(m1, m2)
+	}
+
+	unionSize = len(union)
+}