dsa_rust/associative/hash_set.rs
1#![allow(clippy::uninlined_format_args)] // Cant inline all args in print
2
3/*! A set structure based on this library's HashMap
4
5# About
6This structure provides common binary set operations (as `self` and `other`), mostly as an excuse to explore implementing custom iterators.
7- [Union](crate::associative::hash_set::HashSet::union): Returns an iterator over references to all elements that appear in either `self` or `other` as a set-theoretic [union](https://en.wikipedia.org/wiki/Union_(set_theory)).
8- [Extend](crate::associative::hash_set::HashSet::extend): Mutates `self` to contain all unique elements in both `self` and `other`. This is a mutating version of `union()`.
9- [Intersection](crate::associative::hash_set::HashSet::intersection): Returns an iterator over references to elements that appear in both `self` and `other` as a set-theoretic [intersection](https://en.wikipedia.org/wiki/Intersection_(set_theory)).
10- [Retain All](crate::associative::hash_set::HashSet::retain_all): Mutates `self` such that self only keeps elements that are also in `other`. This function could also appropriately be called "intersect", but that was too close to the non-mutating version of this function called "intersection".
11- [Difference](crate::associative::hash_set::HashSet::difference): Returns an iterator over references to elements that are present in `self` but not in `other` as a set-theoretic asymmetric difference or [relative complement](https://en.wikipedia.org/wiki/Complement_(set_theory)). This operation is asymmetric: interchanging self and other generally produces different results.
12- [Unique](crate::associative::hash_set::HashSet::unique): Returns an iterator over references to elements that are present in exactly one of the two sets as a set-theoretic [symmetric difference](https://en.wikipedia.org/wiki/Symmetric_difference). This operation is symmetric: interchanging self and other yields the same result.
13
14# Design
15The structure is based on this library's (open-addressing) [hash map](crate::associative::probing_hash_table). Each entry is stored as `(T, ())`, and because of the underlying implementation, `T` must be [Hash], [PartialEq], and [Eq].
16
17# Example
18
19```rust
20use dsa_rust::associative::hash_set::HashSet;
21
22 //Creates a couple new hash sets
23 let mut set1 = HashSet::<u8>::new();
24 // [0, 1, 2, 3]
25 for n in 0..=3 {
26 set1.put(n);
27 }
28
29 let mut set2 = HashSet::<u8>::new();
30 // [2, 3, 4, 5]
31 for n in 2..=5 {
32 set2.put(n);
33 }
34
35 // Creates a list of all elements in both self and other
36 let mut union: Vec<u8> = Vec::new();
37 for e in set1.union(&set2) {
38 union.push(*e);
39 }
40 assert_eq!(union.len(), 6);
41 assert!(union.contains(&0));
42 assert!(union.contains(&1));
43 assert!(union.contains(&2));
44 assert!(union.contains(&3));
45 assert!(union.contains(&4));
46 assert!(union.contains(&5));
47
48 // Creates a list of elements only present in both sets
49 let mut intersection: Vec<u8> = Vec::new();
50 for e in set1.intersection(&set2) {
51 intersection.push(*e);
52 }
53 assert_eq!(intersection.len(), 2);
54 assert!(intersection.contains(&2));
55 assert!(intersection.contains(&3));
56
57 // Creates a list of elements from self not present in other
58 let mut difference: Vec<u8> = Vec::new();
59 for e in set1.difference(&set2) {
60 difference.push(*e);
61 }
62 assert_eq!(difference.len(), 2);
63 assert!(difference.contains(&0));
64 assert!(difference.contains(&1));
65
66 // Proves that difference() is asymmetric
67 let mut difference: Vec<u8> = Vec::new();
68 for e in set2.difference(&set1) {
69 difference.push(*e);
70 }
71 assert_eq!(difference.len(), 2);
72 assert!(difference.contains(&4));
73 assert!(difference.contains(&5));
74
75 // Creates a list of elements present in either the first or
76 // second one set, but not both
77 let mut unique: Vec<u8> = Vec::new();
78 for e in set1.unique(&set2) {
79 unique.push(*e);
80 }
81 assert_eq!(unique.len(), 4);
82 assert!(unique.contains(&0));
83 assert!(unique.contains(&1));
84 assert!(unique.contains(&4));
85 assert!(unique.contains(&5));
86
87 // Illustrates that unique() is symmetric
88 let mut unique: Vec<u8> = Vec::new();
89 for e in set2.unique(&set1) {
90 unique.push(*e);
91 }
92 assert_eq!(unique.len(), 4);
93 assert!(unique.contains(&0));
94 assert!(unique.contains(&1));
95 assert!(unique.contains(&4));
96 assert!(unique.contains(&5));
97```
98
99*/
100
101use crate::associative::probing_hash_table;
102
103use std::fmt::Debug;
104use std::hash::Hash;
105
106pub struct HashSet<T> {
107 map: probing_hash_table::HashMap<T, ()>,
108}
109impl<T> Default for HashSet<T>
110where
111 T: Clone + Default + Debug + Hash + Eq + PartialEq,
112{
113 fn default() -> Self {
114 Self::new()
115 }
116}
117impl<T> HashSet<T>
118where
119 T: Clone + Default + Debug + Hash + Eq + PartialEq,
120{
121 // Basic operations
122 ///////////////////
123
124 /// Constructor for an empty set with a default capacity of 2
125 /// because everyone deserves a friend.
126 pub fn new() -> Self {
127 let new_capacity = 2;
128 Self {
129 map: probing_hash_table::HashMap::<T, ()>::new_with_capacity(new_capacity),
130 }
131 }
132
133 /// Constructor for an empty set with a given capacity.
134 pub fn new_with_capacity(size: usize) -> Self {
135 Self {
136 map: probing_hash_table::HashMap::<T, ()>::new_with_capacity(size),
137 }
138 }
139
140 /// Returns the number of elements in the set.
141 pub fn size(&self) -> usize {
142 self.map.len()
143 }
144
145 /// Returns true if the set is either empty or contains only empty slots and deleted entries.
146 pub fn is_empty(&self) -> bool {
147 self.map.is_empty()
148 }
149
150 /// Adds an item to the set.
151 pub fn put(&mut self, elem: T) {
152 self.map.put(elem, ());
153 }
154
155 /// Removes an item from the set.
156 pub fn remove(&mut self, elem: T) -> Option<T> {
157 if let Some(val) = self.map.remove(&elem) {
158 return Some(val.key().clone());
159 }
160 None
161 }
162
163 /// Returns true if the set contains the referenced element.
164 pub fn contains<Q>(&self, elem: &Q) -> bool
165 where
166 T: std::borrow::Borrow<Q>,
167 Q: Debug + Hash + Eq + ?Sized,
168 {
169 self.map.contains(elem) // Uses the HashMap's contains() function
170 }
171
172 // Union operations
173 ///////////////////
174
175 /// Returns an iterator over references to all elements that appear
176 /// in either `self` or `other` as a set-theoretic
177 /// [union](https://en.wikipedia.org/wiki/Union_(set_theory)).
178 pub fn union<'a>(&'a self, other: &'a Self) -> Union<'a, T> {
179 Union::build(&self.map, &other.map)
180 }
181
182 /// A mutating version of `union()` that consumes `other` to add
183 /// all of its elements to `self`.
184 pub fn extend(&mut self, other: Self) {
185 let other_iter = other.map.into_iter();
186 for key in other_iter {
187 if !self.map.contains(&key.0) {
188 self.put(key.0)
189 }
190 }
191 }
192
193 //Intersection operations
194 /////////////////////////
195
196 /// Returns an iterator over references to elements that appear
197 /// in both `self` and `other` as a set-theoretic
198 /// [intersection](https://en.wikipedia.org/wiki/Intersection_(set_theory)).
199 pub fn intersection<'a>(&'a self, other: &'a Self) -> Intersection<'a, T> {
200 let (longer, shorter) = if self.map.len() >= other.map.len() {
201 (&self.map, &other.map)
202 } else {
203 (&other.map, &self.map)
204 };
205 Intersection {
206 lhs: longer,
207 rhs_iter: shorter.keys(),
208 }
209 }
210
211 /// Mutates `self` such that `self` only keeps elements that are also in `other`.
212 /// Different from the function `retain` which takes a closure to only retain elements
213 /// according to the predicate.
214 ///
215 /// NOTE: This function could also appropriately be called "intersect", but that was
216 /// too close to the non-mutating version of this function called "intersection".
217 // O(m*n)
218 pub fn retain_all(&mut self, other: &Self) {
219 // Collect keys to remove
220 let keys_to_remove: Vec<_> = self
221 .map
222 .keys()
223 .filter(|k| !other.map.contains(k))
224 .cloned() // collect owned keys if necessary
225 .collect();
226
227 // Remove them safely
228 for key in keys_to_remove {
229 self.map.remove(&key);
230 }
231 }
232
233 // Subtraction operations
234 /////////////////////////
235
236 /// Returns an iterator over references to elements that are present
237 /// in `self` but not in `other` as a set-theoretic asymmetric difference
238 /// or [relative complement](https://en.wikipedia.org/wiki/Complement_(set_theory)).
239 /// This operation is asymmetric: interchanging self and other
240 /// generally produces different results.
241 pub fn difference<'a>(&'a self, other: &'a Self) -> Difference<'a, T> {
242 //let (longer, shorter) = if self.map.len() <= other.map.len() {
243 // (&self.map, &other.map)
244 //} else {
245 // (&other.map, &self.map)
246 //};
247 //Diff::new(longer, shorter)
248 Difference {
249 lhs_iter: self.map.keys(),
250 rhs: &other.map,
251 }
252 }
253
254 /// Returns an iterator over references to elements that are present
255 /// in exactly one of the two sets as a set-theoretic [symmetric
256 /// difference](https://en.wikipedia.org/wiki/Symmetric_difference).
257 /// This operation is symmetric: interchanging `self` and `other`
258 /// yields the same result.
259 pub fn unique<'a>(&'a self, other: &'a Self) -> Unique<'a, T> {
260 Unique::new(&self.map, &other.map)
261 }
262
263 // Filter operations
264 ////////////////////
265
266 // Mutates `S` to only contain values that adhere to the given predicate (lambda)
267 //pub fn retain(&mut self, |predicate|) {}
268}
269
270pub struct Union<'a, K> {
271 // Used "raw" for its "free" operations
272 lhs: &'a probing_hash_table::HashMap<K, ()>,
273 // Create explicit iterators outside of the next() implementation
274 lhs_iter: probing_hash_table::Keys<'a, K, ()>,
275 rhs_iter: probing_hash_table::Keys<'a, K, ()>,
276}
277impl<'a, K> Union<'a, K> {
278 // Constructor that takes map references to create iterators outside of the next()
279 // implementation
280 fn build(
281 lhs: &'a probing_hash_table::HashMap<K, ()>,
282 rhs: &'a probing_hash_table::HashMap<K, ()>,
283 ) -> Union<'a, K>
284 where
285 K: Debug + Eq + Hash + PartialEq,
286 {
287 Union {
288 lhs_iter: lhs.keys(),
289 rhs_iter: rhs.keys(),
290 lhs,
291 }
292 }
293}
294impl<'a, K> Iterator for Union<'a, K>
295where
296 K: Debug + Eq + Hash,
297{
298 type Item = &'a K;
299
300 fn next(&mut self) -> Option<Self::Item> {
301 // Yield elements from lhs iterator
302 if let Some(key) = self.lhs_iter.next() {
303 return Some(key);
304 }
305
306 // Then yield only unique elements from the rhs iterator
307 self.rhs_iter.by_ref().find(|&key| !self.lhs.contains(key))
308 //while let Some(key) = self.rhs_iter.next() {
309 // if !self.lhs.contains(key) {
310 // return Some(key);
311 // }
312 //}
313 //None
314 }
315}
316
317pub struct Intersection<'a, K> {
318 rhs_iter: probing_hash_table::Keys<'a, K, ()>,
319 lhs: &'a probing_hash_table::HashMap<K, ()>, // needed to skip duplicates from rhs
320}
321impl<'a, K> Iterator for Intersection<'a, K>
322where
323 K: Debug + Eq + Hash,
324{
325 type Item = &'a K;
326
327 fn next(&mut self) -> Option<Self::Item> {
328 // Yield only elements from both sets
329 self.rhs_iter.by_ref().find(|&key| self.lhs.contains(key))
330 }
331}
332
333pub struct Difference<'a, K> {
334 lhs_iter: probing_hash_table::Keys<'a, K, ()>,
335 rhs: &'a probing_hash_table::HashMap<K, ()>, // optional if needed
336}
337impl<'a, K> Iterator for Difference<'a, K>
338where
339 K: Debug + Eq + Hash,
340{
341 type Item = &'a K;
342
343 // Yield only elements from self not contained in other
344 fn next(&mut self) -> Option<Self::Item> {
345 self.lhs_iter.find(|&k| !self.rhs.contains(k))
346 }
347}
348
349pub struct Unique<'a, K> {
350 lhs_iter: probing_hash_table::Keys<'a, K, ()>,
351 rhs_iter: probing_hash_table::Keys<'a, K, ()>,
352 lhs: &'a probing_hash_table::HashMap<K, ()>,
353 rhs: &'a probing_hash_table::HashMap<K, ()>,
354}
355impl<'a, K> Unique<'a, K>
356where
357 K: Debug + Eq + Hash,
358{
359 pub fn new(
360 lhs: &'a probing_hash_table::HashMap<K, ()>,
361 rhs: &'a probing_hash_table::HashMap<K, ()>,
362 ) -> Self {
363 Self {
364 lhs,
365 rhs,
366 lhs_iter: lhs.keys(),
367 rhs_iter: rhs.keys(),
368 }
369 }
370}
371impl<'a, K> Iterator for Unique<'a, K>
372where
373 K: Debug + Eq + Hash,
374{
375 type Item = &'a K;
376
377 // Yield only elements not contained across both sets
378 fn next(&mut self) -> Option<Self::Item> {
379 // First yield from rhs elements not in lhs
380 if let Some(key) = self.rhs_iter.find(|&k| !self.lhs.contains(k)) {
381 return Some(key);
382 }
383
384 // Then reverse the operation to yield lhs elements not in rhs
385 self.lhs_iter.find(|&k| !self.rhs.contains(k))
386 }
387}
388
389// Unit tests
390/////////////
391
392#[test]
393// Basic function test
394fn hash_set_test() {
395 //Creates a new hash map
396 let mut set = HashSet::<&str>::new();
397 assert_eq!(set.size(), 0);
398 assert_eq!(set.map.len(), 0);
399
400 set.put("Peter");
401 set.put("Brain");
402 set.put("Bobson");
403 set.put("Peter"); // Oopsiedoodle, a duplicate!
404 set.put("Dichael");
405
406 assert_eq!(set.size(), 4);
407 assert_eq!(set.map.len(), 4);
408 assert!(!set.is_empty());
409 assert!(set.contains(&"Peter"));
410 assert!(set.contains(&"Brain"));
411 assert!(set.contains(&"Bobson"));
412 assert!(set.contains(&"Dichael"));
413}
414
415#[test]
416// Covers union() and extend()
417fn union() {
418 //Creates a new hash map
419 let mut set1 = HashSet::<&str>::new();
420 set1.put("Peter"); // +1
421 set1.put("Brain"); // +1
422 set1.put("Bobson"); // +1
423 set1.put("Dichael"); // +1
424
425 let mut set2 = HashSet::<&str>::new();
426 set2.put("Remus"); // +1
427 set2.put("Romulus"); // +1
428 set2.put("Bobson");
429 set2.put("Peter");
430
431 let mut union: Vec<&str> = Vec::new();
432 for e in set1.union(&set2) {
433 union.push(*e);
434 }
435 assert_eq!(union.len(), 6);
436 eprintln!("{:#?}", union);
437 // contains takes &T, which is &&str for the type
438 assert!(union.contains(&"Peter"));
439 assert!(union.contains(&"Brain"));
440 assert!(union.contains(&"Bobson"));
441 assert!(union.contains(&"Dichael"));
442
443 // Consumes set2 and extends set1
444 assert_eq!(set1.size(), 4);
445 set1.extend(set2);
446 assert_eq!(set1.size(), 6);
447 assert!(set1.contains("Peter"));
448 assert!(set1.contains("Brain"));
449 assert!(set1.contains("Bobson"));
450 assert!(set1.contains("Dichael"));
451 assert!(set1.contains("Remus"));
452 assert!(set1.contains("Romulus"));
453
454 //panic!("MANUAL TEST FAILURE");
455}
456
457#[test]
458// Covers intersection() and retain_all()
459fn intersection() {
460 //Creates a couple new hash sets
461 let mut set1 = HashSet::<&str>::new();
462 set1.put("Peter");
463 set1.put("Brain");
464 set1.put("Bobson");
465 set1.put("Dichael");
466
467 let mut set2 = HashSet::<&str>::new();
468 set2.put("Remus");
469 set2.put("Romulus");
470 set2.put("Bobson");
471 set2.put("Glank");
472 set2.put("Flock");
473 set2.put("Peter");
474
475 // Creates a vec of borrowed keys contained only in both sets
476 let mut intersection: Vec<&str> = Vec::new();
477 for e in set1.intersection(&set2) {
478 intersection.push(*e);
479 }
480 eprintln!("{:#?}", intersection);
481 assert_eq!(intersection.len(), 2);
482 assert!(intersection.contains(&"Peter"));
483 assert!(intersection.contains(&"Bobson"));
484
485 // Mutates self to contain only keys ALSO contained in other
486 eprintln!("{:#?}", set1.map);
487 assert_eq!(set1.size(), 4);
488 assert!(set1.contains(&"Peter"));
489 assert!(set1.contains(&"Brain"));
490 assert!(set1.contains(&"Bobson"));
491 assert!(set1.contains(&"Dichael"));
492 set1.retain_all(&set2); // Removes two entries
493 eprintln!("{:#?}", set1.map);
494 assert_eq!(set1.size(), 2);
495 assert!(set1.contains(&"Peter"));
496 assert!(set1.contains(&"Bobson"));
497
498 //panic!("MANUAL TEST FAILURE");
499}
500
501#[test]
502// Covers difference() and unique()
503fn difference() {
504 //Creates a couple new hash sets
505 let mut set1 = HashSet::<&str>::new();
506 set1.put("Peter");
507 set1.put("Brain"); // +1
508 set1.put("Bobson");
509 set1.put("Dichael"); // +1
510
511 let mut set2 = HashSet::<&str>::new();
512 set2.put("Remus"); // +1
513 set2.put("Romulus"); // +1
514 set2.put("Bobson");
515 set2.put("Flock"); // +1
516 set2.put("Peter");
517
518 // Creates a vec of borrowed keys contained only in both sets
519 let mut difference: Vec<&str> = Vec::new();
520 for e in set1.difference(&set2) {
521 difference.push(*e);
522 }
523 eprintln!("{:#?}", difference);
524 assert_eq!(difference.len(), 2);
525 assert!(difference.contains(&"Brain"));
526 assert!(difference.contains(&"Dichael"));
527
528 // Proves that difference() is positionally dependent
529 let mut difference: Vec<&str> = Vec::new();
530 for e in set2.difference(&set1) {
531 difference.push(*e);
532 }
533 eprintln!("{:#?}", difference);
534 assert_eq!(difference.len(), 3);
535 assert!(difference.contains(&"Remus"));
536 assert!(difference.contains(&"Romulus"));
537 assert!(difference.contains(&"Flock"));
538
539 // Creates a vec of borrowed keys contained only in both sets
540 let mut unique: Vec<&str> = Vec::new();
541 for e in set1.unique(&set2) {
542 unique.push(*e);
543 }
544 eprintln!("{:#?}", unique);
545 assert_eq!(unique.len(), 5);
546 assert!(unique.contains(&"Brain"));
547 assert!(unique.contains(&"Dichael"));
548 assert!(unique.contains(&"Remus"));
549 assert!(unique.contains(&"Romulus"));
550 assert!(unique.contains(&"Flock"));
551
552 //panic!("MANUAL TEST FAILURE");
553}