dsa_rust/associative/hash_set.rs
1#![allow(clippy::uninlined_format_args)] // Cant inline all args in print
2
3/*! A set structure based on this library's HashMap
4
5# About
6This structure provides common binary set operations (as `self` and `other`), mostly as an excuse to explore implementing custom iterators.
7- [Union](crate::associative::hash_set::HashSet::union): Returns an iterator over references to all elements that appear in either `self` or `other` as a set-theoretic [union](https://en.wikipedia.org/wiki/Union_(set_theory)).
8- [Extend](crate::associative::hash_set::HashSet::extend): Mutates `self` to contain all unique elements in both `self` and `other`. This is a mutating version of `union()`.
9- [Intersection](crate::associative::hash_set::HashSet::intersection): Returns an iterator over references to elements that appear in both `self` and `other` as a set-theoretic [intersection](https://en.wikipedia.org/wiki/Intersection_(set_theory)).
10- [Retain All](crate::associative::hash_set::HashSet::retain_all): Mutates `self` such that self only keeps elements that are also in `other`. This function could also appropriately be called "intersect", but that was too close to the non-mutating version of this function called "intersection".
11- [Difference](crate::associative::hash_set::HashSet::difference): Returns an iterator over references to elements that are present in `self` but not in `other` as a set-theoretic asymmetric difference or [relative complement](https://en.wikipedia.org/wiki/Complement_(set_theory)). This operation is asymmetric: interchanging self and other generally produces different results.
12- [Unique](crate::associative::hash_set::HashSet::unique): Returns an iterator over references to elements that are present in exactly one of the two sets as a set-theoretic [symmetric difference](https://en.wikipedia.org/wiki/Symmetric_difference). This operation is symmetric: interchanging self and other yields the same result.
13
14# Design
15The structure is based on this library's (open-addressing) [hash map](crate::associative::probing_hash_table). Each entry is stored as `(T, ())`, and because of the underlying implementation, `T` must be [Hash], [PartialEq], and [Eq].
16
17# Example
18
19```rust
20use dsa_rust::associative::hash_set::HashSet;
21
22 //Creates a couple new hash sets
23 let mut set1 = HashSet::<u8>::new();
24 // [0, 1, 2, 3]
25 for n in 0..=3 {
26 set1.put(n);
27 }
28
29 let mut set2 = HashSet::<u8>::new();
30 // [2, 3, 4, 5]
31 for n in 2..=5 {
32 set2.put(n);
33 }
34
35 // Creates a list of all elements in both self and other
36 let mut union: Vec<u8> = Vec::new();
37 for e in set1.union(&set2) {
38 union.push(*e);
39 }
40 assert_eq!(union.len(), 6);
41 assert!(union.contains(&0));
42 assert!(union.contains(&1));
43 assert!(union.contains(&2));
44 assert!(union.contains(&3));
45 assert!(union.contains(&4));
46 assert!(union.contains(&5));
47
48 // Creates a list of elements only present in both sets
49 let mut intersection: Vec<u8> = Vec::new();
50 for e in set1.intersection(&set2) {
51 intersection.push(*e);
52 }
53 assert_eq!(intersection.len(), 2);
54 assert!(intersection.contains(&2));
55 assert!(intersection.contains(&3));
56
57 // Creates a list of elements from self not present in other
58 let mut difference: Vec<u8> = Vec::new();
59 for e in set1.difference(&set2) {
60 difference.push(*e);
61 }
62 assert_eq!(difference.len(), 2);
63 assert!(difference.contains(&0));
64 assert!(difference.contains(&1));
65
66 // Proves that difference() is asymmetric
67 let mut difference: Vec<u8> = Vec::new();
68 for e in set2.difference(&set1) {
69 difference.push(*e);
70 }
71 assert_eq!(difference.len(), 2);
72 assert!(difference.contains(&4));
73 assert!(difference.contains(&5));
74
75 // Creates a list of elements present in either the first or
76 // second one set, but not both
77 let mut unique: Vec<u8> = Vec::new();
78 for e in set1.unique(&set2) {
79 unique.push(*e);
80 }
81 assert_eq!(unique.len(), 4);
82 assert!(unique.contains(&0));
83 assert!(unique.contains(&1));
84 assert!(unique.contains(&4));
85 assert!(unique.contains(&5));
86
87 // Illustrates that unique() is symmetric
88 let mut unique: Vec<u8> = Vec::new();
89 for e in set2.unique(&set1) {
90 unique.push(*e);
91 }
92 assert_eq!(unique.len(), 4);
93 assert!(unique.contains(&0));
94 assert!(unique.contains(&1));
95 assert!(unique.contains(&4));
96 assert!(unique.contains(&5));
97```
98
99*/
100
101use crate::associative::probing_hash_table;
102
103use std::fmt::Debug;
104use std::hash::Hash;
105
106pub struct HashSet<T> {
107 map: probing_hash_table::HashMap<T, ()>
108}
109impl<T> Default for HashSet<T>
110where
111 T: Clone + Default + Debug + Hash + Eq + PartialEq,
112 {
113 fn default() -> Self {
114 Self::new()
115 }
116}
117impl<T> HashSet<T>
118where
119 T: Clone + Default + Debug + Hash + Eq + PartialEq, {
120
121 // Basic operations
122 ///////////////////
123
124 /// Constructor for an empty set with a default capacity of 2
125 /// because everyone deserves a friend.
126 pub fn new() -> Self {
127 let new_capacity = 2;
128 Self {
129 map: probing_hash_table::HashMap::<T, ()>::new_with_capacity(new_capacity),
130 }
131 }
132
133 /// Constructor for an empty set with a given capacity.
134 pub fn new_with_capacity(size: usize) -> Self {
135 Self {
136 map: probing_hash_table::HashMap::<T, ()>::new_with_capacity(size),
137 }
138 }
139
140 /// Returns the number of elements in the set.
141 pub fn size(&self) -> usize {
142 self.map.len()
143 }
144
145 /// Returns true if the set is either empty or contains only empty slots and deleted entries.
146 pub fn is_empty(&self) -> bool {
147 self.map.is_empty()
148 }
149
150 /// Adds an item to the set.
151 pub fn put(&mut self, elem: T) {
152 self.map.put(elem, ());
153 }
154
155 /// Removes an item from the set.
156 pub fn remove(&mut self, elem: T) -> Option<T> {
157 if let Some(val) = self.map.remove(&elem) {
158 return Some(val.key().clone());
159 } None
160 }
161
162 /// Returns true if the set contains the referenced element.
163 pub fn contains<Q>(&self, elem: &Q) -> bool
164 where
165 T: std::borrow::Borrow<Q>,
166 Q: Debug + Hash + Eq + ?Sized,
167 {
168 self.map.contains(elem) // Uses the HashMap's contains() function
169 }
170
171 // Union operations
172 ///////////////////
173
174 /// Returns an iterator over references to all elements that appear
175 /// in either `self` or `other` as a set-theoretic
176 /// [union](https://en.wikipedia.org/wiki/Union_(set_theory)).
177 pub fn union<'a>(&'a self, other: &'a Self) -> Union<'a, T> {
178 Union::build(&self.map, &other.map)
179 }
180
181 /// A mutating version of `union()` that consumes `other` to add
182 /// all of its elements to `self`.
183 pub fn extend(&mut self, other: Self) {
184 let other_iter = other.map.into_iter();
185 for key in other_iter {
186 if !self.map.contains(&key.0) {
187 self.put(key.0)
188 }
189 }
190 }
191
192 //Intersection operations
193 /////////////////////////
194
195 /// Returns an iterator over references to elements that appear
196 /// in both `self` and `other` as a set-theoretic
197 /// [intersection](https://en.wikipedia.org/wiki/Intersection_(set_theory)).
198 pub fn intersection<'a>(&'a self, other: &'a Self) -> Intersection<'a, T> {
199 let (longer, shorter) = if self.map.len() >= other.map.len() {
200 (&self.map, &other.map)
201 } else {
202 (&other.map, &self.map)
203 };
204 Intersection {
205 lhs: longer,
206 rhs_iter: shorter.keys(),
207 }
208 }
209
210 /// Mutates `self` such that `self` only keeps elements that are also in `other`.
211 /// Different from the function `retain` which takes a closure to only retain elements
212 /// according to the predicate.
213 ///
214 /// NOTE: This function could also appropriately be called "intersect", but that was
215 /// too close to the non-mutating version of this function called "intersection".
216 // O(m*n)
217 pub fn retain_all(&mut self, other: &Self) {
218 // Collect keys to remove
219 let keys_to_remove: Vec<_> = self.map.keys()
220 .filter(|k| !other.map.contains(k))
221 .cloned() // collect owned keys if necessary
222 .collect();
223
224 // Remove them safely
225 for key in keys_to_remove {
226 self.map.remove(&key);
227 }
228 }
229
230 // Subtraction operations
231 /////////////////////////
232
233 /// Returns an iterator over references to elements that are present
234 /// in `self` but not in `other` as a set-theoretic asymmetric difference
235 /// or [relative complement](https://en.wikipedia.org/wiki/Complement_(set_theory)).
236 /// This operation is asymmetric: interchanging self and other
237 /// generally produces different results.
238 pub fn difference<'a>(&'a self, other: &'a Self) -> Difference<'a, T> {
239 //let (longer, shorter) = if self.map.len() <= other.map.len() {
240 // (&self.map, &other.map)
241 //} else {
242 // (&other.map, &self.map)
243 //};
244 //Diff::new(longer, shorter)
245 Difference {
246 lhs_iter: self.map.keys(),
247 rhs: &other.map
248 }
249
250 }
251
252 /// Returns an iterator over references to elements that are present
253 /// in exactly one of the two sets as a set-theoretic [symmetric
254 /// difference](https://en.wikipedia.org/wiki/Symmetric_difference).
255 /// This operation is symmetric: interchanging `self` and `other`
256 /// yields the same result.
257 pub fn unique<'a>(&'a self, other: &'a Self) -> Unique<'a, T> {
258 Unique::new(&self.map, &other.map)
259 }
260
261 // Filter operations
262 ////////////////////
263
264 // Mutates `S` to only contain values that adhere to the given predicate (lambda)
265 //pub fn retain(&mut self, |predicate|) {}
266
267}
268
269pub struct Union<'a, K> {
270 // Used "raw" for its "free" operations
271 lhs: &'a probing_hash_table::HashMap<K, ()>,
272 // Create explicit iterators outside of the next() implementation
273 lhs_iter: probing_hash_table::Keys<'a, K, ()>,
274 rhs_iter: probing_hash_table::Keys<'a, K, ()>,
275}
276impl<'a, K> Union<'a, K> {
277 // Constructor that takes map references to create iterators outside of the next()
278 // implementation
279 fn build(
280 lhs: &'a probing_hash_table::HashMap<K, ()>,
281 rhs: &'a probing_hash_table::HashMap<K, ()>)
282 -> Union<'a, K>
283 where
284 K: Debug + Eq + Hash + PartialEq,
285 {
286 Union {
287 lhs_iter: lhs.keys(),
288 rhs_iter: rhs.keys(),
289 lhs,
290 }
291 }
292}
293impl<'a, K> Iterator for Union<'a, K>
294where
295 K: Debug + Eq + Hash,
296{
297 type Item = &'a K;
298
299 fn next(&mut self) -> Option<Self::Item> {
300 // Yield elements from lhs iterator
301 if let Some(key) = self.lhs_iter.next() {
302 return Some(key);
303 }
304
305 // Then yield only unique elements from the rhs iterator
306 self.rhs_iter.by_ref().find(|&key| !self.lhs.contains(key))
307 //while let Some(key) = self.rhs_iter.next() {
308 // if !self.lhs.contains(key) {
309 // return Some(key);
310 // }
311 //}
312 //None
313 }
314}
315
316pub struct Intersection<'a, K> {
317 rhs_iter: probing_hash_table::Keys<'a, K, ()>,
318 lhs: &'a probing_hash_table::HashMap<K, ()>, // needed to skip duplicates from rhs
319}
320impl<'a, K> Iterator for Intersection<'a, K>
321where
322 K: Debug + Eq + Hash,
323{
324 type Item = &'a K;
325
326 fn next(&mut self) -> Option<Self::Item> {
327 // Yield only elements from both sets
328 self.rhs_iter.by_ref().find(|&key| self.lhs.contains(key))
329 }
330}
331
332pub struct Difference<'a, K> {
333 lhs_iter: probing_hash_table::Keys<'a, K, ()>,
334 rhs: &'a probing_hash_table::HashMap<K, ()>, // optional if needed
335}
336impl<'a, K> Iterator for Difference<'a, K>
337where
338 K: Debug + Eq + Hash,
339{
340 type Item = &'a K;
341
342 // Yield only elements from self not contained in other
343 fn next(&mut self) -> Option<Self::Item> {
344 self.lhs_iter.find(|&k| !self.rhs.contains(k))
345 }
346}
347
348pub struct Unique<'a, K> {
349 lhs_iter: probing_hash_table::Keys<'a, K, ()>,
350 rhs_iter: probing_hash_table::Keys<'a, K, ()>,
351 lhs: &'a probing_hash_table::HashMap<K, ()>,
352 rhs: &'a probing_hash_table::HashMap<K, ()>,
353}
354impl<'a, K> Unique<'a, K>
355where
356 K: Debug + Eq + Hash,
357{
358 pub fn new(lhs: &'a probing_hash_table::HashMap<K, ()>, rhs: &'a probing_hash_table::HashMap<K, ()>) -> Self {
359 Self {
360 lhs,
361 rhs,
362 lhs_iter: lhs.keys(),
363 rhs_iter: rhs.keys(),
364 }
365 }
366}
367impl<'a, K> Iterator for Unique<'a, K>
368where
369 K: Debug + Eq + Hash,
370{
371 type Item = &'a K;
372
373 // Yield only elements not contained across both sets
374 fn next(&mut self) -> Option<Self::Item> {
375 // First yield from rhs elements not in lhs
376 if let Some(key) = self.rhs_iter.find(|&k| !self.lhs.contains(k)) {
377 return Some(key);
378 }
379
380 // Then reverse the operation to yield lhs elements not in rhs
381 self.lhs_iter.find(|&k| !self.rhs.contains(k))
382 }
383}
384
385
386// Unit tests
387/////////////
388
389#[test]
390// Basic function test
391fn hash_set_test() {
392 //Creates a new hash map
393 let mut set = HashSet::<&str>::new();
394 assert_eq!(set.size(), 0);
395 assert_eq!(set.map.len(), 0);
396
397 set.put("Peter");
398 set.put("Brain");
399 set.put("Bobson");
400 set.put("Peter"); // Oopsiedoodle, a duplicate!
401 set.put("Dichael");
402
403 assert_eq!(set.size(), 4);
404 assert_eq!(set.map.len(), 4);
405 assert!(!set.is_empty());
406 assert!(set.contains(&"Peter"));
407 assert!(set.contains(&"Brain"));
408 assert!(set.contains(&"Bobson"));
409 assert!(set.contains(&"Dichael"));
410}
411
412#[test]
413// Covers union() and extend()
414fn union() {
415 //Creates a new hash map
416 let mut set1 = HashSet::<&str>::new();
417 set1.put("Peter"); // +1
418 set1.put("Brain"); // +1
419 set1.put("Bobson"); // +1
420 set1.put("Dichael"); // +1
421
422 let mut set2 = HashSet::<&str>::new();
423 set2.put("Remus"); // +1
424 set2.put("Romulus"); // +1
425 set2.put("Bobson");
426 set2.put("Peter");
427
428 let mut union: Vec<&str> = Vec::new();
429 for e in set1.union(&set2) {
430 union.push(*e);
431 }
432 assert_eq!(union.len(), 6);
433 eprintln!("{:#?}", union);
434 // contains takes &T, which is &&str for the type
435 assert!(union.contains(&"Peter"));
436 assert!(union.contains(&"Brain"));
437 assert!(union.contains(&"Bobson"));
438 assert!(union.contains(&"Dichael"));
439
440 // Consumes set2 and extends set1
441 assert_eq!(set1.size(), 4);
442 set1.extend(set2);
443 assert_eq!(set1.size(), 6);
444 assert!(set1.contains("Peter"));
445 assert!(set1.contains("Brain"));
446 assert!(set1.contains("Bobson"));
447 assert!(set1.contains("Dichael"));
448 assert!(set1.contains("Remus"));
449 assert!(set1.contains("Romulus"));
450
451 //panic!("MANUAL TEST FAILURE");
452}
453
454#[test]
455// Covers intersection() and retain_all()
456fn intersection() {
457 //Creates a couple new hash sets
458 let mut set1 = HashSet::<&str>::new();
459 set1.put("Peter");
460 set1.put("Brain");
461 set1.put("Bobson");
462 set1.put("Dichael");
463
464 let mut set2 = HashSet::<&str>::new();
465 set2.put("Remus");
466 set2.put("Romulus");
467 set2.put("Bobson");
468 set2.put("Glank");
469 set2.put("Flock");
470 set2.put("Peter");
471
472 // Creates a vec of borrowed keys contained only in both sets
473 let mut intersection: Vec<&str> = Vec::new();
474 for e in set1.intersection(&set2) {
475 intersection.push(*e);
476 }
477 eprintln!("{:#?}", intersection);
478 assert_eq!(intersection.len(), 2);
479 assert!(intersection.contains(&"Peter"));
480 assert!(intersection.contains(&"Bobson"));
481
482 // Mutates self to contain only keys ALSO contained in other
483 eprintln!("{:#?}", set1.map);
484 assert_eq!(set1.size(), 4);
485 assert!(set1.contains(&"Peter"));
486 assert!(set1.contains(&"Brain"));
487 assert!(set1.contains(&"Bobson"));
488 assert!(set1.contains(&"Dichael"));
489 set1.retain_all(&set2); // Removes two entries
490 eprintln!("{:#?}", set1.map);
491 assert_eq!(set1.size(), 2);
492 assert!(set1.contains(&"Peter"));
493 assert!(set1.contains(&"Bobson"));
494
495 //panic!("MANUAL TEST FAILURE");
496}
497
498#[test]
499// Covers difference() and unique()
500fn difference() {
501 //Creates a couple new hash sets
502 let mut set1 = HashSet::<&str>::new();
503 set1.put("Peter");
504 set1.put("Brain"); // +1
505 set1.put("Bobson");
506 set1.put("Dichael"); // +1
507
508 let mut set2 = HashSet::<&str>::new();
509 set2.put("Remus"); // +1
510 set2.put("Romulus"); // +1
511 set2.put("Bobson");
512 set2.put("Flock"); // +1
513 set2.put("Peter");
514
515 // Creates a vec of borrowed keys contained only in both sets
516 let mut difference: Vec<&str> = Vec::new();
517 for e in set1.difference(&set2) {
518 difference.push(*e);
519 }
520 eprintln!("{:#?}", difference);
521 assert_eq!(difference.len(), 2);
522 assert!(difference.contains(&"Brain"));
523 assert!(difference.contains(&"Dichael"));
524
525 // Proves that difference() is positionally dependent
526 let mut difference: Vec<&str> = Vec::new();
527 for e in set2.difference(&set1) {
528 difference.push(*e);
529 }
530 eprintln!("{:#?}", difference);
531 assert_eq!(difference.len(), 3);
532 assert!(difference.contains(&"Remus"));
533 assert!(difference.contains(&"Romulus"));
534 assert!(difference.contains(&"Flock"));
535
536 // Creates a vec of borrowed keys contained only in both sets
537 let mut unique: Vec<&str> = Vec::new();
538 for e in set1.unique(&set2) {
539 unique.push(*e);
540 }
541 eprintln!("{:#?}", unique);
542 assert_eq!(unique.len(), 5);
543 assert!(unique.contains(&"Brain"));
544 assert!(unique.contains(&"Dichael"));
545 assert!(unique.contains(&"Remus"));
546 assert!(unique.contains(&"Romulus"));
547 assert!(unique.contains(&"Flock"));
548
549 //panic!("MANUAL TEST FAILURE");
550}