dsa_rust/associative/
hash_set.rs

1#![allow(clippy::uninlined_format_args)] // Cant inline all args in print
2
3/*! A set structure based on this library's HashMap
4
5# About
6This structure provides common binary set operations (as `self` and `other`), mostly as an excuse to explore implementing custom iterators.
7- [Union](crate::associative::hash_set::HashSet::union): Returns an iterator over references to all elements that appear in either `self` or `other` as a set-theoretic [union](https://en.wikipedia.org/wiki/Union_(set_theory)).
8- [Extend](crate::associative::hash_set::HashSet::extend): Mutates `self` to contain all unique elements in both `self` and `other`. This is a mutating version of `union()`.
9- [Intersection](crate::associative::hash_set::HashSet::intersection): Returns an iterator over references to elements that appear in both `self` and `other` as a set-theoretic [intersection](https://en.wikipedia.org/wiki/Intersection_(set_theory)).
10- [Retain All](crate::associative::hash_set::HashSet::retain_all): Mutates `self` such that self only keeps elements that are also in `other`. This function could also appropriately be called "intersect", but that was too close to the non-mutating version of this function called "intersection".
11- [Difference](crate::associative::hash_set::HashSet::difference): Returns an iterator over references to elements that are present in `self` but not in `other` as a set-theoretic asymmetric difference or [relative complement](https://en.wikipedia.org/wiki/Complement_(set_theory)). This operation is asymmetric: interchanging self and other generally produces different results.
12- [Unique](crate::associative::hash_set::HashSet::unique): Returns an iterator over references to elements that are present in exactly one of the two sets as a set-theoretic [symmetric difference](https://en.wikipedia.org/wiki/Symmetric_difference). This operation is symmetric: interchanging self and other yields the same result.
13
14# Design
15The structure is based on this library's (open-addressing) [hash map](crate::associative::probing_hash_table). Each entry is stored as `(T, ())`, and because of the underlying implementation, `T` must be [Hash], [PartialEq], and [Eq].
16
17# Example
18
19```rust
20use dsa_rust::associative::hash_set::HashSet;
21
22    //Creates a couple new hash sets
23    let mut set1 = HashSet::<u8>::new();
24    // [0, 1, 2, 3]
25    for n in 0..=3 {
26        set1.put(n);
27    }
28
29    let mut set2 = HashSet::<u8>::new();
30    // [2, 3, 4, 5]
31    for n in 2..=5 {
32        set2.put(n);
33    }
34
35    // Creates a list of all elements in both self and other
36    let mut union: Vec<u8> = Vec::new();
37    for e in set1.union(&set2) {
38        union.push(*e);
39    }
40    assert_eq!(union.len(), 6);
41    assert!(union.contains(&0));
42    assert!(union.contains(&1));
43    assert!(union.contains(&2));
44    assert!(union.contains(&3));
45    assert!(union.contains(&4));
46    assert!(union.contains(&5));
47
48    // Creates a list of elements only present in both sets
49    let mut intersection: Vec<u8> = Vec::new();
50    for e in set1.intersection(&set2) {
51        intersection.push(*e);
52    }
53    assert_eq!(intersection.len(), 2);
54    assert!(intersection.contains(&2));
55    assert!(intersection.contains(&3));
56
57    // Creates a list of elements from self not present in other
58    let mut difference: Vec<u8> = Vec::new();
59    for e in set1.difference(&set2) {
60        difference.push(*e);
61    }
62    assert_eq!(difference.len(), 2);
63    assert!(difference.contains(&0));
64    assert!(difference.contains(&1));
65
66    // Proves that difference() is asymmetric
67    let mut difference: Vec<u8> = Vec::new();
68    for e in set2.difference(&set1) {
69        difference.push(*e);
70    }
71    assert_eq!(difference.len(), 2);
72    assert!(difference.contains(&4));
73    assert!(difference.contains(&5));
74
75    // Creates a list of elements present in either the first or
76    // second one set, but not both
77    let mut unique: Vec<u8> = Vec::new();
78    for e in set1.unique(&set2) {
79        unique.push(*e);
80    }
81    assert_eq!(unique.len(), 4);
82    assert!(unique.contains(&0));
83    assert!(unique.contains(&1));
84    assert!(unique.contains(&4));
85    assert!(unique.contains(&5));
86
87    // Illustrates that unique() is symmetric
88    let mut unique: Vec<u8> = Vec::new();
89    for e in set2.unique(&set1) {
90        unique.push(*e);
91    }
92    assert_eq!(unique.len(), 4);
93    assert!(unique.contains(&0));
94    assert!(unique.contains(&1));
95    assert!(unique.contains(&4));
96    assert!(unique.contains(&5));
97```
98
99*/
100
101use crate::associative::probing_hash_table;
102
103use std::fmt::Debug;
104use std::hash::Hash;
105
106pub struct HashSet<T> {
107    map: probing_hash_table::HashMap<T, ()>,
108}
109impl<T> Default for HashSet<T>
110where
111    T: Clone + Default + Debug + Hash + Eq + PartialEq,
112{
113    fn default() -> Self {
114        Self::new()
115    }
116}
117impl<T> HashSet<T>
118where
119    T: Clone + Default + Debug + Hash + Eq + PartialEq,
120{
121    // Basic operations
122    ///////////////////
123
124    /// Constructor for an empty set with a default capacity of 2
125    /// because everyone deserves a friend.
126    pub fn new() -> Self {
127        let new_capacity = 2;
128        Self {
129            map: probing_hash_table::HashMap::<T, ()>::new_with_capacity(new_capacity),
130        }
131    }
132
133    /// Constructor for an empty set with a given capacity.
134    pub fn new_with_capacity(size: usize) -> Self {
135        Self {
136            map: probing_hash_table::HashMap::<T, ()>::new_with_capacity(size),
137        }
138    }
139
140    /// Returns the number of elements in the set.
141    pub fn size(&self) -> usize {
142        self.map.len()
143    }
144
145    /// Returns true if the set is either empty or contains only empty slots and deleted entries.
146    pub fn is_empty(&self) -> bool {
147        self.map.is_empty()
148    }
149
150    /// Adds an item to the set.
151    pub fn put(&mut self, elem: T) {
152        self.map.put(elem, ());
153    }
154
155    /// Removes an item from the set.
156    pub fn remove(&mut self, elem: T) -> Option<T> {
157        if let Some(val) = self.map.remove(&elem) {
158            return Some(val.key().clone());
159        }
160        None
161    }
162
163    /// Returns true if the set contains the referenced element.
164    pub fn contains<Q>(&self, elem: &Q) -> bool
165    where
166        T: std::borrow::Borrow<Q>,
167        Q: Debug + Hash + Eq + ?Sized,
168    {
169        self.map.contains(elem) // Uses the HashMap's contains() function
170    }
171
172    // Union operations
173    ///////////////////
174
175    /// Returns an iterator over references to all elements that appear
176    /// in either `self` or `other` as a set-theoretic
177    /// [union](https://en.wikipedia.org/wiki/Union_(set_theory)).
178    pub fn union<'a>(&'a self, other: &'a Self) -> Union<'a, T> {
179        Union::build(&self.map, &other.map)
180    }
181
182    /// A mutating version of `union()` that consumes `other` to add
183    /// all of its elements to `self`.
184    pub fn extend(&mut self, other: Self) {
185        let other_iter = other.map.into_iter();
186        for key in other_iter {
187            if !self.map.contains(&key.0) {
188                self.put(key.0)
189            }
190        }
191    }
192
193    //Intersection operations
194    /////////////////////////
195
196    /// Returns an iterator over references to elements that appear
197    /// in both `self` and `other` as a set-theoretic
198    /// [intersection](https://en.wikipedia.org/wiki/Intersection_(set_theory)).
199    pub fn intersection<'a>(&'a self, other: &'a Self) -> Intersection<'a, T> {
200        let (longer, shorter) = if self.map.len() >= other.map.len() {
201            (&self.map, &other.map)
202        } else {
203            (&other.map, &self.map)
204        };
205        Intersection {
206            lhs: longer,
207            rhs_iter: shorter.keys(),
208        }
209    }
210
211    /// Mutates `self` such that `self` only keeps elements that are also in `other`.
212    /// Different from the function `retain` which takes a closure to only retain elements
213    /// according to the predicate.
214    ///
215    /// NOTE: This function could also appropriately be called "intersect", but that was
216    /// too close to the non-mutating version of this function called "intersection".
217    // O(m*n)
218    pub fn retain_all(&mut self, other: &Self) {
219        // Collect keys to remove
220        let keys_to_remove: Vec<_> = self
221            .map
222            .keys()
223            .filter(|k| !other.map.contains(k))
224            .cloned() // collect owned keys if necessary
225            .collect();
226
227        // Remove them safely
228        for key in keys_to_remove {
229            self.map.remove(&key);
230        }
231    }
232
233    // Subtraction operations
234    /////////////////////////
235
236    /// Returns an iterator over references to elements that are present
237    /// in `self` but not in `other` as a set-theoretic asymmetric difference
238    /// or [relative complement](https://en.wikipedia.org/wiki/Complement_(set_theory)).
239    /// This operation is asymmetric: interchanging self and other
240    /// generally produces different results.
241    pub fn difference<'a>(&'a self, other: &'a Self) -> Difference<'a, T> {
242        //let (longer, shorter) = if self.map.len() <= other.map.len() {
243        //    (&self.map, &other.map)
244        //} else {
245        //    (&other.map, &self.map)
246        //};
247        //Diff::new(longer, shorter)
248        Difference {
249            lhs_iter: self.map.keys(),
250            rhs: &other.map,
251        }
252    }
253
254    /// Returns an iterator over references to elements that are present
255    /// in exactly one of the two sets as a set-theoretic [symmetric
256    /// difference](https://en.wikipedia.org/wiki/Symmetric_difference).
257    /// This operation is symmetric: interchanging `self` and `other`
258    /// yields the same result.
259    pub fn unique<'a>(&'a self, other: &'a Self) -> Unique<'a, T> {
260        Unique::new(&self.map, &other.map)
261    }
262
263    // Filter operations
264    ////////////////////
265
266    // Mutates `S` to only contain values that adhere to the given predicate (lambda)
267    //pub fn retain(&mut self, |predicate|) {}
268}
269
270pub struct Union<'a, K> {
271    // Used "raw" for its "free" operations
272    lhs: &'a probing_hash_table::HashMap<K, ()>,
273    // Create explicit iterators outside of the next() implementation
274    lhs_iter: probing_hash_table::Keys<'a, K, ()>,
275    rhs_iter: probing_hash_table::Keys<'a, K, ()>,
276}
277impl<'a, K> Union<'a, K> {
278    // Constructor that takes map references to create iterators outside of the next()
279    // implementation
280    fn build(
281        lhs: &'a probing_hash_table::HashMap<K, ()>,
282        rhs: &'a probing_hash_table::HashMap<K, ()>,
283    ) -> Union<'a, K>
284    where
285        K: Debug + Eq + Hash + PartialEq,
286    {
287        Union {
288            lhs_iter: lhs.keys(),
289            rhs_iter: rhs.keys(),
290            lhs,
291        }
292    }
293}
294impl<'a, K> Iterator for Union<'a, K>
295where
296    K: Debug + Eq + Hash,
297{
298    type Item = &'a K;
299
300    fn next(&mut self) -> Option<Self::Item> {
301        // Yield elements from lhs iterator
302        if let Some(key) = self.lhs_iter.next() {
303            return Some(key);
304        }
305
306        // Then yield only unique elements from the rhs iterator
307        self.rhs_iter.by_ref().find(|&key| !self.lhs.contains(key))
308        //while let Some(key) = self.rhs_iter.next() {
309        //    if !self.lhs.contains(key) {
310        //        return Some(key);
311        //    }
312        //}
313        //None
314    }
315}
316
317pub struct Intersection<'a, K> {
318    rhs_iter: probing_hash_table::Keys<'a, K, ()>,
319    lhs: &'a probing_hash_table::HashMap<K, ()>, // needed to skip duplicates from rhs
320}
321impl<'a, K> Iterator for Intersection<'a, K>
322where
323    K: Debug + Eq + Hash,
324{
325    type Item = &'a K;
326
327    fn next(&mut self) -> Option<Self::Item> {
328        // Yield only elements from both sets
329        self.rhs_iter.by_ref().find(|&key| self.lhs.contains(key))
330    }
331}
332
333pub struct Difference<'a, K> {
334    lhs_iter: probing_hash_table::Keys<'a, K, ()>,
335    rhs: &'a probing_hash_table::HashMap<K, ()>, // optional if needed
336}
337impl<'a, K> Iterator for Difference<'a, K>
338where
339    K: Debug + Eq + Hash,
340{
341    type Item = &'a K;
342
343    // Yield only elements from self not contained in other
344    fn next(&mut self) -> Option<Self::Item> {
345        self.lhs_iter.find(|&k| !self.rhs.contains(k))
346    }
347}
348
349pub struct Unique<'a, K> {
350    lhs_iter: probing_hash_table::Keys<'a, K, ()>,
351    rhs_iter: probing_hash_table::Keys<'a, K, ()>,
352    lhs: &'a probing_hash_table::HashMap<K, ()>,
353    rhs: &'a probing_hash_table::HashMap<K, ()>,
354}
355impl<'a, K> Unique<'a, K>
356where
357    K: Debug + Eq + Hash,
358{
359    pub fn new(
360        lhs: &'a probing_hash_table::HashMap<K, ()>,
361        rhs: &'a probing_hash_table::HashMap<K, ()>,
362    ) -> Self {
363        Self {
364            lhs,
365            rhs,
366            lhs_iter: lhs.keys(),
367            rhs_iter: rhs.keys(),
368        }
369    }
370}
371impl<'a, K> Iterator for Unique<'a, K>
372where
373    K: Debug + Eq + Hash,
374{
375    type Item = &'a K;
376
377    // Yield only elements not contained across both sets
378    fn next(&mut self) -> Option<Self::Item> {
379        // First yield from rhs elements not in lhs
380        if let Some(key) = self.rhs_iter.find(|&k| !self.lhs.contains(k)) {
381            return Some(key);
382        }
383
384        // Then reverse the operation to yield lhs elements not in rhs
385        self.lhs_iter.find(|&k| !self.rhs.contains(k))
386    }
387}
388
389// Unit tests
390/////////////
391
392#[test]
393// Basic function test
394fn hash_set_test() {
395    //Creates a new hash map
396    let mut set = HashSet::<&str>::new();
397    assert_eq!(set.size(), 0);
398    assert_eq!(set.map.len(), 0);
399
400    set.put("Peter");
401    set.put("Brain");
402    set.put("Bobson");
403    set.put("Peter"); // Oopsiedoodle, a duplicate!
404    set.put("Dichael");
405
406    assert_eq!(set.size(), 4);
407    assert_eq!(set.map.len(), 4);
408    assert!(!set.is_empty());
409    assert!(set.contains(&"Peter"));
410    assert!(set.contains(&"Brain"));
411    assert!(set.contains(&"Bobson"));
412    assert!(set.contains(&"Dichael"));
413}
414
415#[test]
416// Covers union() and extend()
417fn union() {
418    //Creates a new hash map
419    let mut set1 = HashSet::<&str>::new();
420    set1.put("Peter"); // +1
421    set1.put("Brain"); // +1
422    set1.put("Bobson"); // +1
423    set1.put("Dichael"); // +1
424
425    let mut set2 = HashSet::<&str>::new();
426    set2.put("Remus"); // +1
427    set2.put("Romulus"); // +1
428    set2.put("Bobson");
429    set2.put("Peter");
430
431    let mut union: Vec<&str> = Vec::new();
432    for e in set1.union(&set2) {
433        union.push(*e);
434    }
435    assert_eq!(union.len(), 6);
436    eprintln!("{:#?}", union);
437    // contains takes &T, which is &&str for the type
438    assert!(union.contains(&"Peter"));
439    assert!(union.contains(&"Brain"));
440    assert!(union.contains(&"Bobson"));
441    assert!(union.contains(&"Dichael"));
442
443    // Consumes set2 and extends set1
444    assert_eq!(set1.size(), 4);
445    set1.extend(set2);
446    assert_eq!(set1.size(), 6);
447    assert!(set1.contains("Peter"));
448    assert!(set1.contains("Brain"));
449    assert!(set1.contains("Bobson"));
450    assert!(set1.contains("Dichael"));
451    assert!(set1.contains("Remus"));
452    assert!(set1.contains("Romulus"));
453
454    //panic!("MANUAL TEST FAILURE");
455}
456
457#[test]
458// Covers intersection() and retain_all()
459fn intersection() {
460    //Creates a couple new hash sets
461    let mut set1 = HashSet::<&str>::new();
462    set1.put("Peter");
463    set1.put("Brain");
464    set1.put("Bobson");
465    set1.put("Dichael");
466
467    let mut set2 = HashSet::<&str>::new();
468    set2.put("Remus");
469    set2.put("Romulus");
470    set2.put("Bobson");
471    set2.put("Glank");
472    set2.put("Flock");
473    set2.put("Peter");
474
475    // Creates a vec of borrowed keys contained only in both sets
476    let mut intersection: Vec<&str> = Vec::new();
477    for e in set1.intersection(&set2) {
478        intersection.push(*e);
479    }
480    eprintln!("{:#?}", intersection);
481    assert_eq!(intersection.len(), 2);
482    assert!(intersection.contains(&"Peter"));
483    assert!(intersection.contains(&"Bobson"));
484
485    // Mutates self to contain only keys ALSO contained in other
486    eprintln!("{:#?}", set1.map);
487    assert_eq!(set1.size(), 4);
488    assert!(set1.contains(&"Peter"));
489    assert!(set1.contains(&"Brain"));
490    assert!(set1.contains(&"Bobson"));
491    assert!(set1.contains(&"Dichael"));
492    set1.retain_all(&set2); // Removes two entries
493    eprintln!("{:#?}", set1.map);
494    assert_eq!(set1.size(), 2);
495    assert!(set1.contains(&"Peter"));
496    assert!(set1.contains(&"Bobson"));
497
498    //panic!("MANUAL TEST FAILURE");
499}
500
501#[test]
502// Covers difference() and unique()
503fn difference() {
504    //Creates a couple new hash sets
505    let mut set1 = HashSet::<&str>::new();
506    set1.put("Peter");
507    set1.put("Brain"); // +1
508    set1.put("Bobson");
509    set1.put("Dichael"); // +1
510
511    let mut set2 = HashSet::<&str>::new();
512    set2.put("Remus"); // +1
513    set2.put("Romulus"); // +1
514    set2.put("Bobson");
515    set2.put("Flock"); // +1
516    set2.put("Peter");
517
518    // Creates a vec of borrowed keys contained only in both sets
519    let mut difference: Vec<&str> = Vec::new();
520    for e in set1.difference(&set2) {
521        difference.push(*e);
522    }
523    eprintln!("{:#?}", difference);
524    assert_eq!(difference.len(), 2);
525    assert!(difference.contains(&"Brain"));
526    assert!(difference.contains(&"Dichael"));
527
528    // Proves that difference() is positionally dependent
529    let mut difference: Vec<&str> = Vec::new();
530    for e in set2.difference(&set1) {
531        difference.push(*e);
532    }
533    eprintln!("{:#?}", difference);
534    assert_eq!(difference.len(), 3);
535    assert!(difference.contains(&"Remus"));
536    assert!(difference.contains(&"Romulus"));
537    assert!(difference.contains(&"Flock"));
538
539    // Creates a vec of borrowed keys contained only in both sets
540    let mut unique: Vec<&str> = Vec::new();
541    for e in set1.unique(&set2) {
542        unique.push(*e);
543    }
544    eprintln!("{:#?}", unique);
545    assert_eq!(unique.len(), 5);
546    assert!(unique.contains(&"Brain"));
547    assert!(unique.contains(&"Dichael"));
548    assert!(unique.contains(&"Remus"));
549    assert!(unique.contains(&"Romulus"));
550    assert!(unique.contains(&"Flock"));
551
552    //panic!("MANUAL TEST FAILURE");
553}