dsa_rust/associative/
hash_set.rs

1#![allow(clippy::uninlined_format_args)] // Cant inline all args in print
2
3/*! A set structure based on this library's HashMap
4
5# About
6This structure provides common binary set operations (as `self` and `other`), mostly as an excuse to explore implementing custom iterators.
7- [Union](crate::associative::hash_set::HashSet::union): Returns an iterator over references to all elements that appear in either `self` or `other` as a set-theoretic [union](https://en.wikipedia.org/wiki/Union_(set_theory)).
8- [Extend](crate::associative::hash_set::HashSet::extend): Mutates `self` to contain all unique elements in both `self` and `other`. This is a mutating version of `union()`.
9- [Intersection](crate::associative::hash_set::HashSet::intersection): Returns an iterator over references to elements that appear in both `self` and `other` as a set-theoretic [intersection](https://en.wikipedia.org/wiki/Intersection_(set_theory)).
10- [Retain All](crate::associative::hash_set::HashSet::retain_all): Mutates `self` such that self only keeps elements that are also in `other`. This function could also appropriately be called "intersect", but that was too close to the non-mutating version of this function called "intersection". 
11- [Difference](crate::associative::hash_set::HashSet::difference): Returns an iterator over references to elements that are present in `self` but not in `other` as a set-theoretic asymmetric difference or [relative complement](https://en.wikipedia.org/wiki/Complement_(set_theory)). This operation is asymmetric: interchanging self and other generally produces different results.
12- [Unique](crate::associative::hash_set::HashSet::unique): Returns an iterator over references to elements that are present in exactly one of the two sets as a set-theoretic [symmetric difference](https://en.wikipedia.org/wiki/Symmetric_difference). This operation is symmetric: interchanging self and other yields the same result.
13
14# Design
15The structure is based on this library's (open-addressing) [hash map](crate::associative::probing_hash_table). Each entry is stored as `(T, ())`, and because of the underlying implementation, `T` must be [Hash], [PartialEq], and [Eq].
16
17# Example
18
19```rust
20use dsa_rust::associative::hash_set::HashSet;
21
22    //Creates a couple new hash sets
23    let mut set1 = HashSet::<u8>::new();
24    // [0, 1, 2, 3]
25    for n in 0..=3 {
26        set1.put(n); 
27    }
28
29    let mut set2 = HashSet::<u8>::new();
30    // [2, 3, 4, 5]
31    for n in 2..=5 {
32        set2.put(n); 
33    }
34
35    // Creates a list of all elements in both self and other
36    let mut union: Vec<u8> = Vec::new();
37    for e in set1.union(&set2) {
38        union.push(*e); 
39    }
40    assert_eq!(union.len(), 6);
41    assert!(union.contains(&0));
42    assert!(union.contains(&1));
43    assert!(union.contains(&2));
44    assert!(union.contains(&3));
45    assert!(union.contains(&4));
46    assert!(union.contains(&5));
47
48    // Creates a list of elements only present in both sets
49    let mut intersection: Vec<u8> = Vec::new();
50    for e in set1.intersection(&set2) {
51        intersection.push(*e); 
52    }
53    assert_eq!(intersection.len(), 2);
54    assert!(intersection.contains(&2));
55    assert!(intersection.contains(&3));
56
57    // Creates a list of elements from self not present in other
58    let mut difference: Vec<u8> = Vec::new();
59    for e in set1.difference(&set2) {
60        difference.push(*e); 
61    }
62    assert_eq!(difference.len(), 2);
63    assert!(difference.contains(&0));
64    assert!(difference.contains(&1));
65
66    // Proves that difference() is asymmetric 
67    let mut difference: Vec<u8> = Vec::new();
68    for e in set2.difference(&set1) {
69        difference.push(*e); 
70    }
71    assert_eq!(difference.len(), 2);
72    assert!(difference.contains(&4));
73    assert!(difference.contains(&5));
74
75    // Creates a list of elements present in either the first or 
76    // second one set, but not both
77    let mut unique: Vec<u8> = Vec::new();
78    for e in set1.unique(&set2) {
79        unique.push(*e); 
80    }
81    assert_eq!(unique.len(), 4);
82    assert!(unique.contains(&0));
83    assert!(unique.contains(&1));
84    assert!(unique.contains(&4));
85    assert!(unique.contains(&5));
86
87    // Illustrates that unique() is symmetric
88    let mut unique: Vec<u8> = Vec::new();
89    for e in set2.unique(&set1) {
90        unique.push(*e); 
91    }
92    assert_eq!(unique.len(), 4);
93    assert!(unique.contains(&0));
94    assert!(unique.contains(&1));
95    assert!(unique.contains(&4));
96    assert!(unique.contains(&5));
97```
98
99*/
100
101use crate::associative::probing_hash_table;
102
103use std::fmt::Debug;
104use std::hash::Hash;
105
106pub struct HashSet<T> {
107    map: probing_hash_table::HashMap<T, ()>
108}
109impl<T> Default for HashSet<T>
110where
111    T: Clone + Default + Debug + Hash + Eq + PartialEq,
112 {
113    fn default() -> Self {
114        Self::new()
115    }
116}
117impl<T> HashSet<T>
118where
119    T: Clone + Default + Debug + Hash + Eq + PartialEq, {
120
121    // Basic operations
122    ///////////////////
123
124    /// Constructor for an empty set with a default capacity of 2 
125    /// because everyone deserves a friend.
126    pub fn new() -> Self {
127        let new_capacity = 2;
128        Self {
129            map: probing_hash_table::HashMap::<T, ()>::new_with_capacity(new_capacity),
130        }
131    }
132
133    /// Constructor for an empty set with a given capacity.
134    pub fn new_with_capacity(size: usize) -> Self {
135        Self {
136            map: probing_hash_table::HashMap::<T, ()>::new_with_capacity(size),
137        }
138    }
139
140    /// Returns the number of elements in the set.
141    pub fn size(&self) -> usize {
142        self.map.len()
143    }
144
145    /// Returns true if the set is either empty or contains only empty slots and deleted entries.
146    pub fn is_empty(&self) -> bool {
147        self.map.is_empty()
148    }
149
150    /// Adds an item to the set.
151    pub fn put(&mut self, elem: T) {
152        self.map.put(elem, ());
153    }
154
155    /// Removes an item from the set.
156    pub fn remove(&mut self, elem: T) -> Option<T> {
157        if let Some(val) = self.map.remove(&elem) {
158            return Some(val.key().clone());
159        } None
160    }
161
162    /// Returns true if the set contains the referenced element.
163    pub fn contains<Q>(&self, elem: &Q) -> bool
164    where
165        T: std::borrow::Borrow<Q>,
166        Q: Debug + Hash + Eq + ?Sized,
167    {
168        self.map.contains(elem) // Uses the HashMap's contains() function
169    }
170
171    // Union operations
172    ///////////////////
173
174    /// Returns an iterator over references to all elements that appear 
175    /// in either `self` or `other` as a set-theoretic 
176    /// [union](https://en.wikipedia.org/wiki/Union_(set_theory)).
177    pub fn union<'a>(&'a self, other: &'a Self) -> Union<'a, T> {
178        Union::build(&self.map, &other.map)
179    }
180
181    /// A mutating version of `union()` that consumes `other` to add 
182    /// all of its elements to `self`.
183    pub fn extend(&mut self, other: Self) {
184        let other_iter = other.map.into_iter();
185        for key in other_iter {
186            if !self.map.contains(&key.0) {
187                self.put(key.0)
188            }
189        }
190    } 
191
192    //Intersection operations
193    /////////////////////////
194
195    /// Returns an iterator over references to elements that appear 
196    /// in both `self` and `other` as a set-theoretic 
197    /// [intersection](https://en.wikipedia.org/wiki/Intersection_(set_theory)).
198    pub fn intersection<'a>(&'a self, other: &'a Self) -> Intersection<'a, T> {
199        let (longer, shorter) = if self.map.len() >= other.map.len() {
200            (&self.map, &other.map)
201        } else {
202            (&other.map, &self.map)
203        };
204        Intersection {
205            lhs: longer,
206            rhs_iter: shorter.keys(),
207        }
208    }
209
210    /// Mutates `self` such that `self` only keeps elements that are also in `other`.
211    /// Different from the function `retain` which takes a closure to only retain elements
212    /// according to the predicate.
213    ///
214    /// NOTE: This function could also appropriately be called "intersect", but that was
215    /// too close to the non-mutating version of this function called "intersection".
216    // O(m*n)
217    pub fn retain_all(&mut self, other: &Self) {
218        // Collect keys to remove
219        let keys_to_remove: Vec<_> = self.map.keys()
220            .filter(|k| !other.map.contains(k))
221            .cloned() // collect owned keys if necessary
222            .collect();
223    
224        // Remove them safely
225        for key in keys_to_remove {
226            self.map.remove(&key);
227        }
228    }
229
230    // Subtraction operations
231    /////////////////////////
232
233    /// Returns an iterator over references to elements that are present 
234    /// in `self` but not in `other` as a set-theoretic asymmetric difference 
235    /// or [relative complement](https://en.wikipedia.org/wiki/Complement_(set_theory)). 
236    /// This operation is asymmetric: interchanging self and other 
237    /// generally produces different results.
238    pub fn difference<'a>(&'a self, other: &'a Self) -> Difference<'a, T> {
239        //let (longer, shorter) = if self.map.len() <= other.map.len() {
240        //    (&self.map, &other.map)
241        //} else {
242        //    (&other.map, &self.map)
243        //};
244        //Diff::new(longer, shorter)
245        Difference {
246            lhs_iter: self.map.keys(), 
247            rhs: &other.map
248        }
249
250    }
251
252    /// Returns an iterator over references to elements that are present 
253    /// in exactly one of the two sets as a set-theoretic [symmetric 
254    /// difference](https://en.wikipedia.org/wiki/Symmetric_difference). 
255    /// This operation is symmetric: interchanging `self` and `other` 
256    /// yields the same result.
257    pub fn unique<'a>(&'a self, other: &'a Self) -> Unique<'a, T> {
258        Unique::new(&self.map, &other.map)
259    }
260
261    // Filter operations
262    ////////////////////
263
264    // Mutates `S` to only contain values that adhere to the given predicate (lambda)
265    //pub fn retain(&mut self, |predicate|) {}
266
267}
268
269pub struct Union<'a, K> {
270    // Used "raw" for its "free" operations
271    lhs: &'a probing_hash_table::HashMap<K, ()>,
272    // Create explicit iterators outside of the next() implementation
273    lhs_iter: probing_hash_table::Keys<'a, K, ()>, 
274    rhs_iter: probing_hash_table::Keys<'a, K, ()>,
275}
276impl<'a, K> Union<'a, K> {
277    // Constructor that takes map references to create iterators outside of the next()
278    // implementation
279    fn build(
280        lhs: &'a probing_hash_table::HashMap<K, ()>, 
281        rhs: &'a probing_hash_table::HashMap<K, ()>) 
282    -> Union<'a, K> 
283    where
284        K: Debug + Eq + Hash + PartialEq,
285    {
286        Union {
287            lhs_iter: lhs.keys(),
288            rhs_iter: rhs.keys(),
289            lhs,
290        }
291    }
292}
293impl<'a, K> Iterator for Union<'a, K>
294where
295    K: Debug + Eq + Hash,
296{
297    type Item = &'a K;
298
299    fn next(&mut self) -> Option<Self::Item> {
300        // Yield elements from lhs iterator
301        if let Some(key) = self.lhs_iter.next() {
302            return Some(key);
303        }
304
305        // Then yield only unique elements from the rhs iterator
306        self.rhs_iter.by_ref().find(|&key| !self.lhs.contains(key))
307        //while let Some(key) = self.rhs_iter.next() {
308        //    if !self.lhs.contains(key) {
309        //        return Some(key);
310        //    }
311        //}
312        //None
313    }
314}
315
316pub struct Intersection<'a, K> {
317    rhs_iter: probing_hash_table::Keys<'a, K, ()>,
318    lhs: &'a probing_hash_table::HashMap<K, ()>, // needed to skip duplicates from rhs
319}
320impl<'a, K> Iterator for Intersection<'a, K>
321where
322    K: Debug + Eq + Hash,
323{
324    type Item = &'a K;
325
326    fn next(&mut self) -> Option<Self::Item> {
327        // Yield only elements from both sets
328        self.rhs_iter.by_ref().find(|&key| self.lhs.contains(key))
329    }
330}
331
332pub struct Difference<'a, K> {
333    lhs_iter: probing_hash_table::Keys<'a, K, ()>,
334    rhs: &'a probing_hash_table::HashMap<K, ()>, // optional if needed
335}
336impl<'a, K> Iterator for Difference<'a, K>
337where
338    K: Debug + Eq + Hash,
339{
340    type Item = &'a K;
341
342    // Yield only elements from self not contained in other
343    fn next(&mut self) -> Option<Self::Item> {
344        self.lhs_iter.find(|&k| !self.rhs.contains(k))
345    }
346}
347
348pub struct Unique<'a, K> {
349    lhs_iter: probing_hash_table::Keys<'a, K, ()>,
350    rhs_iter: probing_hash_table::Keys<'a, K, ()>,
351    lhs: &'a probing_hash_table::HashMap<K, ()>,
352    rhs: &'a probing_hash_table::HashMap<K, ()>,
353}
354impl<'a, K> Unique<'a, K>
355where
356    K: Debug + Eq + Hash,
357{
358    pub fn new(lhs: &'a probing_hash_table::HashMap<K, ()>, rhs: &'a probing_hash_table::HashMap<K, ()>) -> Self {
359        Self {
360            lhs,
361            rhs,
362            lhs_iter: lhs.keys(),
363            rhs_iter: rhs.keys(),
364        }
365    }
366}
367impl<'a, K> Iterator for Unique<'a, K>
368where
369    K: Debug + Eq + Hash,
370{
371    type Item = &'a K;
372
373    // Yield only elements not contained across both sets
374    fn next(&mut self) -> Option<Self::Item> {
375        // First yield from rhs elements not in lhs
376        if let Some(key) = self.rhs_iter.find(|&k| !self.lhs.contains(k)) {
377            return Some(key);
378        }
379
380        // Then reverse the operation to yield lhs elements not in rhs
381        self.lhs_iter.find(|&k| !self.rhs.contains(k))
382    }
383}
384
385
386// Unit tests
387/////////////
388
389#[test]
390// Basic function test
391fn hash_set_test() {
392    //Creates a new hash map
393    let mut set = HashSet::<&str>::new();
394    assert_eq!(set.size(), 0);
395    assert_eq!(set.map.len(), 0);
396
397    set.put("Peter");
398    set.put("Brain");
399    set.put("Bobson");
400    set.put("Peter"); // Oopsiedoodle, a duplicate!
401    set.put("Dichael");
402
403    assert_eq!(set.size(), 4);
404    assert_eq!(set.map.len(), 4);
405    assert!(!set.is_empty());
406    assert!(set.contains(&"Peter"));
407    assert!(set.contains(&"Brain"));
408    assert!(set.contains(&"Bobson"));
409    assert!(set.contains(&"Dichael"));
410}
411
412#[test]
413// Covers union() and extend()
414fn union() {
415    //Creates a new hash map
416    let mut set1 = HashSet::<&str>::new();
417    set1.put("Peter"); // +1
418    set1.put("Brain"); // +1
419    set1.put("Bobson"); // +1
420    set1.put("Dichael"); // +1
421
422    let mut set2 = HashSet::<&str>::new();
423    set2.put("Remus"); // +1
424    set2.put("Romulus"); // +1
425    set2.put("Bobson"); 
426    set2.put("Peter"); 
427
428    let mut union: Vec<&str> = Vec::new();
429    for e in set1.union(&set2) {
430        union.push(*e); 
431    }
432    assert_eq!(union.len(), 6);
433    eprintln!("{:#?}", union);
434    // contains takes &T, which is &&str for the type
435    assert!(union.contains(&"Peter"));
436    assert!(union.contains(&"Brain"));
437    assert!(union.contains(&"Bobson"));
438    assert!(union.contains(&"Dichael"));
439
440    // Consumes set2 and extends set1
441    assert_eq!(set1.size(), 4);
442    set1.extend(set2);
443    assert_eq!(set1.size(), 6);
444    assert!(set1.contains("Peter"));
445    assert!(set1.contains("Brain"));
446    assert!(set1.contains("Bobson"));
447    assert!(set1.contains("Dichael"));
448    assert!(set1.contains("Remus"));
449    assert!(set1.contains("Romulus"));
450    
451    //panic!("MANUAL TEST FAILURE");
452}
453
454#[test]
455// Covers intersection() and retain_all()
456fn intersection() {
457    //Creates a couple new hash sets
458    let mut set1 = HashSet::<&str>::new();
459    set1.put("Peter");
460    set1.put("Brain");
461    set1.put("Bobson");
462    set1.put("Dichael");
463
464    let mut set2 = HashSet::<&str>::new();
465    set2.put("Remus");
466    set2.put("Romulus");
467    set2.put("Bobson"); 
468    set2.put("Glank"); 
469    set2.put("Flock"); 
470    set2.put("Peter"); 
471
472    // Creates a vec of borrowed keys contained only in both sets
473    let mut intersection: Vec<&str> = Vec::new();
474    for e in set1.intersection(&set2) {
475        intersection.push(*e); 
476    }
477    eprintln!("{:#?}", intersection);
478    assert_eq!(intersection.len(), 2);
479    assert!(intersection.contains(&"Peter"));
480    assert!(intersection.contains(&"Bobson"));
481
482    // Mutates self to contain only keys ALSO contained in other
483    eprintln!("{:#?}", set1.map);
484    assert_eq!(set1.size(), 4);
485    assert!(set1.contains(&"Peter"));
486    assert!(set1.contains(&"Brain"));
487    assert!(set1.contains(&"Bobson"));
488    assert!(set1.contains(&"Dichael"));
489    set1.retain_all(&set2); // Removes two entries
490    eprintln!("{:#?}", set1.map);
491    assert_eq!(set1.size(), 2);
492    assert!(set1.contains(&"Peter"));
493    assert!(set1.contains(&"Bobson"));
494
495    //panic!("MANUAL TEST FAILURE");
496}
497
498#[test]
499// Covers difference() and unique()
500fn difference() {
501    //Creates a couple new hash sets
502    let mut set1 = HashSet::<&str>::new();
503    set1.put("Peter"); 
504    set1.put("Brain"); // +1
505    set1.put("Bobson"); 
506    set1.put("Dichael"); // +1 
507
508    let mut set2 = HashSet::<&str>::new();
509    set2.put("Remus"); // +1
510    set2.put("Romulus"); // +1
511    set2.put("Bobson"); 
512    set2.put("Flock");  // +1
513    set2.put("Peter"); 
514
515    // Creates a vec of borrowed keys contained only in both sets
516    let mut difference: Vec<&str> = Vec::new();
517    for e in set1.difference(&set2) {
518        difference.push(*e); 
519    }
520    eprintln!("{:#?}", difference);
521    assert_eq!(difference.len(), 2);
522    assert!(difference.contains(&"Brain"));
523    assert!(difference.contains(&"Dichael"));
524
525    // Proves that difference() is positionally dependent
526    let mut difference: Vec<&str> = Vec::new();
527    for e in set2.difference(&set1) {
528        difference.push(*e); 
529    }
530    eprintln!("{:#?}", difference);
531    assert_eq!(difference.len(), 3);
532    assert!(difference.contains(&"Remus"));
533    assert!(difference.contains(&"Romulus"));
534    assert!(difference.contains(&"Flock"));
535
536    // Creates a vec of borrowed keys contained only in both sets
537    let mut unique: Vec<&str> = Vec::new();
538    for e in set1.unique(&set2) {
539        unique.push(*e); 
540    }
541    eprintln!("{:#?}", unique);
542    assert_eq!(unique.len(), 5);
543    assert!(unique.contains(&"Brain"));
544    assert!(unique.contains(&"Dichael"));
545    assert!(unique.contains(&"Remus"));
546    assert!(unique.contains(&"Romulus"));
547    assert!(unique.contains(&"Flock"));
548
549    //panic!("MANUAL TEST FAILURE");
550}