dsa_rust/associative/
chaining_hash_table.rs

1/*! Safe chaining hash table with division compression
2
3# About
4This structure represents a relatively simple way to implement a hash map. The structure relies on chaining for collision handling which allocates separate `Vec` structures for each node.
5
6# Design
7The structure's primary backing structure [ChainingHashTable<K, V>](crate::associative::ChainingHashTable::ChainingHashTable) contains a `data` field of type `Vec<Option<Vec<Entry<K, V>>>>`. The nested `Vec`s provide hash collision provention, and in practice might look something like the following:
8
9```text
10+----------------------------------------+
11| Index | Contents                       |
12|-------+--------------------------------|
13|   1   | None                           |
14|   2   | None                           |
15|   3   | Some((Peter, 45), (Bryan, 67)) |
16|   5   | None                           |
17|   6   | None                           |
18|   7   | Some((Cork, 103))              |
19|   8   | Some((Bobson, 12))             |
20|   9   | None                           |
21|   10  | Some((Flank, 23))              |
22|   11  | None                           |
23+----------------------------------------+
24```
25
26# Example
27
28```rust
29```
30*/
31
32use crate::associative::hash_lib;
33use std::fmt::Debug;
34use std::hash::Hash;
35
36#[derive(Debug)]
37pub struct Entry<K, V> {
38    key: K,
39    value: V,
40}
41impl<K, V> Entry<K, V> {
42    fn new(key: K, value: V) -> Entry<K, V> {
43        Entry { key, value }
44    }
45}
46
47#[derive(Debug)]
48pub struct ChainingHashTable<K, V> {
49    data: Vec<Option<Vec<Entry<K, V>>>>,
50    size: usize,
51}
52impl<K: Hash + Debug + PartialEq, V: PartialEq + Clone> Default for ChainingHashTable<K, V> {
53    fn default() -> Self {
54        Self::new()
55    }
56}
57impl<K: Hash + Debug + PartialEq, V: PartialEq + Clone> ChainingHashTable<K, V> {
58    /** Creates a new HashTable */
59    pub fn new() -> ChainingHashTable<K, V> {
60        ChainingHashTable {
61            data: Vec::with_capacity(2),
62            size: 0,
63        }
64    }
65
66    /** Returns the number of elements in the HashTable */
67    pub fn size(&self) -> usize {
68        self.size
69    }
70
71    /** Returns a Boolean indicating whether the HashTable is empty */
72    pub fn is_empty(&self) -> bool {
73        self.size == 0
74    }
75
76    /** Returns the value `v` associated with key `k` */
77    pub fn get(&self, key: K) -> Option<&V> {
78        let hashed = hash_lib::hash(&key);
79        let location: usize = hash_lib::division_compression(hashed, self.data.len());
80        if let Some(bucket) = &self.data[location] {
81            for e in bucket {
82                let chain_key_hash = hash_lib::hash(&e.key);
83                if hashed == chain_key_hash {
84                    return Some(&e.value);
85                }
86            }
87            // Required because the for loop contains early return statement
88            // but does not return a value when no match is found
89            None
90        } else {
91            // Required because function returns and if let Some is inexhaustive
92            None
93        }
94    }
95
96    /** Adds entry `(k, v)`, overwriting any value `v` associated with an
97    existing key `k`, returns old value. Resizes the map which the
98    table encounters a load factor >.75. */
99    pub fn put(&mut self, key: K, value: V) {
100        // Checks if the addition will bring the load factor above threshold
101        if self.size == 0 || (self.size + 1) as f64 / self.data.len() as f64 > 0.5 {
102            self.grow()
103        }
104
105        // Finds the correct insertion location
106        let location: usize = hash_lib::division_compression(hash_lib::hash(&key), self.data.len());
107
108        // Creates a new Entry
109        let entry = Entry::new(key, value);
110
111        // Inserts the Entry
112        match &mut self.data[location] {
113            Some(v) => v.push(entry),
114            None => {
115                self.data[location] = Some(vec![entry]);
116            }
117        }
118        self.size += 1;
119    }
120
121    /** Internal function that grows the base vector to the next prime larger than
122    double the length of the original vector, rehashes and compresses hashes
123    for new distribution */
124    fn grow(&mut self) {
125        // Create a new base vector with_capacity and resize_with to ensure all
126        // indexes exist, otherwise you could push to an index that doesn't
127        // exist causing a panic
128        let new_capacity = hash_lib::next_prime(self.data.len() * 2);
129        let mut new_base: Vec<Option<Vec<Entry<K, V>>>> = Vec::with_capacity(new_capacity);
130        new_base.resize_with(new_capacity, || None);
131
132        // Move entries from self.data into new_base
133        for mut bucket in self.data.drain(..).flatten() {
134            // Vec::drain transfers ownership with no need to clone
135            for entry in bucket.drain(..) {
136                let rehash =
137                    hash_lib::division_compression(hash_lib::hash(&entry.key), new_capacity);
138                match &mut new_base[rehash] {
139                    Some(existing) => existing.push(entry),
140                    None => new_base[rehash] = Some(vec![entry]),
141                }
142            }
143        }
144
145        // Update the struct instance
146        self.data = new_base;
147    }
148
149    /**  Removes the entry `(k, v)` associated with key `k` */
150    pub fn remove(&mut self, key: K) {
151        let hashed = hash_lib::hash(&key);
152        let location: usize = hash_lib::division_compression(hashed, self.data.len());
153        if let Some(bucket) = &mut self.data[location] {
154            bucket.retain(|e| e.key != key);
155            if bucket.is_empty() {
156                self.data[location] = None; // Replace Some with None for empty buckets
157            }
158        }
159        self.size -= 1;
160    }
161
162    /**  Returns an iterable collection of all keys in the map */
163    pub fn key_set(&self) -> Vec<&K> {
164        let mut v = Vec::new();
165        for k in self.iter().keys() {
166            v.push(k)
167        }
168        v
169    }
170
171    /**  Returns an iterable collection of all values in the map, including
172    repeats for multiple key-value associations */
173    pub fn values(&self) -> Vec<&V> {
174        let mut vec = Vec::new();
175        for v in self.iter().values() {
176            vec.push(v)
177        }
178        vec
179    }
180
181    /**  Returns an iterable collection of all `(k, v)` entries in the map */
182    pub fn entry_set(&self) -> Vec<&Entry<K, V>> {
183        let mut vec = Vec::new();
184        for e in self.iter() {
185            vec.push(e)
186        }
187        vec
188    }
189
190    /**  Returns a Boolean if the map contains _key_ `k`; Used to disambiguate
191    the presence of a key with a null/None value */
192    pub fn contains(&self, key: K) -> bool {
193        let hashed = hash_lib::hash(&key);
194        let location: usize = hash_lib::division_compression(hashed, self.data.len());
195        if let Some(bucket) = &self.data[location] {
196            for e in bucket.iter() {
197                if e.key == key {
198                    return true;
199                }
200            }
201            // Required because for loop contains an early return statement
202            // but does not return a value if no match is found
203            false
204        } else {
205            // Required because function returns and if let Some is inexhaustive
206            false
207        }
208    }
209
210    /** A function that follows the naming convention for iterable collections;
211    Returns struct Iter that tracks iteration state */
212    pub fn iter(&self) -> Iter<'_, K, V> {
213        Iter {
214            outer: self.data.iter(),
215            inner: None,
216        }
217    }
218} // End of ChainingHashTable implementation
219
220// Create an iterator as a struct to track iteration state
221pub struct Iter<'a, K, V> {
222    outer: std::slice::Iter<'a, Option<Vec<Entry<K, V>>>>,
223    inner: Option<std::slice::Iter<'a, Entry<K, V>>>,
224}
225// Direct implementation for adapter methods
226impl<'a, K, V> Iter<'a, K, V> {
227    /// Adapter method for iterating over the keys of a map;
228    /// Example:
229    /// ```example
230    ///     for k in map.iter().keys() { ... }
231    /// ```
232    pub fn keys(self) -> Keys<'a, K, V> {
233        Keys { iter: self }
234    }
235    /// Adapter method for iterating over the values of a map;
236    /// Example:
237    /// ```example
238    ///     for v in map.iter().values() { ... }
239    /// ```
240    pub fn values(self) -> Values<'a, K, V> {
241        Values { iter: self }
242    }
243}
244// Implement Iterator for the struct
245impl<'a, K, V> Iterator for Iter<'a, K, V> {
246    type Item = &'a Entry<K, V>;
247
248    fn next(&mut self) -> Option<Self::Item> {
249        loop {
250            if let Some(ref mut inner) = self.inner {
251                if let Some(entry) = inner.next() {
252                    return Some(entry);
253                }
254            }
255            // Move to the next bucket if available
256            self.inner = self.outer.next()?.as_ref().map(|vec| vec.iter());
257        }
258    }
259}
260pub struct Keys<'a, K, V> {
261    iter: Iter<'a, K, V>,
262}
263impl<'a, K, V> Iterator for Keys<'a, K, V> {
264    type Item = &'a K;
265
266    fn next(&mut self) -> Option<Self::Item> {
267        self.iter.next().map(|entry| &entry.key)
268    }
269}
270pub struct Values<'a, K, V> {
271    iter: Iter<'a, K, V>,
272}
273impl<'a, K, V> Iterator for Values<'a, K, V> {
274    type Item = &'a V;
275
276    fn next(&mut self) -> Option<Self::Item> {
277        self.iter.next().map(|entry| &entry.value)
278    }
279}
280
281#[test]
282fn chaining_hash_table_test() {
283    //Creates a new hash map
284    let mut map = ChainingHashTable::<&str, u8>::new();
285
286    // Illustrates that put() and get() work
287    map.put("Peter", 41);
288    assert_eq!(map.size, 1);
289    assert_eq!(map.data.len(), 2);
290    let fetch = map.get("Peter").unwrap();
291    assert_eq!(*fetch, 41_u8);
292
293    // Illustrates that the map grows correctly
294    map.put("Brain", 39); // Grows the map
295    assert_eq!(map.data.len(), 5);
296    map.put("Remus", 22);
297    map.put("Bobson", 36); // Grows the map
298    assert_eq!(map.data.len(), 11);
299    map.put("Dingus", 18);
300    map.put("Dangus", 27); // Grows the map
301    assert_eq!(map.size, 6);
302    assert_eq!(map.data.len(), 23);
303
304    // Illustrates that remove() works as intended
305    assert!(map.contains("Dingus"));
306    map.remove("Dingus");
307    assert!(!map.contains("Dingus"));
308}
309
310pub fn example() {
311    //Creates a new hash map
312    let mut map = ChainingHashTable::<&str, u8>::new();
313    //Creates several entries
314    map.put("Peter", 41);
315    map.put("Brain", 39);
316    map.put("Remus", 22);
317    map.put("Bobson", 36);
318    map.put("Dingus", 18);
319    map.put("Dangus", 27);
320
321    // Prints a debug version of the map
322    println!("Debug print of the whole shebang:\n");
323    for e in map.iter() {
324        println!("{e:?}")
325    }
326
327    let value = map.get("Peter").unwrap();
328    println!("\nmap.get(\"Peter\"): {value}");
329
330    // Its now iterable!!!
331    println!("Iterating over all entries:");
332    for e in map.iter() {
333        println!("\t{e:?}")
334    }
335    println!("\nNow just the keys:");
336    for k in map.iter().keys() {
337        println!("\t{k}")
338    }
339    println!("\nNow just the values:");
340    for v in map.iter().values() {
341        println!("\t{v}")
342    }
343
344    // Does the same thing with public methods
345    let _entries: Vec<&Entry<&str, u8>> = map.entry_set();
346    let _keys: Vec<&&str> = map.key_set();
347    let _values: Vec<&u8> = map.values();
348
349    map.remove("Brain");
350    map.remove("Remus");
351    map.remove("Bobson");
352    map.remove("Dingus");
353    map.remove("Dangus");
354
355    println!("\nIts all over now:");
356    for e in map.iter() {
357        println!("{e:?}")
358    }
359}