dsa_rust/associative/
hash_lib.rs

1/*! Hashing & compression functions
2
3The utility functions in this module can be used to implement various hashing and compression algorithms
4
5*/
6
7// HOME-ROLLED HASHING
8//////////////////////
9
10/// Illustrates the concept of bit shifting;
11/// Takes a string a prints the bit and integer value of each character before
12/// performing a bit shift operation and printing the result as bit and integer values
13#[allow(clippy::manual_rotate)]
14pub fn bit_shift_visualization(value: &str) {
15    for mut v in value.bytes() {
16        print!("{v:08b} ({v}) -> ");
17        v = (v << 5) | (v >> 3);
18        // clippy wants a function
19        //v = v.rotate_right(3);
20        println!("{v:08b} ({v})");
21    }
22}
23
24/// Calculates a bit-shifted hash code;
25/// The function initializes a 32-bit hash code integer to 0,
26/// then loops over each character in the input string;
27/// Each loop adds the next character in the string to the hash code
28/// as an integer value with wrapping; This ensures consistency
29/// across architectures; The next operation in each loop performs
30/// a cyclic bit shift on the hash code, and the process repeats
31#[allow(clippy::manual_rotate)]
32pub fn simple_hash_function(key: &str) -> u32 {
33    let mut hash: u32 = 0;
34    for v in key.bytes() {
35        print!("{v:08b} -> "); // Prints the character's bit value
36                               //hash += v as u32; // Adds the character to the hash
37        hash = hash.wrapping_add(v as u32); // Adds the character to the hash
38        hash = (hash << 5) | (hash >> 27); // Shifts the hash left by 5 places
39                                           // clippy wants a function
40                                           //hash = hash.rotate_left(5);
41        println!("{hash:032b}"); // Prints the resultant hash value
42    }
43    hash
44}
45#[test]
46fn hash_code_test() {
47    let v = simple_hash_function("Peter");
48    assert_eq!(v, 2794168896);
49
50    //let v = visual_hash_code("This block overflows the value");
51    //assert_eq!(v, 3862340559);
52
53    let v = simple_hash_function("Simple hash digest");
54    assert_eq!(v, 1741143911);
55}
56
57// STANDARD LIBRARY HASHING
58///////////////////////////
59
60// Explores Rust's default hashing functionality
61use std::collections::hash_map::DefaultHasher;
62use std::fmt::Debug;
63use std::hash::{Hash, Hasher};
64
65/// Takes a reference to a type `T` and uses Rust's default hasher to return a
66/// 64-bit digest.
67pub fn hash<T: Hash + Debug + ?Sized>(key: &T) -> usize {
68    let mut hasher = DefaultHasher::new();
69    key.hash(&mut hasher); // Hash::hash()
70    let digest = hasher.finish(); // Hasher::finish()
71    digest as usize
72}
73
74/** Does the same thing as hash() but feeds individual bytes which produces a
75slightly less efficient (and different) digest */
76pub fn hash_1(key: &str) -> u64 {
77    let mut hasher = DefaultHasher::new();
78    for e in key.bytes() {
79        hasher.write_u8(e)
80    }
81    hasher.finish()
82}
83
84// COMPRESSION
85//////////////
86
87/// Super simple division compression as `i mod N`.
88/// Produces a deterministic output, works best when `n` (len) is prime.
89pub fn division_compression(key: usize, len: usize) -> usize {
90    key % len
91}
92
93/// Efficient primality test using the 6k ± 1 rule for numbers >3 by trial.
94fn is_prime(n: usize) -> bool {
95    // Only positive numbers >1 need apply
96    if n < 2 {
97        return false;
98    }
99    // The lil guys get some love
100    if n == 2 || n == 3 {
101        return true;
102    }
103    // Skips evens and numbers divisible by 3
104    if n % 2 == 0 || n % 3 == 0 {
105        return false;
106    }
107    // 6k ± 1 rule check
108    let mut i = 5;
109    while i * i <= n {
110        if n % i == 0 || n % (i + 2) == 0 {
111            return false;
112        }
113        i += 6;
114    }
115    true
116}
117#[test]
118fn prime_test() {
119    // Prime
120    assert!(is_prime(7));
121    assert!(is_prime(23));
122    assert!(is_prime(1103));
123    assert!(is_prime(50411));
124    assert!(is_prime(120569));
125    // Not prime
126    assert!(!is_prime(21));
127    assert!(!is_prime(39));
128    assert!(!is_prime(98));
129    assert!(!is_prime(1208));
130    assert!(!is_prime(445));
131}
132
133// Finds the next prime by brute force in O(n) time
134//fn next_prime(n: u64) -> u64 {
135//    let mut candidate = n + 1;
136//    while !is_prime(candidate) {e
137//        candidate += 1;
138//    }
139//    candidate
140//}
141
142/// Finds the next prime in O(n/2) time by skipping evens
143pub fn next_prime(n: usize) -> usize {
144    if n < 2 {
145        return 2;
146    }
147    if n == 2 {
148        return 3;
149    }
150    let mut candidate = if n % 2 == 0 { n + 1 } else { n + 2 }; // Ensure candidate is odd
151    while !is_prime(candidate) {
152        candidate += 2; // Skip even numbers
153    }
154    candidate
155}
156#[test]
157fn next_prime_test() {
158    assert_eq!(next_prime(3), 5);
159    assert_eq!(next_prime(19), 23);
160    assert_eq!(next_prime(8868), 8887);
161    assert_eq!(next_prime(117760), 117763);
162    assert_eq!(next_prime(1899498), 1899503);
163}
164
165use rand::Rng;
166
167/// Illustrates MAD compression as `[(ai + b) mod p] mod N`
168/// Relies on `is_prime` and `next_prime` functions
169/// `c(h(k)) = ((a * h(k) + b) mod p) mod N`
170pub fn mad_compression(key: usize, len: usize) -> usize {
171    // Finds a prime >len, starting much larger to ensure even spread
172    let p = next_prime(len.pow(3));
173
174    let mut rng = rand::rng(); // Thread-local RNG
175    let a = rng.random_range(1..=p - 1);
176    let b = rng.random_range(0..=p - 1);
177
178    // Raw-dogging the algorithm may cause overflow
179    //(((key * a) + b) % p) % len
180
181    // Apply wrapping * and + to prevent overflow
182    (key.wrapping_mul(a))
183        .wrapping_add(b)
184        .wrapping_rem(p)
185        .wrapping_rem(len)
186}
187pub fn mad_compression_1(
188    hash: usize,
189    prime: usize,
190    scale: usize,
191    shift: usize,
192    capacity: usize,
193) -> usize {
194    (hash.wrapping_mul(scale)).wrapping_add(shift) % (prime) % (capacity)
195}