dsa_rust/associative/hash_lib.rs
1/*! Hashing & compression functions
2
3The utility functions in this module can be used to implement various hashing and compression algorithms
4
5*/
6
7// HOME-ROLLED HASHING
8//////////////////////
9
10/// Illustrates the concept of bit shifting;
11/// Takes a string a prints the bit and integer value of each character before
12/// performing a bit shift operation and printing the result as bit and integer values
13#[allow(clippy::manual_rotate)]
14pub fn bit_shift_visualization(value: &str) {
15 for mut v in value.bytes() {
16 print!("{v:08b} ({v}) -> ");
17 v = (v << 5) | (v >> 3);
18 // clippy wants a function
19 //v = v.rotate_right(3);
20 println!("{v:08b} ({v})");
21 }
22}
23
24/// Calculates a bit-shifted hash code;
25/// The function initializes a 32-bit hash code integer to 0,
26/// then loops over each character in the input string;
27/// Each loop adds the next character in the string to the hash code
28/// as an integer value with wrapping; This ensures consistency
29/// across architectures; The next operation in each loop performs
30/// a cyclic bit shift on the hash code, and the process repeats
31#[allow(clippy::manual_rotate)]
32pub fn simple_hash_function(key: &str) -> u32 {
33 let mut hash: u32 = 0;
34 for v in key.bytes() {
35 print!("{v:08b} -> "); // Prints the character's bit value
36 //hash += v as u32; // Adds the character to the hash
37 hash = hash.wrapping_add(v as u32); // Adds the character to the hash
38 hash = (hash << 5) | (hash >> 27); // Shifts the hash left by 5 places
39 // clippy wants a function
40 //hash = hash.rotate_left(5);
41 println!("{hash:032b}"); // Prints the resultant hash value
42 }
43 hash
44}
45#[test]
46fn hash_code_test() {
47 let v = simple_hash_function("Peter");
48 assert_eq!(v, 2794168896);
49
50 //let v = visual_hash_code("This block overflows the value");
51 //assert_eq!(v, 3862340559);
52
53 let v = simple_hash_function("Simple hash digest");
54 assert_eq!(v, 1741143911);
55}
56
57// STANDARD LIBRARY HASHING
58///////////////////////////
59
60// Explores Rust's default hashing functionality
61use std::collections::hash_map::DefaultHasher;
62use std::fmt::Debug;
63use std::hash::{Hash, Hasher};
64
65/// Takes a reference to a type `T` and uses Rust's default hasher to return a
66/// 64-bit digest.
67pub fn hash<T: Hash + Debug + ?Sized>(key: &T) -> usize {
68 let mut hasher = DefaultHasher::new();
69 key.hash(&mut hasher); // Hash::hash()
70 let digest = hasher.finish(); // Hasher::finish()
71 digest as usize
72}
73
74/** Does the same thing as hash() but feeds individual bytes which produces a
75slightly less efficient (and different) digest */
76pub fn hash_1(key: &str) -> u64 {
77 let mut hasher = DefaultHasher::new();
78 for e in key.bytes() {
79 hasher.write_u8(e)
80 }
81 hasher.finish()
82}
83
84// COMPRESSION
85//////////////
86
87/// Super simple division compression as `i mod N`.
88/// Produces a deterministic output, works best when `n` (len) is prime.
89pub fn division_compression(key: usize, len: usize) -> usize {
90 key % len
91}
92
93/// Efficient primality test using the 6k ± 1 rule for numbers >3 by trial.
94fn is_prime(n: usize) -> bool {
95 // Only positive numbers >1 need apply
96 if n < 2 {
97 return false;
98 }
99 // The lil guys get some love
100 if n == 2 || n == 3 {
101 return true;
102 }
103 // Skips evens and numbers divisible by 3
104 if n % 2 == 0 || n % 3 == 0 {
105 return false;
106 }
107 // 6k ± 1 rule check
108 let mut i = 5;
109 while i * i <= n {
110 if n % i == 0 || n % (i + 2) == 0 {
111 return false;
112 }
113 i += 6;
114 }
115 true
116}
117#[test]
118fn prime_test() {
119 // Prime
120 assert!(is_prime(7));
121 assert!(is_prime(23));
122 assert!(is_prime(1103));
123 assert!(is_prime(50411));
124 assert!(is_prime(120569));
125 // Not prime
126 assert!(!is_prime(21));
127 assert!(!is_prime(39));
128 assert!(!is_prime(98));
129 assert!(!is_prime(1208));
130 assert!(!is_prime(445));
131}
132
133// Finds the next prime by brute force in O(n) time
134//fn next_prime(n: u64) -> u64 {
135// let mut candidate = n + 1;
136// while !is_prime(candidate) {e
137// candidate += 1;
138// }
139// candidate
140//}
141
142/// Finds the next prime in O(n/2) time by skipping evens
143pub fn next_prime(n: usize) -> usize {
144 if n < 2 {
145 return 2;
146 }
147 if n == 2 {
148 return 3;
149 }
150 let mut candidate = if n % 2 == 0 { n + 1 } else { n + 2 }; // Ensure candidate is odd
151 while !is_prime(candidate) {
152 candidate += 2; // Skip even numbers
153 }
154 candidate
155}
156#[test]
157fn next_prime_test() {
158 assert_eq!(next_prime(3), 5);
159 assert_eq!(next_prime(19), 23);
160 assert_eq!(next_prime(8868), 8887);
161 assert_eq!(next_prime(117760), 117763);
162 assert_eq!(next_prime(1899498), 1899503);
163}
164
165use rand::Rng;
166
167/// Illustrates MAD compression as `[(ai + b) mod p] mod N`
168/// Relies on `is_prime` and `next_prime` functions
169/// `c(h(k)) = ((a * h(k) + b) mod p) mod N`
170pub fn mad_compression(key: usize, len: usize) -> usize {
171 // Finds a prime >len, starting much larger to ensure even spread
172 let p = next_prime(len.pow(3));
173
174 let mut rng = rand::rng(); // Thread-local RNG
175 let a = rng.random_range(1..=p - 1);
176 let b = rng.random_range(0..=p - 1);
177
178 // Raw-dogging the algorithm may cause overflow
179 //(((key * a) + b) % p) % len
180
181 // Apply wrapping * and + to prevent overflow
182 (key.wrapping_mul(a))
183 .wrapping_add(b)
184 .wrapping_rem(p)
185 .wrapping_rem(len)
186}
187pub fn mad_compression_1(
188 hash: usize,
189 prime: usize,
190 scale: usize,
191 shift: usize,
192 capacity: usize,
193) -> usize {
194 (hash.wrapping_mul(scale)).wrapping_add(shift) % (prime) % (capacity)
195}