跳到主要内容

HashSet 和其他集合类型

除了 Vec 和 HashMap,Rust 标准库还提供了其他有用的集合类型,包括 HashSet、BTreeMap、BTreeSet 等。

HashSet

HashSet 是一个集合,用于存储唯一的值。它基于 HashMap 实现,所以具有类似的性能特征。

创建和基本操作

use std::collections::HashSet;

fn main() {
let mut books = HashSet::new();

// 添加元素
books.insert("The Rust Programming Language");
books.insert("Programming Rust");
books.insert("Rust in Action");
books.insert("The Rust Programming Language"); // 重复元素不会被添加

println!("书籍数量: {}", books.len()); // 输出: 3

// 检查是否包含某个元素
if books.contains("Rust in Action") {
println!("找到了 'Rust in Action'");
}

// 遍历集合
for book in &books {
println!("书籍: {}", book);
}
}

从向量创建 HashSet

use std::collections::HashSet;

fn main() {
let numbers = vec![1, 2, 3, 2, 1, 4, 5, 4];
let unique_numbers: HashSet<_> = numbers.into_iter().collect();

println!("唯一数字: {:?}", unique_numbers);
}

集合操作

use std::collections::HashSet;

fn main() {
let set1: HashSet<i32> = [1, 2, 3, 4, 5].iter().cloned().collect();
let set2: HashSet<i32> = [4, 5, 6, 7, 8].iter().cloned().collect();

// 并集
let union: HashSet<_> = set1.union(&set2).collect();
println!("并集: {:?}", union);

// 交集
let intersection: HashSet<_> = set1.intersection(&set2).collect();
println!("交集: {:?}", intersection);

// 差集
let difference: HashSet<_> = set1.difference(&set2).collect();
println!("差集 (set1 - set2): {:?}", difference);

// 对称差集
let symmetric_difference: HashSet<_> = set1.symmetric_difference(&set2).collect();
println!("对称差集: {:?}", symmetric_difference);
}

子集和超集检查

use std::collections::HashSet;

fn main() {
let set1: HashSet<i32> = [1, 2, 3].iter().cloned().collect();
let set2: HashSet<i32> = [1, 2, 3, 4, 5].iter().cloned().collect();

println!("set1 是 set2 的子集: {}", set1.is_subset(&set2));
println!("set2 是 set1 的超集: {}", set2.is_superset(&set1));
println!("set1 和 set2 不相交: {}", set1.is_disjoint(&set2));
}

BTreeMap 和 BTreeSet

BTreeMap 和 BTreeSet 是基于 B 树实现的有序集合,它们保持键的排序顺序。

BTreeMap

use std::collections::BTreeMap;

fn main() {
let mut scores = BTreeMap::new();

scores.insert("Alice", 95);
scores.insert("Bob", 87);
scores.insert("Charlie", 92);
scores.insert("David", 89);

// BTreeMap 会按键的顺序遍历
for (name, score) in &scores {
println!("{}: {}", name, score);
}

// 获取范围内的元素
println!("\n分数在 'Bob' 到 'David' 之间的学生:");
for (name, score) in scores.range("Bob".."David") {
println!("{}: {}", name, score);
}
}

BTreeSet

use std::collections::BTreeSet;

fn main() {
let mut numbers = BTreeSet::new();

numbers.insert(5);
numbers.insert(2);
numbers.insert(8);
numbers.insert(1);
numbers.insert(9);

// BTreeSet 会按排序顺序遍历
println!("排序后的数字:");
for number in &numbers {
println!("{}", number);
}

// 获取范围内的元素
println!("\n3 到 7 之间的数字:");
for number in numbers.range(3..=7) {
println!("{}", number);
}
}

VecDeque (双端队列)

VecDeque 是一个双端队列,支持在两端高效地添加和删除元素。

use std::collections::VecDeque;

fn main() {
let mut deque = VecDeque::new();

// 在后端添加元素
deque.push_back(1);
deque.push_back(2);
deque.push_back(3);

// 在前端添加元素
deque.push_front(0);

println!("双端队列: {:?}", deque); // [0, 1, 2, 3]

// 从两端删除元素
if let Some(front) = deque.pop_front() {
println!("从前端删除: {}", front);
}

if let Some(back) = deque.pop_back() {
println!("从后端删除: {}", back);
}

println!("剩余元素: {:?}", deque); // [1, 2]
}

BinaryHeap (二叉堆)

BinaryHeap 是一个优先队列,总是返回最大的元素。

use std::collections::BinaryHeap;

fn main() {
let mut heap = BinaryHeap::new();

// 添加元素
heap.push(5);
heap.push(2);
heap.push(8);
heap.push(1);
heap.push(9);

println!("堆的大小: {}", heap.len());

// 查看最大元素(不删除)
if let Some(&max) = heap.peek() {
println!("最大元素: {}", max);
}

// 依次取出最大元素
while let Some(max) = heap.pop() {
println!("取出: {}", max);
}
}

自定义优先级

use std::collections::BinaryHeap;
use std::cmp::Reverse;

#[derive(Debug, Eq, PartialEq)]
struct Task {
priority: u32,
description: String,
}

impl Ord for Task {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.priority.cmp(&other.priority)
}
}

impl PartialOrd for Task {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}

fn main() {
let mut tasks = BinaryHeap::new();

tasks.push(Task {
priority: 3,
description: "中等优先级任务".to_string(),
});

tasks.push(Task {
priority: 1,
description: "低优先级任务".to_string(),
});

tasks.push(Task {
priority: 5,
description: "高优先级任务".to_string(),
});

// 按优先级顺序处理任务
while let Some(task) = tasks.pop() {
println!("处理任务 (优先级 {}): {}", task.priority, task.description);
}

// 如果想要最小堆,可以使用 Reverse
let mut min_heap = BinaryHeap::new();
min_heap.push(Reverse(5));
min_heap.push(Reverse(2));
min_heap.push(Reverse(8));

while let Some(Reverse(value)) = min_heap.pop() {
println!("最小值: {}", value);
}
}

实际应用示例

文本分析工具

use std::collections::{HashMap, HashSet};

struct TextAnalyzer {
word_count: HashMap<String, usize>,
unique_words: HashSet<String>,
total_words: usize,
}

impl TextAnalyzer {
fn new() -> Self {
TextAnalyzer {
word_count: HashMap::new(),
unique_words: HashSet::new(),
total_words: 0,
}
}

fn analyze(&mut self, text: &str) {
for word in text.split_whitespace() {
let word = word.to_lowercase()
.trim_matches(|c: char| !c.is_alphabetic())
.to_string();

if !word.is_empty() {
*self.word_count.entry(word.clone()).or_insert(0) += 1;
self.unique_words.insert(word);
self.total_words += 1;
}
}
}

fn most_common_words(&self, n: usize) -> Vec<(&String, &usize)> {
let mut words: Vec<_> = self.word_count.iter().collect();
words.sort_by(|a, b| b.1.cmp(a.1));
words.into_iter().take(n).collect()
}

fn statistics(&self) {
println!("文本统计:");
println!(" 总词数: {}", self.total_words);
println!(" 唯一词数: {}", self.unique_words.len());
println!(" 平均词频: {:.2}",
self.total_words as f64 / self.unique_words.len() as f64);
}
}

fn main() {
let mut analyzer = TextAnalyzer::new();

let text = "The quick brown fox jumps over the lazy dog. \
The dog was really lazy, and the fox was very quick.";

analyzer.analyze(text);
analyzer.statistics();

println!("\n最常见的 5 个词:");
for (word, count) in analyzer.most_common_words(5) {
println!(" {}: {}", word, count);
}
}

图的邻接表表示

use std::collections::{HashMap, HashSet};

struct Graph {
adjacency_list: HashMap<String, HashSet<String>>,
}

impl Graph {
fn new() -> Self {
Graph {
adjacency_list: HashMap::new(),
}
}

fn add_vertex(&mut self, vertex: String) {
self.adjacency_list.entry(vertex).or_insert_with(HashSet::new);
}

fn add_edge(&mut self, from: String, to: String) {
self.add_vertex(from.clone());
self.add_vertex(to.clone());

self.adjacency_list.get_mut(&from).unwrap().insert(to.clone());
self.adjacency_list.get_mut(&to).unwrap().insert(from);
}

fn neighbors(&self, vertex: &str) -> Option<&HashSet<String>> {
self.adjacency_list.get(vertex)
}

fn has_edge(&self, from: &str, to: &str) -> bool {
self.adjacency_list
.get(from)
.map_or(false, |neighbors| neighbors.contains(to))
}

fn vertex_count(&self) -> usize {
self.adjacency_list.len()
}

fn edge_count(&self) -> usize {
self.adjacency_list
.values()
.map(|neighbors| neighbors.len())
.sum::<usize>() / 2 // 无向图,每条边被计算两次
}

fn display(&self) {
println!("图的邻接表:");
for (vertex, neighbors) in &self.adjacency_list {
println!(" {}: {:?}", vertex, neighbors);
}
}
}

fn main() {
let mut graph = Graph::new();

// 添加边(自动添加顶点)
graph.add_edge("A".to_string(), "B".to_string());
graph.add_edge("A".to_string(), "C".to_string());
graph.add_edge("B".to_string(), "D".to_string());
graph.add_edge("C".to_string(), "D".to_string());

graph.display();

println!("顶点数: {}", graph.vertex_count());
println!("边数: {}", graph.edge_count());

if let Some(neighbors) = graph.neighbors("A") {
println!("A 的邻居: {:?}", neighbors);
}

println!("A 和 D 之间有边: {}", graph.has_edge("A", "D"));
println!("A 和 B 之间有边: {}", graph.has_edge("A", "B"));
}

缓存系统与 LRU 策略

use std::collections::{HashMap, VecDeque};

struct LRUCache<K, V> {
capacity: usize,
cache: HashMap<K, V>,
order: VecDeque<K>,
}

impl<K, V> LRUCache<K, V>
where
K: Clone + Eq + std::hash::Hash,
{
fn new(capacity: usize) -> Self {
LRUCache {
capacity,
cache: HashMap::with_capacity(capacity),
order: VecDeque::with_capacity(capacity),
}
}

fn get(&mut self, key: &K) -> Option<&V> {
if self.cache.contains_key(key) {
// 移动到最前面(最近使用)
self.move_to_front(key);
self.cache.get(key)
} else {
None
}
}

fn put(&mut self, key: K, value: V) {
if self.cache.contains_key(&key) {
// 更新现有键
self.cache.insert(key.clone(), value);
self.move_to_front(&key);
} else {
// 添加新键
if self.cache.len() >= self.capacity {
// 移除最久未使用的项
if let Some(lru_key) = self.order.pop_back() {
self.cache.remove(&lru_key);
}
}

self.cache.insert(key.clone(), value);
self.order.push_front(key);
}
}

fn move_to_front(&mut self, key: &K) {
// 从当前位置移除
if let Some(pos) = self.order.iter().position(|k| k == key) {
self.order.remove(pos);
}
// 添加到前面
self.order.push_front(key.clone());
}

fn len(&self) -> usize {
self.cache.len()
}

fn display_order(&self) {
println!("缓存顺序 (最新到最旧): {:?}", self.order);
}
}

fn main() {
let mut cache = LRUCache::new(3);

cache.put("a", 1);
cache.put("b", 2);
cache.put("c", 3);
cache.display_order(); // [c, b, a]

// 访问 'a',它会移到最前面
cache.get(&"a");
cache.display_order(); // [a, c, b]

// 添加新项,'b' 会被移除(最久未使用)
cache.put("d", 4);
cache.display_order(); // [d, a, c]

println!("缓存大小: {}", cache.len());

// 尝试获取已被移除的项
match cache.get(&"b") {
Some(value) => println!("找到 b: {}", value),
None => println!("b 已被移除"),
}
}

选择合适的集合类型

集合类型用途时间复杂度特点
Vec<T>动态数组O(1) 访问, O(n) 插入/删除连续内存,索引访问
VecDeque<T>双端队列O(1) 两端操作环形缓冲区
HashMap<K,V>键值映射O(1) 平均无序,快速查找
BTreeMap<K,V>有序映射O(log n)有序,范围查询
HashSet<T>唯一值集合O(1) 平均无序,快速查找
BTreeSet<T>有序集合O(log n)有序,范围查询
BinaryHeap<T>优先队列O(log n) 插入/删除最大堆

最佳实践

  1. 选择合适的集合类型:根据使用模式选择
  2. 预分配容量:当知道大概大小时
  3. 使用引用避免克隆:特别是在键中
  4. 考虑内存布局:Vec 有更好的缓存局部性
  5. 使用迭代器:更高效且表达力强
use std::collections::{HashMap, HashSet};

// 好的实践示例
fn analyze_data(data: &[String]) -> (HashMap<&str, usize>, HashSet<&str>) {
let mut counts = HashMap::with_capacity(data.len());
let mut unique = HashSet::with_capacity(data.len());

for item in data {
let item_ref = item.as_str();
*counts.entry(item_ref).or_insert(0) += 1;
unique.insert(item_ref);
}

(counts, unique)
}

fn main() {
let data = vec![
"apple".to_string(),
"banana".to_string(),
"apple".to_string(),
"cherry".to_string(),
];

let (counts, unique) = analyze_data(&data);

println!("计数: {:?}", counts);
println!("唯一项: {:?}", unique);
}