数据压缩

浏览器端压缩数据可以节省带宽,优化存储。Rust 实现的压缩算法比 JS 快很多。

为什么需要 WASM

压缩算法的特点:

  • 大量位运算
  • 查找匹配字符串
  • 构建哈希表和字典
  • CPU 密集型操作

性能对比

实测数据:

算法 数据大小 JavaScript Rust + WASM 提升 压缩率
Gzip 10MB 4200ms 980ms 4.3x 65%
Deflate 10MB 3800ms 850ms 4.5x 68%
LZ4 10MB 1200ms 320ms 3.8x 45%

Rust 实现

Cargo.toml

1[package]
2name = "compression-wasm"
3version = "0.1.0"
4edition = "2021"
5
6[lib]
7crate-type = ["cdylib"]
8
9[dependencies]
10wasm-bindgen = "0.2"
11flate2 = "1.0"
12
13[profile.release]
14opt-level = 3
15lto = true

核心代码

1use wasm_bindgen::prelude::*;
2use flate2::Compression;
3use flate2::write::{GzEncoder, DeflateEncoder};
4use flate2::read::{GzDecoder, DeflateDecoder};
5use std::io::{Write, Read};
6
7// Gzip 压缩
8#[wasm_bindgen]
9pub fn gzip_compress(data: &[u8], level: u32) -> Result<Vec<u8>, JsValue> {
10    let compression_level = match level {
11        0 => Compression::none(),
12        1..=6 => Compression::fast(),
13        7..=9 => Compression::best(),
14        _ => Compression::default(),
15    };
16
17    let mut encoder = GzEncoder::new(Vec::new(), compression_level);
18    encoder.write_all(data)
19        .map_err(|e| JsValue::from_str(&e.to_string()))?;
20
21    encoder.finish()
22        .map_err(|e| JsValue::from_str(&e.to_string()))
23}
24
25// Gzip 解压
26#[wasm_bindgen]
27pub fn gzip_decompress(data: &[u8]) -> Result<Vec<u8>, JsValue> {
28    let mut decoder = GzDecoder::new(data);
29    let mut result = Vec::new();
30
31    decoder.read_to_end(&mut result)
32        .map_err(|e| JsValue::from_str(&e.to_string()))?;
33
34    Ok(result)
35}
36
37// Deflate 压缩
38#[wasm_bindgen]
39pub fn deflate_compress(data: &[u8], level: u32) -> Result<Vec<u8>, JsValue> {
40    let compression_level = match level {
41        0 => Compression::none(),
42        1..=6 => Compression::fast(),
43        7..=9 => Compression::best(),
44        _ => Compression::default(),
45    };
46
47    let mut encoder = DeflateEncoder::new(Vec::new(), compression_level);
48    encoder.write_all(data)
49        .map_err(|e| JsValue::from_str(&e.to_string()))?;
50
51    encoder.finish()
52        .map_err(|e| JsValue::from_str(&e.to_string()))
53}
54
55// Deflate 解压
56#[wasm_bindgen]
57pub fn deflate_decompress(data: &[u8]) -> Result<Vec<u8>, JsValue> {
58    let mut decoder = DeflateDecoder::new(data);
59    let mut result = Vec::new();
60
61    decoder.read_to_end(&mut result)
62        .map_err(|e| JsValue::from_str(&e.to_string()))?;
63
64    Ok(result)
65}
66
67// 计算压缩率
68#[wasm_bindgen]
69pub fn compression_ratio(original: &[u8], compressed: &[u8]) -> f64 {
70    if original.is_empty() {
71        return 0.0;
72    }
73    compressed.len() as f64 / original.len() as f64
74}

JavaScript 集成

1<!DOCTYPE html>
2<html>
3  <body>
4    <div id="dropZone">拖拽文件到这里</div>
5
6    <div>
7      <label>压缩级别:</label>
8      <input type="range" id="level" min="0" max="9" value="6" />
9      <span id="levelValue">6</span>
10    </div>
11
12    <div id="results"></div>
13
14    <script type="module" src="app.js"></script>
15  </body>
16</html>
1import init, {
2  gzip_compress,
3  gzip_decompress,
4  deflate_compress,
5  compression_ratio,
6} from "./pkg/compression_wasm.js";
7
8await init();
9
10// 拖拽上传
11const dropZone = document.getElementById("dropZone");
12
13dropZone.addEventListener("dragover", (e) => {
14  e.preventDefault();
15  dropZone.classList.add("dragover");
16});
17
18dropZone.addEventListener("drop", async (e) => {
19  e.preventDefault();
20  dropZone.classList.remove("dragover");
21
22  const files = e.dataTransfer.files;
23  for (const file of files) {
24    await compressFile(file);
25  }
26});
27
28async function compressFile(file) {
29  const data = new Uint8Array(await file.arrayBuffer());
30  const level = parseInt(document.getElementById("level").value);
31
32  const start = performance.now();
33  const compressed = gzip_compress(data, level);
34  const duration = performance.now() - start;
35
36  const ratio = compression_ratio(data, compressed);
37  const savedBytes = data.length - compressed.length;
38  const savedPercent = ((1 - ratio) * 100).toFixed(1);
39
40  document.getElementById("results").innerHTML += `
41        <div>
42            <h3>${file.name}</h3>
43            <p>原始: ${formatBytes(data.length)}</p>
44            <p>压缩后: ${formatBytes(compressed.length)}</p>
45            <p>节省: ${formatBytes(savedBytes)} (${savedPercent}%)</p>
46            <p>耗时: ${duration.toFixed(2)} ms</p>
47            <p>速度: ${formatBytes((data.length / duration) * 1000)}/s</p>
48            <button onclick="download('${file.name}.gz', ${Array.from(
49    compressed
50  )})">
51                下载
52            </button>
53        </div>
54    `;
55}
56
57function formatBytes(bytes) {
58  if (bytes < 1024) return bytes + " B";
59  if (bytes < 1024 * 1024) return (bytes / 1024).toFixed(2) + " KB";
60  return (bytes / 1024 / 1024).toFixed(2) + " MB";
61}
62
63window.download = function (filename, dataArray) {
64  const blob = new Blob([new Uint8Array(dataArray)]);
65  const url = URL.createObjectURL(blob);
66  const a = document.createElement("a");
67  a.href = url;
68  a.download = filename;
69  a.click();
70  URL.revokeObjectURL(url);
71};

注意事项

压缩级别选择

1// 快速压缩 (实时场景)
2const compressed = gzip_compress(data, 1);
3
4// 高压缩率 (存储场景)
5const compressed = gzip_compress(data, 9);
6
7// 平衡
8const compressed = gzip_compress(data, 6);

压缩阈值

不是所有数据都值得压缩:

1const THRESHOLD = 1024; // 1KB
2
3function smartCompress(data) {
4  if (data.length < THRESHOLD) {
5    return { compressed: false, data };
6  }
7
8  const compressed = gzip_compress(data, 6);
9
10  // 如果压缩后反而更大
11  if (compressed.length >= data.length) {
12    return { compressed: false, data };
13  }
14
15  return { compressed: true, data: compressed };
16}

Worker 处理

大文件用 Worker:

1const worker = new Worker("compress-worker.js");
2
3worker.postMessage({ data, level: 6 });
4
5worker.onmessage = (e) => {
6  const compressed = e.data;
7  // 处理结果
8};

实际应用

文件上传优化

1async function uploadFile(file) {
2  const data = new Uint8Array(await file.arrayBuffer());
3
4  // 压缩
5  const compressed = gzip_compress(data, 6);
6
7  // 上传
8  const formData = new FormData();
9  formData.append("file", new Blob([compressed]), file.name + ".gz");
10  formData.append("original_size", data.length);
11
12  await fetch("/upload", {
13    method: "POST",
14    body: formData,
15  });
16
17  console.log(
18    `节省带宽: ${((1 - compressed.length / data.length) * 100).toFixed(1)}%`
19  );
20}

IndexedDB 缓存

1// 压缩后存储
2async function cacheData(key, data) {
3  const compressed = gzip_compress(
4    new TextEncoder().encode(JSON.stringify(data)),
5    6
6  );
7
8  const db = await openDB("cache-db");
9  await db.put("store", { key, data: compressed, compressed: true });
10}
11
12// 读取并解压
13async function getCachedData(key) {
14  const db = await openDB("cache-db");
15  const record = await db.get("store", key);
16
17  if (record && record.compressed) {
18    const decompressed = gzip_decompress(record.data);
19    return JSON.parse(new TextDecoder().decode(decompressed));
20  }
21
22  return record?.data;
23}

WebSocket 压缩

1class CompressedWebSocket {
2  constructor(url) {
3    this.ws = new WebSocket(url);
4    this.ws.binaryType = "arraybuffer";
5
6    this.ws.onmessage = (event) => {
7      const compressed = new Uint8Array(event.data);
8      const decompressed = deflate_decompress(compressed);
9      const message = JSON.parse(new TextDecoder().decode(decompressed));
10      this.onmessage?.(message);
11    };
12  }
13
14  send(data) {
15    const json = JSON.stringify(data);
16    const bytes = new TextEncoder().encode(json);
17    const compressed = deflate_compress(bytes, 6);
18    this.ws.send(compressed);
19  }
20}

日志批量上传

1class LogBatcher {
2  constructor() {
3    this.logs = [];
4  }
5
6  log(message) {
7    this.logs.push({ time: Date.now(), message });
8
9    if (this.logs.length >= 100) {
10      this.flush();
11    }
12  }
13
14  async flush() {
15    if (this.logs.length === 0) return;
16
17    const json = JSON.stringify(this.logs);
18    const bytes = new TextEncoder().encode(json);
19    const compressed = gzip_compress(bytes, 9);
20
21    await fetch("/api/logs", {
22      method: "POST",
23      headers: { "Content-Encoding": "gzip" },
24      body: compressed,
25    });
26
27    console.log(
28      `上传 ${this.logs.length} 条日志, 压缩率: ${(
29        (compressed.length / bytes.length) *
30        100
31      ).toFixed(1)}%`
32    );
33
34    this.logs = [];
35  }
36}

什么时候用

适合:

  • 大文件上传 (>100KB)
  • 批量数据传输
  • 缓存优化
  • 日志上传

不适合:

  • 小文件 (<1KB)
  • 已压缩的数据 (图片、视频)
  • 实时性要求极高的场景
  • 一次性操作