数据处理与计算

大量数据的排序、统计、矩阵运算等操作,用 JavaScript 比较慢。Rust 在这类场景下有明显优势。

为什么需要 WASM

数值计算的特点:

  • 大量循环嵌套
  • 频繁的数组访问
  • 浮点运算密集
  • JavaScript 的动态类型带来额外开销

性能对比

实测数据:

操作 数据规模 JavaScript Rust + WASM 提升
快速排序 100 万数字 340ms 28ms 12x
数组求和 1000 万数字 145ms 12ms 12x
矩阵乘法 500×500 890ms 68ms 13x
移动平均 100 万点 280ms 35ms 8x

Rust 实现

Cargo.toml

1[package]
2name = "data-processor"
3version = "0.1.0"
4edition = "2021"
5
6[lib]
7crate-type = ["cdylib"]
8
9[dependencies]
10wasm-bindgen = "0.2"
11
12[profile.release]
13opt-level = 3
14lto = true
15codegen-units = 1

核心代码

1use wasm_bindgen::prelude::*;
2
3// 快速排序
4#[wasm_bindgen]
5pub fn quick_sort(arr: &mut [f64]) {
6    if arr.len() <= 1 {
7        return;
8    }
9    quick_sort_impl(arr, 0, arr.len() - 1);
10}
11
12fn quick_sort_impl(arr: &mut [f64], low: usize, high: usize) {
13    if low < high {
14        let pivot = partition(arr, low, high);
15        if pivot > 0 {
16            quick_sort_impl(arr, low, pivot - 1);
17        }
18        quick_sort_impl(arr, pivot + 1, high);
19    }
20}
21
22fn partition(arr: &mut [f64], low: usize, high: usize) -> usize {
23    let pivot = arr[high];
24    let mut i = low;
25
26    for j in low..high {
27        if arr[j] < pivot {
28            arr.swap(i, j);
29            i += 1;
30        }
31    }
32    arr.swap(i, high);
33    i
34}
35
36// 统计分析
37#[wasm_bindgen]
38pub struct Statistics {
39    mean: f64,
40    median: f64,
41    std_dev: f64,
42    min: f64,
43    max: f64,
44}
45
46#[wasm_bindgen]
47impl Statistics {
48    #[wasm_bindgen(getter)]
49    pub fn mean(&self) -> f64 { self.mean }
50
51    #[wasm_bindgen(getter)]
52    pub fn median(&self) -> f64 { self.median }
53
54    #[wasm_bindgen(getter)]
55    pub fn std_dev(&self) -> f64 { self.std_dev }
56
57    #[wasm_bindgen(getter)]
58    pub fn min(&self) -> f64 { self.min }
59
60    #[wasm_bindgen(getter)]
61    pub fn max(&self) -> f64 { self.max }
62}
63
64#[wasm_bindgen]
65pub fn calculate_statistics(data: &[f64]) -> Statistics {
66    let n = data.len() as f64;
67
68    // 均值
69    let mean = data.iter().sum::<f64>() / n;
70
71    // 方差和标准差
72    let variance = data.iter()
73        .map(|x| (x - mean).powi(2))
74        .sum::<f64>() / n;
75    let std_dev = variance.sqrt();
76
77    // 最小值最大值
78    let min = data.iter().cloned().fold(f64::INFINITY, f64::min);
79    let max = data.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
80
81    // 中位数
82    let mut sorted = data.to_vec();
83    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
84    let median = if sorted.len() % 2 == 0 {
85        let mid = sorted.len() / 2;
86        (sorted[mid - 1] + sorted[mid]) / 2.0
87    } else {
88        sorted[sorted.len() / 2]
89    };
90
91    Statistics { mean, median, std_dev, min, max }
92}
93
94// 移动平均
95#[wasm_bindgen]
96pub fn moving_average(data: &[f64], window: usize) -> Vec<f64> {
97    if data.len() < window {
98        return vec![];
99    }
100
101    let mut result = Vec::with_capacity(data.len() - window + 1);
102
103    // 初始窗口
104    let mut sum: f64 = data.iter().take(window).sum();
105    result.push(sum / window as f64);
106
107    // 滑动窗口
108    for i in window..data.len() {
109        sum = sum - data[i - window] + data[i];
110        result.push(sum / window as f64);
111    }
112
113    result
114}
115
116// 矩阵乘法
117// C = A × B
118// A: m×n, B: n×p -> C: m×p
119#[wasm_bindgen]
120pub fn matrix_multiply(
121    a: &[f64],
122    a_rows: usize,
123    a_cols: usize,
124    b: &[f64],
125    b_rows: usize,
126    b_cols: usize
127) -> Option<Vec<f64>> {
128    if a_cols != b_rows {
129        return None;
130    }
131
132    let mut result = vec![0.0; a_rows * b_cols];
133
134    for i in 0..a_rows {
135        for j in 0..b_cols {
136            let mut sum = 0.0;
137            for k in 0..a_cols {
138                sum += a[i * a_cols + k] * b[k * b_cols + j];
139            }
140            result[i * b_cols + j] = sum;
141        }
142    }
143
144    Some(result)
145}
146
147// 线性回归
148#[wasm_bindgen]
149pub struct LinearRegression {
150    slope: f64,
151    intercept: f64,
152    r_squared: f64,
153}
154
155#[wasm_bindgen]
156impl LinearRegression {
157    #[wasm_bindgen(getter)]
158    pub fn slope(&self) -> f64 { self.slope }
159
160    #[wasm_bindgen(getter)]
161    pub fn intercept(&self) -> f64 { self.intercept }
162
163    #[wasm_bindgen(getter)]
164    pub fn r_squared(&self) -> f64 { self.r_squared }
165
166    pub fn predict(&self, x: f64) -> f64 {
167        self.slope * x + self.intercept
168    }
169}
170
171#[wasm_bindgen]
172pub fn linear_regression(x_data: &[f64], y_data: &[f64]) -> Option<LinearRegression> {
173    if x_data.len() != y_data.len() || x_data.is_empty() {
174        return None;
175    }
176
177    let n = x_data.len() as f64;
178    let x_mean = x_data.iter().sum::<f64>() / n;
179    let y_mean = y_data.iter().sum::<f64>() / n;
180
181    let mut numerator = 0.0;
182    let mut denominator = 0.0;
183
184    for i in 0..x_data.len() {
185        let x_diff = x_data[i] - x_mean;
186        let y_diff = y_data[i] - y_mean;
187        numerator += x_diff * y_diff;
188        denominator += x_diff * x_diff;
189    }
190
191    let slope = numerator / denominator;
192    let intercept = y_mean - slope * x_mean;
193
194    // 计算 R²
195    let mut ss_tot = 0.0;
196    let mut ss_res = 0.0;
197
198    for i in 0..y_data.len() {
199        let y_pred = slope * x_data[i] + intercept;
200        ss_res += (y_data[i] - y_pred).powi(2);
201        ss_tot += (y_data[i] - y_mean).powi(2);
202    }
203
204    let r_squared = 1.0 - (ss_res / ss_tot);
205
206    Some(LinearRegression { slope, intercept, r_squared })
207}

JavaScript 集成

1import init, {
2  quick_sort,
3  calculate_statistics,
4  moving_average,
5  matrix_multiply,
6  linear_regression,
7} from "./pkg/data_processor.js";
8
9await init();
10
11// 排序性能测试
12function testSort() {
13  const size = 1000000;
14  const data = Array.from({ length: size }, () => Math.random() * 100);
15
16  // WASM 排序
17  const wasmData = new Float64Array(data);
18  const start = performance.now();
19  quick_sort(wasmData);
20  const duration = performance.now() - start;
21
22  console.log(`排序 ${size} 个数字: ${duration.toFixed(2)} ms`);
23}
24
25// 统计分析
26function analyzeData() {
27  const data = new Float64Array(1000000);
28  for (let i = 0; i < data.length; i++) {
29    data[i] = Math.random() * 100;
30  }
31
32  const stats = calculate_statistics(data);
33
34  console.log("统计结果:");
35  console.log(`均值: ${stats.mean.toFixed(2)}`);
36  console.log(`中位数: ${stats.median.toFixed(2)}`);
37  console.log(`标准差: ${stats.std_dev.toFixed(2)}`);
38  console.log(`范围: ${stats.min.toFixed(2)} - ${stats.max.toFixed(2)}`);
39}
40
41// 移动平均
42function calcMA() {
43  const prices = new Float64Array(10000);
44  for (let i = 0; i < prices.length; i++) {
45    prices[i] = 100 + Math.sin(i / 100) * 10 + Math.random() * 5;
46  }
47
48  const ma20 = moving_average(prices, 20);
49  const ma50 = moving_average(prices, 50);
50
51  console.log(`MA20 最新值: ${ma20[ma20.length - 1].toFixed(2)}`);
52  console.log(`MA50 最新值: ${ma50[ma50.length - 1].toFixed(2)}`);
53}
54
55// 矩阵乘法
56function testMatrix() {
57  const size = 500;
58  const a = new Float64Array(size * size);
59  const b = new Float64Array(size * size);
60
61  // 随机填充
62  for (let i = 0; i < a.length; i++) {
63    a[i] = Math.random();
64    b[i] = Math.random();
65  }
66
67  const start = performance.now();
68  const c = matrix_multiply(a, size, size, b, size, size);
69  const duration = performance.now() - start;
70
71  console.log(`${size}×${size} 矩阵乘法: ${duration.toFixed(2)} ms`);
72}
73
74// 线性回归
75function testRegression() {
76  // 生成 y = 2x + 5 + noise 的数据
77  const size = 10000;
78  const x = new Float64Array(size);
79  const y = new Float64Array(size);
80
81  for (let i = 0; i < size; i++) {
82    x[i] = Math.random() * 100;
83    y[i] = 2 * x[i] + 5 + (Math.random() - 0.5) * 10;
84  }
85
86  const reg = linear_regression(x, y);
87
88  console.log(
89    `拟合结果: y = ${reg.slope.toFixed(2)}x + ${reg.intercept.toFixed(2)}`
90  );
91  console.log(`R² = ${reg.r_squared.toFixed(4)}`);
92  console.log(`预测 x=50: y = ${reg.predict(50).toFixed(2)}`);
93}

注意事项

数据传递

小数据 (< 1000 元素):

1const result = sum([1, 2, 3, 4, 5]);

大数据 (> 1000 元素):

1// 使用 TypedArray,避免拷贝
2const data = new Float64Array(1000000);
3quick_sort(data); // 原地修改

内存优化

原地操作 vs 返回新数组:

1// 原地修改,零拷贝
2pub fn quick_sort(arr: &mut [f64]) {
3    // ...
4}
5
6// 返回新数组,有内存分配
7pub fn moving_average(data: &[f64], window: usize) -> Vec<f64> {
8    // ...
9}

算法优化

使用迭代器:

1// 好 - 编译器优化更好
2let sum: f64 = data.iter().sum();
3
4// 差 - 手动循环
5let mut sum = 0.0;
6for &x in data {
7    sum += x;
8}

缓存友好的访问:

1// 行优先访问 (缓存友好)
2for i in 0..rows {
3    for j in 0..cols {
4        result[i * cols + j] = ...;
5    }
6}
7
8// 列优先访问 (缓存不友好)
9for j in 0..cols {
10    for i in 0..rows {
11        result[i * cols + j] = ...;  // 跳跃访问
12    }
13}

实际应用

金融数据分析

1// 股票技术分析
2async function analyzeSt股票(symbol) {
3  const prices = await fetchPrices(symbol, 365);
4  const pricesArray = new Float64Array(prices);
5
6  // 移动平均线
7  const ma20 = moving_average(pricesArray, 20);
8  const ma50 = moving_average(pricesArray, 50);
9
10  // 统计分析
11  const stats = calculate_statistics(pricesArray);
12
13  console.log(`${symbol} 分析:`);
14  console.log(`当前价: ${prices[prices.length - 1]}`);
15  console.log(`MA20: ${ma20[ma20.length - 1].toFixed(2)}`);
16  console.log(`MA50: ${ma50[ma50.length - 1].toFixed(2)}`);
17  console.log(`波动率: ${stats.std_dev.toFixed(2)}`);
18}

大数据排序

1// 处理超大数组
2async function sortLargeData(data) {
3  const CHUNK_SIZE = 1000000;
4
5  // 分块排序
6  for (let i = 0; i < data.length; i += CHUNK_SIZE) {
7    const chunk = data.subarray(i, Math.min(i + CHUNK_SIZE, data.length));
8    quick_sort(chunk);
9
10    // 让浏览器有时间响应
11    await new Promise((r) => setTimeout(r, 0));
12  }
13
14  // 归并已排序的块
15  // ...
16}

什么时候用

适合:

  • 大数据量 (>1 万元素)
  • 复杂算法 (排序、统计、矩阵)
  • 实时计算
  • 金融分析、科学计算

不适合:

  • 简单操作 (几个数字的加减)
  • 小数据量
  • 一次性计算