feat: 字符串分段 + 比特流编码

This commit is contained in:
2026-06-16 23:43:04 +08:00
parent db9c930359
commit f6a349882d
2 changed files with 230 additions and 2 deletions
+99 -1
View File
@@ -1 +1,99 @@
// FIXME: 比特流 — Task 6
use crate::encoder::mode::{
encode_alphanumeric, encode_byte, encode_kanji, encode_numeric, Mode,
};
use crate::encoder::segment::segment_text;
use crate::version::{get_data_capacity, EcLevel, Version};
/// 将文本编码为数据码字序列
pub fn build_codewords(text: &str, version: Version, level: EcLevel) -> Vec<u8> {
let segments = segment_text(text);
let mut bits: Vec<bool> = Vec::new();
// 1. 各段编码:模式指示符 + 字符计数 + 数据
for seg in &segments {
// 模式指示符 4 bit
for i in (0..4).rev() {
bits.push((seg.mode.indicator() >> i) & 1 == 1);
}
// 字符计数
let count_bits = seg.mode.count_bits(version.0);
for i in (0..count_bits).rev() {
bits.push((seg.char_count >> i) & 1 == 1);
}
// 编码数据
let data_bits = match seg.mode {
Mode::Numeric => encode_numeric(&seg.data),
Mode::Alphanumeric => encode_alphanumeric(&seg.data),
Mode::Byte => encode_byte(&seg.data),
Mode::Kanji => encode_kanji(&seg.data),
};
bits.extend(data_bits);
}
// 2. 终止符(最多 4 bit 0
let total_capacity = get_data_capacity(version, level) as usize * 8;
let terminator_len = 4usize.min(total_capacity.saturating_sub(bits.len()));
bits.extend(std::iter::repeat(false).take(terminator_len));
// 3. 补零到 8-bit 边界
while bits.len() % 8 != 0 {
bits.push(false);
}
// 4. 填充码字 0xEC/0x11 交替
let mut pad_byte = 0xECu8;
while bits.len() < total_capacity {
for i in (0..8).rev() {
bits.push((pad_byte >> i) & 1 == 1);
}
pad_byte ^= 0xEC ^ 0x11; // 交替 0xEC ↔ 0x11
}
// 5. 比特 → 字节
bits_to_bytes(&bits)
}
fn bits_to_bytes(bits: &[bool]) -> Vec<u8> {
bits.chunks(8)
.map(|chunk| chunk.iter().fold(0u8, |acc, &b| (acc << 1) | (b as u8)))
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_build_codewords_numeric() {
let data = build_codewords("123", Version(1), EcLevel::L);
// Version 1 L: 19 数据码字
assert_eq!(data.len(), 19);
}
#[test]
fn test_build_codewords_alphanumeric() {
let data = build_codewords("HELLO", Version(1), EcLevel::M);
assert_eq!(data.len(), 16);
}
#[test]
fn test_build_codewords_short_with_padding() {
let data = build_codewords("A", Version(1), EcLevel::L);
assert_eq!(data.len(), 19);
}
#[test]
fn test_build_codewords_mixed() {
let data = build_codewords("HELLO WORLD 123", Version(1), EcLevel::Q);
// Version 1 Q: 13 数据码字
assert_eq!(data.len(), 13);
}
#[test]
fn test_padding_pattern() {
let data = build_codewords("A", Version(1), EcLevel::L);
// 短数据 → 大量填充
// 第一个填充字节应为 0xEC
assert!(data.len() > 2);
}
}
+131 -1
View File
@@ -1 +1,131 @@
// FIXME: 数据分段 — Task 6
use crate::encoder::mode::{is_alphanumeric, is_kanji, is_numeric, Mode};
/// 数据段:一段连续使用同一种编码模式的数据
#[derive(Debug, Clone)]
pub struct Segment {
pub mode: Mode,
pub char_count: u16,
pub data: String,
}
/// 分析字符串,生成最优分段
pub fn segment_text(text: &str) -> Vec<Segment> {
if text.is_empty() {
return vec![];
}
let chars: Vec<char> = text.chars().collect();
let mut segments = Vec::new();
let mut i = 0;
while i < chars.len() {
let range = find_best_run(&chars, i);
let chunk: String = chars[i..range].iter().collect();
let mode = char_mode(chars[i]);
segments.push(Segment {
mode,
char_count: (range - i) as u16,
data: chunk,
});
i = range;
}
segments
}
/// 找到从 pos 开始的最长同模式字符序列
fn find_best_run(chars: &[char], pos: usize) -> usize {
if pos >= chars.len() {
return pos;
}
let current_mode = char_mode(chars[pos]);
let mut end = pos + 1;
while end < chars.len() && char_mode(chars[end]) == current_mode {
end += 1;
}
end
}
/// 判断单个字符的最佳编码模式(按优先级:数字 > 字母 > 汉字 > 字节)
fn char_mode(c: char) -> Mode {
if is_numeric(c) {
Mode::Numeric
} else if is_alphanumeric(c) {
Mode::Alphanumeric
} else if is_kanji(c) {
Mode::Kanji
} else {
Mode::Byte
}
}
/// 计算段的比特长度(模式指示符 + 字符计数 + 数据)
pub fn segment_bit_length(seg: &Segment, version: u8) -> u16 {
let mode_bits = 4u16;
let count_bits = seg.mode.count_bits(version) as u16;
let data_bits = match seg.mode {
Mode::Numeric => {
let groups_of_3 = seg.char_count / 3;
let remainder = seg.char_count % 3;
groups_of_3 * 10
+ if remainder == 2 {
7
} else if remainder == 1 {
4
} else {
0
}
}
Mode::Alphanumeric => {
let groups_of_2 = seg.char_count / 2;
groups_of_2 * 11 + if seg.char_count % 2 == 1 { 6 } else { 0 }
}
Mode::Byte => seg.char_count * 8,
Mode::Kanji => seg.char_count * 13,
};
mode_bits + count_bits + data_bits
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_segment_numeric_only() {
let segs = segment_text("12345");
assert_eq!(segs.len(), 1);
assert_eq!(segs[0].mode, Mode::Numeric);
assert_eq!(segs[0].char_count, 5);
}
#[test]
fn test_segment_mixed() {
// "ABC123" → "ABC" (alphanum) + "123" (numeric)
let segs = segment_text("ABC123");
assert_eq!(segs.len(), 2);
assert_eq!(segs[0].mode, Mode::Alphanumeric);
assert_eq!(segs[1].mode, Mode::Numeric);
}
#[test]
fn test_segment_empty() {
let segs = segment_text("");
assert!(segs.is_empty());
}
#[test]
fn test_segment_bit_length() {
let seg = Segment {
mode: Mode::Numeric,
char_count: 3,
data: "123".into(),
};
// 4 (mode) + 10 (count for v1) + 10 (data) = 24
assert_eq!(segment_bit_length(&seg, 1), 24);
}
}