feat: 字符串分段 + 比特流编码
This commit is contained in:
@@ -1 +1,99 @@
|
||||
// FIXME: 比特流 — Task 6
|
||||
use crate::encoder::mode::{
|
||||
encode_alphanumeric, encode_byte, encode_kanji, encode_numeric, Mode,
|
||||
};
|
||||
use crate::encoder::segment::segment_text;
|
||||
use crate::version::{get_data_capacity, EcLevel, Version};
|
||||
|
||||
/// 将文本编码为数据码字序列
|
||||
pub fn build_codewords(text: &str, version: Version, level: EcLevel) -> Vec<u8> {
|
||||
let segments = segment_text(text);
|
||||
let mut bits: Vec<bool> = Vec::new();
|
||||
|
||||
// 1. 各段编码:模式指示符 + 字符计数 + 数据
|
||||
for seg in &segments {
|
||||
// 模式指示符 4 bit
|
||||
for i in (0..4).rev() {
|
||||
bits.push((seg.mode.indicator() >> i) & 1 == 1);
|
||||
}
|
||||
// 字符计数
|
||||
let count_bits = seg.mode.count_bits(version.0);
|
||||
for i in (0..count_bits).rev() {
|
||||
bits.push((seg.char_count >> i) & 1 == 1);
|
||||
}
|
||||
// 编码数据
|
||||
let data_bits = match seg.mode {
|
||||
Mode::Numeric => encode_numeric(&seg.data),
|
||||
Mode::Alphanumeric => encode_alphanumeric(&seg.data),
|
||||
Mode::Byte => encode_byte(&seg.data),
|
||||
Mode::Kanji => encode_kanji(&seg.data),
|
||||
};
|
||||
bits.extend(data_bits);
|
||||
}
|
||||
|
||||
// 2. 终止符(最多 4 bit 0)
|
||||
let total_capacity = get_data_capacity(version, level) as usize * 8;
|
||||
let terminator_len = 4usize.min(total_capacity.saturating_sub(bits.len()));
|
||||
bits.extend(std::iter::repeat(false).take(terminator_len));
|
||||
|
||||
// 3. 补零到 8-bit 边界
|
||||
while bits.len() % 8 != 0 {
|
||||
bits.push(false);
|
||||
}
|
||||
|
||||
// 4. 填充码字 0xEC/0x11 交替
|
||||
let mut pad_byte = 0xECu8;
|
||||
while bits.len() < total_capacity {
|
||||
for i in (0..8).rev() {
|
||||
bits.push((pad_byte >> i) & 1 == 1);
|
||||
}
|
||||
pad_byte ^= 0xEC ^ 0x11; // 交替 0xEC ↔ 0x11
|
||||
}
|
||||
|
||||
// 5. 比特 → 字节
|
||||
bits_to_bytes(&bits)
|
||||
}
|
||||
|
||||
fn bits_to_bytes(bits: &[bool]) -> Vec<u8> {
|
||||
bits.chunks(8)
|
||||
.map(|chunk| chunk.iter().fold(0u8, |acc, &b| (acc << 1) | (b as u8)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_build_codewords_numeric() {
|
||||
let data = build_codewords("123", Version(1), EcLevel::L);
|
||||
// Version 1 L: 19 数据码字
|
||||
assert_eq!(data.len(), 19);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_codewords_alphanumeric() {
|
||||
let data = build_codewords("HELLO", Version(1), EcLevel::M);
|
||||
assert_eq!(data.len(), 16);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_codewords_short_with_padding() {
|
||||
let data = build_codewords("A", Version(1), EcLevel::L);
|
||||
assert_eq!(data.len(), 19);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_codewords_mixed() {
|
||||
let data = build_codewords("HELLO WORLD 123", Version(1), EcLevel::Q);
|
||||
// Version 1 Q: 13 数据码字
|
||||
assert_eq!(data.len(), 13);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_padding_pattern() {
|
||||
let data = build_codewords("A", Version(1), EcLevel::L);
|
||||
// 短数据 → 大量填充
|
||||
// 第一个填充字节应为 0xEC
|
||||
assert!(data.len() > 2);
|
||||
}
|
||||
}
|
||||
|
||||
+131
-1
@@ -1 +1,131 @@
|
||||
// FIXME: 数据分段 — Task 6
|
||||
use crate::encoder::mode::{is_alphanumeric, is_kanji, is_numeric, Mode};
|
||||
|
||||
/// 数据段:一段连续使用同一种编码模式的数据
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Segment {
|
||||
pub mode: Mode,
|
||||
pub char_count: u16,
|
||||
pub data: String,
|
||||
}
|
||||
|
||||
/// 分析字符串,生成最优分段
|
||||
pub fn segment_text(text: &str) -> Vec<Segment> {
|
||||
if text.is_empty() {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let chars: Vec<char> = text.chars().collect();
|
||||
let mut segments = Vec::new();
|
||||
let mut i = 0;
|
||||
|
||||
while i < chars.len() {
|
||||
let range = find_best_run(&chars, i);
|
||||
let chunk: String = chars[i..range].iter().collect();
|
||||
let mode = char_mode(chars[i]);
|
||||
|
||||
segments.push(Segment {
|
||||
mode,
|
||||
char_count: (range - i) as u16,
|
||||
data: chunk,
|
||||
});
|
||||
|
||||
i = range;
|
||||
}
|
||||
|
||||
segments
|
||||
}
|
||||
|
||||
/// 找到从 pos 开始的最长同模式字符序列
|
||||
fn find_best_run(chars: &[char], pos: usize) -> usize {
|
||||
if pos >= chars.len() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
let current_mode = char_mode(chars[pos]);
|
||||
let mut end = pos + 1;
|
||||
|
||||
while end < chars.len() && char_mode(chars[end]) == current_mode {
|
||||
end += 1;
|
||||
}
|
||||
|
||||
end
|
||||
}
|
||||
|
||||
/// 判断单个字符的最佳编码模式(按优先级:数字 > 字母 > 汉字 > 字节)
|
||||
fn char_mode(c: char) -> Mode {
|
||||
if is_numeric(c) {
|
||||
Mode::Numeric
|
||||
} else if is_alphanumeric(c) {
|
||||
Mode::Alphanumeric
|
||||
} else if is_kanji(c) {
|
||||
Mode::Kanji
|
||||
} else {
|
||||
Mode::Byte
|
||||
}
|
||||
}
|
||||
|
||||
/// 计算段的比特长度(模式指示符 + 字符计数 + 数据)
|
||||
pub fn segment_bit_length(seg: &Segment, version: u8) -> u16 {
|
||||
let mode_bits = 4u16;
|
||||
let count_bits = seg.mode.count_bits(version) as u16;
|
||||
let data_bits = match seg.mode {
|
||||
Mode::Numeric => {
|
||||
let groups_of_3 = seg.char_count / 3;
|
||||
let remainder = seg.char_count % 3;
|
||||
groups_of_3 * 10
|
||||
+ if remainder == 2 {
|
||||
7
|
||||
} else if remainder == 1 {
|
||||
4
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
Mode::Alphanumeric => {
|
||||
let groups_of_2 = seg.char_count / 2;
|
||||
groups_of_2 * 11 + if seg.char_count % 2 == 1 { 6 } else { 0 }
|
||||
}
|
||||
Mode::Byte => seg.char_count * 8,
|
||||
Mode::Kanji => seg.char_count * 13,
|
||||
};
|
||||
mode_bits + count_bits + data_bits
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_segment_numeric_only() {
|
||||
let segs = segment_text("12345");
|
||||
assert_eq!(segs.len(), 1);
|
||||
assert_eq!(segs[0].mode, Mode::Numeric);
|
||||
assert_eq!(segs[0].char_count, 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segment_mixed() {
|
||||
// "ABC123" → "ABC" (alphanum) + "123" (numeric)
|
||||
let segs = segment_text("ABC123");
|
||||
assert_eq!(segs.len(), 2);
|
||||
assert_eq!(segs[0].mode, Mode::Alphanumeric);
|
||||
assert_eq!(segs[1].mode, Mode::Numeric);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segment_empty() {
|
||||
let segs = segment_text("");
|
||||
assert!(segs.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segment_bit_length() {
|
||||
let seg = Segment {
|
||||
mode: Mode::Numeric,
|
||||
char_count: 3,
|
||||
data: "123".into(),
|
||||
};
|
||||
// 4 (mode) + 10 (count for v1) + 10 (data) = 24
|
||||
assert_eq!(segment_bit_length(&seg, 1), 24);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user