diff --git a/core/src/encoder/bitstream.rs b/core/src/encoder/bitstream.rs index 62eb58a..396bae2 100644 --- a/core/src/encoder/bitstream.rs +++ b/core/src/encoder/bitstream.rs @@ -1 +1,99 @@ -// FIXME: 比特流 — Task 6 +use crate::encoder::mode::{ + encode_alphanumeric, encode_byte, encode_kanji, encode_numeric, Mode, +}; +use crate::encoder::segment::segment_text; +use crate::version::{get_data_capacity, EcLevel, Version}; + +/// 将文本编码为数据码字序列 +pub fn build_codewords(text: &str, version: Version, level: EcLevel) -> Vec { + let segments = segment_text(text); + let mut bits: Vec = Vec::new(); + + // 1. 各段编码:模式指示符 + 字符计数 + 数据 + for seg in &segments { + // 模式指示符 4 bit + for i in (0..4).rev() { + bits.push((seg.mode.indicator() >> i) & 1 == 1); + } + // 字符计数 + let count_bits = seg.mode.count_bits(version.0); + for i in (0..count_bits).rev() { + bits.push((seg.char_count >> i) & 1 == 1); + } + // 编码数据 + let data_bits = match seg.mode { + Mode::Numeric => encode_numeric(&seg.data), + Mode::Alphanumeric => encode_alphanumeric(&seg.data), + Mode::Byte => encode_byte(&seg.data), + Mode::Kanji => encode_kanji(&seg.data), + }; + bits.extend(data_bits); + } + + // 2. 终止符(最多 4 bit 0) + let total_capacity = get_data_capacity(version, level) as usize * 8; + let terminator_len = 4usize.min(total_capacity.saturating_sub(bits.len())); + bits.extend(std::iter::repeat(false).take(terminator_len)); + + // 3. 补零到 8-bit 边界 + while bits.len() % 8 != 0 { + bits.push(false); + } + + // 4. 填充码字 0xEC/0x11 交替 + let mut pad_byte = 0xECu8; + while bits.len() < total_capacity { + for i in (0..8).rev() { + bits.push((pad_byte >> i) & 1 == 1); + } + pad_byte ^= 0xEC ^ 0x11; // 交替 0xEC ↔ 0x11 + } + + // 5. 比特 → 字节 + bits_to_bytes(&bits) +} + +fn bits_to_bytes(bits: &[bool]) -> Vec { + bits.chunks(8) + .map(|chunk| chunk.iter().fold(0u8, |acc, &b| (acc << 1) | (b as u8))) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_build_codewords_numeric() { + let data = build_codewords("123", Version(1), EcLevel::L); + // Version 1 L: 19 数据码字 + assert_eq!(data.len(), 19); + } + + #[test] + fn test_build_codewords_alphanumeric() { + let data = build_codewords("HELLO", Version(1), EcLevel::M); + assert_eq!(data.len(), 16); + } + + #[test] + fn test_build_codewords_short_with_padding() { + let data = build_codewords("A", Version(1), EcLevel::L); + assert_eq!(data.len(), 19); + } + + #[test] + fn test_build_codewords_mixed() { + let data = build_codewords("HELLO WORLD 123", Version(1), EcLevel::Q); + // Version 1 Q: 13 数据码字 + assert_eq!(data.len(), 13); + } + + #[test] + fn test_padding_pattern() { + let data = build_codewords("A", Version(1), EcLevel::L); + // 短数据 → 大量填充 + // 第一个填充字节应为 0xEC + assert!(data.len() > 2); + } +} diff --git a/core/src/encoder/segment.rs b/core/src/encoder/segment.rs index 50e7101..962f95b 100644 --- a/core/src/encoder/segment.rs +++ b/core/src/encoder/segment.rs @@ -1 +1,131 @@ -// FIXME: 数据分段 — Task 6 +use crate::encoder::mode::{is_alphanumeric, is_kanji, is_numeric, Mode}; + +/// 数据段:一段连续使用同一种编码模式的数据 +#[derive(Debug, Clone)] +pub struct Segment { + pub mode: Mode, + pub char_count: u16, + pub data: String, +} + +/// 分析字符串,生成最优分段 +pub fn segment_text(text: &str) -> Vec { + if text.is_empty() { + return vec![]; + } + + let chars: Vec = text.chars().collect(); + let mut segments = Vec::new(); + let mut i = 0; + + while i < chars.len() { + let range = find_best_run(&chars, i); + let chunk: String = chars[i..range].iter().collect(); + let mode = char_mode(chars[i]); + + segments.push(Segment { + mode, + char_count: (range - i) as u16, + data: chunk, + }); + + i = range; + } + + segments +} + +/// 找到从 pos 开始的最长同模式字符序列 +fn find_best_run(chars: &[char], pos: usize) -> usize { + if pos >= chars.len() { + return pos; + } + + let current_mode = char_mode(chars[pos]); + let mut end = pos + 1; + + while end < chars.len() && char_mode(chars[end]) == current_mode { + end += 1; + } + + end +} + +/// 判断单个字符的最佳编码模式(按优先级:数字 > 字母 > 汉字 > 字节) +fn char_mode(c: char) -> Mode { + if is_numeric(c) { + Mode::Numeric + } else if is_alphanumeric(c) { + Mode::Alphanumeric + } else if is_kanji(c) { + Mode::Kanji + } else { + Mode::Byte + } +} + +/// 计算段的比特长度(模式指示符 + 字符计数 + 数据) +pub fn segment_bit_length(seg: &Segment, version: u8) -> u16 { + let mode_bits = 4u16; + let count_bits = seg.mode.count_bits(version) as u16; + let data_bits = match seg.mode { + Mode::Numeric => { + let groups_of_3 = seg.char_count / 3; + let remainder = seg.char_count % 3; + groups_of_3 * 10 + + if remainder == 2 { + 7 + } else if remainder == 1 { + 4 + } else { + 0 + } + } + Mode::Alphanumeric => { + let groups_of_2 = seg.char_count / 2; + groups_of_2 * 11 + if seg.char_count % 2 == 1 { 6 } else { 0 } + } + Mode::Byte => seg.char_count * 8, + Mode::Kanji => seg.char_count * 13, + }; + mode_bits + count_bits + data_bits +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_segment_numeric_only() { + let segs = segment_text("12345"); + assert_eq!(segs.len(), 1); + assert_eq!(segs[0].mode, Mode::Numeric); + assert_eq!(segs[0].char_count, 5); + } + + #[test] + fn test_segment_mixed() { + // "ABC123" → "ABC" (alphanum) + "123" (numeric) + let segs = segment_text("ABC123"); + assert_eq!(segs.len(), 2); + assert_eq!(segs[0].mode, Mode::Alphanumeric); + assert_eq!(segs[1].mode, Mode::Numeric); + } + + #[test] + fn test_segment_empty() { + let segs = segment_text(""); + assert!(segs.is_empty()); + } + + #[test] + fn test_segment_bit_length() { + let seg = Segment { + mode: Mode::Numeric, + char_count: 3, + data: "123".into(), + }; + // 4 (mode) + 10 (count for v1) + 10 (data) = 24 + assert_eq!(segment_bit_length(&seg, 1), 24); + } +}