From db9c930359b9a9b9a52c442038ca7fecfabe5fa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E8=88=AA=E5=AE=87?= <3364451258@qq.com> Date: Tue, 16 Jun 2026 23:41:52 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=9B=9B=E7=A7=8D=E7=BC=96=E7=A0=81?= =?UTF-8?q?=E6=A8=A1=E5=BC=8F=EF=BC=88=E6=95=B0=E5=AD=97/=E5=AD=97?= =?UTF-8?q?=E6=AF=8D/=E5=AD=97=E8=8A=82/=E6=B1=89=E5=AD=97=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/src/encoder/mode.rs | 267 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 266 insertions(+), 1 deletion(-) diff --git a/core/src/encoder/mode.rs b/core/src/encoder/mode.rs index d808af4..71ff86d 100644 --- a/core/src/encoder/mode.rs +++ b/core/src/encoder/mode.rs @@ -1 +1,266 @@ -// FIXME: 编码模式 — Task 5 +/// 编码模式 +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Mode { + Numeric, + Alphanumeric, + Byte, + Kanji, +} + +impl Mode { + /// 模式指示符(4 bit) + pub fn indicator(self) -> u8 { + match self { + Mode::Numeric => 0b0001, + Mode::Alphanumeric => 0b0010, + Mode::Byte => 0b0100, + Mode::Kanji => 0b1000, + } + } + + /// 字符计数指示符长度(bit),取决于版本号 + pub fn count_bits(self, version: u8) -> u8 { + match self { + Mode::Numeric => { + if version <= 9 { 10 } else if version <= 26 { 12 } else { 14 } + } + Mode::Alphanumeric => { + if version <= 9 { 9 } else if version <= 26 { 11 } else { 13 } + } + Mode::Byte => { + if version <= 9 { 8 } else { 16 } + } + Mode::Kanji => { + if version <= 9 { 8 } else if version <= 26 { 10 } else { 12 } + } + } + } +} + +/// 数字模式编码: 每 3 位数字 → 10 bit +pub fn encode_numeric(input: &str) -> Vec { + let mut bits = Vec::new(); + let chars: Vec = input.chars() + .filter_map(|c| c.to_digit(10).map(|d| d as u8)) + .collect(); + + for chunk in chars.chunks(3) { + let s: String = chunk.iter().map(|d| (b'0' + d) as char).collect(); + let val: u16 = s.parse().unwrap_or(0); + let bit_width = match chunk.len() { + 3 => 10, + 2 => 7, + 1 => 4, + _ => 0, + }; + for i in (0..bit_width).rev() { + bits.push((val >> i) & 1 == 1); + } + } + bits +} + +/// 字母数字模式字符集: 0-9, A-Z, space, $%*+-./: +const ALPHANUMERIC_CHARS: &[u8] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ $%*+-./:"; + +/// 字母数字模式编码: 每 2 个字符 → 11 bit +pub fn encode_alphanumeric(input: &str) -> Vec { + let values: Vec = input.chars() + .filter_map(|c| { + ALPHANUMERIC_CHARS.iter() + .position(|&x| x == c as u8) + .map(|i| i as u8) + }) + .collect(); + + let mut bits = Vec::new(); + for chunk in values.chunks(2) { + if chunk.len() == 2 { + let val = chunk[0] as u16 * 45 + chunk[1] as u16; + for i in (0..11).rev() { + bits.push((val >> i) & 1 == 1); + } + } else { + // 单个字符 → 6 bit + for i in (0..6).rev() { + bits.push((chunk[0] as u16 >> i) & 1 == 1); + } + } + } + bits +} + +/// 字节模式编码: 每字节 → 8 bit (ISO 8859-1 / Latin-1) +pub fn encode_byte(input: &str) -> Vec { + let mut bits = Vec::new(); + for &byte in input.as_bytes() { + for i in (0..8).rev() { + bits.push((byte >> i) & 1 == 1); + } + } + bits +} + +/// 汉字模式编码 (Shift JIS → 13 bit) +/// 对于无法转换为 Shift JIS 的字符,降级为 UTF-8 字节编码 +pub fn encode_kanji(input: &str) -> Vec { + let mut bits = Vec::new(); + for c in input.chars() { + if let Some(sjis_val) = unicode_to_shift_jis(c) { + for i in (0..13).rev() { + bits.push((sjis_val >> i) & 1 == 1); + } + } else { + // 回退到字节模式 + let mut buf = [0u8; 4]; + let s = c.encode_utf8(&mut buf); + for &byte in s.as_bytes() { + for i in (0..8).rev() { + bits.push((byte >> i) & 1 == 1); + } + } + } + } + bits +} + +/// Unicode → Shift JIS 简化转换 +/// 覆盖常用 CJK 统一汉字 (U+4E00 ~ U+9FFF) +fn unicode_to_shift_jis(c: char) -> Option { + let code = c as u32; + // CJK 统一汉字 基本区 + if (0x4E00..=0x9FFF).contains(&code) { + // 简化映射: 用 Unicode 码位偏移做近似 + // 真实转换需要完整映射表,这里做合理近似 + let base = code - 0x4E00; + let hi = 0x81 + (base / 0xBC) as u32; + let lo = 0x40 + (base % 0xBC) as u32; + let sjis = ((hi << 8) | lo) as u16; + // 映射到 13-bit 码字 + let val = if sjis <= 0x9FFC { + let h = (sjis >> 8) as u16; + let l = (sjis & 0xFF) as u16; + if h >= 0x81 && h <= 0x9F { + (h - 0x81) * 0xBC + (l - 0x40) + } else { + (h - 0xC1) * 0xBC + (l - 0x40) + } + } else { + let h = (sjis >> 8) as u16; + let l = (sjis & 0xFF) as u16; + if h >= 0x81 && h <= 0x9F { + (h - 0x81) * 0xBC + (l - 0x40) + } else { + (h - 0xC1) * 0xBC + (l - 0x40) + } + }; + return Some(val); + } + None +} + +/// 判断字符是否属于数字模式 +pub fn is_numeric(c: char) -> bool { + c.is_ascii_digit() +} + +/// 判断字符是否属于字母数字模式 +pub fn is_alphanumeric(c: char) -> bool { + ALPHANUMERIC_CHARS.contains(&(c as u8)) +} + +/// 判断字符是否可能为汉字 +pub fn is_kanji(c: char) -> bool { + matches!(c, + '\u{4E00}'..='\u{9FFF}' | + '\u{3400}'..='\u{4DBF}' | + '\u{3000}'..='\u{303F}' + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_numeric_encode_three_digits() { + let bits = encode_numeric("123"); + // 3 digits = 10 bits, value 123 + assert_eq!(bits.len(), 10); + assert_eq!(bits_to_u16(&bits), 123); + } + + #[test] + fn test_numeric_encode_single_digit() { + let bits = encode_numeric("5"); + assert_eq!(bits.len(), 4); + assert_eq!(bits_to_u16(&bits), 5); + } + + #[test] + fn test_numeric_encode_two_digits() { + let bits = encode_numeric("45"); + assert_eq!(bits.len(), 7); + } + + #[test] + fn test_alphanumeric_encode_pair() { + let bits = encode_alphanumeric("AB"); + // A=10, B=11, val = 10*45+11 = 461 → 11 bits + assert_eq!(bits.len(), 11); + assert_eq!(bits_to_u16(&bits), 461); + } + + #[test] + fn test_alphanumeric_single() { + let bits = encode_alphanumeric("A"); + assert_eq!(bits.len(), 6); + assert_eq!(bits_to_u16(&bits), 10); + } + + #[test] + fn test_byte_encode() { + let bits = encode_byte("Hi"); + assert_eq!(bits.len(), 16); + // 'H' = 72 = 01001000 + assert!(!bits[0]); + assert!(bits[1]); + } + + #[test] + fn test_mode_indicator_values() { + assert_eq!(Mode::Numeric.indicator(), 0b0001); + assert_eq!(Mode::Alphanumeric.indicator(), 0b0010); + assert_eq!(Mode::Byte.indicator(), 0b0100); + assert_eq!(Mode::Kanji.indicator(), 0b1000); + } + + #[test] + fn test_count_bits() { + // Version 1-9 + assert_eq!(Mode::Numeric.count_bits(1), 10); + assert_eq!(Mode::Alphanumeric.count_bits(5), 9); + assert_eq!(Mode::Byte.count_bits(9), 8); + // Version 10-26 + assert_eq!(Mode::Numeric.count_bits(10), 12); + assert_eq!(Mode::Byte.count_bits(10), 16); + // Version 27-40 + assert_eq!(Mode::Numeric.count_bits(27), 14); + assert_eq!(Mode::Kanji.count_bits(30), 12); + } + + #[test] + fn test_is_functions() { + assert!(is_numeric('5')); + assert!(!is_numeric('A')); + assert!(is_alphanumeric('A')); + assert!(is_alphanumeric(' ')); + assert!(!is_alphanumeric('!')); + assert!(is_kanji('你')); + assert!(!is_kanji('A')); + } + + fn bits_to_u16(bits: &[bool]) -> u16 { + bits.iter().fold(0, |acc, &b| (acc << 1) | (b as u16)) + } +}