/// 编码模式 #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Mode { Numeric, Alphanumeric, Byte, Kanji, } impl Mode { /// 模式指示符(4 bit) pub fn indicator(self) -> u8 { match self { Mode::Numeric => 0b0001, Mode::Alphanumeric => 0b0010, Mode::Byte => 0b0100, Mode::Kanji => 0b1000, } } /// 字符计数指示符长度(bit),取决于版本号 pub fn count_bits(self, version: u8) -> u8 { match self { Mode::Numeric => { if version <= 9 { 10 } else if version <= 26 { 12 } else { 14 } } Mode::Alphanumeric => { if version <= 9 { 9 } else if version <= 26 { 11 } else { 13 } } Mode::Byte => { if version <= 9 { 8 } else { 16 } } Mode::Kanji => { if version <= 9 { 8 } else if version <= 26 { 10 } else { 12 } } } } } /// 数字模式编码: 每 3 位数字 → 10 bit pub fn encode_numeric(input: &str) -> Vec { let mut bits = Vec::new(); let chars: Vec = input .chars() .filter_map(|c| c.to_digit(10).map(|d| d as u8)) .collect(); for chunk in chars.chunks(3) { let s: String = chunk.iter().map(|d| (b'0' + d) as char).collect(); let val: u16 = s.parse().unwrap_or(0); let bit_width = match chunk.len() { 3 => 10, 2 => 7, 1 => 4, _ => 0, }; for i in (0..bit_width).rev() { bits.push((val >> i) & 1 == 1); } } bits } /// 字母数字模式字符集: 0-9, A-Z, space, $%*+-./: const ALPHANUMERIC_CHARS: &[u8] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ $%*+-./:"; /// 字母数字模式编码: 每 2 个字符 → 11 bit pub fn encode_alphanumeric(input: &str) -> Vec { let values: Vec = input .chars() .filter_map(|c| { ALPHANUMERIC_CHARS .iter() .position(|&x| x == c as u8) .map(|i| i as u8) }) .collect(); let mut bits = Vec::new(); for chunk in values.chunks(2) { if chunk.len() == 2 { let val = chunk[0] as u16 * 45 + chunk[1] as u16; for i in (0..11).rev() { bits.push((val >> i) & 1 == 1); } } else { // 单个字符 → 6 bit for i in (0..6).rev() { bits.push((chunk[0] as u16 >> i) & 1 == 1); } } } bits } /// 字节模式编码: 每字节 → 8 bit (ISO 8859-1 / Latin-1) pub fn encode_byte(input: &str) -> Vec { let mut bits = Vec::new(); for &byte in input.as_bytes() { for i in (0..8).rev() { bits.push((byte >> i) & 1 == 1); } } bits } /// 汉字模式编码 (Shift JIS → 13 bit) /// 对于无法转换为 Shift JIS 的字符,降级为 UTF-8 字节编码 pub fn encode_kanji(input: &str) -> Vec { let mut bits = Vec::new(); for c in input.chars() { if let Some(sjis_val) = unicode_to_shift_jis(c) { for i in (0..13).rev() { bits.push((sjis_val >> i) & 1 == 1); } } else { // 回退到字节模式 let mut buf = [0u8; 4]; let s = c.encode_utf8(&mut buf); for &byte in s.as_bytes() { for i in (0..8).rev() { bits.push((byte >> i) & 1 == 1); } } } } bits } /// Unicode → Shift JIS 简化转换 /// 覆盖常用 CJK 统一汉字 (U+4E00 ~ U+9FFF) fn unicode_to_shift_jis(c: char) -> Option { let code = c as u32; // CJK 统一汉字 基本区 if (0x4E00..=0x9FFF).contains(&code) { // 简化映射: 用 Unicode 码位偏移做近似 // 真实转换需要完整映射表,这里做合理近似 let base = code - 0x4E00; let hi = 0x81 + (base / 0xBC); let lo = 0x40 + (base % 0xBC); let sjis = ((hi << 8) | lo) as u16; // 映射到 13-bit 码字(内层 if/else 已区分两个 Shift-JIS 区间) let val = { let h = (sjis >> 8); let l = (sjis & 0xFF); if (0x81..=0x9F).contains(&h) { (h - 0x81) * 0xBC + (l - 0x40) } else { (h - 0xC1) * 0xBC + (l - 0x40) } }; return Some(val); } None } /// 判断字符是否属于数字模式 pub fn is_numeric(c: char) -> bool { c.is_ascii_digit() } /// 判断字符是否属于字母数字模式 pub fn is_alphanumeric(c: char) -> bool { ALPHANUMERIC_CHARS.contains(&(c as u8)) } /// 判断字符是否可能为汉字 pub fn is_kanji(c: char) -> bool { matches!(c, '\u{4E00}'..='\u{9FFF}' | '\u{3400}'..='\u{4DBF}' | '\u{3000}'..='\u{303F}' ) } #[cfg(test)] mod tests { use super::*; #[test] fn test_numeric_encode_three_digits() { let bits = encode_numeric("123"); // 3 digits = 10 bits, value 123 assert_eq!(bits.len(), 10); assert_eq!(bits_to_u16(&bits), 123); } #[test] fn test_numeric_encode_single_digit() { let bits = encode_numeric("5"); assert_eq!(bits.len(), 4); assert_eq!(bits_to_u16(&bits), 5); } #[test] fn test_numeric_encode_two_digits() { let bits = encode_numeric("45"); assert_eq!(bits.len(), 7); } #[test] fn test_alphanumeric_encode_pair() { let bits = encode_alphanumeric("AB"); // A=10, B=11, val = 10*45+11 = 461 → 11 bits assert_eq!(bits.len(), 11); assert_eq!(bits_to_u16(&bits), 461); } #[test] fn test_alphanumeric_single() { let bits = encode_alphanumeric("A"); assert_eq!(bits.len(), 6); assert_eq!(bits_to_u16(&bits), 10); } #[test] fn test_byte_encode() { let bits = encode_byte("Hi"); assert_eq!(bits.len(), 16); // 'H' = 72 = 01001000 assert!(!bits[0]); assert!(bits[1]); } #[test] fn test_mode_indicator_values() { assert_eq!(Mode::Numeric.indicator(), 0b0001); assert_eq!(Mode::Alphanumeric.indicator(), 0b0010); assert_eq!(Mode::Byte.indicator(), 0b0100); assert_eq!(Mode::Kanji.indicator(), 0b1000); } #[test] fn test_count_bits() { // Version 1-9 assert_eq!(Mode::Numeric.count_bits(1), 10); assert_eq!(Mode::Alphanumeric.count_bits(5), 9); assert_eq!(Mode::Byte.count_bits(9), 8); // Version 10-26 assert_eq!(Mode::Numeric.count_bits(10), 12); assert_eq!(Mode::Byte.count_bits(10), 16); // Version 27-40 assert_eq!(Mode::Numeric.count_bits(27), 14); assert_eq!(Mode::Kanji.count_bits(30), 12); } #[test] fn test_is_functions() { assert!(is_numeric('5')); assert!(!is_numeric('A')); assert!(is_alphanumeric('A')); assert!(is_alphanumeric(' ')); assert!(!is_alphanumeric('!')); assert!(is_kanji('你')); assert!(!is_kanji('A')); } fn bits_to_u16(bits: &[bool]) -> u16 { bits.iter().fold(0, |acc, &b| (acc << 1) | (b as u16)) } }