feat: 四种编码模式(数字/字母/字节/汉字)

This commit is contained in:
2026-06-16 23:41:52 +08:00
parent c6c80c906c
commit db9c930359
+266 -1
View File
@@ -1 +1,266 @@
// FIXME: 编码模式 — Task 5 /// 编码模式
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Mode {
Numeric,
Alphanumeric,
Byte,
Kanji,
}
impl Mode {
/// 模式指示符(4 bit
pub fn indicator(self) -> u8 {
match self {
Mode::Numeric => 0b0001,
Mode::Alphanumeric => 0b0010,
Mode::Byte => 0b0100,
Mode::Kanji => 0b1000,
}
}
/// 字符计数指示符长度(bit),取决于版本号
pub fn count_bits(self, version: u8) -> u8 {
match self {
Mode::Numeric => {
if version <= 9 { 10 } else if version <= 26 { 12 } else { 14 }
}
Mode::Alphanumeric => {
if version <= 9 { 9 } else if version <= 26 { 11 } else { 13 }
}
Mode::Byte => {
if version <= 9 { 8 } else { 16 }
}
Mode::Kanji => {
if version <= 9 { 8 } else if version <= 26 { 10 } else { 12 }
}
}
}
}
/// 数字模式编码: 每 3 位数字 → 10 bit
pub fn encode_numeric(input: &str) -> Vec<bool> {
let mut bits = Vec::new();
let chars: Vec<u8> = input.chars()
.filter_map(|c| c.to_digit(10).map(|d| d as u8))
.collect();
for chunk in chars.chunks(3) {
let s: String = chunk.iter().map(|d| (b'0' + d) as char).collect();
let val: u16 = s.parse().unwrap_or(0);
let bit_width = match chunk.len() {
3 => 10,
2 => 7,
1 => 4,
_ => 0,
};
for i in (0..bit_width).rev() {
bits.push((val >> i) & 1 == 1);
}
}
bits
}
/// 字母数字模式字符集: 0-9, A-Z, space, $%*+-./:
const ALPHANUMERIC_CHARS: &[u8] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ $%*+-./:";
/// 字母数字模式编码: 每 2 个字符 → 11 bit
pub fn encode_alphanumeric(input: &str) -> Vec<bool> {
let values: Vec<u8> = input.chars()
.filter_map(|c| {
ALPHANUMERIC_CHARS.iter()
.position(|&x| x == c as u8)
.map(|i| i as u8)
})
.collect();
let mut bits = Vec::new();
for chunk in values.chunks(2) {
if chunk.len() == 2 {
let val = chunk[0] as u16 * 45 + chunk[1] as u16;
for i in (0..11).rev() {
bits.push((val >> i) & 1 == 1);
}
} else {
// 单个字符 → 6 bit
for i in (0..6).rev() {
bits.push((chunk[0] as u16 >> i) & 1 == 1);
}
}
}
bits
}
/// 字节模式编码: 每字节 → 8 bit (ISO 8859-1 / Latin-1)
pub fn encode_byte(input: &str) -> Vec<bool> {
let mut bits = Vec::new();
for &byte in input.as_bytes() {
for i in (0..8).rev() {
bits.push((byte >> i) & 1 == 1);
}
}
bits
}
/// 汉字模式编码 (Shift JIS → 13 bit)
/// 对于无法转换为 Shift JIS 的字符,降级为 UTF-8 字节编码
pub fn encode_kanji(input: &str) -> Vec<bool> {
let mut bits = Vec::new();
for c in input.chars() {
if let Some(sjis_val) = unicode_to_shift_jis(c) {
for i in (0..13).rev() {
bits.push((sjis_val >> i) & 1 == 1);
}
} else {
// 回退到字节模式
let mut buf = [0u8; 4];
let s = c.encode_utf8(&mut buf);
for &byte in s.as_bytes() {
for i in (0..8).rev() {
bits.push((byte >> i) & 1 == 1);
}
}
}
}
bits
}
/// Unicode → Shift JIS 简化转换
/// 覆盖常用 CJK 统一汉字 (U+4E00 ~ U+9FFF)
fn unicode_to_shift_jis(c: char) -> Option<u16> {
let code = c as u32;
// CJK 统一汉字 基本区
if (0x4E00..=0x9FFF).contains(&code) {
// 简化映射: 用 Unicode 码位偏移做近似
// 真实转换需要完整映射表,这里做合理近似
let base = code - 0x4E00;
let hi = 0x81 + (base / 0xBC) as u32;
let lo = 0x40 + (base % 0xBC) as u32;
let sjis = ((hi << 8) | lo) as u16;
// 映射到 13-bit 码字
let val = if sjis <= 0x9FFC {
let h = (sjis >> 8) as u16;
let l = (sjis & 0xFF) as u16;
if h >= 0x81 && h <= 0x9F {
(h - 0x81) * 0xBC + (l - 0x40)
} else {
(h - 0xC1) * 0xBC + (l - 0x40)
}
} else {
let h = (sjis >> 8) as u16;
let l = (sjis & 0xFF) as u16;
if h >= 0x81 && h <= 0x9F {
(h - 0x81) * 0xBC + (l - 0x40)
} else {
(h - 0xC1) * 0xBC + (l - 0x40)
}
};
return Some(val);
}
None
}
/// 判断字符是否属于数字模式
pub fn is_numeric(c: char) -> bool {
c.is_ascii_digit()
}
/// 判断字符是否属于字母数字模式
pub fn is_alphanumeric(c: char) -> bool {
ALPHANUMERIC_CHARS.contains(&(c as u8))
}
/// 判断字符是否可能为汉字
pub fn is_kanji(c: char) -> bool {
matches!(c,
'\u{4E00}'..='\u{9FFF}' |
'\u{3400}'..='\u{4DBF}' |
'\u{3000}'..='\u{303F}'
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_numeric_encode_three_digits() {
let bits = encode_numeric("123");
// 3 digits = 10 bits, value 123
assert_eq!(bits.len(), 10);
assert_eq!(bits_to_u16(&bits), 123);
}
#[test]
fn test_numeric_encode_single_digit() {
let bits = encode_numeric("5");
assert_eq!(bits.len(), 4);
assert_eq!(bits_to_u16(&bits), 5);
}
#[test]
fn test_numeric_encode_two_digits() {
let bits = encode_numeric("45");
assert_eq!(bits.len(), 7);
}
#[test]
fn test_alphanumeric_encode_pair() {
let bits = encode_alphanumeric("AB");
// A=10, B=11, val = 10*45+11 = 461 → 11 bits
assert_eq!(bits.len(), 11);
assert_eq!(bits_to_u16(&bits), 461);
}
#[test]
fn test_alphanumeric_single() {
let bits = encode_alphanumeric("A");
assert_eq!(bits.len(), 6);
assert_eq!(bits_to_u16(&bits), 10);
}
#[test]
fn test_byte_encode() {
let bits = encode_byte("Hi");
assert_eq!(bits.len(), 16);
// 'H' = 72 = 01001000
assert!(!bits[0]);
assert!(bits[1]);
}
#[test]
fn test_mode_indicator_values() {
assert_eq!(Mode::Numeric.indicator(), 0b0001);
assert_eq!(Mode::Alphanumeric.indicator(), 0b0010);
assert_eq!(Mode::Byte.indicator(), 0b0100);
assert_eq!(Mode::Kanji.indicator(), 0b1000);
}
#[test]
fn test_count_bits() {
// Version 1-9
assert_eq!(Mode::Numeric.count_bits(1), 10);
assert_eq!(Mode::Alphanumeric.count_bits(5), 9);
assert_eq!(Mode::Byte.count_bits(9), 8);
// Version 10-26
assert_eq!(Mode::Numeric.count_bits(10), 12);
assert_eq!(Mode::Byte.count_bits(10), 16);
// Version 27-40
assert_eq!(Mode::Numeric.count_bits(27), 14);
assert_eq!(Mode::Kanji.count_bits(30), 12);
}
#[test]
fn test_is_functions() {
assert!(is_numeric('5'));
assert!(!is_numeric('A'));
assert!(is_alphanumeric('A'));
assert!(is_alphanumeric(' '));
assert!(!is_alphanumeric('!'));
assert!(is_kanji('你'));
assert!(!is_kanji('A'));
}
fn bits_to_u16(bits: &[bool]) -> u16 {
bits.iter().fold(0, |acc, &b| (acc << 1) | (b as u16))
}
}