feb5ae709f
- C1: placement.rs 删除列偏移特殊处理 (col==6→5),place_bit 已自动跳过保留区 - C2: version.rs V5-H 纠错表 h_g1: 4→2 (总码字数 200→134) - C3: mode.rs Kanji 编码删除冗余 if/else 重复分支 - C4: galois.rs div() 返回 Option<u8> 替代 panic!
284 lines
7.6 KiB
Rust
284 lines
7.6 KiB
Rust
/// 编码模式
|
||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||
pub enum Mode {
|
||
Numeric,
|
||
Alphanumeric,
|
||
Byte,
|
||
Kanji,
|
||
}
|
||
|
||
impl Mode {
|
||
/// 模式指示符(4 bit)
|
||
pub fn indicator(self) -> u8 {
|
||
match self {
|
||
Mode::Numeric => 0b0001,
|
||
Mode::Alphanumeric => 0b0010,
|
||
Mode::Byte => 0b0100,
|
||
Mode::Kanji => 0b1000,
|
||
}
|
||
}
|
||
|
||
/// 字符计数指示符长度(bit),取决于版本号
|
||
pub fn count_bits(self, version: u8) -> u8 {
|
||
match self {
|
||
Mode::Numeric => {
|
||
if version <= 9 {
|
||
10
|
||
} else if version <= 26 {
|
||
12
|
||
} else {
|
||
14
|
||
}
|
||
}
|
||
Mode::Alphanumeric => {
|
||
if version <= 9 {
|
||
9
|
||
} else if version <= 26 {
|
||
11
|
||
} else {
|
||
13
|
||
}
|
||
}
|
||
Mode::Byte => {
|
||
if version <= 9 {
|
||
8
|
||
} else {
|
||
16
|
||
}
|
||
}
|
||
Mode::Kanji => {
|
||
if version <= 9 {
|
||
8
|
||
} else if version <= 26 {
|
||
10
|
||
} else {
|
||
12
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/// 数字模式编码: 每 3 位数字 → 10 bit
|
||
pub fn encode_numeric(input: &str) -> Vec<bool> {
|
||
let mut bits = Vec::new();
|
||
let chars: Vec<u8> = input
|
||
.chars()
|
||
.filter_map(|c| c.to_digit(10).map(|d| d as u8))
|
||
.collect();
|
||
|
||
for chunk in chars.chunks(3) {
|
||
let s: String = chunk.iter().map(|d| (b'0' + d) as char).collect();
|
||
let val: u16 = s.parse().unwrap_or(0);
|
||
let bit_width = match chunk.len() {
|
||
3 => 10,
|
||
2 => 7,
|
||
1 => 4,
|
||
_ => 0,
|
||
};
|
||
for i in (0..bit_width).rev() {
|
||
bits.push((val >> i) & 1 == 1);
|
||
}
|
||
}
|
||
bits
|
||
}
|
||
|
||
/// 字母数字模式字符集: 0-9, A-Z, space, $%*+-./:
|
||
const ALPHANUMERIC_CHARS: &[u8] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ $%*+-./:";
|
||
|
||
/// 字母数字模式编码: 每 2 个字符 → 11 bit
|
||
pub fn encode_alphanumeric(input: &str) -> Vec<bool> {
|
||
let values: Vec<u8> = input
|
||
.chars()
|
||
.filter_map(|c| {
|
||
ALPHANUMERIC_CHARS
|
||
.iter()
|
||
.position(|&x| x == c as u8)
|
||
.map(|i| i as u8)
|
||
})
|
||
.collect();
|
||
|
||
let mut bits = Vec::new();
|
||
for chunk in values.chunks(2) {
|
||
if chunk.len() == 2 {
|
||
let val = chunk[0] as u16 * 45 + chunk[1] as u16;
|
||
for i in (0..11).rev() {
|
||
bits.push((val >> i) & 1 == 1);
|
||
}
|
||
} else {
|
||
// 单个字符 → 6 bit
|
||
for i in (0..6).rev() {
|
||
bits.push((chunk[0] as u16 >> i) & 1 == 1);
|
||
}
|
||
}
|
||
}
|
||
bits
|
||
}
|
||
|
||
/// 字节模式编码: 每字节 → 8 bit (ISO 8859-1 / Latin-1)
|
||
pub fn encode_byte(input: &str) -> Vec<bool> {
|
||
let mut bits = Vec::new();
|
||
for &byte in input.as_bytes() {
|
||
for i in (0..8).rev() {
|
||
bits.push((byte >> i) & 1 == 1);
|
||
}
|
||
}
|
||
bits
|
||
}
|
||
|
||
/// 汉字模式编码 (Shift JIS → 13 bit)
|
||
/// 对于无法转换为 Shift JIS 的字符,降级为 UTF-8 字节编码
|
||
pub fn encode_kanji(input: &str) -> Vec<bool> {
|
||
let mut bits = Vec::new();
|
||
for c in input.chars() {
|
||
if let Some(sjis_val) = unicode_to_shift_jis(c) {
|
||
for i in (0..13).rev() {
|
||
bits.push((sjis_val >> i) & 1 == 1);
|
||
}
|
||
} else {
|
||
// 回退到字节模式
|
||
let mut buf = [0u8; 4];
|
||
let s = c.encode_utf8(&mut buf);
|
||
for &byte in s.as_bytes() {
|
||
for i in (0..8).rev() {
|
||
bits.push((byte >> i) & 1 == 1);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
bits
|
||
}
|
||
|
||
/// Unicode → Shift JIS 简化转换
|
||
/// 覆盖常用 CJK 统一汉字 (U+4E00 ~ U+9FFF)
|
||
fn unicode_to_shift_jis(c: char) -> Option<u16> {
|
||
let code = c as u32;
|
||
// CJK 统一汉字 基本区
|
||
if (0x4E00..=0x9FFF).contains(&code) {
|
||
// 简化映射: 用 Unicode 码位偏移做近似
|
||
// 真实转换需要完整映射表,这里做合理近似
|
||
let base = code - 0x4E00;
|
||
let hi = 0x81 + (base / 0xBC);
|
||
let lo = 0x40 + (base % 0xBC);
|
||
let sjis = ((hi << 8) | lo) as u16;
|
||
// 映射到 13-bit 码字(内层 if/else 已区分两个 Shift-JIS 区间)
|
||
let val = {
|
||
let h = (sjis >> 8);
|
||
let l = (sjis & 0xFF);
|
||
if (0x81..=0x9F).contains(&h) {
|
||
(h - 0x81) * 0xBC + (l - 0x40)
|
||
} else {
|
||
(h - 0xC1) * 0xBC + (l - 0x40)
|
||
}
|
||
};
|
||
return Some(val);
|
||
}
|
||
None
|
||
}
|
||
|
||
/// 判断字符是否属于数字模式
|
||
pub fn is_numeric(c: char) -> bool {
|
||
c.is_ascii_digit()
|
||
}
|
||
|
||
/// 判断字符是否属于字母数字模式
|
||
pub fn is_alphanumeric(c: char) -> bool {
|
||
ALPHANUMERIC_CHARS.contains(&(c as u8))
|
||
}
|
||
|
||
/// 判断字符是否可能为汉字
|
||
pub fn is_kanji(c: char) -> bool {
|
||
matches!(c,
|
||
'\u{4E00}'..='\u{9FFF}' |
|
||
'\u{3400}'..='\u{4DBF}' |
|
||
'\u{3000}'..='\u{303F}'
|
||
)
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn test_numeric_encode_three_digits() {
|
||
let bits = encode_numeric("123");
|
||
// 3 digits = 10 bits, value 123
|
||
assert_eq!(bits.len(), 10);
|
||
assert_eq!(bits_to_u16(&bits), 123);
|
||
}
|
||
|
||
#[test]
|
||
fn test_numeric_encode_single_digit() {
|
||
let bits = encode_numeric("5");
|
||
assert_eq!(bits.len(), 4);
|
||
assert_eq!(bits_to_u16(&bits), 5);
|
||
}
|
||
|
||
#[test]
|
||
fn test_numeric_encode_two_digits() {
|
||
let bits = encode_numeric("45");
|
||
assert_eq!(bits.len(), 7);
|
||
}
|
||
|
||
#[test]
|
||
fn test_alphanumeric_encode_pair() {
|
||
let bits = encode_alphanumeric("AB");
|
||
// A=10, B=11, val = 10*45+11 = 461 → 11 bits
|
||
assert_eq!(bits.len(), 11);
|
||
assert_eq!(bits_to_u16(&bits), 461);
|
||
}
|
||
|
||
#[test]
|
||
fn test_alphanumeric_single() {
|
||
let bits = encode_alphanumeric("A");
|
||
assert_eq!(bits.len(), 6);
|
||
assert_eq!(bits_to_u16(&bits), 10);
|
||
}
|
||
|
||
#[test]
|
||
fn test_byte_encode() {
|
||
let bits = encode_byte("Hi");
|
||
assert_eq!(bits.len(), 16);
|
||
// 'H' = 72 = 01001000
|
||
assert!(!bits[0]);
|
||
assert!(bits[1]);
|
||
}
|
||
|
||
#[test]
|
||
fn test_mode_indicator_values() {
|
||
assert_eq!(Mode::Numeric.indicator(), 0b0001);
|
||
assert_eq!(Mode::Alphanumeric.indicator(), 0b0010);
|
||
assert_eq!(Mode::Byte.indicator(), 0b0100);
|
||
assert_eq!(Mode::Kanji.indicator(), 0b1000);
|
||
}
|
||
|
||
#[test]
|
||
fn test_count_bits() {
|
||
// Version 1-9
|
||
assert_eq!(Mode::Numeric.count_bits(1), 10);
|
||
assert_eq!(Mode::Alphanumeric.count_bits(5), 9);
|
||
assert_eq!(Mode::Byte.count_bits(9), 8);
|
||
// Version 10-26
|
||
assert_eq!(Mode::Numeric.count_bits(10), 12);
|
||
assert_eq!(Mode::Byte.count_bits(10), 16);
|
||
// Version 27-40
|
||
assert_eq!(Mode::Numeric.count_bits(27), 14);
|
||
assert_eq!(Mode::Kanji.count_bits(30), 12);
|
||
}
|
||
|
||
#[test]
|
||
fn test_is_functions() {
|
||
assert!(is_numeric('5'));
|
||
assert!(!is_numeric('A'));
|
||
assert!(is_alphanumeric('A'));
|
||
assert!(is_alphanumeric(' '));
|
||
assert!(!is_alphanumeric('!'));
|
||
assert!(is_kanji('你'));
|
||
assert!(!is_kanji('A'));
|
||
}
|
||
|
||
fn bits_to_u16(bits: &[bool]) -> u16 {
|
||
bits.iter().fold(0, |acc, &b| (acc << 1) | (b as u16))
|
||
}
|
||
}
|