Files
QRGen/core/src/encoder/mode.rs
T
Serendipity feb5ae709f fix: 4 个 CRITICAL bug 修复
- C1: placement.rs 删除列偏移特殊处理 (col==6→5),place_bit 已自动跳过保留区
- C2: version.rs V5-H 纠错表 h_g1: 4→2 (总码字数 200→134)
- C3: mode.rs Kanji 编码删除冗余 if/else 重复分支
- C4: galois.rs div() 返回 Option<u8> 替代 panic!
2026-06-17 08:58:29 +08:00

284 lines
7.6 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/// 编码模式
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Mode {
Numeric,
Alphanumeric,
Byte,
Kanji,
}
impl Mode {
/// 模式指示符(4 bit
pub fn indicator(self) -> u8 {
match self {
Mode::Numeric => 0b0001,
Mode::Alphanumeric => 0b0010,
Mode::Byte => 0b0100,
Mode::Kanji => 0b1000,
}
}
/// 字符计数指示符长度(bit),取决于版本号
pub fn count_bits(self, version: u8) -> u8 {
match self {
Mode::Numeric => {
if version <= 9 {
10
} else if version <= 26 {
12
} else {
14
}
}
Mode::Alphanumeric => {
if version <= 9 {
9
} else if version <= 26 {
11
} else {
13
}
}
Mode::Byte => {
if version <= 9 {
8
} else {
16
}
}
Mode::Kanji => {
if version <= 9 {
8
} else if version <= 26 {
10
} else {
12
}
}
}
}
}
/// 数字模式编码: 每 3 位数字 → 10 bit
pub fn encode_numeric(input: &str) -> Vec<bool> {
let mut bits = Vec::new();
let chars: Vec<u8> = input
.chars()
.filter_map(|c| c.to_digit(10).map(|d| d as u8))
.collect();
for chunk in chars.chunks(3) {
let s: String = chunk.iter().map(|d| (b'0' + d) as char).collect();
let val: u16 = s.parse().unwrap_or(0);
let bit_width = match chunk.len() {
3 => 10,
2 => 7,
1 => 4,
_ => 0,
};
for i in (0..bit_width).rev() {
bits.push((val >> i) & 1 == 1);
}
}
bits
}
/// 字母数字模式字符集: 0-9, A-Z, space, $%*+-./:
const ALPHANUMERIC_CHARS: &[u8] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ $%*+-./:";
/// 字母数字模式编码: 每 2 个字符 → 11 bit
pub fn encode_alphanumeric(input: &str) -> Vec<bool> {
let values: Vec<u8> = input
.chars()
.filter_map(|c| {
ALPHANUMERIC_CHARS
.iter()
.position(|&x| x == c as u8)
.map(|i| i as u8)
})
.collect();
let mut bits = Vec::new();
for chunk in values.chunks(2) {
if chunk.len() == 2 {
let val = chunk[0] as u16 * 45 + chunk[1] as u16;
for i in (0..11).rev() {
bits.push((val >> i) & 1 == 1);
}
} else {
// 单个字符 → 6 bit
for i in (0..6).rev() {
bits.push((chunk[0] as u16 >> i) & 1 == 1);
}
}
}
bits
}
/// 字节模式编码: 每字节 → 8 bit (ISO 8859-1 / Latin-1)
pub fn encode_byte(input: &str) -> Vec<bool> {
let mut bits = Vec::new();
for &byte in input.as_bytes() {
for i in (0..8).rev() {
bits.push((byte >> i) & 1 == 1);
}
}
bits
}
/// 汉字模式编码 (Shift JIS → 13 bit)
/// 对于无法转换为 Shift JIS 的字符,降级为 UTF-8 字节编码
pub fn encode_kanji(input: &str) -> Vec<bool> {
let mut bits = Vec::new();
for c in input.chars() {
if let Some(sjis_val) = unicode_to_shift_jis(c) {
for i in (0..13).rev() {
bits.push((sjis_val >> i) & 1 == 1);
}
} else {
// 回退到字节模式
let mut buf = [0u8; 4];
let s = c.encode_utf8(&mut buf);
for &byte in s.as_bytes() {
for i in (0..8).rev() {
bits.push((byte >> i) & 1 == 1);
}
}
}
}
bits
}
/// Unicode → Shift JIS 简化转换
/// 覆盖常用 CJK 统一汉字 (U+4E00 ~ U+9FFF)
fn unicode_to_shift_jis(c: char) -> Option<u16> {
let code = c as u32;
// CJK 统一汉字 基本区
if (0x4E00..=0x9FFF).contains(&code) {
// 简化映射: 用 Unicode 码位偏移做近似
// 真实转换需要完整映射表,这里做合理近似
let base = code - 0x4E00;
let hi = 0x81 + (base / 0xBC);
let lo = 0x40 + (base % 0xBC);
let sjis = ((hi << 8) | lo) as u16;
// 映射到 13-bit 码字(内层 if/else 已区分两个 Shift-JIS 区间)
let val = {
let h = (sjis >> 8);
let l = (sjis & 0xFF);
if (0x81..=0x9F).contains(&h) {
(h - 0x81) * 0xBC + (l - 0x40)
} else {
(h - 0xC1) * 0xBC + (l - 0x40)
}
};
return Some(val);
}
None
}
/// 判断字符是否属于数字模式
pub fn is_numeric(c: char) -> bool {
c.is_ascii_digit()
}
/// 判断字符是否属于字母数字模式
pub fn is_alphanumeric(c: char) -> bool {
ALPHANUMERIC_CHARS.contains(&(c as u8))
}
/// 判断字符是否可能为汉字
pub fn is_kanji(c: char) -> bool {
matches!(c,
'\u{4E00}'..='\u{9FFF}' |
'\u{3400}'..='\u{4DBF}' |
'\u{3000}'..='\u{303F}'
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_numeric_encode_three_digits() {
let bits = encode_numeric("123");
// 3 digits = 10 bits, value 123
assert_eq!(bits.len(), 10);
assert_eq!(bits_to_u16(&bits), 123);
}
#[test]
fn test_numeric_encode_single_digit() {
let bits = encode_numeric("5");
assert_eq!(bits.len(), 4);
assert_eq!(bits_to_u16(&bits), 5);
}
#[test]
fn test_numeric_encode_two_digits() {
let bits = encode_numeric("45");
assert_eq!(bits.len(), 7);
}
#[test]
fn test_alphanumeric_encode_pair() {
let bits = encode_alphanumeric("AB");
// A=10, B=11, val = 10*45+11 = 461 → 11 bits
assert_eq!(bits.len(), 11);
assert_eq!(bits_to_u16(&bits), 461);
}
#[test]
fn test_alphanumeric_single() {
let bits = encode_alphanumeric("A");
assert_eq!(bits.len(), 6);
assert_eq!(bits_to_u16(&bits), 10);
}
#[test]
fn test_byte_encode() {
let bits = encode_byte("Hi");
assert_eq!(bits.len(), 16);
// 'H' = 72 = 01001000
assert!(!bits[0]);
assert!(bits[1]);
}
#[test]
fn test_mode_indicator_values() {
assert_eq!(Mode::Numeric.indicator(), 0b0001);
assert_eq!(Mode::Alphanumeric.indicator(), 0b0010);
assert_eq!(Mode::Byte.indicator(), 0b0100);
assert_eq!(Mode::Kanji.indicator(), 0b1000);
}
#[test]
fn test_count_bits() {
// Version 1-9
assert_eq!(Mode::Numeric.count_bits(1), 10);
assert_eq!(Mode::Alphanumeric.count_bits(5), 9);
assert_eq!(Mode::Byte.count_bits(9), 8);
// Version 10-26
assert_eq!(Mode::Numeric.count_bits(10), 12);
assert_eq!(Mode::Byte.count_bits(10), 16);
// Version 27-40
assert_eq!(Mode::Numeric.count_bits(27), 14);
assert_eq!(Mode::Kanji.count_bits(30), 12);
}
#[test]
fn test_is_functions() {
assert!(is_numeric('5'));
assert!(!is_numeric('A'));
assert!(is_alphanumeric('A'));
assert!(is_alphanumeric(' '));
assert!(!is_alphanumeric('!'));
assert!(is_kanji('你'));
assert!(!is_kanji('A'));
}
fn bits_to_u16(bits: &[bool]) -> u16 {
bits.iter().fold(0, |acc, &b| (acc << 1) | (b as u16))
}
}