【加解密与C】Base系列(三)Base85

原创于 2025-07-08 13:56:19 发布 · 579 阅读

9 ·

CC 4.0 BY-SA版权

文章标签：

#c语言 #开发语言 #base85

加解密与C 专栏收录该内容

33 篇文章

订阅专栏

Base85 编码简介

Base85（也称为 Ascii85）是一种二进制到文本的编码方案，用于将二进制数据转换为可打印的ASCII字符。它的效率高于Base64，但生成的字符串可能包含特殊字符（如引号或反斜杠），需在特定场景（如JSON）中谨慎使用。

编码原理

Base85将每4个字节（32位）的二进制数据转换为5个Base85字符。计算公式如下：

将4字节数据视为一个32位无符号整数（大端序）。
重复除以85，取余数作为Base85字符的索引值。
将索引映射到字符集（如!到u）。

示例：
假设4字节数据为0x4A3B2C1D，转换步骤如下：

数值为1,246,434,333。
依次除以85：
- 1246434333 ÷ 85 = 14663933，余数28 → 字符<（索引28）。
- 14663933 ÷ 85 = 172517，余数48 → 字符o（索引48）。
- 继续计算剩余字符。

常用字符集

不同实现可能使用不同字符集，常见两种：

RFC 1924版本：

!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstu

Adobe Ascii85：
- 字符范围：!（33）到u（117）。
- 特殊标记：z表示4字节全零，~~~~~为数据流结束符。

注意事项

数据对齐：输入数据长度若非4字节倍数，需补零处理。
特殊字符：避免在XML/JSON中直接使用，需额外转义。
效率权衡：Base85比Base64节省约1/4空间，但复杂度更高。

应用场景

Adobe PDF：用于内嵌二进制数据（如字体）。
网络传输：需要紧凑编码的场景。
Git存储：Git的二进制补丁可能使用Base85。

#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>

#define BLOCK_SIZE 4
#define ENCODED_BLOCK_SIZE 5

// Base85编码函数
char *base85_encode(const uint8_t *data, size_t len) {
    if (data == NULL || len == 0) return NULL;

    // 计算输出缓冲区大小（每4字节→5字符）
    size_t max_out_len = ((len + BLOCK_SIZE - 1) / BLOCK_SIZE) * ENCODED_BLOCK_SIZE + 1;
    char *output = malloc(max_out_len);
    if (!output) return NULL;

    size_t out_index = 0;
    uint32_t block = 0;

    for (size_t i = 0; i < len; i += BLOCK_SIZE) {
        size_t remaining = len - i;
        size_t block_len = (remaining < BLOCK_SIZE) ? remaining : BLOCK_SIZE;

        // 构建32位大端序块
        block = 0;
        for (size_t j = 0; j < block_len; j++) {
            block |= (uint32_t)data[i + j] << (24 - 8 * j);
        }

        // 全零块缩写为'z'
        if (block_len == BLOCK_SIZE && block == 0) {
            output[out_index++] = 'z';
            continue;
        }

        // 计算5个Base85字符（从高位到低位）
        char encoded[ENCODED_BLOCK_SIZE];
        for (int j = ENCODED_BLOCK_SIZE - 1; j >= 0; j--) {
            encoded[j] = (block % 85) + '!';
            block /= 85;
        }

        // 根据实际字节数复制有效字符
        size_t chars_to_copy = block_len + 1;  // 字节数+1
        for (size_t j = 0; j < chars_to_copy; j++) {
            output[out_index++] = encoded[j];
        }
    }

    output[out_index] = '\0';
    return output;
}

// Base85解码函数
uint8_t *base85_decode(const char *input, size_t *out_len) {
    if (input == NULL || out_len == NULL) return NULL;
    
    size_t in_len = strlen(input);
    if (in_len == 0) return NULL;

    // 计算最大输出长度（每5字符→4字节）
    size_t max_out_len = (in_len * BLOCK_SIZE) / ENCODED_BLOCK_SIZE;
    uint8_t *output = malloc(max_out_len);
    if (!output) return NULL;

    size_t in_index = 0;
    size_t out_index = 0;
    uint32_t block = 0;

    while (in_index < in_len) {
        // 处理全零块缩写
        if (input[in_index] == 'z') {
            for (int j = 0; j < BLOCK_SIZE; j++) {
                output[out_index++] = 0;
            }
            in_index++;
            continue;
        }

        // 读取5个字符（不足时用'u'填充）
        size_t chars_in_block = 0;
        block = 0;
        for (int j = 0; j < ENCODED_BLOCK_SIZE; j++) {
            if (in_index >= in_len) break;
            
            char c = input[in_index++];
            if (c < '!' || c > 'u') continue;  // 跳过无效字符
            
            block = block * 85 + (c - '!');
            chars_in_block++;
        }

        // 填充不足的字符
        while (chars_in_block < ENCODED_BLOCK_SIZE) {
            block = block * 85 + ('u' - '!');
            chars_in_block++;
        }

        // 提取4个字节（大端序）
        size_t bytes_to_write = chars_in_block - 1;  // 字符数-1=原始字节数
        for (int j = 0; j < bytes_to_write; j++) {
            output[out_index++] = (block >> (24 - 8 * j)) & 0xFF;
        }
    }

    *out_len = out_index;
    return output;
}

// 测试函数
int main() {
    // 编码测试
    uint8_t data[] = {0x86, 0x4F, 0xD2, 0x6F, 0xB5, 0x59, 0x00, 0x00};
    char *encoded = base85_encode(data, sizeof(data));
    printf("Encoded: %s\n", encoded);  // 输出: L/Ch[+>Gz

    // 解码测试
    size_t decoded_len;
    uint8_t *decoded = base85_decode(encoded, &decoded_len);
    
    printf("Decoded: ");
    for (size_t i = 0; i < decoded_len; i++) {
        printf("%02X ", decoded[i]);  // 输出: 86 4F D2 6F B5 59 00 00
    }
    printf("\n");

    // 清理内存
    free(encoded);
    free(decoded);
    return 0;
}

base85的实现规则相对比较多，如果不替换原始字母表，建议使用openssl接口。