分析內核對gzip壓縮文件進行解壓的方法

火星人 @ 2014-03-12 , reply:0
←手機掃碼閱讀

  概述
----
1) Linux的初始內核映象以gzip壓縮文件的格式存放在zImage或bzImage之中, 內核的自舉
代碼將它解壓到1M內存開始處. 在內核初始化時, 如果載入了壓縮的initrd映象, 內核會將
它解壓到內存檔中, 這兩處解壓過程都使用了lib/inflate.c文件.

2) inflate.c是從gzip源程序中分離出來的, 包含了一些對全局數據的直接引用, 在使用時
需要直接嵌入到代碼中. gzip壓縮文件時總是在前32K位元組的範圍內尋找重複的字元串進行
編碼, 在解壓時需要一個至少為32K位元組的解壓緩衝區, 它定義為window[WSIZE].
inflate.c使用get_byte()讀取輸入文件, 它被定義成宏來提高效率. 輸入緩衝區指針必須
定義為inptr, inflate.c中對之有減量操作. inflate.c調用flush_window()來輸出window
緩衝區中的解壓出的位元組串, 每次輸出長度用outcnt變數表示. 在flush_window()中, 還必
須對輸出位元組串計算CRC並且刷新crc變數. 在調用gunzip()開始解壓之前, 調用makecrc()
初始化CRC計算表. 最後gunzip()返回0表示解壓成功.

3) zImage或bzImage由16位引導代碼和32位內核自解壓映象兩個部分組成. 對於zImage, 內
核自解壓映象被載入到物理地址0x1000, 內核被解壓到1M的部位. 對於bzImage, 內核自解
壓映象被載入到1M開始的地方, 內核被解壓為兩個片段, 一個起始於物理地址0x2000-0x90000,
另一個起始於高端解壓映象之後, 離1M開始處不小於低端片段最大長度的區域. 解壓完成後,
這兩個片段被合併到1M的起始位置.

解壓根內存檔映象文件的代碼
--------------------------

; drivers/block/rd.c
#ifdef BUILD_CRAMDISK

/*
* gzip declarations
*/

#define OF(args) args ; 用於函數原型聲明的宏

#ifndef memzero
#define memzero(s, n) memset ((s), 0, (n))
#endif

typedef unsigned char uch; 定義inflate.c所使用的3種數據類型
typedef unsigned short ush;
typedef unsigned long ulg;

#define INBUFSIZ 4096 用戶輸入緩衝區尺寸
#define WSIZE 0x8000 /* window size--must be a power of two, and */
/* at least 32K for zip's deflate method */

static uch *inbuf; 用戶輸入緩衝區,與inflate.c無關
static uch *window; 解壓窗口

static unsigned insize; /* valid bytes in inbuf */
static unsigned inptr; /* index of next byte to be processed in inbuf */
static unsigned outcnt; /* bytes in output buffer */
static int exit_code;
static long bytes_out; 總解壓輸出長度,與inflate.c無關
static struct file *crd_infp, *crd_outfp;

#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf()) 讀取輸入緩衝區中一個位元組

/* Diagnostic functions (stubbed out) */ 一些調試宏
#define Assert(cond,msg)
#define Trace(x)
#define Tracev(x)
#define Tracevv(x)
#define Tracec(c,x)
#define Tracecv(c,x)

#define STATIC static

static int fill_inbuf(void);
static void flush_window(void);
static void *malloc(int size);
static void free(void *where);
static void error(char *m);
static void gzip_mark(void **);
static void gzip_release(void **);

#include "../../lib/inflate.c"

static void __init *malloc(int size)
{
return kmalloc(size, GFP_KERNEL);
}

static void __init free(void *where)
{
kfree(where);
}

static void __init gzip_mark(void **ptr)
{
; 讀取用戶一個標記
}

static void __init gzip_release(void **ptr)
{
; 歸還用戶標記
}


/* ===========================================================================
* Fill the input buffer. This is called only when the buffer is empty
* and at least one byte is really needed.
*/
static int __init fill_inbuf(void) 填充輸入緩衝區
{
if (exit_code) return -1;

insize = crd_infp->f_op->read(crd_infp, inbuf, INBUFSIZ,
&crd_infp->f_pos);
if (insize == 0) return -1;

inptr = 1;

return inbuf[0];
}

/* ===========================================================================
* Write the output window window[0..outcnt-1] and update crc and bytes_out.
* (Used for the decompressed data only.)
*/
static void __init flush_window(void) 輸出window緩衝區中outcnt個位元組串
{
ulg c = crc; /* temporary variable */
unsigned n;
uch *in, ch;

crd_outfp->f_op->write(crd_outfp, window, outcnt, &crd_outfp->f_pos);
in = window;
for (n = 0; n < outcnt; n++) {
ch = *in++;
c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); 計算輸出串的CRC
}
crc = c;
bytes_out += (ulg)outcnt; 刷新總位元組數
outcnt = 0;
}

static void __init error(char *x) 解壓出錯調用的函數
{
printk(KERN_ERR "%s", x);
exit_code = 1;
}

static int __init
crd_load(struct file * fp, struct file *outfp)
{
int result;

insize = 0; /* valid bytes in inbuf */
inptr = 0; /* index of next byte to be processed in inbuf */
outcnt = 0; /* bytes in output buffer */
exit_code = 0;
bytes_out = 0;
crc = (ulg)0xffffffffL; /* shift register contents */

crd_infp = fp;
crd_outfp = outfp;
inbuf = kmalloc(INBUFSIZ, GFP_KERNEL);
if (inbuf == 0) {
printk(KERN_ERR "RAMDISK: Couldn't allocate gzip buffer\n");
return -1;
}
window = kmalloc(WSIZE, GFP_KERNEL);
if (window == 0) {
printk(KERN_ERR "RAMDISK: Couldn't allocate gzip window\n");
kfree(inbuf);
return -1;
}
makecrc();
result = gunzip();
kfree(inbuf);
kfree(window);
return result;
}

#endif /* BUILD_CRAMDISK */


32位內核自解壓代碼
------------------

; arch/i386/boot/compressed/head.S
.text

#include
#include

.globl startup_32 對於zImage該入口地址為0x1000; 對於bzImage為0x101000

startup_32:
cld
cli
movl $(__KERNEL_DS),%eax
movl %eax,%ds
movl %eax,%es
movl %eax,%fs
movl %eax,%gs

lss SYMBOL_NAME(stack_start),%esp # 自解壓代碼的堆棧為misc.c中定義的16K位元組的數組
xorl %eax,%eax
1: incl %eax # check that A20 really IS enabled
movl %eax,0x000000 # loop forever if it isn't
cmpl %eax,0x100000
je 1b

/*
* Initialize eflags. Some BIOS's leave bits like NT set. This would
* confuse the debugger if this code is traced.
* XXX - best to initialize before switching to protected mode.
*/
pushl $0
popfl
/*
* Clear BSS 清除解壓程序的BSS段
*/
xorl %eax,%eax
movl $ SYMBOL_NAME(_edata),%edi
movl $ SYMBOL_NAME(_end),%ecx
subl %edi,%ecx
cld
rep
stosb
/*
* Do the decompression, and jump to the new kernel..
*/
subl $16,%esp # place for structure on the stack
movl %esp,%eax
pushl %esi # real mode pointer as second arg
pushl %eax # address of structure as first arg
call SYMBOL_NAME(decompress_kernel)
orl %eax,%eax # 如果返回非零,則表示為內核解壓為低端和高端的兩個片斷
jnz 3f
popl %esi # discard address
popl %esi # real mode pointer
xorl %ebx,%ebx
ljmp $(__KERNEL_CS), $0x100000 # 運行start_kernel

/*
* We come here, if we were loaded high.
* We need to move the move-in-place routine down to 0x1000
* and then start it with the buffer addresses in registers,
* which we got from the stack.
*/
3:
movl $move_routine_start,%esi
movl $0x1000,%edi
movl $move_routine_end,%ecx
subl %esi,%ecx
addl $3,%ecx
shrl $2,%ecx # 按字取整
cld
rep
movsl # 將內核片斷合併代碼複製到0x1000區域, 內核的片段起始為0x2000

popl %esi # discard the address
popl %ebx # real mode pointer
popl %esi # low_buffer_start 內核低端片段的起始地址
popl %ecx # lcount 內核低端片段的位元組數量
popl %edx # high_buffer_start 內核高端片段的起始地址
popl %eax # hcount 內核高端片段的位元組數量
movl $0x100000,%edi 內核合併的起始地址
cli # make sure we don't get interrupted
ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine

/*
* Routine (template) for moving the decompressed kernel in place,
* if we were high loaded. This _must_ PIC-code !
*/
move_routine_start:
movl %ecx,%ebp
shrl $2,%ecx
rep
movsl # 按字拷貝第1個片段
movl %ebp,%ecx
andl $3,%ecx
rep
movsb # 傳送不完全字
movl %edx,%esi
movl %eax,%ecx # NOTE: rep movsb won't move if %ecx == 0
addl $3,%ecx
shrl $2,%ecx # 按字對齊
rep
movsl # 按字拷貝第2個片段
movl %ebx,%esi # Restore setup pointer
xorl %ebx,%ebx
ljmp $(__KERNEL_CS), $0x100000 # 運行start_kernel
move_routine_end:

; arch/i386/boot/compressed/misc.c

/*
* gzip declarations
*/

#define OF(args) args
#define STATIC static

#undef memset
#undef memcpy
#define memzero(s, n) memset ((s), 0, (n))

typedef unsigned char uch;
typedef unsigned short ush;
typedef unsigned long ulg;

#define WSIZE 0x8000 /* Window size must be at least 32k, */
/* and a power of two */

static uch *inbuf; /* input buffer */
static uch window[WSIZE]; /* Sliding window buffer */

static unsigned insize = 0; /* valid bytes in inbuf */
static unsigned inptr = 0; /* index of next byte to be processed in inbuf */
static unsigned outcnt = 0; /* bytes in output buffer */

/* gzip flag byte */
#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */
#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */
#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */
#define ORIG_NAME 0x08 /* bit 3 set: original file name present */
#define COMMENT 0x10 /* bit 4 set: file comment present */
#define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */
#define RESERVED 0xC0 /* bit 6,7: reserved */

#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf())

/* Diagnostic functions */
#ifdef DEBUG
# define Assert(cond,msg) {if(!(cond)) error(msg);}
# define Trace(x) fprintf x
# define Tracev(x) {if (verbose) fprintf x ;}
# define Tracevv(x) {if (verbose>1) fprintf x ;}
# define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;}
#else
# define Assert(cond,msg)
# define Trace(x)
# define Tracev(x)
# define Tracevv(x)
# define Tracec(c,x)
# define Tracecv(c,x)
#endif

static int fill_inbuf(void);
static void flush_window(void);
static void error(char *m);
static void gzip_mark(void **);
static void gzip_release(void **);

/*
* This is set up by the setup-routine at boot-time
*/
static unsigned char *real_mode; /* Pointer to real-mode data */

#define EXT_MEM_K (*(unsigned short *)(real_mode + 0x2))
#ifndef STANDARD_MEMORY_BIOS_CALL
#define ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0))
#endif
#define SCREEN_INFO (*(struct screen_info *)(real_mode+0))

extern char input_data[];
extern int input_len;

static long bytes_out = 0;
static uch *output_data;
static unsigned long output_ptr = 0;


static void *malloc(int size);
static void free(void *where);
static void error(char *m);
static void gzip_mark(void **);
static void gzip_release(void **);

static void puts(const char *);

extern int end;
static long free_mem_ptr = (long)&end;
static long free_mem_end_ptr;

#define INPLACE_MOVE_ROUTINE 0x1000 內核片段合併代碼的運行地址
#define LOW_BUFFER_START 0x2000 內核低端解壓片段的起始地址
#define LOW_BUFFER_MAX 0x90000 內核低端解壓片段的終止地址
#define HEAP_SIZE 0x3000 為解壓低碼保留的堆的尺寸,堆起始於BSS的結束
static unsigned int low_buffer_end, low_buffer_size;
static int high_loaded =0;
static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;

static char *vidmem = (char *)0xb8000;
static int vidport;
static int lines, cols;

#include "../../../../lib/inflate.c"

static void *malloc(int size)
{
void *p;

if (size <0) error("Malloc error\n");
if (free_mem_ptr <= 0) error("Memory error\n");

free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */

p = (void *)free_mem_ptr;
free_mem_ptr += size;

if (free_mem_ptr >= free_mem_end_ptr)
error("\nOut of memory\n");

return p;
}

static void free(void *where)
{ /* Don't care */
}

static void gzip_mark(void **ptr)
{
*ptr = (void *) free_mem_ptr;
}

static void gzip_release(void **ptr)
{
free_mem_ptr = (long) *ptr;
}

static void scroll(void)
{
int i;

memcpy ( vidmem, vidmem + cols * 2, ( lines - 1 ) * cols * 2 );
for ( i = ( lines - 1 ) * cols * 2; i < lines * cols * 2; i += 2 )
vidmem[ i ] = ' ';
}

static void puts(const char *s)
{
int x,y,pos;
char c;

x = SCREEN_INFO.orig_x;
y = SCREEN_INFO.orig_y;

while ( ( c = *s++ ) != '\0' ) {
if ( c == '\n' ) {
x = 0;
if ( ++y >= lines ) {
scroll();
y--;
}
} else {
vidmem [ ( x + cols * y ) * 2 ] = c;
if ( ++x >= cols ) {
x = 0;
if ( ++y >= lines ) {
scroll();
y--;
}
}
}
}

SCREEN_INFO.orig_x = x;
SCREEN_INFO.orig_y = y;

pos = (x + cols * y) * 2; /* Update cursor position */
outb_p(14, vidport);
outb_p(0xff & (pos >> 9), vidport+1);
outb_p(15, vidport);
outb_p(0xff & (pos >> 1), vidport+1);
}

void* memset(void* s, int c, size_t n)
{
int i;
char *ss = (char*)s;

for (i=0;i return s;
}

void* memcpy(void* __dest, __const void* __src,
size_t __n)
{
int i;
char *d = (char *)__dest, *s = (char *)__src;

for (i=0;i<__n;i++) d[ i ] = s[ i ];
return __dest;
}

/* ===========================================================================
* Fill the input buffer. This is called only when the buffer is empty
* and at least one byte is really needed.
*/
static int fill_inbuf(void)
{
if (insize != 0) {
error("ran out of input data\n");
}

inbuf = input_data;
insize = input_len;
inptr = 1;
return inbuf[0];
}

/* ===========================================================================
* Write the output window window[0..outcnt-1] and update crc and bytes_out.
* (Used for the decompressed data only.)
*/
static void flush_window_low(void)
{
ulg c = crc; /* temporary variable */
unsigned n;
uch *in, *out, ch;

in = window;
out = &output_data[output_ptr];
for (n = 0; n < outcnt; n++) {
ch = *out++ = *in++;
c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
}
crc = c;
bytes_out += (ulg)outcnt;
output_ptr += (ulg)outcnt;
outcnt = 0;
}

static void flush_window_high(void)
{
ulg c = crc; /* temporary variable */
unsigned n;
uch *in, ch;
in = window;
for (n = 0; n < outcnt; n++) {
ch = *output_data++ = *in++;
if ((ulg)output_data == low_buffer_end) output_data=high_buffer_start;
c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
}
crc = c;
bytes_out += (ulg)outcnt;
outcnt = 0;
}

static void flush_window(void)
{
if (high_loaded) flush_window_high();
else flush_window_low();
}

static void error(char *x)
{
puts("\n\n");
puts(x);
puts("\n\n -- System halted");

while(1); /* Halt */
}

#define STACK_SIZE (4096)

long user_stack [STACK_SIZE];

struct {
long * a;
short b;
} stack_start = { & user_stack [STACK_SIZE] , __KERNEL_DS };

void setup_normal_output_buffer(void) 對於zImage, 直接解壓到1M
{
#ifdef STANDARD_MEMORY_BIOS_CALL
if (EXT_MEM_K < 1024) error("Less than 2MB of memory.\n");
#else
if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < 1024) error("Less than 2MB of memory.\n");
#endif
output_data = (char *)0x100000; /* Points to 1M */
free_mem_end_ptr = (long)real_mode;
}

struct moveparams {
uch *low_buffer_start; int lcount;
uch *high_buffer_start; int hcount;
};

void setup_output_buffer_if_we_run_high(struct moveparams *mv)
{
high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE); 內核高端片段的最小起始地址
#ifdef STANDARD_MEMORY_BIOS_CALL
if (EXT_MEM_K < (3*1024)) error("Less than 4MB of memory.\n");
#else
if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory.\n");
#endif
mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START;
low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
low_buffer_size = low_buffer_end - LOW_BUFFER_START;
high_loaded = 1;
free_mem_end_ptr = (long)high_buffer_start;
if ( (0x100000 + low_buffer_size) > ((ulg)high_buffer_start)) {
; 如果高端片段的最小起始地址小於它實際應載入的地址,則將它置為實際地址,
; 這樣高端片段就無需再次移動了,否則它要向前移動
high_buffer_start = (uch *)(0x100000 + low_buffer_size);
mv->hcount = 0; /* say: we need not to move high_buffer */
}
else mv->hcount = -1; 待定
mv->high_buffer_start = high_buffer_start;
}

void close_output_buffer_if_we_run_high(struct moveparams *mv)
{
if (bytes_out > low_buffer_size) {
mv->lcount = low_buffer_size;
if (mv->hcount)
mv->hcount = bytes_out - low_buffer_size; 求出高端片段的位元組數
} else { 如果解壓后內核只有低端的一個片段
mv->lcount = bytes_out;
mv->hcount = 0;
}
}

int decompress_kernel(struct moveparams *mv, void *rmode)
{
real_mode = rmode;

if (SCREEN_INFO.orig_video_mode == 7) {
vidmem = (char *) 0xb0000;
vidport = 0x3b4;
} else {
vidmem = (char *) 0xb8000;
vidport = 0x3d4;
}

lines = SCREEN_INFO.orig_video_lines;
cols = SCREEN_INFO.orig_video_cols;

if (free_mem_ptr < 0x100000) setup_normal_output_buffer();
else setup_output_buffer_if_we_run_high(mv);

makecrc();
puts("Uncompressing Linux... ");
gunzip();
puts("Ok, booting the kernel.\n");
if (high_loaded) close_output_buffer_if_we_run_high(mv);
return high_loaded;
}

Edited by lucian_yao on 04/28/01 01:36 PM.







[火星人 via ] 分析內核對gzip壓縮文件進行解壓的方法已經有314次圍觀

http://www.coctec.com/docs/program/show-post-72243.html