CC++实现ELF结构解析工具--688IT编程网

CC++实现ELF结构解析⼯具

ELF⽂件格式，是⼀个开放的可执⾏⽂件和链接⽂件格式，其主要⼯作在Linux系统上，是⼀种⽤于⼆进制⽂件、可执⾏⽂件、⽬标代码、共享库和核⼼转储格式⽂件,ELF⽂件格式类似于PE格式，但⽐起PE结构来ELF结构显得更加的简单,Linux⽂件结构相⽐于Windows结构来说简单⼀些.

读取ELF头：⾸先需要先来编译⼀个简单的ELF⽂件，然后将⽂件编译并连接.

[root@localhost ~]# cat lyshark.c

#include <stdio.h>

int main()

{

printf("hello lyshark");

return 0;

}

[root@localhost ~]# gcc -c lyshark.c

[root@localhost ~]# gcc -o lyshark lyshark.o

Linux系统中有⼀个默认命令readelf -h可以解析指定⽂件的头结构.

[root@localhost ~]# readelf -h lyshark

ELF Header:

Magic: 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00

Class: ELF64 64位程序

Data: 2's complement, little endian

Version: 1 (current)

OS/ABI: UNIX - System V 调⽤约定

ABI Version: 0

Type: EXEC (Executable file) 可执⾏⽂件

Machine: Advanced Micro Devices X86-64

Version: 0x1

Entry point address: 0x400430 #程序的⼊⼝地址

Start of program headers: 64 (bytes into file)

Start of section headers: 6464 (bytes into file)

Flags: 0x0 #标志

Size of this header: 64 (bytes) #本头⼤⼩

Size of program headers: 56 (bytes) #程序头⼤⼩

Number of program headers: 9

Size of section headers: 64 (bytes) #节头⼤⼩

Number of section headers: 31 #节表数量

Section header string table index: 30 #字符串表索引节头

通过hexdump⼯具查看⽂件16进制⽂件头hexdump -s 0 -n 64 -C lyshark

[root@localhost ~]# hexdump -s 0 -n 64 -C lyshark

00000000 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 |.

00000010 02 00 3e 00 01 00 00 00 30 04 40 00 00 00 00 00 |..>.....0.@.....|

00000020 40 00 00 00 00 00 00 00 40 19 00 00 00 00 00 00 |@.......@.......|

00000030 00 00 00 00 40 00 38 00 09 00 40 00 1f 00 1e 00 |....@.8...@.....|

linux系统中的节头⽂件保存在/usr/include/elf.h我通过查到了ELF64所对应的结构数据

typedef uint16_t Elf64_Half; 16

typedef uint32_t Elf64_Word; 32

typedef uint64_t Elf64_Addr; 64

typedef uint64_t Elf64_Off; 64

#define EI_NIDENT (16)

typedef struct

{

unsigned char e_ident[EI_NIDENT]; /* ⼀个字节数组⽤来确认⽂件是否是⼀个ELF⽂件 */

Elf64_Half e_type; /* 描述⽂件是，可执⾏⽂件elf=2，重定位so=3 */

Elf64_Half e_machine; /* ⽬标主机架构 */

Elf64_Word e_version; /* ELF⽂件格式的版本 */

Elf64_Addr e_entry; /* ⼊⼝点虚拟地址 */

Elf64_Off e_phoff; /* 程序头⽂件偏移 */

Elf64_Off e_shoff; /* 节头表⽂件偏移 */

Elf64_Word e_flags; /* ELF⽂件标志 */

Elf64_Half e_ehsize; /* ELF头⼤⼩ */

Elf64_Half e_phentsize; /* 程序头⼤⼩ */

Elf64_Half e_phnum; /* 程序头表计数 */

Elf64_Half e_shentsize; /* 节头表⼤⼩ */

Elf64_Half e_shnum; /* 节头表计数 */

Elf64_Half e_shstrndx; /* 字符串表索引节头 */

} Elf64_Ehdr;

通过编程实现Magic的读取，或者说实现的是⽂件头e_ident[16]⽂件的读取，通过定义可得知⽂件头⼤⼩是16字节

#include <stdio.h>

#include <stdlib.h>

#include <elf.h>

int main(int argc,char* argv[])

{

if(argc < 2){ exit(0); }

FILE *fp;

Elf64_Ehdr elf_header;

fp = fopen(argv[1],"r");

if(fp == NULL) { exit(0); }

int readfile;

readfile = fread(&elf_header,sizeof(Elf64_Ehdr),1,fp);

if(readfile == 0){ exit(0); }

if(elf_header.e_ident[0] == 0x7F || elf_header.e_ident[1] == 'E')

{

printf("头标志: ");

for(int x =0;x<16;x++)

{

printf("%x ",elf_header.e_ident[x]);

}

printf("\n");

}

return 0;

}

编译并运⾏即可读取出⽂件头部的前16个字节的字节数组，我们最需要关注的就是开头前4个字节，其标志着PE⽂件的开始[root@localhost ~]# gcc -std=c99 -o elf elf.c

[root@localhost ~]# ./elf lyshark

头标志: 7f 45 4c 46 2 1 1 0 0 0 0 0 0 0 0 0

除此之外，读取其他头结构数据，代码与上⽅类似，只需要稍微改动⼀下就好.

if(elf_header.e_ident[0] == 0x7F || elf_header.e_ident[1] == 'E')

{

printf("⽂件类型: %hx\n",elf_header.e_type);

printf("运⾏平台: %hx\n",elf_header.e_machine);

printf("⼊⼝虚拟RVA: 0x%x\n",elf_header.e_entry);

printf("程序头⽂件偏移: %d(bytes)\n",elf_header.e_phoff);

printf("节头表⽂件偏移: %d(bytes)\n",elf_header.e_shoff);

printf("ELF⽂件头⼤⼩: %d\n",elf_header.e_ehsize);

printf("ELF程序头⼤⼩: %d\n",elf_header.e_phentsize);

printf("ELF程序头表计数: %d\n",elf_header.e_phnum);

printf("ELF节头表⼤⼩: %d\n",elf_header.e_shentsize);

printf("ELF节头表计数: %d\n",elf_header.e_shnum);

printf("字符串表索引节头: %d\n",elf_header.e_shstrndx);

}

运⾏后，就可以读取到所有的节头数据.

[root@localhost ~]# gcc -std=c99 -o elf elf.c && ./elf lyshark

⽂件类型: 2

运⾏平台: 3e

⼊⼝虚拟RVA: 0x400430

程序头⽂件偏移: 64(bytes)

节头表⽂件偏移: 6464(bytes)

ELF⽂件头⼤⼩: 64

ELF程序头⼤⼩: 56

ELF程序头表计数: 9

ELF节头表⼤⼩: 64

ELF节头表计数: 31

字符串表索引节头: 30

读取ELF节表：⾸先打开elf.h头⽂件，到这个声明处Elf64_Shdr.

typedef uint32_t Elf64_Word; 32

typedef uint64_t Elf64_Addr; 64

typedef uint64_t Elf64_Off; 64

typedef uint64_t Elf64_Xword; 64

字符串截取工具typedef struct

{

Elf64_Word sh_name; /* 节区名称 */

Elf64_Word sh_type; /* 节区类型 */

Elf64_Xword sh_flags; /* 节区标志 */

Elf64_Addr sh_addr; /* 如果在内存中运⾏，此处存放数据的内存地址 */

Elf64_Off sh_offset; /* 节区数据相对于⽂件的实际偏移量 */

Elf64_Xword sh_size; /* 节区⼤⼩ */

Elf64_Word sh_link; /* 节头表索引链接，其解释依赖于节区类型 */

Elf64_Word sh_info; /* 额外信息 */

Elf64_Xword sh_addralign; /* 节地址对其约束 */

Elf64_Xword sh_entsize; /* 固定⼤⼩项的表 */

} Elf64_Shdr;

通过使⽤hexdump -s 144从偏移为144的位置开始读取，向后读取100个字节，就是节表所在位置.

[root@localhost ~]# hexdump -s 144 -n 100 -C lyshark

00000090 38 02 40 00 00 00 00 00 1c 00 00 00 00 00 00 00 |8.@.............|

000000a0 1c 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 |................|

000000b0 01 00 00 00 05 00 00 00 00 00 00 00 00 00 00 00 |................|

000000c0 00 00 40 00 00 00 00 00 00 00 40 00 00 00 00 00 |..@.......@.....|

000000d0 0c 07 00 00 00 00 00 00 0c 07 00 00 00 00 00 00 |................|

000000e0 00 00 20 00 00 00 00 00 01 00 00 00 06 00 00 00 |.. .............|

000000f0 10 0e 00 00 |....|

编程实现简单的节表读取，只需要在上⽅代码基础上进⾏修改即可.

if(elf_header.e_ident[0] == 0x7F || elf_header.e_ident[1] == 'E')

{

int shnum, x;

Elf64_Shdr *shdr = (Elf64_Shdr*)malloc(sizeof(Elf64_Shdr) * elf_header.e_shnum);

temp = fseek(fp, elf_header.e_shoff, SEEK_SET);

temp = fread(shdr, sizeof(Elf64_Shdr) * elf_header.e_shnum, 1, fp);

rewind(fp);

fseek(fp, shdr[elf_header.e_shstrndx].sh_offset, SEEK_SET);

char shstrtab[shdr[elf_header.e_shstrndx].sh_size];

char *names = shstrtab;

temp = fread(shstrtab, shdr[elf_header.e_shstrndx].sh_size, 1, fp);

printf("节类型\t节地址\t节偏移\t节⼤⼩\t节名称\n");

for(shnum = 0; shnum < elf_header.e_shnum; shnum++)

{

names = shstrtab;

names=names+shdr[shnum].sh_name;

printf("%x\t%x\t%x\t%x\t%s \n",shdr[shnum].sh_type,shdr[shnum].sh_addr,shdr[shnum].sh_offset,shdr[

shnum].sh_size,names); }

}

Linux系统中也可以使⽤objdump命令读取程序的节表信息.

[root@localhost ~]# objdump -h lyshark

lyshark: file format elf64-x86-64

Sections:

Idx Name Size VMA LMA File off Algn

0 .interp 0000001c 0000000000400238 0000000000400238 00000238 2**0