linux环境下tcpdump源代码分析--688IT编程网

linux环境下tcpdump源代码分析

Linux 环境下tcpdump 源代码分析

韩⼤卫@吉林师范⼤学

tcpdump.c 是tcpdump ⼯具的main.c, 本⽂旨对tcpdump的框架有简单了解，只展⽰linux平台使⽤的⼀部分核⼼代码。

Tcpdump 的使⽤⽬的就是打印出指定条件的报⽂，即使有再多的正则表达式作为过滤条件。所以只要懂得tcpdump -nXXi eth0 的实现原理即可。

进⼊main之前，先看⼀些头⽂件

netdissect.h⾥定义了⼀个数据结构struct netdissect_options来描述tcdpump⽀持的所有参数动作，每⼀个参数有对应的flag, 在tcpdump 的main ⾥⾯，会根据⽤户的传⼊的参数来增加相应flag数值，最后根据这些flag数值来实现特定动作。各个参数struct netdissect_options {

int ndo_aflag; /* translate network and broadcast addresses */

//打印出以太⽹头部

int ndo_eflag; /* print ethernet header */

int ndo_fflag; /* don't translate "foreign" IP address */

int ndo_Kflag; /* don't check TCP checksums */

//不将地址转换为名字

int ndo_nflag; /* leave addresses as numbers */

int ndo_Nflag; /* remove domains from printed host names */

int ndo_qflag; /* quick (shorter) output */

int ndo_Rflag; /* print sequence # field in AH/ESP*/

int ndo_sflag; /* use the libsmi to translate OIDs */

int ndo_Sflag; /* print raw TCP sequence numbers */

// 报⽂到达时间

int ndo_tflag; /* print packet arrival time */

int ndo_Uflag; /* "unbuffered" output of dump files */

int ndo_uflag; /* Print undecoded NFS handles */

//详细信息

int ndo_vflag; /* verbose */

// ⼗六进制打印报⽂

int ndo_xflag; /* print packet in hex */

// ⼗六进制和ASCII码打印报⽂

int ndo_Xflag; /* print packet in hex/ascii */

//以ASCII码显⽰打印报⽂

int ndo_Aflag; /* print packet only in ascii observing TAB,

* LF, CR and SPACE as graphical chars

...

//默认的打印函数

void (*ndo_default_print)(netdissect_options *,

void (*ndo_info)(netdissect_options *, int verbose);

...

}

interface.h 接⼝头⽂件，定义了⼀堆宏就为了⽅便调⽤struct netdissect_options⾥的成员。

#ifndef NETDISSECT_REWORKED

extern netdissect_options *gndo;

...

#define nflag gndo->ndo_nflag

...

#define tflag gndo->ndo_tflag

...

#define vflag gndo->ndo_vflag

#define xflag gndo->ndo_xflag

#define Xflag gndo->ndo_Xflag

...

#endif

tcpdump.c

int

main(int argc, char **argv)

{

pcap_handler callback;

int type;

struct bpf_program fcode;

struct print_info printinfo;

...

//对netdissect_options中⼀些参数初始化

gndo->ndo_Oflag=1;

gndo->ndo_Rflag=1;

gndo->ndo_dlt=-1;

gndo->ndo_default_print=ndo_default_print;

gndo->ndo_printf=tcpdump_printf;

gndo->ndo_error=ndo_error;

gndo->ndo_warning=ndo_warning;

gndo->ndo_snaplen = DEFAULT_SNAPLEN;

...

opterr = 0;

while (

*经典的getopt框架。字符数组为tcpdump ⽀持的全部参数。可以看到，参数x, X,t这些参数后⾯没有：或：：，这说明这些参数会产⽣叠加的效果。

(op = getopt(argc, argv, "aA" B_FLAG "c:C:d" D_FLAG "eE:fF:G:i:" I_FLAG "KlLm:M:nNOpqr:Rs:StT:u" U_FLAG "vw:W:xXy:Yz:Z:")) != -1)

switch (op) {

...

//case ⾥⾯的处理⼤多相似，以下仅⽤-i,-X,-x做例。

//-i 参数⽤来指定⽹⼝

case 'i':

if (optarg[0] == '0' && optarg[1] == 0)

error("Invalid adapter index");

device = optarg;

break;

…

//-x 为以⼗六进制打印报⽂，如使⽤-xx， xflag数值为2，后⾯根据xflag>1来打印出链路层头部

case 'x':

++xflag;

++suppress_default_print;

break;

case 'X':

++Xflag;

++suppress_default_print;

break;

//case 'n', case 'A'等操作类似如上

...

}

...

/＊展开核⼼代码前处理信号，信号处理函数cleanup会调⽤info()来打印当⽤户按ctrl+c等发送中⽌信号时tcpdump显⽰已处理报⽂的统计信息。

3 packets captured

3 packets received by filter

0 packets dropped by kernel

(void)setsignal(SIGPIPE, cleanup);

(void)setsignal(SIGTERM, cleanup);

(void)setsignal(SIGINT, cleanup);

(void)setsignal(SIGCHLD, child_cleanup);

...

//从 -r 参数读取指定⽂件，在此忽略

if (RFileName != NULL) {

...

} else {

//如果没有-i 参数来指定⽹络接⼝，那么调⽤ pcap_lookupdev()来寻可⽤的⽹络接⼝

if (device == NULL) {

device = pcap_lookupdev(ebuf);

if (device == NULL)

error("%s", ebuf);

}

/＊pcap_open_live() 定义为：

pcap_t *pcap_open_live(char *device, int snaplen, int promisc, int to_ms, char *ebuf)

device为要打开的指定设备

snaplen为最⼤报⽂长度，由-s 指定. 默认为65536.

Promise 为是否要将⽹⼝配置为混杂模式，由-p 指定，！Pflag：默认为是。

to_ms 为超时时间。 *ebuf 为传递错误信息使⽤。

函数返回捕获报⽂的句柄。

*ebuf = '\0';

pd = pcap_open_live(device, snaplen, !pflag, 1000, ebuf);

if (pd == NULL)

error("%s", ebuf);

else if (*ebuf)

warning("%s", ebuf);

// -w 参数加结果写⼊⼀个⽂件，在此忽略

if (WFileName) {

...

} else {

//返回数据链路层的枚举值

type = pcap_datalink(pd);

printinfo.printer = lookup_printer(type);

／＊lookup_printer() 作⽤如下：根据该数据链路层类型返回相应的打印函数指针。定义如下：

static if_printer

lookup_printer(int type)

{

struct printer *p;

for (p = printers; p->f; ++p)

if (type == p->type)

return p->f;

return NULL;

}

其中struct printer定义为⼀个打印函数指针，⼀个类型数值

typedef u_int (*if_printer)(const struct pcap_pkthdr *, const u_char *);

struct printer {

if_printer f;

int type;

};

printers 为⼀个struct printer数组，定义如下：

static struct printer printers[] =｛

{ arcnet_if_print, DLT_ARCNET },

{ ether_if_print, DLT_EN10MB },

{ token_if_print, DLT_IEEE802 },

...

｝

由上可以看到，当为以太⽹环境（DLT_EN10MB）时，实现函数为ether_if_print,

当为IEEE802令牌环⽹环境时，实现函数为 token_if_print。

等等。不同数据链路层环境有不同的调⽤函数来实现打印特定格式的报⽂。

for (p = printers; p->f; ++p) ：从数组⾸个元素开始，循环条件是元素存在f指针，依次遍历全部数组成员。

所以当数据链路层的类型为DLT_EN10MB时，对应的打印函数为ether_if_print。

我本⼈觉得 lookup_printer() 这个函数写得甚是巧妙。⾮常值得借鉴。每⼀种类型定义⼀个数据结构struct printer，包含⼀个函数指针和⼀个类型值。将全部的类型放⼊⼀个数组中，遍历数组时根据类型值返回对应的函数指针，再有新类型时，仅将＊／

if (printinfo.printer == NULL) {

gndo->ndo_dltname = pcap_datalink_val_to_name(type);

if (gndo->ndo_dltname != NULL)

error("unsupported data link type %s",

gndo->ndo_dltname);

else

error("unsupported data link type %d", type);

}

//函数指针callback指向print_packet

callback = print_packet;

//将printinfo作为unsigned char * 赋值给pcap_usrdata, 在后⾯作为pcap_loop()的参数

pcap_userdata = (u_char *)&printinfo;

}

if (RFileName == NULL) {

int dlt;

const char *dlt_name;

...

／＊pcap_datalink() 返回数据链路层类型枚举值，这⾥返回DLT_EN10MB ＊／

dlt = pcap_datalink(pd);

//根据该枚举返回数据链路类型char *name： “EN10MB”

dlt_name = pcap_datalink_val_to_name(dlt);

if (dlt_name == NULL) {

(void)fprintf(stderr, "listening on %s, link-type %u, capture size %u bytes\n",

device, dlt, snaplen);

} else {

(void)fprintf(stderr, "listening on %s, link-type %s (%s), capture size %u bytes\n",

device, dlt_name,

//获取该数据链路层类型的字符串描述

pcap_datalink_val_to_description(dlt), snaplen);

}

／＊

使⽤tcpdump -nXXi eth0

后，打印信息：

listening on eth0, link-type EN10MB (Ethernet), capture size 65535 bytes

即来源于此。

＊／

/＊调⽤ pcap_loop(), 循环捕获报⽂并将报⽂交给callback处理，直到遇到错误或退出信号。

Cnt 为 -c 参数指定，默认0. Usrdata 作为callback 的参数。

pcap_loop() 是libpcap 提供的API，它完成了与底层驱动的通信，⾸先创建了⼀个socket，将句柄封装后交给底层驱动，驱动收到数据包后将其写⼊socket，从内核层发往⽤户层，⽤户层的pcap_loop()持续poll 这个socket , 发现其有数据后就将数据 */

status = pcap_loop(pd, cnt, callback, pcap_userdata);

...

pcap_close(pd);

／＊

由上⾯看到， callback 的实现函数为print_packet()， pcap_loop()调⽤callpack 时传给print_packet()三个参数，第⼀个为含有特定链路层打印函数的结构体pcap_userdata, 第⼆个为包含报⽂信息的 struct pcap_pkthdr 常量指针，第三个为数据包内容其中struct pcap_pkthdr 定义为：

struct pcap_pkthdr{

struct timeval ts; //时间戳数据结构

bpf_u_int32 caplen; //报⽂捕获长度

bpf_u_int32 len; //报⽂实际长度

}

注：如⼀个报⽂实际长度100B，但tcpdump捕获80B时停⽌，那么caplen 为80, len 为 100。

static void

print_packet(u_char *user, const struct pcap_pkthdr *h, const u_char *sp)

{

struct print_info *print_info;

u_int hdrlen;

++packets_captured;

++infodelay;

ts_print(&h->ts);

/＊取得参数user 的数据结构，后⾯(*print_info->printer)即调⽤user提供的打印函数，

这⾥为ether_if_print()

＊／

print_info = (struct print_info *)user;

snapend = sp + h->caplen;

//调⽤ether_if_print()

hdrlen = (*print_info->printer)(h, sp);

if (Xflag) {

//当tcpdump 有多个X参数时，如 tcpdump -XX 时，以⼗六进制和ASCII码打印出链路层头部信息

if (Xflag > 1) {

hex_and_ascii_print("\n\t", sp, h->caplen);

} else {

／／只有⼀个X参数，即tcpdump -X 时，不打印链路层头部

if (h->caplen > hdrlen)

hex_and_ascii_print("\n\t", sp + hdrlen,

h->caplen - hdrlen);

}

} else if (xflag) {

//同-X，当存在多个-x 参数，如tcpdump -xx 时，打印链路层头部, 但只以⼗六进制打印

if (xflag > 1) {

hex_print("\n\t", sp, h->caplen);

} else {

if (h->caplen > hdrlen)

hex_print("\n\t", sp + hdrlen,

h->caplen - hdrlen);

}

} else if (Aflag) {

//-A 参数，以ASCII码打印报⽂信息socket通信报文格式

if (Aflag > 1) {

ascii_print(sp, h->caplen);

} else {

if (h->caplen > hdrlen)

ascii_print(sp + hdrlen, h->caplen - hdrlen);

}

putchar('\n');

--infodelay;

if (infoprint)

info(0);

}

＊／

／＊

在print-ether.c⾥，有ether_if_print 的定义，同样的，在print-token.c ⾥有token_if_print的定义， print-arcnet.c⾥有arcnet_if_print的定义。Tcpdump ⽬录⾥⼤量的 “print-” 开头的⽂件均是特定的打印函数。

print-ether.c

u_int

ether_if_print(const struct pcap_pkthdr *h, const u_char *p)

{

//将报⽂内容，报⽂捕获长度，报⽂实际长度传给 ether_print

ether_print(p, h->len, h->caplen);

}

ether_print定义：

void

ether_print(const u_char *p, u_int length, u_int caplen)

{

struct ether_header *ep;

／＊

以太⽹头部定义

#define ETHER_HDRLEN 14 //头部长14字节

#define ETHER_ADDR_LEN 6

struct ether_header {

u_int8_t ether_dhost[ETHER_ADDR_LEN];

/DMAC, 6字节

u_int8_t ether_shost[ETHER_ADDR_LEN];

//SMAC, 6字节

u_int16_t ether_type;

//type, 2字节

};

＊／

u_short ether_type;

u_short extracted_ether_type;

if (caplen < ETHER_HDRLEN) {

printf("[|ether]");

return;

}

/＊如果有 -e参数，打印链路层头部，调⽤ ether_hdr_print() ，定义见下⽅。

if (eflag)

ether_hdr_print(p, length);

length -= ETHER_HDRLEN;

caplen -= ETHER_HDRLEN;

ep = (struct ether_header *)p;

p += ETHER_HDRLEN;

ether_type = ntohs(ep->ether_type);

/具体的打印细节不做研究了

if (ether_type <= ETHERMTU) {

/* Try to print the LLC-layer header & higher layers */

if (llc_print(p, length, caplen, ESRC(ep), EDST(ep),

&extracted_ether_type) == 0) {

if (!eflag)

ether_hdr_print((u_char *)ep, length + ETHER_HDRLEN);

if (!suppress_default_print)

default_print(p, caplen);

}

} else if (ether_encap_print(ether_type, p, length, caplen,

&extracted_ether_type) == 0) {

if (!eflag)

ether_hdr_print((u_char *)ep, length + ETHER_HDRLEN);

if (!suppress_default_print)

default_print(p, caplen);

}

使⽤ tcpdump -nei eth0 会有如下显⽰：

12:53:12.189132 d0:df:9a:53:f0:07 > 01:00:5e:7f:ff:fa, ethertype IPv4 (0x0800), length 175: 10.10.168.94.60395 > 239.255.255.250.1900: UDP, length 133

ether_hdr_print 定义：

static inline void

ether_hdr_print(register const u_char *bp, u_int length)

{

ep = (const struct ether_header *)bp;

//打印出原MAC > ⽬的MAC，⽐如上⾯的 d0:df:9a:53:f0:07 > 01:00:5e:7f:ff:fa

(void)printf("%s > %s",

etheraddr_string(ESRC(ep)),

etheraddr_string(EDST(ep)));

//如果没有-q 参数，

if (!qflag) {

if (ntohs(ep->ether_type) <= ETHERMTU)

(void)printf(", 802.3");

else

//打印出协议类型，如上⾯的ethertype IPv4 (0x0800)

(void)printf(", ethertype %s (0x%04x)",

tok2str(ethertype_values,"Unknown", ntohs(ep->ether_type)),

ntohs(ep->ether_type));

} else {

if (ntohs(ep->ether_type) <= ETHERMTU)

(void)printf(", 802.3");

else

(void)printf(", %s", tok2str(ethertype_values,"Unknown Ethertype (0x%04x)", ntohs(ep->ether_type)));

}

//打印出报⽂长度，如上⾯的length 175

(void)printf(", length %u: ", length);

}

＊／

总结：

概括地看， tcpdump.c 可分三个部分：

第⼀部分是⽤struct netdissect_options数据结构作为⼀个参数集合，并⽤getopt框架来处理argv的参数逻辑。

第⼆部分是使⽤libpcap库函数来搭建与底层IPC通道。其中最重要的API有三个，第⼀个是pcap_lookupdev(), 查可⽤⽹⼝，第⼆个是pcap_open_live()，打开指定设备并将其配置为混杂模式返回句柄，第三个是使⽤pcap_loop()持续获取报⽂数据第三部分是实现callback 函数，tcpdump.c⾥的callback函数只做了⼀个封装，最终调⽤的是参数pcap_userdata⾥提供的特定数据链路层的打印函数，这个函数指针的查是由lookup_printer()实现的。

关于pcap_open_live 和pcap_loop 这两个重要的函数源代码分析，后续介绍。

688IT编程网

linux环境下tcpdump源代码分析

发表评论

推荐文章

应用程序的安全检测方法、装置、电子设备和存储介质

nginx map用法正则

VBA之正则表达式(1)--基础篇

Prometheus监控学习笔记之初识PromQL

关于PHP中的webshell

热门文章

m函数数字提取

jest断言方法大全

中兴ZXSEC US 管理员手册

keras系列(一):参数设置

Qt从QString中提取出数字

element input 金额千分位格式化

freemaker 参数解析正则

C#正则验证数字

form表单验证正则

scanf正则表达式用法

grafana value的正则表达式

Android平台浮点数运算应用

js-(JS正则表达式验证数字)

判断Python输入是否是整数,字符,或浮点数

c语言 sscanf 正则规则

从文本中提取数值技巧

js将整数转换成两位浮点数的方法

vue正则限制浮点数

8到20的结尾的正则

shell 正则表达式最后一行

最新文章

应用程序的安全检测方法、装置、电子设备和存储介质

VBA之正则表达式(1)--基础篇

代码编辑的辅助方法、装置及电子设备

SHELL查字符串中包含字符的命令

String方法中replace和replaceAll的区别详解(源码分析)

双字节符号正则

标签列表

688IT编程网

linux环境下tcpdump源代码分析

发表评论

推荐文章

应用程序的安全检测方法、装置、电子设备和存储介质

nginx map用法 正则

VBA之正则表达式(1)--基础篇

Prometheus监控学习笔记之初识PromQL

关于PHP中的webshell

热门文章

m函数数字提取

jest断言方法大全

中兴ZXSEC US 管理员手册

keras系列(一):参数设置

Qt从QString中提取出数字

element input 金额千分位格式化

freemaker 参数解析正则

C#正则验证数字

form表单验证正则

scanf正则表达式用法

grafana value的正则表达式

Android平台浮点数运算应用

js-(JS正则表达式验证数字)

判断Python输入是否是整数,字符,或浮点数

c语言 sscanf 正则规则

从文本中提取数值技巧

js将整数转换成两位浮点数的方法

vue正则限制浮点数

8到20的结尾的正则

shell 正则表达式 最后一行

最新文章

应用程序的安全检测方法、装置、电子设备和存储介质

VBA之正则表达式(1)--基础篇

代码编辑的辅助方法、装置及电子设备

SHELL查字符串中包含字符的命令

String方法中replace和replaceAll的区别详解(源码分析)

双字节符号正则

标签列表

nginx map用法正则

shell 正则表达式最后一行