C语⾔第三⽅库Melon开箱即⽤之词法分析器使⽤之前的⽂章中,笔者介绍了Linux/UNIX C语⾔库Melon的基本功能及框架使⽤。
本⽂将介绍Melon中的词法分析器组件。
基础使⽤
我们先来看⼀个基本例⼦:
//lexer.c
#include<stdio.h>
#include"mln_lex.h"
MLN_DEFINE_TOKEN_TYPE_AND_STRUCT(static, mln_test, TEST);
fprintf格式MLN_DEFINE_TOKEN(mln_test, TEST);
int main(int argc,char*argv[])
{
if(argc !=2){
fprintf(stderr,"Usage: %s file_path\n", argv[0]);
return-1;
}
mln_string_t path;
mln_lex_t *lex =NULL;
struct mln_lex_attr lattr;
mln_test_struct_t *ts;
mln_string_nSet(&path, argv[1],strlen(argv[1]));
lattr.pool =mln_alloc_init();
if(lattr.pool ==NULL){
fprintf(stderr,"init memory pool failed\n");
return-1;
}
lattr.keywords =NULL;
lattr.hooks =NULL;
lattr.preprocess =0;
lattr.padding =0;
lattr.data =&path;
mln_lex_initWithHooks(mln_test, lex,&lattr);
if(lex ==NULL){
fprintf(stderr,"lexer init failed\n");
return-1;
}
while(1){
ts =mln_test_token(lex);
if(ts ==NULL|| ts->type == TEST_TK_EOF)
break;
write(STDOUT_FILENO, ts->text->data, ts->text->len);
printf(" line:%u type:%d\n", ts->line, ts->type);
}
mln_lex_destroy(lex);
mln_alloc_destroy(lattr.pool);
return0;
}
如此,即可完成⼀个词法解析器程序,它读取程序的参数所指定的⽂件的内容,然后解析成词素,并将其打印出来。
我们执⾏:
$ ./lexer lexer.c
/ line:1 type:21
/ line:1 type:21
lexer line:1 type:5
. line:1 type:20
c line:1 type:5
# line:3 type:9
include line:3 type:5
< line:3 type:24
stdio line:3 type:5
. line:3 type:20
h line:3 type:5
> line:3 type:26
...
可以看到,这个程序将我们的⽰例C程序拆解成各种词素,如:/,#,<;等等。
进阶使⽤
上⾯的例⼦可以看到,基础的词法解析器解析出的词素过于细碎,有时我们还希望解析器⽀持我们⾃定义的关键字、⾃定义格式的数据,甚⾄是⼀些预处理功能,例如引⼊其他⽂件的内容解析词素。
那么,我们就将上⾯的例⼦进⾏⼀番修改:
//lexer.c
#include<stdio.h>
#include"mln_lex.h"
mln_string_t keywords[]={
mln_string("on"),
mln_string("off"),
mln_string(NULL)
};
MLN_DEFINE_TOKEN_TYPE_AND_STRUCT(static, mln_test, TEST, TEST_TK_ON, TEST_TK_OFF, TEST_TK_STRING);
MLN_DEFINE_TOKEN(mln_test, TEST,{TEST_TK_ON,"TEST_TK_ON"},{TEST_TK_OFF,"TEST_TK_OFF"},{TEST_TK_STRING,"TEST_TK_STRING" });
static inline int
mln_get_char(mln_lex_t *lex,char c)
{
if(c =='\\'){
char n;
if((n =mln_lex_getAChar(lex))== MLN_ERR)return-1;
switch( n ){
case'\"':
if(mln_lex_putAChar(lex, n)== MLN_ERR)return-1;
break;
case'\'':
if(mln_lex_putAChar(lex, n)== MLN_ERR)return-1;
break;
case'n':
if(mln_lex_putAChar(lex,'\n')== MLN_ERR)return-1;
break;
case't':
if(mln_lex_putAChar(lex,'\t')== MLN_ERR)return-1;
break;
case'b':
if(mln_lex_putAChar(lex,'\b')== MLN_ERR)return-1;
break;
case'a':
if(mln_lex_putAChar(lex,'\a')== MLN_ERR)return-1;
break;
case'f':
if(mln_lex_putAChar(lex,'\f')== MLN_ERR)return-1;
break;
case'r':
if(mln_lex_putAChar(lex,'\r')== MLN_ERR)return-1;
break;
case'v':
if(mln_lex_putAChar(lex,'\v')== MLN_ERR)return-1;
break;
case'\\':
if(mln_lex_putAChar(lex,'\\')== MLN_ERR)return-1;
break;
default:
mln_lex_setError(lex, MLN_LEX_EINVCHAR);
return-1;
}
}else{
if(mln_lex_putAChar(lex, c)== MLN_ERR)return-1;
}
return0;
}
static mln_test_struct_t *
mln_test_dblq_handler(mln_lex_t *lex,void*data)
{
mln_lex_cleanResult(lex);
char c;
while(1){
c =mln_lex_getAChar(lex);
if(c == MLN_ERR)return NULL;
if(c == MLN_EOF){
mln_lex_setError(lex, MLN_LEX_EINVEOF);
return NULL;
}
if(c =='\"')break;
if(mln_get_char(lex, c)<0)return NULL;
}
return mln_test_new(lex, TEST_TK_STRING);
}
int main(int argc,char*argv[])
{
if(argc !=2){
fprintf(stderr,"Usage: %s file_path\n", argv[0]);
return-1;
}
mln_string_t path;
mln_lex_t *lex =NULL;
struct mln_lex_attr lattr;
mln_test_struct_t *ts;
mln_lex_hooks_t hooks;
memset(&hooks,0,sizeof(hooks));
hooks.dblq_handler =(lex_hook)mln_test_dblq_handler;
mln_string_nSet(&path, argv[1],strlen(argv[1]));
lattr.pool =mln_alloc_init();
if(lattr.pool ==NULL){
fprintf(stderr,"init pool failed\n");
return-1;
}
lattr.keywords = keywords;
lattr.hooks =&hooks;
lattr.preprocess =1;//⽀持预处理
lattr.padding =0;
lattr.data =&path;
mln_lex_initWithHooks(mln_test, lex,&lattr);
if(lex ==NULL){
fprintf(stderr,"lexer init failed\n");
return-1;
}
while(1){
ts =mln_test_token(lex);
if(ts ==NULL|| ts->type == TEST_TK_EOF)
break;
write(STDOUT_FILENO, ts->text->data, ts->text->len);
printf(" line:%u type:%d\n", ts->line, ts->type);
}
mln_lex_destroy(lex);
mln_alloc_destroy(lattr.pool);
return0;
}
这⼀次,我们增加如下功能:
⽀持关键字 on和off
⽀持识别双引号扩住的内容为字符串类型
增加了预处理功能,例如引⼊其他⽂件内容
⽣成可执⾏程序:
$ cc -o a a.c -I /usr/local/melon/include/ -L /usr/local/melon/lib/ -lmelon -lpthread 创建两个测试⽂件:
a.ini
#include "b.ini"
test_mode = on
log_level = 'debug'
proc_num = 10
b.ini
conf_name = "b.ini"
运⾏我们的程序来看看效果:
$ ./lexer a.ini
conf_name line:1 type:5
= line:1 type:25
b.ini line:1 type:42
test_mode line:2 type:5
= line:2 type:25
on line:2 type:40
log_level line:3 type:5
= line:3 type:25
' line:3 type:13
debug line:3 type:5
' line:3 type:13
proc_num line:4 type:5
= line:4 type:25
10 line:4 type:2
可以看到,在a.ini中写⼊include的部分,是b.ini⽂件内容解析后的词素。并且on和off都被正常解析出来了。且字符串也被正常处理出来了。
感谢阅读
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论