gcc词法分析

ads

我完全是出自个人兴趣学习一下我们的词法,在假期有空去了解这个庞大的机器gcc的运行

词法分析的本质就是有一个自动机,首先我们可以识别出这个东西是标识符还是其他的东西

我们希望偏重代码的讲解,在lex.c里面,我们是实现了我们的代码

首先,我们进行c——lex_one_token,c_lex_direct和c_classify_number函数完成了所有符号的识别,

_c_lex_directr是符号的识别,cpp_classify_number实现对数字的

进入起始状态的代码如图所示:

cpp_token *_cpp_lex_direct(cpp_reader *pfile){fresh_line;buffer = pfile->buffer;skipped_white;c=*buffer->cur++;}

代码路径

switch (c)    {    case ' ': case 't': case 'f': case 'v': case '':      result->flags |= PREV_WHITE;      skip_whitespace (pfile, c);      goto skipped_white;
case 'n': if (buffer->cur < buffer->rlimit) CPP_INCREMENT_LINE (pfile, 0); buffer->need_line = true; goto fresh_line;

      如果是这四个,如果走到n的时候,就是fresh_line

然后确符号为数字

    case '0': case '1': case '2': case '3': case '4':    case '5': case '6': case '7': case '8': case '9':      {  struct normalize_state nst = INITIAL_NORMALIZE_STATE;  result->type = CPP_NUMBER;  lex_number (pfile, &result->val.str, &nst);单独的处理函数  warn_about_normalization (pfile, result, &nst);  break;      }

      然后进入lex_number

    case '.':      result->type = CPP_DOT;      if (ISDIGIT (*buffer->cur))  {    struct normalize_state nst = INITIAL_NORMALIZE_STATE;    result->type = CPP_NUMBER;    lex_number (pfile, &result->val.str, &nst);    warn_about_normalization (pfile, result, &nst);  }      else if (*buffer->cur == '.' && buffer->cur[1] == '.')  buffer->cur += 2, result->type = CPP_ELLIPSIS;      else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))  buffer->cur++, result->type = CPP_DOT_STAR;      break;

类似的,我们符号的第一个是'L',''U',u','R'

确定符号为字符和字符串,符号的第一个字符是""或者""   

确定符号为标识符的东西,在识别到"L""u""U""R"的时候我们就是标识符。然后其他的情况是放进函数进行判别

分析运算符和分隔符

如果/:

    case ''':    case '"':      lex_string (pfile, result, buffer->cur - 1);//数字处理      break;
case '/': /* A potential block or line comment. */ comment_start = buffer->cur;//获取后面的 c = *buffer->cur; if (c == '*') { if (_cpp_skip_block_comment (pfile)) cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");  }//遍历我们的东西 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments) || cpp_in_system_header (pfile))) { /* Warn about comments only if pedantically GNUC89, and not in system headers. */ if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile) && ! buffer->warned_cplusplus_comments) { cpp_error (pfile, CPP_DL_PEDWARN, "C++ style comments are not allowed in ISO C90"); cpp_error (pfile, CPP_DL_PEDWARN, "(this will be reported only once per input file)"); buffer->warned_cplusplus_comments = 1; }
if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments)) cpp_error (pfile, CPP_DL_WARNING, "multi-line comment"); } else if (c == '=') { buffer->cur++; result->type = CPP_DIV_EQ; break; } else { result->type = CPP_DIV; break; }

考虑"<"

    case '<':      if (pfile->state.angled_headers)  {    lex_string (pfile, result, buffer->cur - 1);    if (result->type != CPP_LESS)      break;  }
result->type = CPP_LESS; if (*buffer->cur == '=') buffer->cur++, result->type = CPP_LESS_EQ; else if (*buffer->cur == '<') { buffer->cur++; IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT); } else if (CPP_OPTION (pfile, digraphs)) { if (*buffer->cur == ':') { buffer->cur++; result->flags |= DIGRAPH; result->type = CPP_OPEN_SQUARE; } else if (*buffer->cur == '%') { buffer->cur++; result->flags |= DIGRAPH; result->type = CPP_OPEN_BRACE; } }

其他的算法一样,我们就不再赘述

然后是lex_identifier函数

static cpp_hashnode *lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,    struct normalize_state *nst){  cpp_hashnode *result;  const uchar *cur;  unsigned int len;  unsigned int hash = HT_HASHSTEP (0, *base);
cur = pfile->buffer->cur; if (! starts_ucn) while (ISIDNUM (*cur)) { hash = HT_HASHSTEP (hash, *cur); cur++; } pfile->buffer->cur = cur; if (starts_ucn || forms_identifier_p (pfile, false, nst)) { /* Slower version for identifiers containing UCNs (or $). */ do { while (ISIDNUM (*pfile->buffer->cur)) { pfile->buffer->cur++; NORMALIZE_STATE_UPDATE_IDNUM (nst); } } while (forms_identifier_p (pfile, false, nst)); result = _cpp_interpret_identifier (pfile, base, pfile->buffer->cur - base); } else { len = cur - base; hash = HT_HASHFINISH (hash, len);
result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC)); }
/* Rarely, identifiers require diagnostics when lexed. */ if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC) && !pfile->state.skipping, 0)) { /* It is allowed to poison the same identifier twice. */ if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok) cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned "%s"", NODE_NAME (result));
/* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the replacement list of a variadic macro. */ if (result == pfile->spec_nodes.n__VA_ARGS__ && !pfile->state.va_args_ok) cpp_error (pfile, CPP_DL_PEDWARN, "__VA_ARGS__ can only appear in the expansion" " of a C99 variadic macro"); }
return result;}

这里我们主要那就是满足条件的可以

·然后就是数字的词法分析实现

static voidlex_number (cpp_reader *pfile, cpp_string *number,      struct normalize_state *nst){  const uchar *cur;  const uchar *base;  uchar *dest;
base = pfile->buffer->cur - 1; do { cur = pfile->buffer->cur;
/* N.B. ISIDNUM does not include $. */ while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1])) { cur++; NORMALIZE_STATE_UPDATE_IDNUM (nst); }
pfile->buffer->cur = cur; } while (forms_identifier_p (pfile, false, nst));
number->len = cur - base; dest = _cpp_unaligned_alloc (pfile, number->len + 1); memcpy (dest, base, number->len); dest[number->len] = ''; number->text = dest;}
/* Create a token of type TYPE with a literal spelling. */static voidcreate_literal (cpp_reader *pfile, cpp_token *token, const uchar *base, unsigned int len, enum cpp_ttype type){ uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
memcpy (dest, base, len); dest[len] = ''; token->type = type; token->val.str.len = len; token->val.str.text = dest;}


最后编辑于:2024/1/19 拔丝英语网

admin-avatar

英语作文代写、国外视频下载

高质量学习资料分享

admin@buzzrecipe.com