-
Notifications
You must be signed in to change notification settings - Fork 622
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
vim: reimplement isMap with regex #134
Conversation
Signed-off-by: Masatake YAMATO <yamato@redhat.com>
This looks a bit overly complex to me, and I don't really see what the regexes really give, but maybe possibly small performance improvement if the pattern compilation doesn't weight down the possible match speedup (which I would only expect on a huge number of runs). For an example of why I think it's hard to read, the Also, as for now regexs are optional, it makes this parser optional. Anyway, I would think that a simpler approach like this would be easier to read: /* checks if a word at the start of `p` matches `word`, and returns the match
* length, or 0 if no match */
static size_t wordMatchLen(const char *p, const char *word)
{
size_t n = 0;
while (*p && *p == *word)
{
p++;
word++;
n++;
}
if (*p == 0 || *p == ' ' || *p == '\t' || *p == '!')
return n;
return 0;
}
static boolean isMap(const char *line)
{
return (wordMatchLen(line, "map") == 3 ||
wordMatchLen(line, "nmap") >= 2 ||
wordMatchLen(line, "vmap") >= 2 ||
wordMatchLen(line, "omap") >= 2 ||
wordMatchLen(line, "imap") >= 2 ||
wordMatchLen(line, "lmap") >= 2 ||
wordMatchLen(line, "cmap") >= 2 ||
wordMatchLen(line, "noremap") >= 2 ||
wordMatchLen(line, "nnoremap") >= 3 ||
wordMatchLen(line, "vnoremap") >= 3 ||
wordMatchLen(line, "onoremap") >= 3 ||
wordMatchLen(line, "inoremap") >= 3 ||
wordMatchLen(line, "lnoremap") >= 3 ||
wordMatchLen(line, "cnoremap") >= 3);
} Or if we really care about performances, go ahead and write the logic down, maybe something like this: /* checks if `line` starts with a map kewyord:
* map requires whole match
* [nvoilc]map requires at least 2 chars prefix (e.g. "nm")
* noremap requires at least 2 chars prefix (e.g. "no")
* [nvoilc]noremap requires at least 3 chars prefix (e.g. "vno")
*/
static boolean isMap(const char *line)
{
int offset = 0;
/* match the prefix */
if (strchr ("nvoilc", line[0]))
{
if (line[1] == 'n')
line++;
else if (line[1] == 'm')
{
/* map only require a 1 char match on "map" when it is prefixed */
offset = 2;
line++;
}
}
if (line[0] == 'n')
return (wordMatchLen(line, "noremap") + offset - 2 /* min len */) >= 0;
else
return (wordMatchLen(line, "map") + offset - 3 /* min len */) >= 0;
} I don't think it's much harder to read (and could probably made easier sacrificing some lines) and it is most likely faster. |
Thank you for comments. I withdraw this pull-request. Your wordMatchLen based code looks elegant. How about adding the condtion/operator for comapring as arguments of wordMatchLen. enum comp { EQ, MORE_OR_EQ }; static boolean isMap(const char *line) ... |
Actually the operator is not useful as it's always static boolean wordMatchLen(const char *p, const char *word, size_t min_len)
{
/* ... */
return n >= min_len;
} If you want I can make a patch with either approach, or you can go ahead and commit it yourself I don't mind. |
Yes, could you make the patch (and merge)? |
I created #141 with the code. I'll merge later if you don't tell me it's not the way you saw it :) BTW, other parts of the code could be improved by using something similar to |
Thank you. I agree with you; it is the reason why I talked about performance. Remeber my original patch. "a bit overly complex" can be rearranged as common utility code. other vim parser related code can use the utility. |
I don't know, because when I look around the vim.c file it does seem to match some words (so something like |
Just to see how it could be (I do not know Vim syntax so this probably isn't strictly right -- for example for matching a word it's probably better to recognize what can be part of one rather than not part of it), here's a diff that makes uses of more diff --git a/vim.c b/vim.c
index 7323520..b54644b 100644
--- a/vim.c
+++ b/vim.c
@@ -138,6 +138,11 @@ static const unsigned char* skipPrefix (const unsigned char* name, int *scope)
return result;
}
+static boolean isWordDelimiter(const unsigned char c)
+{
+ return (c == 0 || c == ' ' || c == '\t' || c == '!');
+}
+
/* checks if a word at the start of `p` matches at least `min_len` first
* characters from `word` */
static boolean wordMatchLen(const unsigned char *p, const char *const word, size_t min_len)
@@ -152,12 +157,20 @@ static boolean wordMatchLen(const unsigned char *p, const char *const word, size
n++;
}
- if (*p == 0 || *p == ' ' || *p == '\t' || *p == '!')
+ if (isWordDelimiter (*p))
return n >= min_len;
return FALSE;
}
+static const unsigned char *skipWord(const unsigned char *p)
+{
+ /* FIXME: maybe rather check what *is* a word? */
+ while (*p && !isWordDelimiter (*p))
+ p++;
+ return p;
+}
+
static boolean isMap (const unsigned char* line)
{
/*
@@ -220,12 +233,8 @@ static void parseFunction (const unsigned char *line)
/* boolean inFunction = FALSE; */
int scope;
- const unsigned char *cp = line + 1;
+ const unsigned char *cp = line;
- if ((int) *++cp == 'n' && (int) *++cp == 'c' &&
- (int) *++cp == 't' && (int) *++cp == 'i' &&
- (int) *++cp == 'o' && (int) *++cp == 'n')
- ++cp;
if ((int) *cp == '!')
++cp;
if (isspace ((int) *cp))
@@ -257,12 +266,7 @@ static void parseFunction (const unsigned char *line)
/* TODO - update struct to indicate inside function */
while ((line = readVimLine ()) != NULL)
{
- /*
- * Vim7 added the for/endfo[r] construct, so we must first
- * check for an "endfo", before a "endf"
- */
- if ( (!strncmp ((const char*) line, "endfo", (size_t) 5) == 0) &&
- (strncmp ((const char*) line, "endf", (size_t) 4) == 0) )
+ if (wordMatchLen (line, "endfunction", 4))
break;
parseVimLine(line, TRUE);
@@ -275,10 +279,7 @@ static void parseAutogroup (const unsigned char *line)
vString *name = vStringNew ();
/* Found Autocommand Group (augroup) */
- const unsigned char *cp = line + 2;
- if ((int) *++cp == 'r' && (int) *++cp == 'o' &&
- (int) *++cp == 'u' && (int) *++cp == 'p')
- ++cp;
+ const unsigned char *cp = line;
if (isspace ((int) *cp))
{
while (*cp && isspace ((int) *cp))
@@ -339,15 +340,9 @@ static boolean parseCommand (const unsigned char *line)
while (*cp && isspace ((int) *cp))
++cp;
}
- else if ( line &&
- (!strncmp ((const char*) line, "comp", (size_t) 4) == 0) &&
- (!strncmp ((const char*) line, "comc", (size_t) 4) == 0) &&
- (strncmp ((const char*) line, "com", (size_t) 3) == 0) )
+ else if ( line && wordMatchLen (cp, "command", 3) )
{
- cp += 2;
- if ((int) *++cp == 'm' && (int) *++cp == 'a' &&
- (int) *++cp == 'n' && (int) *++cp == 'd')
- ++cp;
+ cp = skipWord (cp);
if ((int) *cp == '!')
++cp;
@@ -438,7 +433,7 @@ static void parseLet (const unsigned char *line, int infunction)
{
vString *name = vStringNew ();
- const unsigned char *cp = line + 3;
+ const unsigned char *cp = line;
const unsigned char *np = line;
/* get the name */
if (isspace ((int) *cp))
@@ -578,9 +573,7 @@ static boolean parseMap (const unsigned char *line)
static boolean isCommand (const unsigned char* line)
{
- return ( (!strncmp ((const char*) line, "comp", (size_t) 4) == 0) &&
- (!strncmp ((const char*) line, "comc", (size_t) 4) == 0) &&
- (strncmp ((const char*) line, "com", (size_t) 3) == 0) );
+ return wordMatchLen (line, "command", 3);
}
static boolean parseVimLine (const unsigned char *line, int infunction)
@@ -598,19 +591,19 @@ static boolean parseVimLine (const unsigned char *line, int infunction)
parseMap(line);
}
- else if (strncmp ((const char*) line, "fu", (size_t) 2) == 0)
+ else if (wordMatchLen (line, "function", 2))
{
- parseFunction(line);
+ parseFunction(skipWord(line));
}
- else if (strncmp ((const char*) line, "aug", (size_t) 3) == 0)
+ else if (wordMatchLen (line, "augroup", 3))
{
- parseAutogroup(line);
+ parseAutogroup(skipWord(line));
}
- else if ( strncmp ((const char*) line, "let", (size_t) 3) == 0 )
+ else if (wordMatchLen (line, "let", 3))
{
- parseLet(line, infunction);
+ parseLet(skipWord(line), infunction);
}
return readNextLine; @blueyed (as you seem to be the de facto vim.c maintainer): if you're interested by this kind of changes, I can either make a PR or you can steal this code. |
@b4n
Well, I have only fixed problems with the parser, as they appeared to me. |
PR opened at #146
Hehe ^^ Well, I assumed it from you making most of the recent changes that's all -- but actually all I wanted to find was someone that knows the syntax and would have an opinion, and it looks like I still found you :) |
…r-str Fix SEGV in onig_error_code_to_str() (Fix universal-ctags#132)
isMapPredicateWords is more maintainable than repeated starts_with_cmd calls.
Signed-off-by: Masatake YAMATO yamato@redhat.com