Skip to content

Commit

Permalink
lregex: replace gnu_regex with Onigmo regex engine
Browse files Browse the repository at this point in the history
Close #1861.

In the much of cases, I hope there is no impact on existing optlib
code using --regex-... option.

If `r` regex flag is given, you can use extended features of Onigmo
with ruby regex syntax.

A demonstration of one of miracle features:

;; input.mylang

    (define (f1) 1)
    (define ((f2)) #t)
    (define (((f3))) "abc")

    --langdef=mylang
    --map-mylang=.mylang
    --kinddef-mylang=f,fun,function, function returing a function, or function returing a function returing function...
    --_fielddef-mylang=symbol,symbol binding to the function
    --fields-mylang={symbol}
    --regex-mylang=/\(define +(([-a-z0-9]+)|\(\g<1>\))/\1/f/r{_field=symbol:\2}

See the r flag passed to --regex-mylang.

    (((f3)))	input.mylang	/^(define (((f3))) "abc")$/;"	f	symbol:f3
    ((f2))	input.mylang	/^(define ((f2)) #t)$/;"	f	symbol:f2
    (f1)	input.mylang	/^(define (f1) 1)$/;"	f	symbol:f1

Look at the name of tags. The pairs of `(' and `)' are balanced well.

Signed-off-by: Masatake YAMATO <yamato@redhat.com>
  • Loading branch information
masatake committed Mar 12, 2020
1 parent f2444e4 commit 97d07b7
Show file tree
Hide file tree
Showing 13 changed files with 91 additions and 22 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ addons:
- gdb
- valgrind
- python3-docutils
- libtool

env:
- TARGET=Unix
Expand Down
14 changes: 5 additions & 9 deletions Makefile.am
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
include $(top_srcdir)/source.mak

SUBDIRS = Onigmo

EXTRA_DIST = README.md autogen.sh \
makefiles/list-optlib2c-input.mak misc/optlib2c \
.ctags.d/exclusion.ctags .dir-locals.el .editorconfig .gdbinit .gitignore \
.indent.pro .uncrustify.cfg \
gnu_regex/README.txt gnu_regex/regcomp.c gnu_regex/regexec.c \
gnu_regex/regex_internal.c gnu_regex/regex_internal.h \
misc/ctags-optlib-mode.el \
misc/mk-interactive-request.sh misc/roundtrip misc/tinst \
misc/packcc/.gitignore misc/packcc/LICENSE.txt \
Expand Down Expand Up @@ -63,11 +63,6 @@ LIB_SRCS += $(FNMATCH_SRCS)
LIB_HEADS += $(FNMATCH_HEADS)
endif

if !HAVE_REGCOMP
LIB_SRCS += $(REGEX_SRCS)
LIB_HEADS += $(REGEX_HEADS)
endif

if HAVE_LIBXML
PARSER_SRCS += $(XML_SRCS)
PARSER_HEADS += $(XML_HEADS)
Expand All @@ -93,7 +88,6 @@ if HAVE_STRNLEN
libctags_a_CPPFLAGS += -DUSE_SYSTEM_STRNLEN
endif
libctags_a_CPPFLAGS+= $(FNMATCH_CPPFLAGS)
libctags_a_CPPFLAGS+= $(REGCOMP_CPPFLAGS)
libctags_a_CPPFLAGS+= -DHAVE_REPOINFO_H

libctags_a_CFLAGS =
Expand Down Expand Up @@ -151,7 +145,8 @@ dist_libctags_a_SOURCES = $(ALL_LIB_HEADS) $(ALL_LIB_SRCS)

ctags_CPPFLAGS = $(libctags_a_CPPFLAGS)
ctags_CFLAGS = $(libctags_a_CFLAGS)
ctags_LDADD =libctags.a
ctags_LDADD = libctags.a
ctags_LDADD += $(top_builddir)/Onigmo/.libs/libonigmo.a
ctags_LDADD += $(LIBXML_LIBS)
ctags_LDADD += $(JANSSON_LIBS)
ctags_LDADD += $(LIBYAML_LIBS)
Expand All @@ -175,6 +170,7 @@ endif
mini_geany_CPPFLAGS = $(libctags_a_CPPFLAGS)
mini_geany_CFLAGS = $(libctags_a_CFLAGS)
mini_geany_LDADD = libctags.a
mini_geany_LDADD += $(top_builddir)/Onigmo/.libs/libonigmo.a
mini_geany_LDADD += $(LIBXML_LIBS)
mini_geany_LDADD += $(JANSSON_LIBS)
mini_geany_LDADD += $(LIBYAML_LIBS)
Expand Down
7 changes: 7 additions & 0 deletions Units/onigmo-ruby-syntax.d/args.ctags
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
--langdef=mylang
--map-mylang=.mylang
--kinddef-mylang=f,fun,function, function returing a function, or function returing a function returing function...
--_fielddef-mylang=symbol,symbol binding to the function
--fields-mylang={symbol}
--regex-mylang=/\(define +(([-a-z0-9]+)|\(\g<1>\))/\1/f/r{_field=symbol:\2}

3 changes: 3 additions & 0 deletions Units/onigmo-ruby-syntax.d/expected.tags
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
(((f3))) input.mylang /^(define (((f3))) "abc")$/;" f symbol:f3
((f2)) input.mylang /^(define ((f2)) #t)$/;" f symbol:f2
(f1) input.mylang /^(define (f1) 1)$/;" f symbol:f1
5 changes: 5 additions & 0 deletions Units/onigmo-ruby-syntax.d/input.mylang
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
(define (f1) 1)
(define ((f2)) #t)
(define (((f3))) "abc")


3 changes: 2 additions & 1 deletion autogen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ set -xe

type autoreconf > /dev/null || exit 1
type pkg-config > /dev/null || exit 1
type libtoolize > /dev/null || exit 1

if [ -z "${MAKE}" ]; then
if type make > /dev/null; then
Expand Down Expand Up @@ -36,6 +37,6 @@ if autoreconf -vfi; then
else
echo "failed in running autoreconf" 1>&2
exit 1
fi
fi && (cd Onigmo; ./autogen.sh)

exit $?
2 changes: 1 addition & 1 deletion circle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
- run:
name: Install build tools
command: |
dnf -y install gcc automake autoconf pkgconfig make libseccomp-devel libxml2-devel jansson-devel libyaml-devel findutils diffutils
dnf -y install gcc automake autoconf pkgconfig make libseccomp-devel libxml2-devel jansson-devel libyaml-devel findutils diffutils libtool
dnf -y install jq puppet
- run:
name: Build
Expand Down
2 changes: 2 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -748,6 +748,8 @@ AC_CONFIG_FILES([Makefile
man/ctags-optlib.7.rst
])

AC_CONFIG_SUBDIRS([Onigmo])

AC_OUTPUT

# vim:ts=4:sw=4:
63 changes: 58 additions & 5 deletions main/lregex.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#ifdef HAVE_SYS_TYPES_H
# include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
#endif
#include <regex.h>
#include "Onigmo/onigmoposix.h"

#include <inttypes.h>

Expand Down Expand Up @@ -1293,6 +1293,16 @@ static void regex_flag_icase_long (const char* s CTAGS_ATTR_UNUSED, const char*
regex_flag_icase_short ('i', data);
}

static void regex_flag_ruby_short (char c CTAGS_ATTR_UNUSED, void* data)
{
int* cflags = data;
*cflags *= -1;
}

static void regex_flag_ruby_long (const char* s CTAGS_ATTR_UNUSED, const char* const unused CTAGS_ATTR_UNUSED, void* data)
{
regex_flag_ruby_short ('r', data);
}

static flagDefinition regexFlagDefs[] = {
{ 'b', "basic", regex_flag_basic_short, regex_flag_basic_long,
Expand All @@ -1301,6 +1311,8 @@ static flagDefinition regexFlagDefs[] = {
NULL, "interpreted as a Posix extended regular expression (default)"},
{ 'i', "icase", regex_flag_icase_short, regex_flag_icase_long,
NULL, "applied in a case-insensitive manner"},
{ 'r', "ruby", regex_flag_ruby_short, regex_flag_ruby_long,
NULL, "used ruby syntax of Onigmo regex engine"},
};

static regex_t* compileRegex (enum regexParserType regptype,
Expand All @@ -1320,6 +1332,18 @@ static regex_t* compileRegex (enum regexParserType regptype,
&cflags);

result = xMalloc (1, regex_t);

if (cflags < 0)
{
cflags *= -1;
onig_set_default_syntax (ONIG_SYNTAX_RUBY);

}
else
onig_set_default_syntax (cflags & REG_EXTENDED
? ONIG_SYNTAX_POSIX_EXTENDED
: ONIG_SYNTAX_POSIX_BASIC);

errcode = regcomp (result, regexp, cflags);
if (errcode != 0)
{
Expand Down Expand Up @@ -1452,10 +1476,14 @@ static void matchTagPattern (struct lregexControlBlock *lcb,
const regmatch_t* const pmatch,
off_t offset)
{
int num_match = patbuf->pattern->re_nsub + 1;
if (num_match > BACK_REFERENCE_COUNT)
num_match = BACK_REFERENCE_COUNT;

vString *const name =
(patbuf->u.tag.name_pattern[0] != '\0') ? substitute (line,
patbuf->u.tag.name_pattern,
BACK_REFERENCE_COUNT, pmatch):
num_match, pmatch):
(patbuf->anonymous_tag_prefix) ? anonGenerateNew (patbuf->anonymous_tag_prefix,
patbuf->u.tag.kindIndex):
vStringNewInit ("");
Expand Down Expand Up @@ -1536,8 +1564,12 @@ static void matchTagPattern (struct lregexControlBlock *lcb,
struct fieldPattern *fp = ptrArrayItem(patbuf->fieldPatterns, i);
if (isFieldEnabled (fp->ftype))
{
int num_match = patbuf->pattern->re_nsub + 1;
if (num_match > BACK_REFERENCE_COUNT)
num_match = BACK_REFERENCE_COUNT;

vString * const value = substitute (line, fp->template,
BACK_REFERENCE_COUNT, pmatch);
num_match, pmatch);
attachParserField (&e, false, fp->ftype, vStringValue (value));
trashBoxPut (field_trashbox, value,
(TrashBoxDestroyItemProc)vStringDelete);
Expand Down Expand Up @@ -1607,7 +1639,11 @@ static void printMessage(const langType language,
Assert (ptrn->message.selection > 0);
Assert (ptrn->message.message_string);

msg = substitute (line, ptrn->message.message_string, BACK_REFERENCE_COUNT, pmatch);
int num_match = ptrn->pattern->re_nsub + 1;
if (num_match > BACK_REFERENCE_COUNT)
num_match = BACK_REFERENCE_COUNT;

msg = substitute (line, ptrn->message.message_string, num_match, pmatch);

error (ptrn->message.selection, "%sMessage from regex<%s>: %s (%s:%lu)",
(ptrn->message.selection == FATAL ? "Fatal: " : ""),
Expand Down Expand Up @@ -1825,6 +1861,15 @@ extern bool matchRegex (struct lregexControlBlock *lcb, const vString* const lin
{
bool result = false;
unsigned int i;

bool trimed = false;
if (vStringLength (line) > 0 &&
vStringLast (line) == '\n')
{
vStringChop ((vString* const)line);
trimed = true;
}

for (i = 0 ; i < ptrArrayCount(lcb->entries[REG_PARSER_SINGLE_LINE]) ; ++i)
{
regexTableEntry *entry = ptrArrayItem(lcb->entries[REG_PARSER_SINGLE_LINE], i);
Expand All @@ -1843,6 +1888,10 @@ extern bool matchRegex (struct lregexControlBlock *lcb, const vString* const lin
break;
}
}

if (trimed)
vStringPut ((vString *const)line, '\n');

return result;
}

Expand Down Expand Up @@ -2362,7 +2411,11 @@ static void printMultitableMessage(const langType language,
Assert (ptrn->message.selection > 0);
Assert (ptrn->message.message_string);

msg = substitute (current, ptrn->message.message_string, BACK_REFERENCE_COUNT, pmatch);
int num_match = ptrn->pattern->re_nsub + 1;
if (num_match > BACK_REFERENCE_COUNT)
num_match = BACK_REFERENCE_COUNT;

msg = substitute (current, ptrn->message.message_string, num_match, pmatch);

error (ptrn->message.selection, "%sMessage from mtable<%s/%s[%2u]>: %s (%s:%lu)",
(ptrn->message.selection == FATAL ? "Fatal: " : ""),
Expand Down
5 changes: 5 additions & 0 deletions main/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,7 @@ static struct Feature {
/* The test harnesses use this as hints for skipping test cases */
{"packcc", "has peg based parser(s)"},
#endif
{"onigmo", "uses onigmo as regex engine"},
{NULL,}
};

Expand Down Expand Up @@ -2015,6 +2016,10 @@ static void processLicenseOption (
puts ("");
puts (License1);
puts (License2);

extern const char* onig_copyright(void);
puts (onig_copyright());

exit (0);
}

Expand Down
2 changes: 1 addition & 1 deletion optlib/elixir.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ extern parserDefinition* ElixirParser (void)
"p", "{scope=set}", NULL, false},
{"^[ \t]*defmodule[ \t]+([A-Z][a-zA-Z0-9_]*\\.)*([A-Z][a-zA-Z0-9_?!]*)", "\\2",
"m", "{scope=set}", NULL, false},
{"^[ \t]*def((p?)|macro(p?))[ \t]+([a-zA-Z0-9_?!]+)[ \t]+([\\|\\^/&<>~.=!*+-]{1,3}|and|or|in|not|when|not in)[ \t]+[a-zA-Z0-9_?!]", "\\5",
{"^[ \t]*def((p?)|macro(p?))[ \t]+([a-zA-Z0-9_?!]+)[ \t]+([\\|\\^/&<>~.=!*+-]{1,3}|and|or|in|not in|when|not)[ \t]+[a-zA-Z0-9_?!]", "\\5",
"o", "{scope=ref}{exclusive}", NULL, false},
{"^[ \t]*def[ \t]+([a-z_][a-zA-Z0-9_?!]*)", "\\1",
"f", "{scope=ref}{_field=access:public}", NULL, false},
Expand Down
2 changes: 1 addition & 1 deletion optlib/elixir.ctags
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@

--regex-Elixir=/^[ \t]*defprotocol[ \t]+([A-Z][a-zA-Z0-9_]*\.)*([A-Z][a-zA-Z0-9_?!]*)/\2/p/{scope=set}
--regex-Elixir=/^[ \t]*defmodule[ \t]+([A-Z][a-zA-Z0-9_]*\.)*([A-Z][a-zA-Z0-9_?!]*)/\2/m/{scope=set}
--regex-Elixir=/^[ \t]*def((p?)|macro(p?))[ \t]+([a-zA-Z0-9_?!]+)[ \t]+([\|\^\/&<>~.=!*+-]{1,3}|and|or|in|not|when|not in)[ \t]+[a-zA-Z0-9_?!]/\5/o/{scope=ref}{exclusive}
--regex-Elixir=/^[ \t]*def((p?)|macro(p?))[ \t]+([a-zA-Z0-9_?!]+)[ \t]+([\|\^\/&<>~.=!*+-]{1,3}|and|or|in|not in|when|not)[ \t]+[a-zA-Z0-9_?!]/\5/o/{scope=ref}{exclusive}
--regex-Elixir=/^[ \t]*def[ \t]+([a-z_][a-zA-Z0-9_?!]*)/\1/f/{scope=ref}{_field=access:public}
--regex-Elixir=/^[ \t]*defp[ \t]+([a-z_][a-zA-Z0-9_?!]*)/\1/f/{scope=ref}{_field=access:private}
--regex-Elixir=/^[ \t]*(@|def)callback[ \t]+([a-z_][a-zA-Z0-9_?!]*)/\2/c/{scope=ref}
Expand Down
4 changes: 0 additions & 4 deletions source.mak
Original file line number Diff line number Diff line change
Expand Up @@ -322,10 +322,6 @@ ALL_SRCS = $(ALL_LIB_SRCS) $(CMDLINE_SRCS)
ENVIRONMENT_HEADS =
ENVIRONMENT_SRCS =

REGEX_HEADS = gnu_regex/regex.h
REGEX_SRCS = gnu_regex/regex.c
REGEX_OBJS = $(REGEX_SRCS:.c=.$(OBJEXT))

FNMATCH_HEADS = fnmatch/fnmatch.h
FNMATCH_SRCS = fnmatch/fnmatch.c
FNMATCH_OBJS = $(FNMATCH_SRCS:.c=.$(OBJEXT))
Expand Down

0 comments on commit 97d07b7

Please sign in to comment.