diff --git a/lexer.go b/lexer.go index 4a1bd0fd1..060c3f0d3 100644 --- a/lexer.go +++ b/lexer.go @@ -57,6 +57,18 @@ type Config struct { // // If this is 0 it will be treated as a default of 1. Priority float32 `xml:"priority,omitempty"` + + // Analyse is a list of regexes to match against the input. + // + // The sum of all the score of matching patterns will be + // used as the final score. + Analyse []AnalyseConfig `xml:"analyse,omitempty"` +} + +// AnalyseConfig defines a single regex analyser pattern. +type AnalyseConfig struct { + Regex string `xml:"regex,attr"` + Score float32 `xml:"score,attr"` } // Token output to formatter. diff --git a/lexers/c.go b/lexers/c.go deleted file mode 100644 index 4ed2dd546..000000000 --- a/lexers/c.go +++ /dev/null @@ -1,37 +0,0 @@ -package lexers - -import ( - "regexp" - - . "github.com/alecthomas/chroma/v2" // nolint -) - -var ( - cAnalyserIncludeRe = regexp.MustCompile(`(?m)^\s*#include [<"]`) - cAnalyserIfdefRe = regexp.MustCompile(`(?m)^\s*#ifn?def `) -) - -// C lexer. -var C = Register(MustNewXMLLexer( - embedded, - "embedded/c.xml", -).SetConfig( - &Config{ - Name: "C", - Aliases: []string{"c"}, - Filenames: []string{"*.c", "*.h", "*.idc", "*.x[bp]m"}, - MimeTypes: []string{"text/x-chdr", "text/x-csrc", "image/x-xbitmap", "image/x-xpixmap"}, - EnsureNL: true, - Priority: 0.1, - }, -).SetAnalyser(func(text string) float32 { - if cAnalyserIncludeRe.MatchString(text) { - return 0.1 - } - - if cAnalyserIfdefRe.MatchString(text) { - return 0.1 - } - - return 0 -})) diff --git a/lexers/c_test.go b/lexers/c_test.go deleted file mode 100644 index 4ef5b5d18..000000000 --- a/lexers/c_test.go +++ /dev/null @@ -1,44 +0,0 @@ -package lexers_test - -import ( - "io/ioutil" - "testing" - - "github.com/alecthomas/chroma/v2" - "github.com/alecthomas/chroma/v2/lexers" - - "github.com/alecthomas/assert/v2" -) - -func TestC_AnalyseText(t *testing.T) { - tests := map[string]struct { - Filepath string - Expected float32 - }{ - "include": { - Filepath: "testdata/c_include.c", - Expected: 0.1, - }, - "ifdef": { - Filepath: "testdata/c_ifdef.c", - Expected: 0.1, - }, - "ifndef": { - Filepath: "testdata/c_ifndef.c", - Expected: 0.1, - }, - } - - for name, test := range tests { - test := test - t.Run(name, func(t *testing.T) { - data, err := ioutil.ReadFile(test.Filepath) - assert.NoError(t, err) - - analyser, ok := lexers.C.(chroma.Analyser) - assert.True(t, ok) - - assert.Equal(t, test.Expected, analyser.AnalyseText(string(data))) - }) - } -} diff --git a/lexers/cpp.go b/lexers/cpp.go deleted file mode 100644 index 322481df9..000000000 --- a/lexers/cpp.go +++ /dev/null @@ -1,36 +0,0 @@ -package lexers - -import ( - "regexp" - - . "github.com/alecthomas/chroma/v2" // nolint -) - -var ( - cppAnalyserIncludeRe = regexp.MustCompile(`#include <[a-z_]+>`) - cppAnalyserNamespaceRe = regexp.MustCompile(`using namespace `) -) - -var CPP = Register(MustNewXMLLexer( - embedded, - "embedded/c++.xml", -).SetConfig( - &Config{ - Name: "C++", - Aliases: []string{"cpp", "c++"}, - Filenames: []string{"*.cpp", "*.hpp", "*.c++", "*.h++", "*.cc", "*.hh", "*.cxx", "*.hxx", "*.C", "*.H", "*.cp", "*.CPP", "*.cppm", "*.ixx", "*.tpp"}, - MimeTypes: []string{"text/x-c++hdr", "text/x-c++src"}, - Priority: 0.1, - EnsureNL: true, - }, -)).SetAnalyser(func(text string) float32 { - if cppAnalyserIncludeRe.MatchString(text) { - return 0.2 - } - - if cppAnalyserNamespaceRe.MatchString(text) { - return 0.4 - } - - return 0 -}) diff --git a/lexers/cpp_test.go b/lexers/cpp_test.go deleted file mode 100644 index ade2743ee..000000000 --- a/lexers/cpp_test.go +++ /dev/null @@ -1,57 +0,0 @@ -package lexers_test - -import ( - "os" - "testing" - - "github.com/alecthomas/chroma/v2" - "github.com/alecthomas/chroma/v2/lexers" - - "github.com/alecthomas/assert/v2" -) - -func TestCpp_AnalyseText(t *testing.T) { - tests := map[string]struct { - Filepath string - Expected float32 - }{ - "include": { - Filepath: "testdata/cpp_include.cpp", - Expected: 0.2, - }, - "namespace": { - Filepath: "testdata/cpp_namespace.cpp", - Expected: 0.4, - }, - } - - for name, test := range tests { - test := test - t.Run(name, func(t *testing.T) { - data, err := os.ReadFile(test.Filepath) - assert.NoError(t, err) - - analyser, ok := lexers.CPP.(chroma.Analyser) - assert.True(t, ok) - - assert.Equal(t, test.Expected, analyser.AnalyseText(string(data))) - }) - } -} - -func TestIssue290(t *testing.T) { - input := `// 64-bit floats have 53 digits of precision, including the whole-number-part. -double a = 0011111110111001100110011001100110011001100110011001100110011010; // imperfect representation of 0.1 -double b = 0011111111001001100110011001100110011001100110011001100110011010; // imperfect representation of 0.2 -double c = 0011111111010011001100110011001100110011001100110011001100110011; // imperfect representation of 0.3 -double a + b = 0011111111010011001100110011001100110011001100110011001100110100; // Note that this is not quite equal to the "canonical" 0.3!a -` - it, err := lexers.GlobalLexerRegistry.Get("C++").Tokenise(nil, input) - assert.NoError(t, err) - for { - token := it() - if token == chroma.EOF { - break - } - } -} diff --git a/lexers/embedded/c++.xml b/lexers/embedded/c++.xml index e8702c58d..455c03368 100644 --- a/lexers/embedded/c++.xml +++ b/lexers/embedded/c++.xml @@ -15,9 +15,12 @@ *.H *.cp *.CPP + *.tpp text/x-c++hdr text/x-c++src true + + diff --git a/lexers/embedded/c.xml b/lexers/embedded/c.xml index f941802e1..744732902 100644 --- a/lexers/embedded/c.xml +++ b/lexers/embedded/c.xml @@ -11,6 +11,8 @@ image/x-xbitmap image/x-xpixmap true + + diff --git a/lexers/testdata/c_ifdef.c b/lexers/testdata/analysis/c.ifdef.actual similarity index 100% rename from lexers/testdata/c_ifdef.c rename to lexers/testdata/analysis/c.ifdef.actual diff --git a/lexers/testdata/analysis/c.ifdef.expected b/lexers/testdata/analysis/c.ifdef.expected new file mode 100644 index 000000000..49d59571f --- /dev/null +++ b/lexers/testdata/analysis/c.ifdef.expected @@ -0,0 +1 @@ +0.1 diff --git a/lexers/testdata/c_ifndef.c b/lexers/testdata/analysis/c.ifndef.actual similarity index 100% rename from lexers/testdata/c_ifndef.c rename to lexers/testdata/analysis/c.ifndef.actual diff --git a/lexers/testdata/analysis/c.ifndef.expected b/lexers/testdata/analysis/c.ifndef.expected new file mode 100644 index 000000000..49d59571f --- /dev/null +++ b/lexers/testdata/analysis/c.ifndef.expected @@ -0,0 +1 @@ +0.1 diff --git a/lexers/testdata/c_include.c b/lexers/testdata/analysis/c.include.actual similarity index 100% rename from lexers/testdata/c_include.c rename to lexers/testdata/analysis/c.include.actual diff --git a/lexers/testdata/analysis/c.include.expected b/lexers/testdata/analysis/c.include.expected new file mode 100644 index 000000000..49d59571f --- /dev/null +++ b/lexers/testdata/analysis/c.include.expected @@ -0,0 +1 @@ +0.1 diff --git a/lexers/testdata/cpp_include.cpp b/lexers/testdata/analysis/cpp.include.actual similarity index 100% rename from lexers/testdata/cpp_include.cpp rename to lexers/testdata/analysis/cpp.include.actual diff --git a/lexers/testdata/analysis/cpp.include.expected b/lexers/testdata/analysis/cpp.include.expected new file mode 100644 index 000000000..3b04cfb60 --- /dev/null +++ b/lexers/testdata/analysis/cpp.include.expected @@ -0,0 +1 @@ +0.2 diff --git a/lexers/testdata/cpp_namespace.cpp b/lexers/testdata/analysis/cpp.namespace.actual similarity index 100% rename from lexers/testdata/cpp_namespace.cpp rename to lexers/testdata/analysis/cpp.namespace.actual diff --git a/lexers/testdata/analysis/cpp.namespace.expected b/lexers/testdata/analysis/cpp.namespace.expected new file mode 100644 index 000000000..bd73f4707 --- /dev/null +++ b/lexers/testdata/analysis/cpp.namespace.expected @@ -0,0 +1 @@ +0.4 diff --git a/serialise.go b/serialise.go index 2b727db8a..f6ad589a7 100644 --- a/serialise.go +++ b/serialise.go @@ -11,6 +11,8 @@ import ( "reflect" "regexp" "strings" + + "github.com/dlclark/regexp2" ) // Serialisation of Chroma rules to XML. The format is: @@ -107,7 +109,7 @@ func fastUnmarshalConfig(from fs.FS, path string) (*Config, error) { var config Config err = dec.DecodeElement(&config, &se) if err != nil { - panic(err) + return nil, fmt.Errorf("%s: %w", path, err) } return &config, nil } @@ -135,8 +137,29 @@ func NewXMLLexer(from fs.FS, path string) (*RegexLexer, error) { return nil, fmt.Errorf("%s: %q is not a valid glob: %w", config.Name, glob, err) } } + type regexAnalyse struct { + re *regexp2.Regexp + score float32 + } + regexAnalysers := make([]regexAnalyse, 0, len(config.Analyse)) + for _, ra := range config.Analyse { + re, err := regexp2.Compile(ra.Regex, regexp2.None) + if err != nil { + return nil, fmt.Errorf("%s: %q is not a valid analyser regex: %w", config.Name, ra.Regex, err) + } + regexAnalysers = append(regexAnalysers, regexAnalyse{re, ra.Score}) + } return &RegexLexer{ config: config, + analyser: func(text string) float32 { + var score float32 + for _, ra := range regexAnalysers { + if ok, _ := ra.re.MatchString(text); ok { + score += ra.score + } + } + return score + }, fetchRulesFunc: func() (Rules, error) { var lexer struct { Config