diff --git a/lexer.go b/lexer.go
index 4a1bd0fd1..060c3f0d3 100644
--- a/lexer.go
+++ b/lexer.go
@@ -57,6 +57,18 @@ type Config struct {
//
// If this is 0 it will be treated as a default of 1.
Priority float32 `xml:"priority,omitempty"`
+
+ // Analyse is a list of regexes to match against the input.
+ //
+ // The sum of all the score of matching patterns will be
+ // used as the final score.
+ Analyse []AnalyseConfig `xml:"analyse,omitempty"`
+}
+
+// AnalyseConfig defines a single regex analyser pattern.
+type AnalyseConfig struct {
+ Regex string `xml:"regex,attr"`
+ Score float32 `xml:"score,attr"`
}
// Token output to formatter.
diff --git a/lexers/c.go b/lexers/c.go
deleted file mode 100644
index 4ed2dd546..000000000
--- a/lexers/c.go
+++ /dev/null
@@ -1,37 +0,0 @@
-package lexers
-
-import (
- "regexp"
-
- . "github.com/alecthomas/chroma/v2" // nolint
-)
-
-var (
- cAnalyserIncludeRe = regexp.MustCompile(`(?m)^\s*#include [<"]`)
- cAnalyserIfdefRe = regexp.MustCompile(`(?m)^\s*#ifn?def `)
-)
-
-// C lexer.
-var C = Register(MustNewXMLLexer(
- embedded,
- "embedded/c.xml",
-).SetConfig(
- &Config{
- Name: "C",
- Aliases: []string{"c"},
- Filenames: []string{"*.c", "*.h", "*.idc", "*.x[bp]m"},
- MimeTypes: []string{"text/x-chdr", "text/x-csrc", "image/x-xbitmap", "image/x-xpixmap"},
- EnsureNL: true,
- Priority: 0.1,
- },
-).SetAnalyser(func(text string) float32 {
- if cAnalyserIncludeRe.MatchString(text) {
- return 0.1
- }
-
- if cAnalyserIfdefRe.MatchString(text) {
- return 0.1
- }
-
- return 0
-}))
diff --git a/lexers/c_test.go b/lexers/c_test.go
deleted file mode 100644
index 4ef5b5d18..000000000
--- a/lexers/c_test.go
+++ /dev/null
@@ -1,44 +0,0 @@
-package lexers_test
-
-import (
- "io/ioutil"
- "testing"
-
- "github.com/alecthomas/chroma/v2"
- "github.com/alecthomas/chroma/v2/lexers"
-
- "github.com/alecthomas/assert/v2"
-)
-
-func TestC_AnalyseText(t *testing.T) {
- tests := map[string]struct {
- Filepath string
- Expected float32
- }{
- "include": {
- Filepath: "testdata/c_include.c",
- Expected: 0.1,
- },
- "ifdef": {
- Filepath: "testdata/c_ifdef.c",
- Expected: 0.1,
- },
- "ifndef": {
- Filepath: "testdata/c_ifndef.c",
- Expected: 0.1,
- },
- }
-
- for name, test := range tests {
- test := test
- t.Run(name, func(t *testing.T) {
- data, err := ioutil.ReadFile(test.Filepath)
- assert.NoError(t, err)
-
- analyser, ok := lexers.C.(chroma.Analyser)
- assert.True(t, ok)
-
- assert.Equal(t, test.Expected, analyser.AnalyseText(string(data)))
- })
- }
-}
diff --git a/lexers/cpp.go b/lexers/cpp.go
deleted file mode 100644
index 322481df9..000000000
--- a/lexers/cpp.go
+++ /dev/null
@@ -1,36 +0,0 @@
-package lexers
-
-import (
- "regexp"
-
- . "github.com/alecthomas/chroma/v2" // nolint
-)
-
-var (
- cppAnalyserIncludeRe = regexp.MustCompile(`#include <[a-z_]+>`)
- cppAnalyserNamespaceRe = regexp.MustCompile(`using namespace `)
-)
-
-var CPP = Register(MustNewXMLLexer(
- embedded,
- "embedded/c++.xml",
-).SetConfig(
- &Config{
- Name: "C++",
- Aliases: []string{"cpp", "c++"},
- Filenames: []string{"*.cpp", "*.hpp", "*.c++", "*.h++", "*.cc", "*.hh", "*.cxx", "*.hxx", "*.C", "*.H", "*.cp", "*.CPP", "*.cppm", "*.ixx", "*.tpp"},
- MimeTypes: []string{"text/x-c++hdr", "text/x-c++src"},
- Priority: 0.1,
- EnsureNL: true,
- },
-)).SetAnalyser(func(text string) float32 {
- if cppAnalyserIncludeRe.MatchString(text) {
- return 0.2
- }
-
- if cppAnalyserNamespaceRe.MatchString(text) {
- return 0.4
- }
-
- return 0
-})
diff --git a/lexers/cpp_test.go b/lexers/cpp_test.go
deleted file mode 100644
index ade2743ee..000000000
--- a/lexers/cpp_test.go
+++ /dev/null
@@ -1,57 +0,0 @@
-package lexers_test
-
-import (
- "os"
- "testing"
-
- "github.com/alecthomas/chroma/v2"
- "github.com/alecthomas/chroma/v2/lexers"
-
- "github.com/alecthomas/assert/v2"
-)
-
-func TestCpp_AnalyseText(t *testing.T) {
- tests := map[string]struct {
- Filepath string
- Expected float32
- }{
- "include": {
- Filepath: "testdata/cpp_include.cpp",
- Expected: 0.2,
- },
- "namespace": {
- Filepath: "testdata/cpp_namespace.cpp",
- Expected: 0.4,
- },
- }
-
- for name, test := range tests {
- test := test
- t.Run(name, func(t *testing.T) {
- data, err := os.ReadFile(test.Filepath)
- assert.NoError(t, err)
-
- analyser, ok := lexers.CPP.(chroma.Analyser)
- assert.True(t, ok)
-
- assert.Equal(t, test.Expected, analyser.AnalyseText(string(data)))
- })
- }
-}
-
-func TestIssue290(t *testing.T) {
- input := `// 64-bit floats have 53 digits of precision, including the whole-number-part.
-double a = 0011111110111001100110011001100110011001100110011001100110011010; // imperfect representation of 0.1
-double b = 0011111111001001100110011001100110011001100110011001100110011010; // imperfect representation of 0.2
-double c = 0011111111010011001100110011001100110011001100110011001100110011; // imperfect representation of 0.3
-double a + b = 0011111111010011001100110011001100110011001100110011001100110100; // Note that this is not quite equal to the "canonical" 0.3!a
-`
- it, err := lexers.GlobalLexerRegistry.Get("C++").Tokenise(nil, input)
- assert.NoError(t, err)
- for {
- token := it()
- if token == chroma.EOF {
- break
- }
- }
-}
diff --git a/lexers/embedded/c++.xml b/lexers/embedded/c++.xml
index e8702c58d..455c03368 100644
--- a/lexers/embedded/c++.xml
+++ b/lexers/embedded/c++.xml
@@ -15,9 +15,12 @@
*.H
*.cp
*.CPP
+ *.tpp
text/x-c++hdr
text/x-c++src
true
+
+
diff --git a/lexers/embedded/c.xml b/lexers/embedded/c.xml
index f941802e1..744732902 100644
--- a/lexers/embedded/c.xml
+++ b/lexers/embedded/c.xml
@@ -11,6 +11,8 @@
image/x-xbitmap
image/x-xpixmap
true
+
+
diff --git a/lexers/testdata/c_ifdef.c b/lexers/testdata/analysis/c.ifdef.actual
similarity index 100%
rename from lexers/testdata/c_ifdef.c
rename to lexers/testdata/analysis/c.ifdef.actual
diff --git a/lexers/testdata/analysis/c.ifdef.expected b/lexers/testdata/analysis/c.ifdef.expected
new file mode 100644
index 000000000..49d59571f
--- /dev/null
+++ b/lexers/testdata/analysis/c.ifdef.expected
@@ -0,0 +1 @@
+0.1
diff --git a/lexers/testdata/c_ifndef.c b/lexers/testdata/analysis/c.ifndef.actual
similarity index 100%
rename from lexers/testdata/c_ifndef.c
rename to lexers/testdata/analysis/c.ifndef.actual
diff --git a/lexers/testdata/analysis/c.ifndef.expected b/lexers/testdata/analysis/c.ifndef.expected
new file mode 100644
index 000000000..49d59571f
--- /dev/null
+++ b/lexers/testdata/analysis/c.ifndef.expected
@@ -0,0 +1 @@
+0.1
diff --git a/lexers/testdata/c_include.c b/lexers/testdata/analysis/c.include.actual
similarity index 100%
rename from lexers/testdata/c_include.c
rename to lexers/testdata/analysis/c.include.actual
diff --git a/lexers/testdata/analysis/c.include.expected b/lexers/testdata/analysis/c.include.expected
new file mode 100644
index 000000000..49d59571f
--- /dev/null
+++ b/lexers/testdata/analysis/c.include.expected
@@ -0,0 +1 @@
+0.1
diff --git a/lexers/testdata/cpp_include.cpp b/lexers/testdata/analysis/cpp.include.actual
similarity index 100%
rename from lexers/testdata/cpp_include.cpp
rename to lexers/testdata/analysis/cpp.include.actual
diff --git a/lexers/testdata/analysis/cpp.include.expected b/lexers/testdata/analysis/cpp.include.expected
new file mode 100644
index 000000000..3b04cfb60
--- /dev/null
+++ b/lexers/testdata/analysis/cpp.include.expected
@@ -0,0 +1 @@
+0.2
diff --git a/lexers/testdata/cpp_namespace.cpp b/lexers/testdata/analysis/cpp.namespace.actual
similarity index 100%
rename from lexers/testdata/cpp_namespace.cpp
rename to lexers/testdata/analysis/cpp.namespace.actual
diff --git a/lexers/testdata/analysis/cpp.namespace.expected b/lexers/testdata/analysis/cpp.namespace.expected
new file mode 100644
index 000000000..bd73f4707
--- /dev/null
+++ b/lexers/testdata/analysis/cpp.namespace.expected
@@ -0,0 +1 @@
+0.4
diff --git a/serialise.go b/serialise.go
index 2b727db8a..f6ad589a7 100644
--- a/serialise.go
+++ b/serialise.go
@@ -11,6 +11,8 @@ import (
"reflect"
"regexp"
"strings"
+
+ "github.com/dlclark/regexp2"
)
// Serialisation of Chroma rules to XML. The format is:
@@ -107,7 +109,7 @@ func fastUnmarshalConfig(from fs.FS, path string) (*Config, error) {
var config Config
err = dec.DecodeElement(&config, &se)
if err != nil {
- panic(err)
+ return nil, fmt.Errorf("%s: %w", path, err)
}
return &config, nil
}
@@ -135,8 +137,29 @@ func NewXMLLexer(from fs.FS, path string) (*RegexLexer, error) {
return nil, fmt.Errorf("%s: %q is not a valid glob: %w", config.Name, glob, err)
}
}
+ type regexAnalyse struct {
+ re *regexp2.Regexp
+ score float32
+ }
+ regexAnalysers := make([]regexAnalyse, 0, len(config.Analyse))
+ for _, ra := range config.Analyse {
+ re, err := regexp2.Compile(ra.Regex, regexp2.None)
+ if err != nil {
+ return nil, fmt.Errorf("%s: %q is not a valid analyser regex: %w", config.Name, ra.Regex, err)
+ }
+ regexAnalysers = append(regexAnalysers, regexAnalyse{re, ra.Score})
+ }
return &RegexLexer{
config: config,
+ analyser: func(text string) float32 {
+ var score float32
+ for _, ra := range regexAnalysers {
+ if ok, _ := ra.re.MatchString(text); ok {
+ score += ra.score
+ }
+ }
+ return score
+ },
fetchRulesFunc: func() (Rules, error) {
var lexer struct {
Config