Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: augment C with semgrep constructs #480

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 55 additions & 4 deletions lang/semgrep-grammars/src/semgrep-c/grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,73 @@ module.exports = grammar(base_grammar, {
name: 'c',

conflicts: ($, previous) => previous.concat([
// This conflict arises from the case of
// 'if' parenthesized_expression semgrep_ellipsis_metavar • '(' …
// we don't know if we should reduce the whole if or just the metavar
// not a very realistic case, so I don't care to fix this here
[$.if_statement, $._expression]
]),

/*
Support for semgrep ellipsis ('...') and metavariables ('$FOO'),
if they're not already part of the base grammar.
*/
rules: {
/*
semgrep_ellipsis: $ => '...',
semgrep_deep_expression: $ => seq('<...', $._expression, '...>'),
semgrep_typed_metavar: $ => seq('(', $.type_descriptor, $.semgrep_metavariable, ')'),
semgrep_metavariable: $ => /\$[A-Z_][A-Z_0-9]*/,
semgrep_ellipsis_metavar: $ => /\$\.\.\.[a-zA-Z_][a-zA-Z_0-9]*/,

// Alternate "entry point". Allows parsing a standalone expression.
semgrep_expression: ($) => seq("__SEMGREP_EXPRESSION", $._expression),

_expression: ($, previous) => {
return choice(
$.semgrep_ellipsis,
$.semgrep_deep_expression,
$.semgrep_typed_metavar,
$.semgrep_ellipsis_metavar,
...previous.members
);
}
*/
)
},

_statement: ($, previous) => {
return choice(
...previous.members,
// This needs to have a little more precedence, so that we can parse
// { ... <stmts> }
// properly, such that `...` is for the semgrep ellipsis.
prec(1, $.semgrep_ellipsis),
prec(1, $.semgrep_ellipsis_metavar),
)
},

_top_level_statement: ($, previous) => {
return choice(
...previous.members,
prec(1, $.semgrep_ellipsis),
prec(1, $.semgrep_ellipsis_metavar)
)
},

_for_statement_body: ($, previous) => choice(
previous,
$.semgrep_ellipsis
),

_field_identifier: ($, previous) => choice(
previous,
$.semgrep_ellipsis,
$.semgrep_ellipsis_metavar
),

// don't need to edit parameter_declaration because variadic_parameter already exists

// Alternative entry point for pattern parsing
translation_unit: ($, previous) => choice(
previous,
$.semgrep_expression
)
}
});
301 changes: 301 additions & 0 deletions lang/semgrep-grammars/src/semgrep-c/test/corpus/semgrep.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
======================================================================
Deep expression
======================================================================

__SEMGREP_EXPRESSION <... foo($X) ...>

---

(translation_unit
(semgrep_expression
(semgrep_deep_expression
(call_expression
(identifier)
(argument_list
(identifier))))))

======================================================================
Typed metavariable
======================================================================

__SEMGREP_EXPRESSION ($T $X)

---

(translation_unit
(semgrep_expression
(semgrep_typed_metavar
(type_descriptor
(type_identifier))
(semgrep_metavariable))))

======================================================================
Sizeof ellipsis
======================================================================

__SEMGREP_EXPRESSION sizeof(...)

---

(translation_unit
(semgrep_expression
(sizeof_expression
(parenthesized_expression
(semgrep_ellipsis)))))

======================================================================
Ellipsis expression
======================================================================

__SEMGREP_EXPRESSION ...

---

(translation_unit
(semgrep_expression
(semgrep_ellipsis)))

======================================================================
Ellipsis argument
======================================================================

foo(..., 1, ..., 2, ...);

---

(translation_unit
(expression_statement
(call_expression
(identifier)
(argument_list
(semgrep_ellipsis)
(number_literal)
(semgrep_ellipsis)
(number_literal)
(semgrep_ellipsis)))))

======================================================================
Ellipsis statement
======================================================================

int x = 2;
...
int y = 3;

---

(translation_unit
(declaration
(primitive_type)
(init_declarator
(identifier)
(number_literal)))
(semgrep_ellipsis)
(declaration
(primitive_type)
(init_declarator
(identifier)
(number_literal))))

======================================================================
Ellipsis statement in function
======================================================================

int main() {
int x = 2;
...
int y = 3;
}

---

(translation_unit
(function_definition
(primitive_type)
(function_declarator
(identifier)
(parameter_list))
(compound_statement
(declaration
(primitive_type)
(init_declarator
(identifier)
(number_literal)))
(semgrep_ellipsis)
(declaration
(primitive_type)
(init_declarator
(identifier)
(number_literal))))))

======================================================================
Ellipsis in for loop
======================================================================

for (...) {}

---

(translation_unit
(for_statement
(semgrep_ellipsis)
(compound_statement)))

======================================================================
Metavariables
======================================================================

__SEMGREP_EXPRESSION $X

---

(translation_unit
(semgrep_expression
(identifier)))

======================================================================
Metavariables in function definition
======================================================================

$T $F($T2 $ARG) {
...
}

---

(translation_unit
(function_definition
(type_identifier)
(function_declarator
(identifier)
(parameter_list
(parameter_declaration
(type_identifier)
(identifier))))
(compound_statement
(semgrep_ellipsis))))

======================================================================
Ellipsis metavariable
======================================================================

$...X

---

(translation_unit
(semgrep_ellipsis_metavar))

======================================================================
Ellipsis metavariable as expression
======================================================================

__SEMGREP_EXPRESSION $...X

---

(translation_unit
(semgrep_expression
(semgrep_ellipsis_metavar)))

======================================================================
Ellipsis metavariable between statements
======================================================================

int x = 1;
$...STMTS
int y = 2;

---

(translation_unit
(declaration
(primitive_type)
(init_declarator
(identifier)
(number_literal)))
(semgrep_ellipsis_metavar)
(declaration
(primitive_type)
(init_declarator
(identifier)
(number_literal))))

======================================================================
Ellipsis metavariable between statements in function
======================================================================

int main(){
int x = 1;
$...STMTS
int y = 2;
}

---

(translation_unit
(function_definition
(primitive_type)
(function_declarator
(identifier)
(parameter_list))
(compound_statement
(declaration
(primitive_type)
(init_declarator
(identifier)
(number_literal)))
(semgrep_ellipsis_metavar)
(declaration
(primitive_type)
(init_declarator
(identifier)
(number_literal))))))

======================================================================
Ellipsis in dot access
======================================================================

__SEMGREP_EXPRESSION a. ... .c

---

(translation_unit
(semgrep_expression
(field_expression
(field_expression
(identifier)
(semgrep_ellipsis))
(field_identifier))))

======================================================================
Ellipsis in dot access
======================================================================

__SEMGREP_EXPRESSION a. ... .c

---

(translation_unit
(semgrep_expression
(field_expression
(field_expression
(identifier)
(semgrep_ellipsis))
(field_identifier))))

======================================================================
Ellipsis metavariable in dot access
======================================================================

__SEMGREP_EXPRESSION a. $...STUFF .c

---

(translation_unit
(semgrep_expression
(field_expression
(field_expression
(identifier)
(semgrep_ellipsis_metavar))
(field_identifier))))
8 changes: 7 additions & 1 deletion lang/semgrep-grammars/src/semgrep-cpp/grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,16 @@ module.exports = grammar(base_grammar, {
prec(1,$.semgrep_ellipsis)
),

_for_statement_body: ($, previous) => choice(
previous,
$.semgrep_ellipsis
),

// For method chaining, like foo. ... .bar()
_field_identifier: ($, previous) => choice(
previous,
$.semgrep_ellipsis
$.semgrep_ellipsis,
$.semgrep_named_ellipsis
),

// So we prefer to parse a unary left fold for
Expand Down
Loading
Loading