Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework C++ Runtime Interval, IntervalSet, ATN and ATNState #1930

Merged
merged 11 commits into from
Oct 29, 2017
1 change: 1 addition & 0 deletions runtime/Cpp/runtime/src/antlr4-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include <algorithm>
#include <assert.h>
#include <atomic>
#include <codecvt>
#include <chrono>
#include <fstream>
Expand Down
18 changes: 9 additions & 9 deletions runtime/Cpp/runtime/src/atn/ATN.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,21 +87,21 @@ misc::IntervalSet ATN::nextTokens(ATNState *s, RuleContext *ctx) const {

}

misc::IntervalSet& ATN::nextTokens(ATNState *s) const {
if (!s->nextTokenWithinRule.isReadOnly()) {
misc::IntervalSet const& ATN::nextTokens(ATNState *s) const {
if (!s->_nextTokenUpdated) {
std::unique_lock<std::mutex> lock { _mutex };
if (!s->nextTokenWithinRule.isReadOnly()) {
s->nextTokenWithinRule = nextTokens(s, nullptr);
s->nextTokenWithinRule.setReadOnly(true);
if (!s->_nextTokenUpdated) {
s->_nextTokenWithinRule = nextTokens(s, nullptr);
s->_nextTokenUpdated = true;
}
}
return s->nextTokenWithinRule;
return s->_nextTokenWithinRule;
}

void ATN::addState(ATNState *state) {
if (state != nullptr) {
//state->atn = this;
state->stateNumber = (int)states.size();
state->stateNumber = static_cast<int>(states.size());
}

states.push_back(state);
Expand All @@ -114,7 +114,7 @@ void ATN::removeState(ATNState *state) {

int ATN::defineDecisionState(DecisionState *s) {
decisionToState.push_back(s);
s->decision = (int)decisionToState.size() - 1;
s->decision = static_cast<int>(decisionToState.size() - 1);
return s->decision;
}

Expand Down Expand Up @@ -154,7 +154,7 @@ misc::IntervalSet ATN::getExpectedTokens(size_t stateNumber, RuleContext *contex
if (ctx->parent == nullptr) {
break;
}
ctx = (RuleContext *)ctx->parent;
ctx = static_cast<RuleContext *>(ctx->parent);
}

if (following.contains(Token::EPSILON)) {
Expand Down
2 changes: 1 addition & 1 deletion runtime/Cpp/runtime/src/atn/ATN.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ namespace atn {
/// staying in same rule. <seealso cref="Token#EPSILON"/> is in set if we reach end of
/// rule.
/// </summary>
virtual misc::IntervalSet& nextTokens(ATNState *s) const;
virtual misc::IntervalSet const& nextTokens(ATNState *s) const;

virtual void addState(ATNState *state);

Expand Down
16 changes: 13 additions & 3 deletions runtime/Cpp/runtime/src/atn/ATNState.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,17 @@ namespace atn {
///
/// <embed src="images/OptionalNonGreedy.svg" type="image/svg+xml"/>
/// </summary>
class ANTLR4CPP_PUBLIC ATN;

class ANTLR4CPP_PUBLIC ATNState {
public:
ATNState();
ATNState(ATNState const&) = delete;

virtual ~ATNState();

ATNState& operator=(ATNState const&) = delete;

static const size_t INITIAL_NUM_TRANSITIONS = 4;
static const size_t INVALID_STATE_NUMBER = static_cast<size_t>(-1); // std::numeric_limits<size_t>::max();

Expand All @@ -102,9 +107,6 @@ namespace atn {
bool epsilonOnlyTransitions = false;

public:
/// Used to cache lookahead during parsing, not used during construction.
misc::IntervalSet nextTokenWithinRule;

virtual size_t hashCode();
bool operator == (const ATNState &other);

Expand All @@ -117,6 +119,14 @@ namespace atn {
virtual void addTransition(size_t index, Transition *e);
virtual Transition* removeTransition(size_t index);
virtual size_t getStateType() = 0;

private:
/// Used to cache lookahead during parsing, not used during construction.

misc::IntervalSet _nextTokenWithinRule;
std::atomic<bool> _nextTokenUpdated { false };

friend class ATN;
};

} // namespace atn
Expand Down
24 changes: 11 additions & 13 deletions runtime/Cpp/runtime/src/atn/LexerATNSimulator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,23 +181,21 @@ size_t LexerATNSimulator::execATN(CharStream *input, dfa::DFAState *ds0) {
}

dfa::DFAState *LexerATNSimulator::getExistingTargetState(dfa::DFAState *s, size_t t) {
if (s->edges.empty()|| /*t < MIN_DFA_EDGE ||*/ t > MAX_DFA_EDGE) { // MIN_DFA_EDGE is 0, hence code gives a warning, if left in.
return nullptr;
}

dfa::DFAState* retval = nullptr;
_edgeLock.readLock();
auto iterator = s->edges.find(t - MIN_DFA_EDGE);
if (t <= MAX_DFA_EDGE) {
auto iterator = s->edges.find(t - MIN_DFA_EDGE);
#if DEBUG_ATN == 1
if (iterator != s->edges.end()) {
std::cout << std::string("reuse state ") << s->stateNumber << std::string(" edge to ") << iterator->second->stateNumber << std::endl;
}
if (iterator != s->edges.end()) {
std::cout << std::string("reuse state ") << s->stateNumber << std::string(" edge to ") << iterator->second->stateNumber << std::endl;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ugh, that's still code from the automatic conversion. We don't need to create std::string here explicitly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at this again -- The string conversion is from the existing code. I added the iterator check to avoid dereferencing a bad iterator in the debug code. That's a bugfix (if only in an error message).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My comment wasn't specifically for your patch. More loudly thinking... Would be nice if things like that get fixed while someone comes across them.

}
#endif
_edgeLock.readUnlock();

if (iterator == s->edges.end())
return nullptr;

return iterator->second;
if (iterator != s->edges.end())
retval = iterator->second;
}
_edgeLock.readUnlock();
return retval;
}

dfa::DFAState *LexerATNSimulator::computeTargetState(CharStream *input, dfa::DFAState *s, size_t t) {
Expand Down
8 changes: 3 additions & 5 deletions runtime/Cpp/runtime/src/atn/ParserATNSimulator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,14 +257,12 @@ size_t ParserATNSimulator::execATN(dfa::DFA &dfa, dfa::DFAState *s0, TokenStream
}

dfa::DFAState *ParserATNSimulator::getExistingTargetState(dfa::DFAState *previousD, size_t t) {
dfa::DFAState* retval;
_edgeLock.readLock();
auto iterator = previousD->edges.find(t);
retval = (iterator == previousD->edges.end()) ? nullptr : iterator->second;
_edgeLock.readUnlock();
if (iterator == previousD->edges.end()) {
return nullptr;
}

return iterator->second;
return retval;
}

dfa::DFAState *ParserATNSimulator::computeTargetState(dfa::DFA &dfa, dfa::DFAState *previousD, size_t t) {
Expand Down
2 changes: 0 additions & 2 deletions runtime/Cpp/runtime/src/misc/Interval.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@

using namespace antlr4::misc;

Interval::~Interval() = default;

size_t antlr4::misc::numericToSymbol(ssize_t v) {
return static_cast<size_t>(v);
}
Expand Down
31 changes: 14 additions & 17 deletions runtime/Cpp/runtime/src/misc/Interval.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,59 +26,56 @@ namespace misc {
ssize_t b;

Interval();
explicit Interval(size_t a_, size_t b_); // For unsigned -> signed mappings.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I missed that one in my first review. Why did you remove the explicit keyword? I certainly don't want automatic conversion between size_t and ssize_t! Compilers behave differently here. Please put that back.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Happy to put it back. However: I don't understand what you mean about size_t and ssize_t conversions. My understanding of the explicit keyword is that it will prevent:

IntervalSize s = { 1, 2 };

from compiling, and require:

IntervalSize s(1, 2);

instead.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IntervalSet of course ...

Copy link
Member

@mike-lischke mike-lischke Jul 20, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The explicit keyword marks a c-tor so that no automatic type conversion is applied. So when the compiler looks up a c-tor with e.g. int values that explicit c-tor is not considered a valid match. This way we can ensure that unsigned values only go with the explicit c-tor, while signed ones use the other c-tor (which is important because of the internal handling). I wish we could avoid using negative values. From the pure semantic of that class there cannot be a negative interval, but such values denote invalid intervals and are used when sorting intervals in an interval set (which is why we cannot use something like (size_t)-1).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The explicit keyword's type conversion refers to conversion to the target class through a converting constructor, and whether that conversion can be implicit or must be explicit. I'm not sure how you are using this in the code. For example:

explicit IntervalSet(size_t a, size_t b);

will prevent both:

IntervalSet s = { 1, 2 };
IntervalSet s = { -1, -2 }

and will allow both:

IntervalSet s(1, 2);
IntervalSet s(-1, -2);

What is the form you want to disallow?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's surprising. From my tests I found that the explicit constructor will be used for IntervalSet s(1, 2).

Anyway, the idea is simple: use the c-tor for size_t only with unsigned values and the one with ssize_t for signed ones. That means usually that the one for unsigned values is used (since most values are unsigned) and only in specific cases take the signed version.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems we haven't finished this one. Are you sure we don't need the explicit keyword?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't do what you think, re: signed vs. unsigned comparisons, however, adding it back in because it doesn't hurt.

Interval(size_t a_, size_t b_); // For unsigned -> signed mappings.
Interval(ssize_t a_, ssize_t b_);
Interval(Interval const&) = default;
virtual ~Interval();
Interval& operator=(Interval const&) = default;

/// return number of elements between a and b inclusively. x..x is length 1.
/// if b < a, then length is 0. 9..10 has length 2.
virtual size_t length() const;
size_t length() const;

bool operator == (const Interval &other) const;

virtual size_t hashCode() const;
size_t hashCode() const;

/// <summary>
/// Does this start completely before other? Disjoint </summary>
virtual bool startsBeforeDisjoint(const Interval &other) const;
bool startsBeforeDisjoint(const Interval &other) const;

/// <summary>
/// Does this start at or before other? Nondisjoint </summary>
virtual bool startsBeforeNonDisjoint(const Interval &other) const;
bool startsBeforeNonDisjoint(const Interval &other) const;

/// <summary>
/// Does this.a start after other.b? May or may not be disjoint </summary>
virtual bool startsAfter(const Interval &other) const;
bool startsAfter(const Interval &other) const;

/// <summary>
/// Does this start completely after other? Disjoint </summary>
virtual bool startsAfterDisjoint(const Interval &other) const;
bool startsAfterDisjoint(const Interval &other) const;

/// <summary>
/// Does this start after other? NonDisjoint </summary>
virtual bool startsAfterNonDisjoint(const Interval &other) const;
bool startsAfterNonDisjoint(const Interval &other) const;

/// <summary>
/// Are both ranges disjoint? I.e., no overlap? </summary>
virtual bool disjoint(const Interval &other) const;
bool disjoint(const Interval &other) const;

/// <summary>
/// Are two intervals adjacent such as 0..41 and 42..42? </summary>
virtual bool adjacent(const Interval &other) const;
bool adjacent(const Interval &other) const;

virtual bool properlyContains(const Interval &other) const;
bool properlyContains(const Interval &other) const;

/// <summary>
/// Return the interval computed from combining this and other </summary>
virtual Interval Union(const Interval &other) const;
Interval Union(const Interval &other) const;

/// <summary>
/// Return the interval in common between this and o </summary>
virtual Interval intersection(const Interval &other) const;
Interval intersection(const Interval &other) const;

virtual std::string toString() const;
std::string toString() const;

private:
};
Expand Down
81 changes: 15 additions & 66 deletions runtime/Cpp/runtime/src/misc/IntervalSet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,54 +13,31 @@
using namespace antlr4;
using namespace antlr4::misc;

IntervalSet const IntervalSet::COMPLETE_CHAR_SET = []() {
IntervalSet complete = IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE);
complete.setReadOnly(true);
return complete;
}();
IntervalSet const IntervalSet::COMPLETE_CHAR_SET =
IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE);

IntervalSet const IntervalSet::EMPTY_SET = []() {
IntervalSet empty;
empty.setReadOnly(true);
return empty;
}();
IntervalSet const IntervalSet::EMPTY_SET;

IntervalSet::IntervalSet() {
InitializeInstanceFields();
IntervalSet::IntervalSet() : _intervals() {
}

IntervalSet::IntervalSet(const std::vector<Interval> &intervals) : IntervalSet() {
_intervals = intervals;
IntervalSet::IntervalSet(const IntervalSet &set) : _intervals(set._intervals) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't like the changes to the c-tors. I took care to use delegating c-tors everywhere and want to stay with them, instead of repeating init of internal values over and over again. Please revert.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you insist. However: This a class with a single member that's being constructed empty and then assigned to in a constructor body. I don't see a repeated init here. What am I missing?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, this one is so simple, but I'd like to keep that pattern throughout the runtime.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you pls change this back also? As I said already, I like to stay with delegating c-tors.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK

}

IntervalSet::IntervalSet(const IntervalSet &set) : IntervalSet() {
addAll(set);
IntervalSet::IntervalSet(IntervalSet&& set) : IntervalSet(std::move(set._intervals)) {
}

IntervalSet::IntervalSet(int n, ...) : IntervalSet() {
va_list vlist;
va_start(vlist, n);

for (int i = 0; i < n; i++) {
add(va_arg(vlist, int));
}

va_end(vlist);
IntervalSet::IntervalSet(std::vector<Interval>&& intervals) : _intervals(std::move(intervals)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mentioned a replacement of the variadic variant with template params, but that's not it. So where is the new code? Also using move here is not a good idea. What if you wanna pass in a constant? Better use const& for the parameter (if we stay with this variant).

Actually, the new code changes the semantic significantly. The variadic variant uses single value intervals (in fact it actually uses single elements, which are later converted to single element intervals). Your new code allows for any interval list (a case we already handled in a different c-tor). This change is not improving anything IMO.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The varadic code starts on line 42 of IntervalSet.h, but I think you found it from your later comment.

The constructor that takes a vector rvalue reference is a private constructor used internally. If we want to use a constant here it will be local to this class and can be an explicit decision.

Removing type-unsafe va_args code certainly seems like an improvement to me: The old code is likely to fail outright, especially on machines where sizeof(int) != sizeof(ssize_t). The new code is type-safe, provides compile-time diagnostics, and allows single and multi-value intervals. For the same arguments I don't see a semantic change (excluding failures because of va_args usage problems).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, I didn't ask for the variadic code, but the templated one (which I don't see in your patch). However, as you say, the new code is a private c-tor, which is not that obvious from the patch. I wonder, though, why you could remove the variadic code. Is it not used anywhere?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I replaced the varadic code with a templated version that is type safe and provides the same interface. There are two changes that are being mixed here because they're close by in the diff:

  • The va_arg code was replaced with a templated version in the header file. The old va_arg code was removed. The interface is the same and any code using the old va_args based interface will now use the templated version (including type safety).
  • At the same place in the file an internal constructor was placed to move in a vector of intervals.

They are distinct changes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My fault. I looked at the wrong places. The templated code is just fine (even though a bit less readable IMO). But type safety is a good thing.

}

IntervalSet::~IntervalSet()
{
IntervalSet& IntervalSet::operator=(const IntervalSet& other) {
_intervals = other._intervals;
return *this;
}

IntervalSet& IntervalSet::operator=(const IntervalSet& other)
{
if (_readonly) {
throw IllegalStateException("can't alter read only IntervalSet");
}

_intervals.clear();

return addAll(other);
IntervalSet& IntervalSet::operator=(IntervalSet&& other) {
_intervals = move(other._intervals);
return *this;
}

IntervalSet IntervalSet::of(ssize_t a) {
Expand All @@ -72,16 +49,10 @@ IntervalSet IntervalSet::of(ssize_t a, ssize_t b) {
}

void IntervalSet::clear() {
if (_readonly) {
throw IllegalStateException("can't alter read only IntervalSet");
}
_intervals.clear();
}

void IntervalSet::add(ssize_t el) {
if (_readonly) {
throw IllegalStateException("can't alter read only IntervalSet");
}
add(el, el);
}

Expand All @@ -90,10 +61,6 @@ void IntervalSet::add(ssize_t a, ssize_t b) {
}

void IntervalSet::add(const Interval &addition) {
if (_readonly) {
throw IllegalStateException("can't alter read only IntervalSet");
}

if (addition.b < addition.a) {
return;
}
Expand Down Expand Up @@ -152,7 +119,7 @@ IntervalSet IntervalSet::Or(const std::vector<IntervalSet> &sets) {

IntervalSet& IntervalSet::addAll(const IntervalSet &set) {
// walk set and add each interval
for (auto &interval : set._intervals) {
for (auto const& interval : set._intervals) {
add(interval);
}
return *this;
Expand Down Expand Up @@ -341,7 +308,7 @@ ssize_t IntervalSet::getMinElement() const {
return _intervals[0].a;
}

std::vector<Interval> IntervalSet::getIntervals() const {
std::vector<Interval> const& IntervalSet::getIntervals() const {
return _intervals;
}

Expand Down Expand Up @@ -518,10 +485,6 @@ void IntervalSet::remove(size_t el) {
}

void IntervalSet::remove(ssize_t el) {
if (_readonly) {
throw IllegalStateException("can't alter read only IntervalSet");
}

for (size_t i = 0; i < _intervals.size(); ++i) {
Interval &interval = _intervals[i];
ssize_t a = interval.a;
Expand Down Expand Up @@ -555,17 +518,3 @@ void IntervalSet::remove(ssize_t el) {
}
}
}

bool IntervalSet::isReadOnly() const {
return _readonly;
}

void IntervalSet::setReadOnly(bool readonly) {
if (_readonly && !readonly)
throw IllegalStateException("Can't alter readonly IntervalSet");
_readonly = readonly;
}

void IntervalSet::InitializeInstanceFields() {
_readonly = false;
}
Loading