Saturday 9 April 2016

ANTLR Dynamic Runtime Tokens and Rules

ANTLR lexer tokens and parser rules are normally coded into the grammar and not modifiable during the codes execution, but I need to add lexer rule tokens and enable or disable parser rules at runtime. So here's an example of how you might want to do that.

First, two classes to hold lexer tokens and the enabled/disabled status of parser rules:

LexerLookup.java

package org.adrianwalker.antlr.dynamicrules;

import static java.lang.String.format;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.logging.Logger;
import static org.adrianwalker.antlr.dynamicrules.DynamicRulesParser.ruleNames;
import org.antlr.v4.runtime.CharStream;

public enum LexerLookup {

  INSTANCE;

  private static final Logger LOGGER = Logger.getLogger(LexerLookup.class.getName());
  private static final Comparator<String> LONGEST_FIRST = (s1, s2) -> s2.length() - s1.length();

  private final Map<Integer, Set<String>> tokenIdTermsMap;

  private LexerLookup() {
    tokenIdTermsMap = new HashMap<>();
  }

  public void put(final int tokenId, final List<String> tokens) {

    if (null == tokens) {
      throw new IllegalArgumentException("Illegal argument, tokens must be not null");
    }

    tokens.removeIf(Objects::isNull);

    Collections.sort(tokens, LONGEST_FIRST);

    LinkedHashSet tokenSet = new LinkedHashSet(tokens);

    LOGGER.info(format("tokens '%s' %s\n", ruleNames[tokenId - 1], tokenSet));

    this.tokenIdTermsMap.put(tokenId, tokenSet);
  }

  public boolean contains(final int tokenId, final CharStream input) {

    boolean contains = false;

    if (!tokenIdTermsMap.containsKey(tokenId)) {
      return contains;
    }

    Set<String> terms = tokenIdTermsMap.get(tokenId);

    for (String term : terms) {

      contains = ahead(term, input);

      if (contains) {
        LOGGER.info(format("contains '%s' ('%s')\n", term, ruleNames[tokenId - 1]));
        break;
      }
    }

    return contains;
  }

  private boolean ahead(final String word, final CharStream input) {

    for (int i = 0; i < word.length(); i++) {

      char wordChar = word.charAt(i);
      int inputChar = input.LA(i + 1);

      if (inputChar != wordChar) {
        return false;
      }
    }

    input.seek(input.index() + word.length() - 1);

    return true;
  }
}

ParserLookup.java

package org.adrianwalker.antlr.dynamicrules;

import static java.lang.String.format;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Logger;

public enum ParserLookup {

  INSTANCE;

  private static final Logger LOGGER = Logger.getLogger(ParserLookup.class.getName());
  private final Map<Integer, Boolean> ruleIdEnabledMap;

  private ParserLookup() {
    ruleIdEnabledMap = new HashMap<>();
  }

  public void put(final int ruleId, final boolean enabled) {

    LOGGER.info(format("ruleId = %s, enabled = %s\n", ruleId, enabled));

    this.ruleIdEnabledMap.put(ruleId, enabled);
  }

  public boolean enabled(final int ruleId) {

    return ruleIdEnabledMap.getOrDefault(ruleId, true);
  }
}
These two classes are used by the grammar to assign values to lexer rules and to enable or disable parser rules like this:

DynamicRules.g4

grammar DynamicRules;

@lexer::header {
  import org.adrianwalker.antlr.dynamicrules.LexerLookup;
}

@lexer::members {
  public static final LexerLookup LOOKUP = LexerLookup.INSTANCE;
}

@parser::header {
  import org.adrianwalker.antlr.dynamicrules.ParserLookup;
}

@parser::members {
  public static final ParserLookup LOOKUP = ParserLookup.INSTANCE;
}

// Parser Rules

sentence : ({LOOKUP.enabled(RULE_words)}? words) FULL_STOP ;
words : WORD (WS WORD)+ ;

// Lexer Rules

WORD : {LOOKUP.contains(WORD, _input)}? . ;
FULL_STOP : '.' ;
WS : [ \t\r\n]+ ;
OTHER : . ;

The parser and lexer generated from the ANLTR grammer can be used with the lexer and parser lookup classes to set token values and enable and disable rules:

SentenceParser.java

package org.adrianwalker.antlr.dynamicrules;

import java.util.List;
import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.RecognitionException;

public final class SentenceParser {

  public SentenceParser() {
  }

  public void setWords(final List<String> words) {
    LexerLookup.INSTANCE.put(DynamicRulesLexer.WORD, words);
  }

  public void enableWords(final boolean enabled) {
    ParserLookup.INSTANCE.put(DynamicRulesParser.RULE_words, enabled);
  }

  public Result parse(final String term) throws RecognitionException {

    DynamicRulesLexer lexer = new DynamicRulesLexer(new ANTLRInputStream(term));
    CommonTokenStream tokens = new CommonTokenStream(lexer);
    DynamicRulesParser parser = new DynamicRulesParser(tokens);

    return new Result(parser.sentence().getText(), parser.getNumberOfSyntaxErrors());
  }

  public static final class Result {

    private String text;
    private int numberOfSyntaxErrors;

    public Result(final String text, final int numberOfSyntaxErrors) {
      this.text = text;
      this.numberOfSyntaxErrors = numberOfSyntaxErrors;
    }

    public String getText() {
      return text;
    }

    public void setText(final String text) {
      this.text = text;
    }

    public int getNumberOfSyntaxErrors() {
      return numberOfSyntaxErrors;
    }

    public void setNumberOfSyntaxErrors(final int numberOfSyntaxErrors) {
      this.numberOfSyntaxErrors = numberOfSyntaxErrors;
    }
  }
}

Some unit tests for example usage:

SentenceParserTest.java

package org.adrianwalker.antlr.dynamicrules;

import static java.util.Arrays.asList;
import org.adrianwalker.antlr.dynamicrules.SentenceParser.Result;
import org.junit.Assert;
import org.junit.Test;

public class SentenceParserTest {

  @Test
  public void testValid() {

    SentenceParser parser = new SentenceParser();
    parser.enableWords(true);
    parser.setWords(asList(new String[]{
      "on", "cat", "mat", "sat", "the"
    }));

    Result result = parser.parse("the cat sat on the mat.");

    Assert.assertEquals("the cat sat on the mat.", result.getText());
    Assert.assertEquals(0, result.getNumberOfSyntaxErrors());
  }

  @Test
  public void testDisabledRule() {

    SentenceParser parser = new SentenceParser();
    parser.enableWords(false);
    parser.setWords(asList(new String[]{
      "on", "cat", "mat", "sat", "the"
    }));

    Result result = parser.parse("the cat sat on the mat.");

    Assert.assertEquals(1, result.getNumberOfSyntaxErrors());
  }

  @Test
  public void testInvalidWords() {

    SentenceParser parser = new SentenceParser();
    parser.enableWords(false);
    parser.setWords(asList(new String[]{
      "on", "cat", "mat", "sat", "the"
    }));

    Result result = parser.parse("INVALID");

    Assert.assertEquals(1, result.getNumberOfSyntaxErrors());
  }

  @Test
  public void testUpdateWordsAndDisableRule() {

    SentenceParser parser = new SentenceParser();
    parser.enableWords(true);
    parser.setWords(asList(new String[]{
      "the"
    }));

    Result result = parser.parse("the cat sat on the mat.");

    Assert.assertEquals(1, result.getNumberOfSyntaxErrors());

    parser.setWords(asList(new String[]{
      "on", "cat", "mat", "sat", "the"
    }));

    result = parser.parse("the cat sat on the mat.");

    Assert.assertEquals(0, result.getNumberOfSyntaxErrors());

    parser.enableWords(false);

    result = parser.parse("the cat sat on the mat.");

    Assert.assertEquals(1, result.getNumberOfSyntaxErrors());
  }
}

Source Code