Getting started
- Step 1: Get the SEMPRE repository:
git clone https://github.com/percyliang/sempre
- Step 2: Navigate to the SEMPRE repository and follow the steps listed under Easy Setup in
README.md
- Step 3: Follow the steps listed below
You'll need
libsempre/sempre-core.jar
libsempre/sempre-cache.jar
libsempre/sempre-corenlp.jar
if usingCoreNLPAnalyzer
You'll also need many of the dependencies under lib
Make sure the jars are included somehow when compiling your project. I like using gradle, so I just threw all the jars in a directory and included the directory as a dependency for my project:
dependencies {
implementation fileTree(dir: '../lib', include: '*.jar')
}
Write yourself a parser class
The hot thing to do here would be to force you to dig through a bunch of poorly written explanations to find the full code listing. It's usually buried in the middle somewhere, disguised to look like another partial chunk of code. I'm not a fan of that. Here's the full code listing for a sample Parser class. We'll explain things later:
Parser.java
import edu.stanford.nlp.sempre.*; import edu.stanford.nlp.sempre.corenlp.CoreNLPAnalyzer; import fig.basic.Pair; import java.util.*; import java.util.stream.Collectors; public class Parser { private Builder builder; private Dataset dataset; private Grammar grammar; private LanguageAnalyzer analyzer; Parser(LanguageAnalyzer analyzer) { this.builder = new Builder(); this.dataset = new Dataset(); this.grammar = new Grammar(); this.analyzer = analyzer; // Equivalent command line option: -languageAnalyzer corenlp.CoreNLPAnalyzer // if `this.analyzer` is `new CoreNLPAnalyzer()` LanguageAnalyzer.setSingleton(this.analyzer); this.repository = repository; } public Parser() { this(new CoreNLPAnalyzer()); } // Equivalent command line option: -Grammar.inPaths [grammarPath] public void setGrammarPath(String grammarPath) { grammar.read(grammarPath); builder.grammar = grammar; } // Equivalent command line option: -Dataset.inPaths train:[examplePath] public void setExamplePath(String examplePath) { dataset.readFromPathPairs(Collections.singletonList(new Pair<>("train", examplePath))); } public void initialize() { builder.buildUnspecified(); } public void learn() { // Equivalent command line option: -FeatureExtractor.featureDomains rule FeatureExtractor.Options o = new FeatureExtractor.Options(); o.featureDomains = Collections.singleton("rule"); FeatureExtractor.opts = o; FeatureExtractor f = new FeatureExtractor(builder.executor); // Equivalent command line option: -Learner.maxTrainIters 3 Learner.opts.maxTrainIters = 3; Learner learner = new Learner(builder.parser, builder.params, dataset); learner.learn(); } // Parse with SEMPRE public Response parse(String query) { Example.Builder b = new Example.Builder(); b.setId("session:1"); b.setUtterance(query); Example ex = b.createExample(); Response response = new Response(builder); ex.preprocess(); // Parse! builder.parser.parse(builder.params, ex, false); response.ex = ex; response.candidateIndex = 0; return response; } }
If you're using the sample class above, you'll also want to copy the Response
class
from within edu.stanford.nlp.sempre.Master
. It's reproduced here:
Response.java
import edu.stanford.nlp.sempre.Builder; import edu.stanford.nlp.sempre.Derivation; import edu.stanford.nlp.sempre.Example; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; // Copied from edu.stanford.nlp.sempre.Master public class Response { // Example that was parsed, if any. public Example ex; private Builder builder; // Which derivation we're selecting to show int candidateIndex = -1; // Detailed information public Mapstats = new LinkedHashMap<>(); public List lines = new ArrayList<>(); public String getFormulaAnswer() { if (ex.getPredDerivations().size() == 0) return "(no answer)"; else if (candidateIndex == -1) return "(not selected)"; else { Derivation deriv = getDerivation(); return deriv.getFormula() + " => " + deriv.getValue(); } } public String getAnswer() { if (ex.getPredDerivations().size() == 0) return "(no answer)"; else if (candidateIndex == -1) return "(not selected)"; else { Derivation deriv = getDerivation(); deriv.ensureExecuted(builder.executor, ex.context); return deriv.getValue().toString(); } } public List getLines() { return lines; } public Example getExample() { return ex; } public int getCandidateIndex() { return candidateIndex; } public Derivation getDerivation() { return ex.getPredDerivations().get(candidateIndex); } public Response(Builder b) { this.builder = b; } }
Usage
ParserTest.java
import edu.stanford.nlp.sempre.SimpleAnalyzer; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.*; public class ParserTest { @Test public void testParser() { // We can use SimpleAnalyzer instead of CoreNLPAnalyzer (default when you run // the `run` script in SEMPRE is SimpleAnalyzer; default for the sample class above // is CoreNLPAnalyzer) Parser parser = new Parser(new SimpleAnalyzer()); // Load grammar parser.setGrammarPath("arithmetic-tutorial.grammar"); // Load training examples parser.setExamplePath("arithmetic-tutorial.examples"); // Must call initialize before learning or parsing parser.initialize(); // Learn from training examples parser.learn(); // Unambiguous query (two plus four means 2 + 4, which is 6, and we expect only 1 prediction) Response resp = parser.parse("two plus four"); assertEquals("(number 6)", resp.getAnswer()); assertEquals(1, resp.ex.getPredDerivations().size()); // Ambiguous parse (two and five could mean 2 + 5 or 2 * 5, so we expect 2 predictions) Response resp = parser.parse("two and five"); assertEquals(2, resp.ex.getPredDerivations().size()); } }