Getting started
- Step 1: Get the SEMPRE repository:
git clone https://github.com/percyliang/sempre - Step 2: Navigate to the SEMPRE repository and follow the steps listed under Easy Setup in
README.md - Step 3: Follow the steps listed below
You'll need
libsempre/sempre-core.jarlibsempre/sempre-cache.jarlibsempre/sempre-corenlp.jarif usingCoreNLPAnalyzer
You'll also need many of the dependencies under lib
Make sure the jars are included somehow when compiling your project. I like using gradle, so I just threw all the jars in a directory and included the directory as a dependency for my project:
dependencies {
implementation fileTree(dir: '../lib', include: '*.jar')
}
Write yourself a parser class
The hot thing to do here would be to force you to dig through a bunch of poorly written explanations to find the full code listing. It's usually buried in the middle somewhere, disguised to look like another partial chunk of code. I'm not a fan of that. Here's the full code listing for a sample Parser class. We'll explain things later:
Parser.java
import edu.stanford.nlp.sempre.*;
import edu.stanford.nlp.sempre.corenlp.CoreNLPAnalyzer;
import fig.basic.Pair;
import java.util.*;
import java.util.stream.Collectors;
public class Parser {
private Builder builder;
private Dataset dataset;
private Grammar grammar;
private LanguageAnalyzer analyzer;
Parser(LanguageAnalyzer analyzer) {
this.builder = new Builder();
this.dataset = new Dataset();
this.grammar = new Grammar();
this.analyzer = analyzer;
// Equivalent command line option: -languageAnalyzer corenlp.CoreNLPAnalyzer
// if `this.analyzer` is `new CoreNLPAnalyzer()`
LanguageAnalyzer.setSingleton(this.analyzer);
this.repository = repository;
}
public Parser() {
this(new CoreNLPAnalyzer());
}
// Equivalent command line option: -Grammar.inPaths [grammarPath]
public void setGrammarPath(String grammarPath) {
grammar.read(grammarPath);
builder.grammar = grammar;
}
// Equivalent command line option: -Dataset.inPaths train:[examplePath]
public void setExamplePath(String examplePath) {
dataset.readFromPathPairs(Collections.singletonList(new Pair<>("train", examplePath)));
}
public void initialize() {
builder.buildUnspecified();
}
public void learn() {
// Equivalent command line option: -FeatureExtractor.featureDomains rule
FeatureExtractor.Options o = new FeatureExtractor.Options();
o.featureDomains = Collections.singleton("rule");
FeatureExtractor.opts = o;
FeatureExtractor f = new FeatureExtractor(builder.executor);
// Equivalent command line option: -Learner.maxTrainIters 3
Learner.opts.maxTrainIters = 3;
Learner learner = new Learner(builder.parser, builder.params, dataset);
learner.learn();
}
// Parse with SEMPRE
public Response parse(String query) {
Example.Builder b = new Example.Builder();
b.setId("session:1");
b.setUtterance(query);
Example ex = b.createExample();
Response response = new Response(builder);
ex.preprocess();
// Parse!
builder.parser.parse(builder.params, ex, false);
response.ex = ex;
response.candidateIndex = 0;
return response;
}
}
If you're using the sample class above, you'll also want to copy the Response class
from within edu.stanford.nlp.sempre.Master. It's reproduced here:
Response.java
import edu.stanford.nlp.sempre.Builder;
import edu.stanford.nlp.sempre.Derivation;
import edu.stanford.nlp.sempre.Example;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
// Copied from edu.stanford.nlp.sempre.Master
public class Response {
// Example that was parsed, if any.
public Example ex;
private Builder builder;
// Which derivation we're selecting to show
int candidateIndex = -1;
// Detailed information
public Map stats = new LinkedHashMap<>();
public List lines = new ArrayList<>();
public String getFormulaAnswer() {
if (ex.getPredDerivations().size() == 0)
return "(no answer)";
else if (candidateIndex == -1)
return "(not selected)";
else {
Derivation deriv = getDerivation();
return deriv.getFormula() + " => " + deriv.getValue();
}
}
public String getAnswer() {
if (ex.getPredDerivations().size() == 0)
return "(no answer)";
else if (candidateIndex == -1)
return "(not selected)";
else {
Derivation deriv = getDerivation();
deriv.ensureExecuted(builder.executor, ex.context);
return deriv.getValue().toString();
}
}
public List getLines() { return lines; }
public Example getExample() { return ex; }
public int getCandidateIndex() { return candidateIndex; }
public Derivation getDerivation() {
return ex.getPredDerivations().get(candidateIndex);
}
public Response(Builder b) {
this.builder = b;
}
}
Usage
ParserTest.java
import edu.stanford.nlp.sempre.SimpleAnalyzer;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
public class ParserTest {
@Test
public void testParser() {
// We can use SimpleAnalyzer instead of CoreNLPAnalyzer (default when you run
// the `run` script in SEMPRE is SimpleAnalyzer; default for the sample class above
// is CoreNLPAnalyzer)
Parser parser = new Parser(new SimpleAnalyzer());
// Load grammar
parser.setGrammarPath("arithmetic-tutorial.grammar");
// Load training examples
parser.setExamplePath("arithmetic-tutorial.examples");
// Must call initialize before learning or parsing
parser.initialize();
// Learn from training examples
parser.learn();
// Unambiguous query (two plus four means 2 + 4, which is 6, and we expect only 1 prediction)
Response resp = parser.parse("two plus four");
assertEquals("(number 6)", resp.getAnswer());
assertEquals(1, resp.ex.getPredDerivations().size());
// Ambiguous parse (two and five could mean 2 + 5 or 2 * 5, so we expect 2 predictions)
Response resp = parser.parse("two and five");
assertEquals(2, resp.ex.getPredDerivations().size());
}
}