Getting started
- Step 1: Get the SEMPRE repository:
git clone https://github.com/percyliang/sempre
- Step 2: Navigate to the SEMPRE repository and follow the steps listed under Easy Setup in
README.md
- Step 3: Follow the steps listed below
You'll need
libsempre/sempre-core.jar
libsempre/sempre-cache.jar
libsempre/sempre-corenlp.jar
if using CoreNLPAnalyzer
These are produced by following the steps under
Easy setup in the
README.md
file of the SEMPRE repository.
You'll also need many of the dependencies under lib
Make sure the jars are included somehow when compiling your project. I like using gradle, so
I just threw all the jars in a directory and included the directory as a dependency for my project:
dependencies {
implementation fileTree(dir: '../lib', include: '*.jar')
}
Write yourself a parser class
The hot thing to do here would be to force you to dig through a bunch of poorly written explanations to find
the full code listing. It's usually buried in the middle somewhere, disguised to look like another partial chunk
of code. I'm not a fan of that. Here's the full code
listing for a sample Parser class. We'll explain things later:
Parser.java
import edu.stanford.nlp.sempre.*;
import edu.stanford.nlp.sempre.corenlp.CoreNLPAnalyzer;
import fig.basic.Pair;
import java.util.*;
import java.util.stream.Collectors;
public class Parser {
private Builder builder;
private Dataset dataset;
private Grammar grammar;
private LanguageAnalyzer analyzer;
Parser(LanguageAnalyzer analyzer) {
this.builder = new Builder();
this.dataset = new Dataset();
this.grammar = new Grammar();
this.analyzer = analyzer;
// Equivalent command line option: -languageAnalyzer corenlp.CoreNLPAnalyzer
// if `this.analyzer` is `new CoreNLPAnalyzer()`
LanguageAnalyzer.setSingleton(this.analyzer);
this.repository = repository;
}
public Parser() {
this(new CoreNLPAnalyzer());
}
// Equivalent command line option: -Grammar.inPaths [grammarPath]
public void setGrammarPath(String grammarPath) {
grammar.read(grammarPath);
builder.grammar = grammar;
}
// Equivalent command line option: -Dataset.inPaths train:[examplePath]
public void setExamplePath(String examplePath) {
dataset.readFromPathPairs(Collections.singletonList(new Pair<>("train", examplePath)));
}
public void initialize() {
builder.buildUnspecified();
}
public void learn() {
// Equivalent command line option: -FeatureExtractor.featureDomains rule
FeatureExtractor.Options o = new FeatureExtractor.Options();
o.featureDomains = Collections.singleton("rule");
FeatureExtractor.opts = o;
FeatureExtractor f = new FeatureExtractor(builder.executor);
// Equivalent command line option: -Learner.maxTrainIters 3
Learner.opts.maxTrainIters = 3;
Learner learner = new Learner(builder.parser, builder.params, dataset);
learner.learn();
}
// Parse with SEMPRE
public Response parse(String query) {
Example.Builder b = new Example.Builder();
b.setId("session:1");
b.setUtterance(query);
Example ex = b.createExample();
Response response = new Response(builder);
ex.preprocess();
// Parse!
builder.parser.parse(builder.params, ex, false);
response.ex = ex;
response.candidateIndex = 0;
return response;
}
}
If you're using the sample class above, you'll also want to copy the Response
class
from within edu.stanford.nlp.sempre.Master
. It's reproduced here:
Response.java
import edu.stanford.nlp.sempre.Builder;
import edu.stanford.nlp.sempre.Derivation;
import edu.stanford.nlp.sempre.Example;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
// Copied from edu.stanford.nlp.sempre.Master
public class Response {
// Example that was parsed, if any.
public Example ex;
private Builder builder;
// Which derivation we're selecting to show
int candidateIndex = -1;
// Detailed information
public Map stats = new LinkedHashMap<>();
public List lines = new ArrayList<>();
public String getFormulaAnswer() {
if (ex.getPredDerivations().size() == 0)
return "(no answer)";
else if (candidateIndex == -1)
return "(not selected)";
else {
Derivation deriv = getDerivation();
return deriv.getFormula() + " => " + deriv.getValue();
}
}
public String getAnswer() {
if (ex.getPredDerivations().size() == 0)
return "(no answer)";
else if (candidateIndex == -1)
return "(not selected)";
else {
Derivation deriv = getDerivation();
deriv.ensureExecuted(builder.executor, ex.context);
return deriv.getValue().toString();
}
}
public List getLines() { return lines; }
public Example getExample() { return ex; }
public int getCandidateIndex() { return candidateIndex; }
public Derivation getDerivation() {
return ex.getPredDerivations().get(candidateIndex);
}
public Response(Builder b) {
this.builder = b;
}
}
Usage
ParserTest.java
import edu.stanford.nlp.sempre.SimpleAnalyzer;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
public class ParserTest {
@Test
public void testParser() {
// We can use SimpleAnalyzer instead of CoreNLPAnalyzer (default when you run
// the `run` script in SEMPRE is SimpleAnalyzer; default for the sample class above
// is CoreNLPAnalyzer)
Parser parser = new Parser(new SimpleAnalyzer());
// Load grammar
parser.setGrammarPath("arithmetic-tutorial.grammar");
// Load training examples
parser.setExamplePath("arithmetic-tutorial.examples");
// Must call initialize before learning or parsing
parser.initialize();
// Learn from training examples
parser.learn();
// Unambiguous query (two plus four means 2 + 4, which is 6, and we expect only 1 prediction)
Response resp = parser.parse("two plus four");
assertEquals("(number 6)", resp.getAnswer());
assertEquals(1, resp.ex.getPredDerivations().size());
// Ambiguous parse (two and five could mean 2 + 5 or 2 * 5, so we expect 2 predictions)
Response resp = parser.parse("two and five");
assertEquals(2, resp.ex.getPredDerivations().size());
}
}