Skip to content

Commit

Permalink
#5230 - Make LLM mention extraction more robust
Browse files Browse the repository at this point in the history
- Use structured output from the LLM when available
- Use chat-based APIs when talking to LLM
- Simplify the LLM presets and the interactive LLM sidebar
- Remove option to configure the response format for the LLM (text vs. JSON) and instead let the extraction mode determine that
- Introduce paragraph-level prompt context
- Normalize newlines in prompt context
- URL for ChatGPT recommenders should no longer include the "v1" - we add that internally
- Switch default ChatGPT model to o1-mini
  • Loading branch information
reckart committed Feb 2, 2025
1 parent 102c177 commit f549ac7
Show file tree
Hide file tree
Showing 95 changed files with 1,756 additions and 911 deletions.
1 change: 0 additions & 1 deletion inception/inception-assistant/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@
<dependency>
<groupId>com.knuddels</groupId>
<artifactId>jtokkit</artifactId>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ public interface AssistantService

List<MTextMessage> getChatMessages(String aSessionOwner, Project aProject);

void processUserMessage(String aSessionOwner, Project aProject,
MTextMessage aMessage);
void processUserMessage(String aSessionOwner, Project aProject, MTextMessage aMessage);

void clearConversation(String aSessionOwner, Project aProject);
}
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ public EmbeddingService EmbeddingService(AssistantProperties aProperties,
@Bean
public DocumentQueryService documentQueryService(RepositoryProperties aRepositoryProperties,
AssistantDocumentIndexProperties aIndexProperties, SchedulingService aSchedulingService,
OllamaClient aOllamaClient, EmbeddingService aEmbeddingService, DocumentService aDocumentService)
OllamaClient aOllamaClient, EmbeddingService aEmbeddingService,
DocumentService aDocumentService)
{
return new DocumentQueryServiceImpl(aRepositoryProperties, aIndexProperties,
aSchedulingService, aEmbeddingService, aDocumentService);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,6 @@ public interface AssistantChatProperties
double getTemperature();

int getContextLength();

String getEncoding();
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
public interface AssistantProperties
{
String getUrl();

AssistantChatProperties getChat();

AssistantEmbeddingProperties getEmbedding();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public interface DocumentQueryService
final String FIELD_EMBEDDING = "field";
final String FIELD_RANGE = "range";

// Stored fields
// Stored fields
final String FIELD_SOURCE_DOC_COMPLETE = "sourceDocComplete";
final String FIELD_SOURCE_DOC_ID = "sourceDoc";
final String FIELD_SECTION = "section";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@
public interface EmbeddingService
{
int getDimension();

Optional<float[]> embed(String aQuery) throws IOException;

List<Pair<String, float[]>> embed(String... aStrings) throws IOException;

<T> List<Pair<T, float[]>> embed(Function<T, String> aExtractor, Iterable<T> aObjects)
throws IOException;
throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@ public sealed interface MChatMessage
* the user
*/
boolean internal();

MPerformanceMetrics performance();
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,5 @@ public sealed interface MCommandMessage
extends MMessage
permits MRemoveConversationCommand
{

}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
@JsonSubTypes.Type(value = MTextMessage.class), //
@JsonSubTypes.Type(value = MRemoveConversationCommand.class) //
})
public sealed interface MMessage permits MChatMessage, MCommandMessage
public sealed interface MMessage
permits MChatMessage, MCommandMessage
{
String TYPE_FIELD = "@type";
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
import com.fasterxml.jackson.databind.annotation.JsonSerialize;

/**
* @param duration time it took to produce the messages in milliseconds
* @param duration
* time it took to produce the messages in milliseconds
*/
@JsonSerialize
public record MPerformanceMetrics(long duration, int tokens) {
Expand All @@ -35,7 +36,7 @@ public MPerformanceMetrics merge(MPerformanceMetrics aPerformance)
if (aPerformance == null) {
return this;
}

return MPerformanceMetrics.builder() //
.withDuration((duration() + aPerformance.duration())) //
.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@ public record MRemoveConversationCommand()
implements MCommandMessage
{
static final String TYPE_CLEAR_CMD = "clearCmd";

@JsonProperty(MMessage.TYPE_FIELD)
public String getType() {
public String getType()
{
return TYPE_CLEAR_CMD;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
import de.tudarmstadt.ukp.clarin.webanno.model.Project;
import de.tudarmstadt.ukp.inception.support.extensionpoint.ExtensionPoint;

public interface RetrieverExtensionPoint extends ExtensionPoint<Project, Retriever>
public interface RetrieverExtensionPoint
extends ExtensionPoint<Project, Retriever>
{

}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,4 @@

@JsonSerialize
@JsonIgnoreProperties(ignoreUnknown = true)
public record UserGuideIndexMetadata(String version, String model, int dimension)
{
}
public record UserGuideIndexMetadata(String version, String model, int dimension) {}
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
"server.address=127.0.0.1", //
"spring.main.banner-mode=off", //
"assistant.enabled=true", //
"websocket.enabled=true"})
"websocket.enabled=true" })
@SpringBootApplication( //
exclude = { //
LiquibaseAutoConfiguration.class, //
Expand All @@ -98,8 +98,7 @@
RepositoryAutoConfiguration.class, //
CasDoctorAutoConfiguration.class, //
DocumentImportExportServiceAutoConfiguration.class, //
DocumentServiceAutoConfiguration.class
})
DocumentServiceAutoConfiguration.class })
@EntityScan({ //
"de.tudarmstadt.ukp.clarin.webanno.model", //
"de.tudarmstadt.ukp.clarin.webanno.security.model" })
Expand Down Expand Up @@ -137,7 +136,7 @@ private void setupOnce() throws IOException
if (project != null) {
return;
}

repositoryProperties.setPath(repositoryDir);
MDC.put(Logging.KEY_REPOSITORY_PATH, repositoryProperties.getPath().toString());

Expand Down Expand Up @@ -202,7 +201,7 @@ void thatMessageSentIsRecieved() throws Exception
.withRole("assistant") //
.withMessage("Test message") //
.build();

try (var client = new WebSocketStompTestClient(USER, PASS)) {
client.expectSuccessfulConnection().connect(websocketUrl);
client.subscribe(broadcastChannel);
Expand Down Expand Up @@ -239,7 +238,7 @@ public DaoAuthenticationProvider authenticationProvider(PasswordEncoder aEncoder
authProvider.setPasswordEncoder(aEncoder);
return authProvider;
}

@Order(100)
@Bean
public SecurityFilterChain wsFilterChain(HttpSecurity aHttp) throws Exception
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class UserGuideQueryServiceImplTest
private EmbeddingServiceImpl embeddingService;

private static @TempDir Path applicationHome;

@BeforeAll
static void checkIfOllamaIsRunning()
{
Expand All @@ -73,7 +73,8 @@ void setup()
assistantProperties = new AssistantPropertiesImpl(assistantDocumentIndexProperties);
ollamaClient = new OllamaClientImpl();
embeddingService = new EmbeddingServiceImpl(assistantProperties, ollamaClient);
sut = new UserGuideQueryServiceImpl(assistantProperties, schedulingService, embeddingService);
sut = new UserGuideQueryServiceImpl(assistantProperties, schedulingService,
embeddingService);
}

@AfterEach
Expand All @@ -87,8 +88,8 @@ void testSimpleIndexAndQuery() throws Exception
{
try (var iw = sut.getIndexWriter()) {
sut.indexBlocks(iw, //
"Waldi is a dog.", //
"Miau is a cat.", //
"Waldi is a dog.", //
"Miau is a cat.", //
"Tweety is a bird.");
}
sut.markIndexUpToDate();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,14 +91,14 @@ void setup() throws Exception
cas.setDocumentText("text");

constraints = parse("""
import my.Span as Span;
Span {
X = "A" -> Y = "1";
Z = "B" -> Y = "2";
X = "C" & Z = "D" -> Y = "3";
}
""");
import my.Span as Span;
Span {
X = "A" -> Y = "1";
Z = "B" -> Y = "2";
X = "C" & Z = "D" -> Y = "3";
}
""");
featX = AnnotationFeature.builder() //
.withName(spanFeatX.getName()) //
.build();
Expand Down Expand Up @@ -164,8 +164,7 @@ class IsHiddenConditionalFeatureTest
void setup() throws Exception
{
cas = CasFactory.createText("text");
constraints = parse(
"""
constraints = parse("""
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS as POS;
POS {
Expand Down Expand Up @@ -221,8 +220,7 @@ void conditionDoesNotMatch_restrictionDoesNotMatch() throws Exception
@Test
void testSimpleFeature() throws Exception
{
var constraints = parse(
"""
var constraints = parse("""
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma as Lemma;
Lemma {
Expand All @@ -246,12 +244,12 @@ void testSimplePath() throws Exception
{
var constraints = parse(
"""
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency as DEPENDENCY;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency as DEPENDENCY;
DEPENDENCY {
Governor.pos.PosValue = "NN" & Dependent.pos.PosValue = "DET" -> DependencyType = "det";
}
""");
DEPENDENCY {
Governor.pos.PosValue = "NN" & Dependent.pos.PosValue = "DET" -> DependencyType = "det";
}
""");

var jcas = JCasFactory.createJCas();
jcas.setDocumentText("The sun.");
Expand Down Expand Up @@ -291,8 +289,7 @@ void testSimplePath() throws Exception
@Test
void testTwoConditions() throws Exception
{
var constraints = parse(
"""
var constraints = parse("""
import webanno.custom.Relation as RELATIONS;
RELATIONS {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,15 @@ public void testSimpleSymbolicRules() throws Exception
{
var constraints = parse(
"""
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS as pos;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma as lemma;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency as DEPENDENCY;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS as pos;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma as lemma;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency as DEPENDENCY;
lemma{
@DEPENDENCY.@pos.@pos.PosValue="pronoun" -> value = "good" (!);
}
""");
lemma{
@DEPENDENCY.@pos.@pos.PosValue="pronoun" -> value = "good" (!);
}
""");

loadConll(jcas.get(), "src/test/resources/text/1.conll");

Expand All @@ -85,15 +85,15 @@ public void testSimpleSymbolicRules2() throws Exception
{
var constraints = parse(
"""
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS as pos;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma as lemma;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency as DEPENDENCY;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS as pos;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma as lemma;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency as DEPENDENCY;
lemma{
@DEPENDENCY.@pos.@pos.text()="a" -> value = "good" (!);
}
""");
lemma{
@DEPENDENCY.@pos.@pos.text()="a" -> value = "good" (!);
}
""");

loadConll(jcas.get(), "src/test/resources/text/1.conll");

Expand All @@ -111,8 +111,7 @@ public void testSimpleSymbolicRules2() throws Exception
@Test
void thatFeatureValueIsValid() throws Exception
{
var constraints = parse(
"""
var constraints = parse("""
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma as Lemma;
Expand Down
16 changes: 16 additions & 0 deletions inception/inception-dependencies/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,16 @@
<artifactId>json-schema-validator</artifactId>
<version>${json-schema-validator.version}</version>
</dependency>
<dependency>
<groupId>com.github.victools</groupId>
<artifactId>jsonschema-generator</artifactId>
<version>${json-schema-generator.version}</version>
</dependency>
<dependency>
<groupId>com.github.victools</groupId>
<artifactId>jsonschema-module-jackson</artifactId>
<version>${json-schema-generator.version}</version>
</dependency>

<!-- SPRING SECURITY -->
<dependency>
Expand Down Expand Up @@ -1249,6 +1259,12 @@
<version>2.2</version>
</dependency>

<dependency>
<groupId>com.knuddels</groupId>
<artifactId>jtokkit</artifactId>
<version>1.1.0</version>
</dependency>

<dependency>
<groupId>net.sourceforge.owlapi</groupId>
<artifactId>owlapi-bom</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ public List<LazyDetailGroup> lookupLazyDetails(IRequestParameters request, VID a
.collect(toList());
}

private List<VLazyDetailGroup> lookLazyDetails(VID aVid, SourceDocument aDocument, User aDataOwner,
StringValue aLayerParam, CAS aCas)
private List<VLazyDetailGroup> lookLazyDetails(VID aVid, SourceDocument aDocument,
User aDataOwner, StringValue aLayerParam, CAS aCas)
throws AnnotationException, IOException
{
if (isSentence(aCas, aVid)) {
Expand Down
Loading

0 comments on commit f549ac7

Please sign in to comment.