diff --git a/src/main/java/org/codelibs/fess/MultiModalConstants.java b/src/main/java/org/codelibs/fess/multimodal/MultiModalConstants.java similarity index 95% rename from src/main/java/org/codelibs/fess/MultiModalConstants.java rename to src/main/java/org/codelibs/fess/multimodal/MultiModalConstants.java index d0ff788..60893ff 100644 --- a/src/main/java/org/codelibs/fess/MultiModalConstants.java +++ b/src/main/java/org/codelibs/fess/multimodal/MultiModalConstants.java @@ -13,7 +13,7 @@ * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ -package org.codelibs.fess; +package org.codelibs.fess.multimodal; public class MultiModalConstants { public static final String X_FESS_EMBEDDING = "X-FESS-Embedding"; diff --git a/src/main/java/org/codelibs/fess/client/CasClient.java b/src/main/java/org/codelibs/fess/multimodal/client/CasClient.java similarity index 98% rename from src/main/java/org/codelibs/fess/client/CasClient.java rename to src/main/java/org/codelibs/fess/multimodal/client/CasClient.java index af3c669..bdfa7f2 100644 --- a/src/main/java/org/codelibs/fess/client/CasClient.java +++ b/src/main/java/org/codelibs/fess/multimodal/client/CasClient.java @@ -13,7 +13,7 @@ * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ -package org.codelibs.fess.client; +package org.codelibs.fess.multimodal.client; import java.awt.Image; import java.awt.Rectangle; @@ -39,7 +39,7 @@ import org.codelibs.curl.Curl; import org.codelibs.curl.CurlException; import org.codelibs.curl.CurlResponse; -import org.codelibs.fess.exception.CasAccessException; +import org.codelibs.fess.multimodal.exception.CasAccessException; import org.opensearch.common.xcontent.LoggingDeprecationHandler; import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.xcontent.NamedXContentRegistry; diff --git a/src/main/java/org/codelibs/fess/crawler/extractor/CasExtractor.java b/src/main/java/org/codelibs/fess/multimodal/crawler/extractor/CasExtractor.java similarity index 86% rename from src/main/java/org/codelibs/fess/crawler/extractor/CasExtractor.java rename to src/main/java/org/codelibs/fess/multimodal/crawler/extractor/CasExtractor.java index 0f9c445..d0dcb50 100644 --- a/src/main/java/org/codelibs/fess/crawler/extractor/CasExtractor.java +++ b/src/main/java/org/codelibs/fess/multimodal/crawler/extractor/CasExtractor.java @@ -13,7 +13,7 @@ * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ -package org.codelibs.fess.crawler.extractor; +package org.codelibs.fess.multimodal.crawler.extractor; import java.io.InputStream; import java.util.Map; @@ -22,12 +22,12 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.codelibs.fess.MultiModalConstants; -import org.codelibs.fess.client.CasClient; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.extractor.impl.TikaExtractor; -import org.codelibs.fess.ingest.EmbeddingIngester; -import org.codelibs.fess.util.EmbeddingUtil; +import org.codelibs.fess.multimodal.MultiModalConstants; +import org.codelibs.fess.multimodal.client.CasClient; +import org.codelibs.fess.multimodal.ingest.EmbeddingIngester; +import org.codelibs.fess.multimodal.util.EmbeddingUtil; public class CasExtractor extends TikaExtractor { diff --git a/src/main/java/org/codelibs/fess/exception/CasAccessException.java b/src/main/java/org/codelibs/fess/multimodal/exception/CasAccessException.java similarity index 95% rename from src/main/java/org/codelibs/fess/exception/CasAccessException.java rename to src/main/java/org/codelibs/fess/multimodal/exception/CasAccessException.java index 897d33a..89168a8 100644 --- a/src/main/java/org/codelibs/fess/exception/CasAccessException.java +++ b/src/main/java/org/codelibs/fess/multimodal/exception/CasAccessException.java @@ -13,7 +13,7 @@ * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ -package org.codelibs.fess.exception; +package org.codelibs.fess.multimodal.exception; import org.codelibs.fess.crawler.exception.CrawlerSystemException; diff --git a/src/main/java/org/codelibs/fess/ingest/EmbeddingIngester.java b/src/main/java/org/codelibs/fess/multimodal/ingest/EmbeddingIngester.java similarity index 91% rename from src/main/java/org/codelibs/fess/ingest/EmbeddingIngester.java rename to src/main/java/org/codelibs/fess/multimodal/ingest/EmbeddingIngester.java index e799531..75c12cb 100644 --- a/src/main/java/org/codelibs/fess/ingest/EmbeddingIngester.java +++ b/src/main/java/org/codelibs/fess/multimodal/ingest/EmbeddingIngester.java @@ -13,7 +13,7 @@ * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ -package org.codelibs.fess.ingest; +package org.codelibs.fess.multimodal.ingest; import java.util.Map; @@ -23,9 +23,10 @@ import org.apache.logging.log4j.Logger; import org.codelibs.core.lang.StringUtil; import org.codelibs.fess.Constants; -import org.codelibs.fess.MultiModalConstants; +import org.codelibs.fess.ingest.Ingester; +import org.codelibs.fess.multimodal.MultiModalConstants; +import org.codelibs.fess.multimodal.util.EmbeddingUtil; import org.codelibs.fess.util.ComponentUtil; -import org.codelibs.fess.util.EmbeddingUtil; public class EmbeddingIngester extends Ingester { private static final Logger logger = LogManager.getLogger(EmbeddingIngester.class); diff --git a/src/main/java/org/codelibs/fess/util/EmbeddingUtil.java b/src/main/java/org/codelibs/fess/multimodal/util/EmbeddingUtil.java similarity index 97% rename from src/main/java/org/codelibs/fess/util/EmbeddingUtil.java rename to src/main/java/org/codelibs/fess/multimodal/util/EmbeddingUtil.java index f4e1c12..5c23c3f 100644 --- a/src/main/java/org/codelibs/fess/util/EmbeddingUtil.java +++ b/src/main/java/org/codelibs/fess/multimodal/util/EmbeddingUtil.java @@ -13,7 +13,7 @@ * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ -package org.codelibs.fess.util; +package org.codelibs.fess.multimodal.util; import java.nio.ByteBuffer; import java.util.Base64; diff --git a/src/main/resources/app++.xml b/src/main/resources/app++.xml index ac3da29..f36e101 100644 --- a/src/main/resources/app++.xml +++ b/src/main/resources/app++.xml @@ -3,6 +3,6 @@ "http://dbflute.org/meta/lastadi10.dtd"> + class="org.codelibs.fess.multimodal.client.CasClient"> diff --git a/src/main/resources/crawler/extractor++.xml b/src/main/resources/crawler/extractor++.xml index 61f0228..b6cd598 100644 --- a/src/main/resources/crawler/extractor++.xml +++ b/src/main/resources/crawler/extractor++.xml @@ -5,7 +5,7 @@ + class="org.codelibs.fess.multimodal.crawler.extractor.CasExtractor"> [ "image/gif", diff --git a/src/main/resources/fess_ingest++.xml b/src/main/resources/fess_ingest++.xml index 34fbca0..8ca0252 100644 --- a/src/main/resources/fess_ingest++.xml +++ b/src/main/resources/fess_ingest++.xml @@ -2,7 +2,7 @@ - + diff --git a/src/test/java/org/codelibs/fess/client/CasClientTest.java b/src/test/java/org/codelibs/fess/multimodal/client/CasClientTest.java similarity index 80% rename from src/test/java/org/codelibs/fess/client/CasClientTest.java rename to src/test/java/org/codelibs/fess/multimodal/client/CasClientTest.java index 742110c..ed8bacd 100644 --- a/src/test/java/org/codelibs/fess/client/CasClientTest.java +++ b/src/test/java/org/codelibs/fess/multimodal/client/CasClientTest.java @@ -13,36 +13,36 @@ * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ -package org.codelibs.fess.client; +package org.codelibs.fess.multimodal.client; import java.io.InputStream; import java.util.logging.Logger; import org.codelibs.core.io.ResourceUtil; import org.codelibs.curl.CurlException; -import org.codelibs.fess.crawler.extractor.CasExtractorTest; +import org.codelibs.fess.multimodal.crawler.extractor.CasExtractorTest; import org.dbflute.utflute.core.PlainTestCase; public class CasClientTest extends PlainTestCase { static final Logger logger = Logger.getLogger(CasExtractorTest.class.getName()); public void test_encodeImage() throws Exception { - CasClient client = new CasClient(); + final CasClient client = new CasClient(); client.init(); try (InputStream in = ResourceUtil.getResourceAsStream("images/codelibs_cover.jpeg")) { - String data = client.encodeImage(in); + final String data = client.encodeImage(in); assertEquals(70804, data.length()); // FileUtil.writeBytes("test.png", Base64.getDecoder().decode(data)); } } public void test_getImageEmbedding() throws Exception { - CasClient client = new CasClient(); + final CasClient client = new CasClient(); client.init(); try (InputStream in = ResourceUtil.getResourceAsStream("images/codelibs_cover.jpeg")) { - float[] embedding = client.getImageEmbedding(in); + final float[] embedding = client.getImageEmbedding(in); assertEquals(512, embedding.length); - } catch (CurlException e) { + } catch (final CurlException e) { logger.warning(e.getMessage()); } } diff --git a/src/test/java/org/codelibs/fess/crawler/extractor/CasExtractorTest.java b/src/test/java/org/codelibs/fess/multimodal/crawler/extractor/CasExtractorTest.java similarity index 82% rename from src/test/java/org/codelibs/fess/crawler/extractor/CasExtractorTest.java rename to src/test/java/org/codelibs/fess/multimodal/crawler/extractor/CasExtractorTest.java index fdd2f25..311ef93 100644 --- a/src/test/java/org/codelibs/fess/crawler/extractor/CasExtractorTest.java +++ b/src/test/java/org/codelibs/fess/multimodal/crawler/extractor/CasExtractorTest.java @@ -13,19 +13,19 @@ * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ -package org.codelibs.fess.crawler.extractor; +package org.codelibs.fess.multimodal.crawler.extractor; import java.io.InputStream; import java.util.logging.Logger; import org.codelibs.core.io.CloseableUtil; import org.codelibs.core.io.ResourceUtil; -import org.codelibs.fess.MultiModalConstants; -import org.codelibs.fess.client.CasClient; import org.codelibs.fess.crawler.container.StandardCrawlerContainer; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.helper.impl.MimeTypeHelperImpl; -import org.codelibs.fess.util.EmbeddingUtil; +import org.codelibs.fess.multimodal.MultiModalConstants; +import org.codelibs.fess.multimodal.client.CasClient; +import org.codelibs.fess.multimodal.util.EmbeddingUtil; import org.dbflute.utflute.core.PlainTestCase; /** @@ -41,7 +41,7 @@ public class CasExtractorTest extends PlainTestCase { protected void setUp() throws Exception { super.setUp(); - StandardCrawlerContainer container = new StandardCrawlerContainer(); + final StandardCrawlerContainer container = new StandardCrawlerContainer(); container// .singleton("mimeTypeHelper", MimeTypeHelperImpl.class)// .singleton("casExtractor", CasExtractor.class)// @@ -63,9 +63,9 @@ public void test_getTika() { final String content = extractData.getContent(); CloseableUtil.closeQuietly(in); assertEquals(0, content.length()); - String[] values = extractData.getValues(MultiModalConstants.X_FESS_EMBEDDING); + final String[] values = extractData.getValues(MultiModalConstants.X_FESS_EMBEDDING); assertEquals(1, values.length); - float[] embedding = EmbeddingUtil.decodeFloatArray(values[0]); + final float[] embedding = EmbeddingUtil.decodeFloatArray(values[0]); assertEquals(5, embedding.length); } } diff --git a/src/test/java/org/codelibs/fess/multimodal/ingest/EmbeddingIngesterTest.java b/src/test/java/org/codelibs/fess/multimodal/ingest/EmbeddingIngesterTest.java new file mode 100644 index 0000000..71a78fc --- /dev/null +++ b/src/test/java/org/codelibs/fess/multimodal/ingest/EmbeddingIngesterTest.java @@ -0,0 +1,50 @@ +/* + * Copyright 2012-2024 CodeLibs Project and the Others. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +package org.codelibs.fess.multimodal.ingest; + +import java.util.HashMap; +import java.util.Map; + +import org.dbflute.utflute.core.PlainTestCase; + +public class EmbeddingIngesterTest extends PlainTestCase { + private static final String VECTOR_FIELD = "vector_field"; + + public void test_process() { + final EmbeddingIngester ingester = new EmbeddingIngester(); + ingester.embeddingField = VECTOR_FIELD; + + final Map target = new HashMap<>(); + Map result = ingester.process(target); + assertEquals(0, result.size()); + + target.clear(); + target.put(VECTOR_FIELD, new String[] { "P4AAAEAAAABAQAAA" }); + result = ingester.process(target); + assertEquals(1, result.size()); + final float[] array = (float[]) result.get(VECTOR_FIELD); + assertEquals(3, array.length); + assertEquals(1.0f, array[0]); + assertEquals(2.0f, array[1]); + assertEquals(3.0f, array[2]); + + target.clear(); + target.put(VECTOR_FIELD, "P4AAAEAAAABAQAAA"); + result = ingester.process(target); + assertEquals(1, result.size()); + assertEquals("P4AAAEAAAABAQAAA", result.get(VECTOR_FIELD)); + } +} diff --git a/src/test/java/org/codelibs/fess/util/EmbeddingUtilTest.java b/src/test/java/org/codelibs/fess/multimodal/util/EmbeddingUtilTest.java similarity index 83% rename from src/test/java/org/codelibs/fess/util/EmbeddingUtilTest.java rename to src/test/java/org/codelibs/fess/multimodal/util/EmbeddingUtilTest.java index 7e45d39..1c80846 100644 --- a/src/test/java/org/codelibs/fess/util/EmbeddingUtilTest.java +++ b/src/test/java/org/codelibs/fess/multimodal/util/EmbeddingUtilTest.java @@ -13,20 +13,19 @@ * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ -package org.codelibs.fess.util; +package org.codelibs.fess.multimodal.util; -import org.codelibs.fess.util.EmbeddingUtil; import org.dbflute.utflute.core.PlainTestCase; public class EmbeddingUtilTest extends PlainTestCase { public void test_encodeFloatArray() { - float[] array = new float[] { 1.0f, 2.0f, 3.0f }; + final float[] array = { 1.0f, 2.0f, 3.0f }; assertEquals("P4AAAEAAAABAQAAA", EmbeddingUtil.encodeFloatArray(array)); } public void test_decodeFloatArray() { - float[] array = EmbeddingUtil.decodeFloatArray("P4AAAEAAAABAQAAA"); + final float[] array = EmbeddingUtil.decodeFloatArray("P4AAAEAAAABAQAAA"); assertEquals(3, array.length); assertEquals(1.0f, array[0]); assertEquals(2.0f, array[1]);