API: move endpoint for Solr schema output to blocked area #976

IQSS · Mar 26, 2015 · d7b09ba · d7b09ba
1 parent a31b592
commit d7b09ba
Show file tree

Hide file tree

Showing 4 changed files with 103 additions and 117 deletions.
diff --git a/conf/solr/4.6.0/schema.xml b/conf/solr/4.6.0/schema.xml
@@ -324,7 +324,7 @@
    <field name="dsPersistentId" type="text_en" multiValued="false" stored="true" indexed="true"/>
    <copyField source="dsPersistentId" dest="text" maxChars="3000"/>
 
-   <!-- Added for Dataverse 4.0 alpha 1 from output of http://localhost:8080/api/config/solr/schema -->
+   <!-- Dynamic Dataverse fields from http://localhost:8080/api/admin/index/solr/schema -->
    <field name="ARCS1" type="text_en" multiValued="false" stored="true" indexed="true"/>
    <field name="ARCS2" type="text_en" multiValued="false" stored="true" indexed="true"/>
    <field name="ARCS3" type="text_en" multiValued="false" stored="true" indexed="true"/>
@@ -593,7 +593,7 @@
    <!-- <copyField source="*_ss" dest="text" maxChars="3000"/> -->
    <!-- <copyField source="*_i" dest="text" maxChars="3000"/> -->
 
-   <!-- Added for Dataverse 4.0 alpha 1 from output of http://localhost:8080/api/config/solr/schema -->
+   <!-- Dataverse copyField from http://localhost:8080/api/admin/index/solr/schema -->
    <copyField source="ARCS1" dest="text" maxChars="3000"/>
    <copyField source="ARCS2" dest="text" maxChars="3000"/>
    <copyField source="ARCS3" dest="text" maxChars="3000"/>

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Config.java b/src/main/java/edu/harvard/iq/dataverse/api/Config.java
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Index.java b/src/main/java/edu/harvard/iq/dataverse/api/Index.java
@@ -3,6 +3,8 @@
 import edu.harvard.iq.dataverse.DataFile;
 import edu.harvard.iq.dataverse.DataFileServiceBean;
 import edu.harvard.iq.dataverse.Dataset;
+import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
+import edu.harvard.iq.dataverse.DatasetFieldType;
 import edu.harvard.iq.dataverse.DatasetServiceBean;
 import edu.harvard.iq.dataverse.Dataverse;
 import edu.harvard.iq.dataverse.DataverseServiceBean;
@@ -11,6 +13,7 @@
 import edu.harvard.iq.dataverse.IndexServiceBean;
 import edu.harvard.iq.dataverse.RoleAssignment;
 import edu.harvard.iq.dataverse.SearchServiceBean;
+import edu.harvard.iq.dataverse.SolrField;
 import edu.harvard.iq.dataverse.SolrQueryResponse;
 import edu.harvard.iq.dataverse.SolrSearchResult;
 import edu.harvard.iq.dataverse.authorization.users.User;
@@ -22,10 +25,12 @@
 import edu.harvard.iq.dataverse.search.SearchFields;
 import edu.harvard.iq.dataverse.search.SolrIndexServiceBean;
 import edu.harvard.iq.dataverse.search.SortBy;
+import java.lang.reflect.Field;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.Future;
+import java.util.logging.Logger;
 import javax.ejb.EJB;
 import javax.ejb.EJBException;
 import javax.json.Json;
@@ -44,6 +49,8 @@
 @Path("admin/index")
 public class Index extends AbstractApiBean {
 
+    private static final Logger logger = Logger.getLogger(Index.class.getCanonicalName());
+
     @EJB
     IndexServiceBean indexService;
     @EJB
@@ -62,6 +69,8 @@ public class Index extends AbstractApiBean {
     SolrIndexServiceBean SolrIndexService;
     @EJB
     SearchServiceBean searchService;
+    @EJB
+    DatasetFieldServiceBean datasetFieldService;
 
     @GET
     public Response indexAllOrSubset(@QueryParam("numPartitions") Long numPartitionsSelected, @QueryParam("partitionIdToProcess") Long partitionIdToProcess, @QueryParam("previewOnly") boolean previewOnly) {
@@ -367,6 +376,96 @@ private JsonObjectBuilder getPermissionsInSolrButNotDatabase() {
                 .add("dvobjects", stalePermissionList.build().size());
     }
 
+    /**
+     * We use the output of this method to generate our Solr schema.xml
+     *
+     * @todo Someday we do want to have this return a Response rather than a
+     * String per https://github.com/IQSS/dataverse/issues/298 but not yet while
+     * we are trying to ship Dataverse 4.0.
+     */
+    @GET
+    @Path("solr/schema")
+    public String getSolrSchema() {
+
+        StringBuilder sb = new StringBuilder();
+
+        for (DatasetFieldType datasetField : datasetFieldService.findAllOrderedByName()) {
+            String nameSearchable = datasetField.getSolrField().getNameSearchable();
+            SolrField.SolrType solrType = datasetField.getSolrField().getSolrType();
+            String type = solrType.getType();
+            if (solrType.equals(SolrField.SolrType.EMAIL)) {
+                /**
+                 * @todo should we also remove all "email" field types (e.g.
+                 * datasetContact) from schema.xml? We are explicitly not
+                 * indexing them for
+                 * https://github.com/IQSS/dataverse/issues/759
+                 *
+                 * "The list of potential collaborators should be searchable"
+                 * according to https://github.com/IQSS/dataverse/issues/747 but
+                 * it's not clear yet if this means a Solr or database search.
+                 * For now we'll keep schema.xml as it is to avoid people having
+                 * to update it. If anything, we can remove the email field type
+                 * when we do a big schema.xml update for
+                 * https://github.com/IQSS/dataverse/issues/754
+                 */
+                logger.info("email type detected (" + nameSearchable + ") See also https://github.com/IQSS/dataverse/issues/759");
+            }
+            String multivalued = datasetField.getSolrField().isAllowedToBeMultivalued().toString();
+            // <field name="datasetId" type="text_general" multiValued="false" stored="true" indexed="true"/>
+            sb.append("   <field name=\"" + nameSearchable + "\" type=\"" + type + "\" multiValued=\"" + multivalued + "\" stored=\"true\" indexed=\"true\"/>\n");
+        }
+
+        List<String> listOfStaticFields = new ArrayList();
+        Object searchFieldsObject = new SearchFields();
+        Field[] staticSearchFields = searchFieldsObject.getClass().getDeclaredFields();
+        for (Field fieldObject : staticSearchFields) {
+            String name = fieldObject.getName();
+            String staticSearchField = null;
+            try {
+                staticSearchField = (String) fieldObject.get(searchFieldsObject);
+            } catch (IllegalArgumentException ex) {
+            } catch (IllegalAccessException ex) {
+            }
+
+            /**
+             * @todo: if you search for "pdf" should you get all pdfs? do we
+             * need a copyField source="filetypemime_s" to the catchall?
+             */
+            if (listOfStaticFields.contains(staticSearchField)) {
+                return error("static search field defined twice: " + staticSearchField);
+            }
+            listOfStaticFields.add(staticSearchField);
+        }
+
+        sb.append("---\n");
+
+        for (DatasetFieldType datasetField : datasetFieldService.findAllOrderedByName()) {
+            String nameSearchable = datasetField.getSolrField().getNameSearchable();
+            String nameFacetable = datasetField.getSolrField().getNameFacetable();
+
+            if (listOfStaticFields.contains(nameSearchable)) {
+                if (nameSearchable.equals(SearchFields.DATASET_DESCRIPTION)) {
+                    // Skip, expected conflct.
+                } else {
+                    return error("searchable dataset metadata field conflict detected with static field: " + nameSearchable);
+                }
+            }
+
+            if (listOfStaticFields.contains(nameFacetable)) {
+                if (nameFacetable.equals(SearchFields.SUBJECT)) {
+                    // Skip, expected conflct.
+                } else {
+                    return error("facetable dataset metadata field conflict detected with static field: " + nameFacetable);
+                }
+            }
+
+            // <copyField source="*_i" dest="text" maxChars="3000"/>
+            sb.append("   <copyField source=\"" + nameSearchable + "\" dest=\"text\" maxChars=\"3000\"/>\n");
+        }
+
+        return sb.toString();
+    }
+
     /**
      * This method is for integration tests of search.
      */

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java
@@ -7,8 +7,8 @@
  * Note that there are many fields in Solr that are *not* here because their
  * values come from the database. For example "authorName" comes from the
  * database. We update the Solr schema.xml file by merging the output of `curl
- * http://localhost:8080/api/config/solr/schema` into the file in the source
- * tree when a metadata block update warrants it.
+ * http://localhost:8080/api/admin/index/solr/schema` into the file in the
+ * source tree when a metadata block update warrants it.
  *
  * Generally speaking, we want the search fields to be readable. This is a
  * challenge for long field names but a power user should be able to type