Skip to content

Commit

Permalink
API: move endpoint for Solr schema output to blocked area #976
Browse files Browse the repository at this point in the history
  • Loading branch information
pdurbin committed Mar 26, 2015
1 parent a31b592 commit d7b09ba
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 117 deletions.
4 changes: 2 additions & 2 deletions conf/solr/4.6.0/schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@
<field name="dsPersistentId" type="text_en" multiValued="false" stored="true" indexed="true"/>
<copyField source="dsPersistentId" dest="text" maxChars="3000"/>

<!-- Added for Dataverse 4.0 alpha 1 from output of http://localhost:8080/api/config/solr/schema -->
<!-- Dynamic Dataverse fields from http://localhost:8080/api/admin/index/solr/schema -->
<field name="ARCS1" type="text_en" multiValued="false" stored="true" indexed="true"/>
<field name="ARCS2" type="text_en" multiValued="false" stored="true" indexed="true"/>
<field name="ARCS3" type="text_en" multiValued="false" stored="true" indexed="true"/>
Expand Down Expand Up @@ -593,7 +593,7 @@
<!-- <copyField source="*_ss" dest="text" maxChars="3000"/> -->
<!-- <copyField source="*_i" dest="text" maxChars="3000"/> -->

<!-- Added for Dataverse 4.0 alpha 1 from output of http://localhost:8080/api/config/solr/schema -->
<!-- Dataverse copyField from http://localhost:8080/api/admin/index/solr/schema -->
<copyField source="ARCS1" dest="text" maxChars="3000"/>
<copyField source="ARCS2" dest="text" maxChars="3000"/>
<copyField source="ARCS3" dest="text" maxChars="3000"/>
Expand Down
113 changes: 0 additions & 113 deletions src/main/java/edu/harvard/iq/dataverse/api/Config.java

This file was deleted.

99 changes: 99 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/api/Index.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.DataFileServiceBean;
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
import edu.harvard.iq.dataverse.DatasetFieldType;
import edu.harvard.iq.dataverse.DatasetServiceBean;
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.DataverseServiceBean;
Expand All @@ -11,6 +13,7 @@
import edu.harvard.iq.dataverse.IndexServiceBean;
import edu.harvard.iq.dataverse.RoleAssignment;
import edu.harvard.iq.dataverse.SearchServiceBean;
import edu.harvard.iq.dataverse.SolrField;
import edu.harvard.iq.dataverse.SolrQueryResponse;
import edu.harvard.iq.dataverse.SolrSearchResult;
import edu.harvard.iq.dataverse.authorization.users.User;
Expand All @@ -22,10 +25,12 @@
import edu.harvard.iq.dataverse.search.SearchFields;
import edu.harvard.iq.dataverse.search.SolrIndexServiceBean;
import edu.harvard.iq.dataverse.search.SortBy;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Future;
import java.util.logging.Logger;
import javax.ejb.EJB;
import javax.ejb.EJBException;
import javax.json.Json;
Expand All @@ -44,6 +49,8 @@
@Path("admin/index")
public class Index extends AbstractApiBean {

private static final Logger logger = Logger.getLogger(Index.class.getCanonicalName());

@EJB
IndexServiceBean indexService;
@EJB
Expand All @@ -62,6 +69,8 @@ public class Index extends AbstractApiBean {
SolrIndexServiceBean SolrIndexService;
@EJB
SearchServiceBean searchService;
@EJB
DatasetFieldServiceBean datasetFieldService;

@GET
public Response indexAllOrSubset(@QueryParam("numPartitions") Long numPartitionsSelected, @QueryParam("partitionIdToProcess") Long partitionIdToProcess, @QueryParam("previewOnly") boolean previewOnly) {
Expand Down Expand Up @@ -367,6 +376,96 @@ private JsonObjectBuilder getPermissionsInSolrButNotDatabase() {
.add("dvobjects", stalePermissionList.build().size());
}

/**
* We use the output of this method to generate our Solr schema.xml
*
* @todo Someday we do want to have this return a Response rather than a
* String per https://github.com/IQSS/dataverse/issues/298 but not yet while
* we are trying to ship Dataverse 4.0.
*/
@GET
@Path("solr/schema")
public String getSolrSchema() {

StringBuilder sb = new StringBuilder();

for (DatasetFieldType datasetField : datasetFieldService.findAllOrderedByName()) {
String nameSearchable = datasetField.getSolrField().getNameSearchable();
SolrField.SolrType solrType = datasetField.getSolrField().getSolrType();
String type = solrType.getType();
if (solrType.equals(SolrField.SolrType.EMAIL)) {
/**
* @todo should we also remove all "email" field types (e.g.
* datasetContact) from schema.xml? We are explicitly not
* indexing them for
* https://github.com/IQSS/dataverse/issues/759
*
* "The list of potential collaborators should be searchable"
* according to https://github.com/IQSS/dataverse/issues/747 but
* it's not clear yet if this means a Solr or database search.
* For now we'll keep schema.xml as it is to avoid people having
* to update it. If anything, we can remove the email field type
* when we do a big schema.xml update for
* https://github.com/IQSS/dataverse/issues/754
*/
logger.info("email type detected (" + nameSearchable + ") See also https://github.com/IQSS/dataverse/issues/759");
}
String multivalued = datasetField.getSolrField().isAllowedToBeMultivalued().toString();
// <field name="datasetId" type="text_general" multiValued="false" stored="true" indexed="true"/>
sb.append(" <field name=\"" + nameSearchable + "\" type=\"" + type + "\" multiValued=\"" + multivalued + "\" stored=\"true\" indexed=\"true\"/>\n");
}

List<String> listOfStaticFields = new ArrayList();
Object searchFieldsObject = new SearchFields();
Field[] staticSearchFields = searchFieldsObject.getClass().getDeclaredFields();
for (Field fieldObject : staticSearchFields) {
String name = fieldObject.getName();
String staticSearchField = null;
try {
staticSearchField = (String) fieldObject.get(searchFieldsObject);
} catch (IllegalArgumentException ex) {
} catch (IllegalAccessException ex) {
}

/**
* @todo: if you search for "pdf" should you get all pdfs? do we
* need a copyField source="filetypemime_s" to the catchall?
*/
if (listOfStaticFields.contains(staticSearchField)) {
return error("static search field defined twice: " + staticSearchField);
}
listOfStaticFields.add(staticSearchField);
}

sb.append("---\n");

for (DatasetFieldType datasetField : datasetFieldService.findAllOrderedByName()) {
String nameSearchable = datasetField.getSolrField().getNameSearchable();
String nameFacetable = datasetField.getSolrField().getNameFacetable();

if (listOfStaticFields.contains(nameSearchable)) {
if (nameSearchable.equals(SearchFields.DATASET_DESCRIPTION)) {
// Skip, expected conflct.
} else {
return error("searchable dataset metadata field conflict detected with static field: " + nameSearchable);
}
}

if (listOfStaticFields.contains(nameFacetable)) {
if (nameFacetable.equals(SearchFields.SUBJECT)) {
// Skip, expected conflct.
} else {
return error("facetable dataset metadata field conflict detected with static field: " + nameFacetable);
}
}

// <copyField source="*_i" dest="text" maxChars="3000"/>
sb.append(" <copyField source=\"" + nameSearchable + "\" dest=\"text\" maxChars=\"3000\"/>\n");
}

return sb.toString();
}

/**
* This method is for integration tests of search.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
* Note that there are many fields in Solr that are *not* here because their
* values come from the database. For example "authorName" comes from the
* database. We update the Solr schema.xml file by merging the output of `curl
* http://localhost:8080/api/config/solr/schema` into the file in the source
* tree when a metadata block update warrants it.
* http://localhost:8080/api/admin/index/solr/schema` into the file in the
* source tree when a metadata block update warrants it.
*
* Generally speaking, we want the search fields to be readable. This is a
* challenge for long field names but a power user should be able to type
Expand Down

0 comments on commit d7b09ba

Please sign in to comment.