-
Notifications
You must be signed in to change notification settings - Fork 7
Solr Schema Configuration
willprice76 edited this page Dec 19, 2014
·
9 revisions
SI4T's schema.xml must have the following minimal structure:
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
SI4T minimum schema example.
-->
<schema name="staging" version="1.1">
<!-- Minimum field type definition. -->
<types>
<fieldType name="ignored" indexed="false" stored="false" class="solr.StrField" />
<fieldtype name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
</types>
<fields>
<!-- SI4T Mandatory fields -->
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
<field name="url" type="string" indexed="true" stored="true" multiValued="false" />
<field name="pubdate" type="tdate" indexed="true" stored="true" multiValued="false" />
<!-- end SI4T Mandatory Fields -->
<!-- Fields configured in the standard SI4T TBB -->
<field name="title" type="string" indexed="true" stored="true" multiValued="false" />
<field name="publicationid" type="tint" indexed="true" stored="true" multiValued="false" />
<field name="schemaid" type="tint" indexed="true" stored="true" multiValued="false" />
<field name="itemtype" type="tint" indexed="true" stored="true" multiValued="false" />
<field name="parentsgid" type="tint" indexed="true" stored="true" multiValued="false" />
<field name="sgid" type="tint" indexed="true" stored="true" multiValued="true" />
<field name="type" type="tint" indexed="true" stored="true" multiValued="false" />
<field name="body" type="text_general" indexed="true" stored="true" multiValued="true" />
<field name="_version_" type="long" indexed="true" stored="true"/>
<!--
place any other custom field configuration here
If for example a custom field 'customerid' is added through the SI4T TBB like so:
<custom><customerid>200</customerid></custom>
Then add this field to the Schema:
<field name="customerid" type="tint" indexed="true" stored="true" multiValued="false" />
-->
<!-- catch all field needed to query on searches which do not have a field specified -->
<field name="searchall" type="text_general" indexed="true" stored="true" multiValued="true" />
<!--
Binary extraction example. This needs to be configured in two places:
1. solrconfig.xml:
map binary content to a binary field and for instance
map the title extracted from the binary to a BinaryEmbeddedTitle field
An example is:
<requestHandler name="/update/extract" class="org.apache.solr.handler.extraction.ExtractingRequestHandler">
<lst name="defaults">
<str name="uprefix">binary_</str>
<str name="fmap.content">binary_content</str>
<str name="fmap.title">BinaryEmbeddedTitle</str>
</lst>
</requestHandler>
2. schema.xml:
create fields for the mapped field types in step 1.
-->
<!-- Binary extraction storage fields -->
<field name="BinaryEmbeddedTitle" type="string" indexed="true" stored="true" multiValued="true" />
<field name="binary_content" type="text_general" indexed="true" stored="true" multiValued="false" />
<field name="fileType" type="string" indexed="true" stored="true" multiValued="false" />
<field name="fileSize" type="string" indexed="true" stored="true" multiValued="false" />
<!-- ignore any other field, including anything other than binary_content -->
<dynamicField name="*" type="ignored" />
<dynamicField name="binary_*" type="ignored" />
</fields>
<!-- copy fields to enable default search -->
<copyField source="*" dest="searchall" />
<!-- field to use to determine and enforce document uniqueness. -->
<uniqueKey>id</uniqueKey>
<!-- field for the QueryParser to use when an explicit fieldname is absent -->
<defaultSearchField>searchall</defaultSearchField>
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
<solrQueryParser defaultOperator="OR"/>
</schema>