Skip to content

Commit

Permalink
#891 use better field analyzer for shelfmark
Browse files Browse the repository at this point in the history
  • Loading branch information
xhero committed Jun 28, 2021
1 parent 75873b9 commit 572aed4
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 52 deletions.
21 changes: 1 addition & 20 deletions solr-configuration/muscat/schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -189,30 +189,11 @@
</analyzer>
</fieldtype>

<fieldType name="text_alphanumeric_sort" class="solr.TextField" sortMissingLast="false" omitNorms="true">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.TrimFilterFactory"/>
<filter class="solr.PatternReplaceFilterFactory"
pattern="^(a |the |les |la |le |l'|de la |du |des )" replacement="" replace="all"
/>
<filter class="solr.PatternReplaceFilterFactory"
pattern="(\d+)" replacement="00000$1" replace="all"
/>
<filter class="solr.PatternReplaceFilterFactory"
pattern="0*([0-9]{6,})" replacement="$1" replace="all"
/>
<filter class="solr.PatternReplaceFilterFactory"
pattern="([^a-z0-9])" replacement="" replace="all"
/>
</analyzer>
</fieldType>
<fieldType name="text_alphanumeric_sort" class="solr.ICUCollationField" locale="" numeric="true" strength="secondary" sortMissingLast="true" />
<!-- END ADDED BY RZ FOR MUSCAT -->
</types>
<uniqueKey>id</uniqueKey>

<copyField source="*_text" dest="textSpell"/>
<copyField source="*_s" dest="textSpell"/>
</schema>

2 changes: 0 additions & 2 deletions solr-configuration/muscat/solrconfig.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
<lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lib" regex="icu4j-\d.*\.jar"/>
<lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lucene-libs"
regex="lucene-analyzers-icu-\d.*\.jar"/>
<lib dir="../../jar"/>

<dataDir>${solr.data.dir:}</dataDir>

Expand Down Expand Up @@ -249,4 +248,3 @@
</lst>
</requestHandler>
</config>

31 changes: 1 addition & 30 deletions solr/configsets/sunspot/conf/schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -112,36 +112,7 @@
</analyzer>
</fieldtype>

<fieldType name="text_alphanumeric_sort" class="solr.TextField" sortMissingLast="false" omitNorms="true">
<analyzer>
<!-- KeywordTokenizer does no actual tokenizing, so the entire
input string is preserved as a single token
-->
<tokenizer class="solr.KeywordTokenizerFactory"/>
<!-- The LowerCase TokenFilter does what you expect, which can be
when you want your sorting to be case insensitive
-->
<filter class="solr.LowerCaseFilterFactory" />
<!-- The TrimFilter removes any leading or trailing whitespace -->
<filter class="solr.TrimFilterFactory" />
<!-- Remove leading articles -->
<filter class="solr.PatternReplaceFilterFactory"
pattern="^(a |the |les |la |le |l'|de la |du |des )" replacement="" replace="all"
/>
<!-- Left-pad numbers with zeroes -->
<filter class="solr.PatternReplaceFilterFactory"
pattern="(\d+)" replacement="00000$1" replace="all"
/>
<!-- Left-trim zeroes to produce 6 digit numbers -->
<filter class="solr.PatternReplaceFilterFactory"
pattern="0*([0-9]{6,})" replacement="$1" replace="all"
/>
<!-- Remove all but alphanumeric characters -->
<filter class="solr.PatternReplaceFilterFactory"
pattern="([^a-z0-9])" replacement="" replace="all"
/>
</analyzer>
</fieldType>
<fieldType name="text_alphanumeric_sort" class="solr.ICUCollationField" locale="" numeric="true" strength="secondary" sortMissingLast="true" />
<!-- END ADDED BY RZ FOR MUSCAT -->

</types>
Expand Down

0 comments on commit 572aed4

Please sign in to comment.