-
Notifications
You must be signed in to change notification settings - Fork 2
/
config-others-others.xml
165 lines (146 loc) · 7.61 KB
/
config-others-others.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
<!-- This is the configuration for harvesting all non-CLARIN providers. -->
<config>
<!-- ### configuration settings ### -->
<settings>
<!-- Working directory. -->
<workdir>workspace-olac</workdir>
<!-- Maximum number of attempts per record before giving up. -->
<max-retry-count>2</max-retry-count>
<!-- Delay between retries of a record (milliseconds). -->
<retry-delay>10</retry-delay>
<!-- Maximum number of concurrent harvester threads -->
<max-jobs>6</max-jobs>
<!-- Number of resources placed in the resource pool. -->
<resource-pool-size>4</resource-pool-size>
<!-- Default timeout (for connection and reading) for a single
http request in seconds. If unspecified, will be INFINITE. -->
<timeout>60</timeout>
<!-- File used to log harvesting times. -->
<state-file>state.xml</state-file>
<!-- If this parameter is true, use incremental harvesting. For that to
work, state-file must be defined, the file must exist, and the provider
in question must have been harvested previously. If any of these conditions
is not fulfilled, this setting has no effect.-->
<incremental>false</incremental>
<scenario>ListRecords</scenario>
</settings>
<!-- ### output directories (referenced in the action section) ### -->
<directories>
<!-- When the attribute 'max-files' is non-zero, subdirectories
will be created to ensure no directory has more than that
number of files. -->
<!--<dir path="oai-rec" id="rec" max-files="0"/>-->
<dir path="oai-pmh" id="oai" max-files="0"/>
<dir path="results/cmdi-1_1" id="cmdi-1_1" max-files="0"/>
<dir path="results/cmdi" id="cmdi-1_2" max-files="0"/>
</directories>
<!-- ### actions to take on metadata formats (in order of preference) ### -->
<actions>
<!-- CMDI 1.2 -->
<format match="namespace" value="http://www.clarin.eu/cmd/1">
<action type="save" dir="oai" suffix=".xml"/>
<action type="split"/>
<!--<action type="save" dir="rec" suffix=".xml"/>-->
<action type="strip"/>
<action type="save" dir="cmdi-1_2" suffix=".xml"/>
</format>
<!-- CMDI 1.1 -->
<format match="namespace" value="http://www.clarin.eu/cmd/">
<action type="save" dir="oai" suffix=".xml"/>
<action type="split"/>
<!--<action type="save" dir="rec" suffix=".xml"/>-->
<action type="strip"/>
<action type="save" dir="cmdi-1_1" suffix=".xml"/>
<action type="transform" file="https://infra.clarin.eu/CMDI/1.x/upgrade/cmd-record-1_1-to-1_2.xsl" cache="cache"/>
<action type="save" dir="cmdi-1_2" suffix=".xml"/>
</format>
<!-- OLAC -->
<format match="prefix" value="olac">
<action type="save" dir="oai" suffix=".xml"/>
<action type="transform" file="resources/addOAISetName.xsl" cache="cache"/>
<action type="split"/>
<!--<action type="save" dir="rec" suffix=".xml"/>-->
<action type="transform" file="https://raw.githubusercontent.com/clarin-eric/metadata-conversion/olac-cmdi/olac-cmdi/olac2cmdi.xsl"/>
<action type="save" dir="cmdi-1_1" suffix=".xml"/>
<action type="transform" file="https://infra.clarin.eu/CMDI/1.x/upgrade/cmd-record-1_1-to-1_2.xsl" cache="cache"/>
<action type="save" dir="cmdi-1_2" suffix=".xml"/>
</format>
<!-- DataCite -->
<format match="prefix" value="oai_datacite">
<action type="save" dir="oai" suffix=".xml"/>
<action type="transform" file="resources/addOAISetName.xsl" cache="cache"/>
<action type="split"/>
<action type="strip"/>
<!-- <action type="save" dir="rec" suffix=".xml"/> -->
<action type="transform" file="https://raw.githubusercontent.com/clarin-eric/metadata-conversion/master/datacite-cmdi/datacite_to_cmdi.xsl"/>
<action type="save" dir="cmdi-1_2" suffix=".xml"/>
</format>
<format match="prefix" value="datacite">
<action type="save" dir="oai" suffix=".xml"/>
<action type="transform" file="resources/addOAISetName.xsl" cache="cache"/>
<action type="split"/>
<action type="strip"/>
<!-- <action type="save" dir="rec" suffix=".xml"/> -->
<action type="transform" file="https://raw.githubusercontent.com/clarin-eric/metadata-conversion/master/datacite-cmdi/datacite_to_cmdi.xsl"/>
<action type="save" dir="cmdi-1_2" suffix=".xml"/>
</format>
<!-- Dublin Core -->
<format match="prefix" value="oai_dc">
<action type="save" dir="oai" suffix=".xml"/>
<action type="transform" file="resources/addOAISetName.xsl" cache="cache"/>
<action type="split"/>
<!--<action type="save" dir="rec" suffix=".xml"/>-->
<action type="transform" file="https://raw.githubusercontent.com/clarin-eric/metadata-conversion/olac-cmdi/olac-cmdi/olac2cmdi.xsl"/>
<action type="save" dir="cmdi-1_1" suffix=".xml"/>
<action type="transform" file="https://infra.clarin.eu/CMDI/1.x/upgrade/cmd-record-1_1-to-1_2.xsl" cache="cache"/>
<action type="save" dir="cmdi-1_2" suffix=".xml"/>
</format>
</actions>
<!-- ### list of providers ### -->
<providers>
<provider url="http://cla.berkeley.edu/olac.xml" static="true"/>
<provider url="http://www.elra.info/elrac/elra_catalogue.xml" static="true" name="European Language Resources Association"/>
<provider url="http://catalog.paradisec.org.au/oai/item" name="Pacific And Regional Archive for Digital Sources in Endangered Cultures (PARADISEC)"/>
<provider url="http://www.language-archives.org/hosted/rosettaproject.org.xml" static="true"/>
<provider url="https://catalog.ldc.upenn.edu/olac/ldc_catalog.xml" static="true" name="The LDC Corpus Catalog"/>
<provider url="http://wals.info/languoid/oai" name="WALS Online"/>
<provider url="http://wals.info/refdb_oai" name="WALS RefDB"/>
<provider url="http://redac.univ-tlse2.fr/metadata/oai/oai.pl" name="CLLE-ERSS Université de Toulouse-Le Mirail"/>
<provider url="http://aholab.ehu.es/oai/" name="University of the Basque Country"/>
<provider url="http://www.e-codices.unifr.ch/oai/oai.php"/>
<provider url="https://b2share.eudat.eu/api/oai2d" name="B2SHARE">
<set>0afede87-2bf2-4d89-867e-d2ee57251c62</set>
</provider>
<provider url="https://europeana-oai.clarin.eu/provider" name="Europeana"/>
<provider url="https://clarin-belarus.corpus.by/provider" name="corpus.by"/>
<!-- Linguist list: content to be evaluated before re-enabling -->
<!--
<provider url="http://www.language-archives.org/cgi-bin/olaca3.pl" name="The LINGUIST List Language Resources"/>
-->
<!--
CHECK: timeouts
-->
<provider url="http://161.116.36.206/~publicacions/oai/oai.pl" name="Universitat de Barcelona"/>
<!--
CHECK: tech problems
-->
<provider url="http://oai.hab.de/" name="Wolfenbuettel Digital Library"/>
<provider url="https://oai.datacite.org/oai" name="DataCite">
<!--
YODA repository at Utrecht University
Set with additional filtering through custom query, see
https://support.datacite.org/docs/datacite-oai-pmh#section-arbitrary-queries
base64('subjects.subject:(*6.2* OR *6.5*)') = c3ViamVjdHMuc3ViamVjdDooKjYuMiogT1IgKjYuNSop
-->
<set>DELFT.UU~c3ViamVjdHMuc3ViamVjdDooKjYuMiogT1IgKjYuNSop</set>
<!-- Swedish Language Bank -->
<set>SND.SPRKB</set>
</provider>
<provider url="https://snd.se/oai-pmh" name="Swedish National Data Service">
<!-- ISOF, see https://sprakresurser.isof.se/ -->
<set>principal:isof.se</set>
<!-- subject "Language technology" -->
<set>subject:ssif:10208</set>
</provider>
</providers>
</config>