-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathharvester-config-dariah.xml
111 lines (92 loc) · 4.3 KB
/
harvester-config-dariah.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
<?xml version="1.0" encoding="utf-8"?>
<config>
<!--
This is the configuration for harvesting all community providers
for the DASISH Joint Metadata Domain (T5.4). This configuration
file is used by OAI Harvest Manager; see
https://github.com/TheLanguageArchive/oai-harvest-manager for
details.
This file covers endpoints belonging to DARIAH members.
-->
<!-- ### configuration settings ### -->
<settings>
<!-- Working directory. -->
<workdir>/home/work/jmd/data/01-harvested</workdir>
<!-- Maximum number of attempts per record before giving up. -->
<max-retry-count>2</max-retry-count>
<!-- Delay between retries of a record (milliseconds). -->
<retry-delay>10000</retry-delay>
<!-- Maximum number of concurrent harvester threads -->
<max-jobs>8</max-jobs>
<!-- Number of resources placed in the resource pool. -->
<resource-pool-size>4</resource-pool-size>
<!-- Default timeout (for connection and reading) for a single
http request in seconds. If unspecified, will be INFINITE. -->
<timeout>60</timeout>
</settings>
<!-- ### output directories (referenced in the action section) ### -->
<directories>
<!-- When the attribute 'max-files' is non-zero, subdirectories
will be created to ensure no directory has more than that
number of files. -->
<dir path="dariah/results/cmdi" id="cmdi" max-files="1000"/>
<dir path="dariah/results/ese" id="ese" max-files="1000"/>
<dir path="dariah/results/dc" id="dc" max-files="1000"/>
<dir path="dariah/results/ddi-1.2.2" id="ddi122" max-files="1000"/>
<dir path="dariah/results/ddi-2.5" id="ddi25" max-files="1000"/>
<dir path="dariah/results/ddi-3.1" id="ddi31" max-files="1000"/>
</directories>
<!-- ### actions to take on metadata formats (in order of preference) ### -->
<actions>
<format match="namespace" value="http://www.clarin.eu/cmd/">
<action type="strip"/>
<action type="save" dir="cmdi" suffix=".xml"/>
</format>
<!--
The different DDI versions are most reliably matched by schema
address, since there seems to be variable usage of namespaces
with them.
-->
<format match="schema" value="http://www.icpsr.umich.edu/DDI/Version1-2-2.xsd">
<action type="strip"/>
<action type="save" dir="ddi122" suffix=".xml"/>
</format>
<format match="schema" value="http://www.ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/codebook.xsd">
<action type="strip"/>
<action type="save" dir="ddi25" suffix=".xml"/>
</format>
<format match="schema" value="http://www.ddialliance.org/Specification/DDI-Lifecycle/3.1/XMLSchema/instance.xsd">
<action type="strip"/>
<action type="save" dir="ddi31" suffix=".xml"/>
</format>
<format match="prefix" value="oai_europeana">
<action type="strip"/>
<action type="save" dir="ese" suffix=".xml"/>
</format>
<format match="prefix" value="oai_dc">
<action type="strip"/>
<action type="save" dir="dc" suffix=".xml"/>
</format>
</actions>
<!-- ### list of providers ### -->
<providers>
<provider url="http://gams.uni-graz.at/oaiprovider/"/>
<provider url="http://demo2.dariah.eu/colreg/OAIHandler"/>
<provider url="http://acdh.oeaw.ac.at/acdh-repo/oai"/>
<provider url="http://fedora.phaidra.univie.ac.at/oaiprovider/"/>
<!-- currently not harvestable: provider url="http://arachne.uni-koeln.de/OAI-PMH/oai-pmh.xml"/ -->
<provider url="http://demo.imeji.org/fledgeddata/oai/"/>
<provider url="http://pubman.mpdl.mpg.de/escidoc-oaiprovider/"/>
<provider url="http://zuse.zib.de/fledgeddata/oai/"/>
<provider url="http://edoc.bbaw.de/oai2/oai2.php"/>
<provider url="http://archaeologydataservice.ac.uk/oai/archives/OAIHandler"/>
<provider url="http://staroai.theses.fr/OAIHandler"/>
<provider url="http://www.calames.abes.fr/oai/oai2.aspx"/>
<!-- currently not harvestable: provider url="http://oai.openedition.org"/ -->
<provider url="http://sldr.org/oai-pmh.php"/>
<!-- currently not harvestable: provider url="http://www.etnoinfolab.org/oai/oai.php"/> -->
<provider url="http://www.arzenal.si/oai/arzenal"/>
<provider url="http://isn3.zrc-sazu.si/etnofolk/OAI-2.0/oai.php"/>
<provider url="http://www.sistory.si/oai.php"/>
</providers>
</config>