-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
a further mess: weird db issues with cmhf - test a couple of differen…
…t methods of db population; serialization changes and updates.
- Loading branch information
Showing
5 changed files
with
122 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
declare namespace oai = "http://www.openarchives.org/OAI/2.0/"; | ||
|
||
(: Retrieves metadata records for an entire OAI-PMH collection :) | ||
(: Adds records to BaseX database:) | ||
|
||
declare function local:request($base-url as xs:string, $verb as xs:string, $set-spec as xs:string) as document-node()* | ||
{ | ||
let $request := $base-url || $verb || $set-spec | ||
let $response := fn:doc($request) | ||
let $token := $response//oai:resumptionToken/text() | ||
return | ||
if (fn:empty($token)) then | ||
$response | ||
else | ||
($response, | ||
local:resume($base-url, $token)) | ||
}; | ||
|
||
declare function local:resume($base-url as xs:string, $token as xs:string) as document-node()* | ||
{ | ||
let $verb := "?verb=ListRecords&resumptionToken=" | ||
let $request := $base-url || $verb || $token | ||
let $response := fn:doc($request) | ||
let $new-token := $response//oai:resumptionToken/text() | ||
return | ||
if (fn:empty($new-token)) then | ||
$response | ||
else | ||
($response, | ||
local:resume($base-url, $new-token)) | ||
}; | ||
|
||
let $base-url := "http://digi.countrymusichalloffame.org/oai/oai.php" | ||
let $verb := "?verb=ListRecords&metadataPrefix=oai_qdc" | ||
let $set-spec := "&set=musicaudio" | ||
let $response := local:request($base-url, $verb, $set-spec) | ||
for $record in $response//oai:record | ||
let $id := $record/oai:header/oai:identifier/text() | ||
return( | ||
(: db:replace('cmhf', $record, $id, map { 'addcache': true() }) :) | ||
db:add('cmhf-with-db-add', $record, $id, map { 'addcache': true() }), | ||
db:optimize('cmhf-with-db-add', true(), map { 'textindex': true(), 'attrindex': true(), 'tokenindex': true(), 'ftindex': true() }) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
declare namespace oai = "http://www.openarchives.org/OAI/2.0/"; | ||
|
||
(: Retrieves metadata records for an entire OAI-PMH collection :) | ||
(: Adds records to BaseX database:) | ||
|
||
declare function local:request($base-url as xs:string, $verb as xs:string, $set-spec as xs:string) as document-node()* | ||
{ | ||
let $request := $base-url || $verb || $set-spec | ||
let $response := fn:doc($request) | ||
let $token := $response//oai:resumptionToken/text() | ||
return | ||
if (fn:empty($token)) then | ||
$response | ||
else | ||
($response, | ||
local:resume($base-url, $token)) | ||
}; | ||
|
||
declare function local:resume($base-url as xs:string, $token as xs:string) as document-node()* | ||
{ | ||
let $verb := "?verb=ListRecords&resumptionToken=" | ||
let $request := $base-url || $verb || $token | ||
let $response := fn:doc($request) | ||
let $new-token := $response//oai:resumptionToken/text() | ||
return | ||
if (fn:empty($new-token)) then | ||
$response | ||
else | ||
($response, | ||
local:resume($base-url, $new-token)) | ||
}; | ||
|
||
let $base-url := "http://digi.countrymusichalloffame.org/oai/oai.php" | ||
let $verb := "?verb=ListRecords&metadataPrefix=oai_qdc" | ||
let $set-spec := "&set=musicaudio" | ||
let $response := local:request($base-url, $verb, $set-spec) | ||
for $record in $response//oai:record | ||
let $id := $record/oai:header/oai:identifier/text() | ||
return( | ||
(: db:replace('cmhf', $record, $id, map { 'addcache': true() }) :) | ||
db:add('cmhf-another-test', $record, $id, map { 'addcache': true() }), | ||
db:optimize('cmhf-another-test', true()) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,30 @@ | ||
import module namespace functx = 'http://www.functx.com'; | ||
|
||
let $target := '/mnt/gwork/repox-data/' | ||
let $coll := 'country_music' | ||
declare namespace oai_qdc = "http://worldcat.org/xmlschemas/qdc-1.0/"; | ||
|
||
for $record in db:open($coll)/record[descendant::*:format[fn:contains(., '6.5 inch metal-based acetate disc') | ||
or fn:contains(., '10 inch metal-based acetate disc') | ||
or fn:contains(., '8 inch metal-based acetate disc')]] | ||
let $target := '/tmp/repox-data/' | ||
let $coll := 'country_music_qdc' | ||
|
||
for $record in db:open($coll)/record[ | ||
descendant::*:format[ | ||
fn:contains(., '6.5 inch metal-based acetate disc') | ||
or fn:contains(., '10 inch metal-based acetate disc') | ||
or fn:contains(., '8 inch metal-based acetate disc') | ||
] | ||
]/*:metadata/oai_qdc:qualifieddc | ||
|
||
let $file-full := functx:substring-after-last(db:path($record), ':') | ||
let $file := if (fn:contains($file-full, '/')) | ||
then (fn:replace($file-full, '/', '_')) | ||
else $file-full | ||
let $path := $target || $coll || '/' || $file || '.xml' | ||
let $path := $target || 'cmhf' || '/' || $file || '.xml' | ||
return ( | ||
file:create-dir(file:parent($path)), | ||
file:write($path, $record) | ||
file:write($path, | ||
$record, | ||
map { "method": "xml", | ||
"encoding": "UTF-8", | ||
"indent": "yes", | ||
"omit-xml-declaration": "no"} | ||
) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,29 @@ | ||
import module namespace functx = 'http://www.functx.com'; | ||
|
||
let $target := '/mnt/gwork/repox-data/' | ||
declare namespace oai_dc = "http://www.openarchives.org/OAI/2.0/oai_dc/"; | ||
declare namespace dc = "http://purl.org/dc/elements/1.1/"; | ||
|
||
let $target := '/tmp/repox-data/' | ||
let $coll := 'crossroadsfreedom' | ||
|
||
for $record in db:open($coll)/record[descendant::*:format[contains(., '6.5 inch metal-based acetate disc') | ||
or contains(., '10 inch metal-based acetate disc') | ||
or contains(., '8 inch metal-based acetate disc')]] | ||
for $record in db:open($coll)/record | ||
[every $t in metadata/oai_dc:dc/dc:title | ||
satisfies $t[not(normalize-space(.) = '')]] | ||
[every $r in metadata/oai_dc:dc/dc:rights | ||
satisfies $r[not(normalize-space(.) = '')]] | ||
[some $i in metadata/oai_dc:dc/dc:identifier | ||
satisfies $i[starts-with(., 'http://')]]/metadata/oai_dc:dc | ||
|
||
let $file-full := functx:substring-after-last(db:path($record), ':') | ||
let $file := if (fn:contains($file-full, '/')) | ||
then (fn:replace($file-full, '/', '_')) | ||
else $file-full | ||
let $path := $target || $coll || '/' || $file || '.xml'(: db:path($record) :) | ||
let $path := $target || 'crossroadsfreedom' || '/' || $file || '.xml' | ||
return ( | ||
file:create-dir(file:parent($path)), | ||
file:write($path, $record) | ||
file:write($path, $record, map { "method": "xml", | ||
"encoding": "UTF-8", | ||
"indent": "yes", | ||
"omit-xml-declaration": "no" } | ||
) | ||
) |