-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvalidate.php
executable file
·688 lines (566 loc) · 29.8 KB
/
validate.php
1
<HTML><?php include_once("config.php");?><HEAD><TITLE>Data Hub LOD Validator</TITLE><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=ISO-8859-1"><script type="text/javascript" src="autocomplete/jquery.js"></script><script type='text/javascript' src='autocomplete/jquery.bgiframe.min.js'></script><script type='text/javascript' src='autocomplete/jquery.ajaxQueue.js'></script><script type='text/javascript' src='autocomplete/thickbox-compressed.js'></script><script type='text/javascript' src='autocomplete/jquery.autocomplete.js'></script><link rel="stylesheet" type="text/css" href="autocomplete/main.css" /><link rel="stylesheet" type="text/css" href="autocomplete/jquery.autocomplete.css" /><link rel="stylesheet" type="text/css" href="autocomplete/thickbox.css" /><script> $(document).ready(function() { $("#package").autocomplete("autocomplete.php", { matchContains: true, width: 300, selectFirst: false }); });</script><script type="text/javascript"> function KeyCode(ev) { if(ev){ TastenWert = ev.which } else { TastenWert = window.event.keyCode } if (TastenWert == 13) { document.form.submit(); } } document.onkeypress = KeyCode;</script><style type="text/css"> body { background: white; color: black; font-family: sans-serif; line-height: 1.4em; padding: 2.5em 3em; margin: 0; } :link { color: #00c; } :visited { color: #609; } a:link img { border: none; } a:visited img { border: none; } .source { font-size: 10px; color: #800; } h1, h2, h3 { background: white; color: #800; } h1 { font: 170% sans-serif; margin: 0; } h2 { clear: both; font: 140% sans-serif; margin: 1.5em 0 -0.5em 0; padding-bottom:20px; } h3 { font: 120% sans-serif; margin: 0.5em 0; } h4 { font: bold 100% sans-serif; } h5 { font: italic 100% sans-serif; } h6 { font: small-caps 100% sans-serif; } .hide { display: none; } p { margin: 0.5em 0;} pre { background: #fff6bb; font-family: monospace; line-height: 1.2em; padding: 1em 2em; } dt { font-weight: bold; margin-top: 0; margin-bottom: 0; } dd { margin-top: 0; margin-bottom: 0; } code, tt { font-family: monospace; } ul.toc { list-style-type: none; } ol.toc li a { text-decoration: none; } .note { color: red; } #header { border-bottom: 1px solid #ccc; } #logo { float: right; } #logo a img { padding-left: 20px; background: white; } #authors { clear: right; float: right; font-size: 80%; text-align: right; } #content { clear: both; margin: 2em auto 0 0; text-align: justify } #download, #demo { float: left; font-family: sans-serif; margin: 1em 0 1.5em; text-align: center; width: 50%; } #download h2, #demo h2 { font-size: 125%; margin: 1.5em 0 -0.2em 0; } #download small, #demo small { color: #888; font-size: 80%; } #footer { border-top: 1px solid #ccc; color: #aaa; margin: 2em 0 0; } .red { color: #800; font-weight: bold; } .info { padding-left: 15px; } ul, ul.info { padding-left: 30px; } td { vertical-align: top; padding-bottom: 8px; } blockquote { margin:22px 40px; min-height: 50px; padding:3px; color:#575757; padding: 0px 50px; } .medal { background: transparent url("medal.png") no-repeat 0 0; } .no-medal { background: transparent url("medal-gray.png") no-repeat 0 0; } .levels { margin:22px 40px 22px 10px; } .level4 { margin:22px 40px 0px 10px; <!--background: transparent url("lodcloud.png") no-repeat 0 0; --> } .success { background: url(tick.png) top right no-repeat; padding-right: 17px; font-weight: bold; } .error { background: url(cross.png) top right no-repeat; padding-right: 17px; font-weight: bold; } .warning { background: url(exclamation.png) top right no-repeat; padding-right: 17px; font-weight: bold; } .flag { background: url(images/flag.jpeg) top right no-repeat; padding-right: 17px; font-weight: bold; } #feedback { background: #000 url(images/feedback.png) no-repeat scroll 0px 0px; height:90px; left:0; position:fixed; top:45%; width:26px; } div#feedback:hover { background-position:+4px 0px; } #feedback a { display:block; height:90px; width:26px; } </style></HEAD><BODY><div id="logo"> <a href="http://www.fu-berlin.de/"><img src="http://www4.wiwiss.fu-berlin.de/bizer/d2r-server/images/fu-logo.gif" alt="Freie Universität Berlin Logo" /></a></div><div id="header"> <h1 style="font-size: 250%">Data Hub LOD Validator</h1></div><div> <a href="index.php">LOD Datasets on Data Hub</a> | Validate | <a href="levels.html">Help</a></div><div id="content"><div id="feedback"><a href="http://www.surveymonkey.com/s/TDS3TML"></a></div> <form action="validate.php" method="post" name="package"> <p style="margin-bottom:40px;">Search Data Hub dataset: <input style="width: 300px;" size="500" id="package" name="package" onKeyPress="KeyCode;"/> <input type="submit" name="submit" value=">"/></p> </form> <p><?php function startsWith($Haystack, $Needle){ return strpos($Haystack, $Needle) === 0; } if (isset($_GET['package'])) { $package_name = $_GET['package']; } else if (isset($_POST['submit']) && isset($_POST['package'])) { $package_name = $_POST['package']; } if (isset($package_name)) { include_once('Ckan_client-PHP/Ckan_client.php'); $ckan = new Ckan_client(); try { $package = $ckan->get_package_entity($package_name); ?> <h1><?php echo $package->title; ?></h1> <p> <ul> <li>Dataset name: <?php echo $package->name; ?></li> <li><a href="http://ckan.net/package/<?php echo $package->name; ?>">Dataset on Data Hub</a></li> </ul> </p> <?php if (in_array("meta.duplicate", $package->tags)) { echo "<p>Package is a duplicate.</p><p><strong>Description:</strong> ".$package->notes."</p>"; } else { ?> <h3>LOD Cloud Diagram Compliance Level</h3> <p> <?php $resources = $package->resources; $extras = get_object_vars($package->extras); $topic_set = false; foreach (array_keys($topic_names) as $topic) { if (in_array($topic, $package->tags)) { $topic_set = true; } } $license_information = false; if (isset($package->license)) { $license_information = true; } else { foreach ($extras as $key => $value) { if ((trim($key) == "license_link") && (strlen(trim($value)) > 0)) { $license_information = true; break; } } } $sparql_endpoint = false; $download_link = false; $example_link = false; $void_or_sitemap = false; $mapping_link = false; $schema_link = false; $is_wrapper = false; $preliminary_indicators = array(0=>array(), 1=>array(), 2=>array(), 3=>array(), 4=>array()); $r = 0; foreach ($resources as $resource_object) { $resource = get_object_vars($resource_object); if ($resource["format"] == "api/sparql") { $sparql_endpoint = true; } if (in_array($resource["format"], $download_formats)) { $download_link = true; $r++; $url = $resource["url"]; $format = $resource["format"]; $preliminary_indicators[3][] = '<div id="resource'.$r.'"><strong>Checking download resource availability for</strong>: '.$url.' <img src="images/loading.gif"/></div><script> $(document).bind("ready", function() { $.get("uri_availability.php", {url: "'.$url.'", contentType: "'.$format.'"}, function(result) { $("#resource'.$r.'").html(result); } ); });</script><a href="#resource'.$r."\" onclick=\"$('.verbose').show()\">[more]</a>"; } if (in_array($resource["format"], $example_formats)) { $example_link = true; $r++; $url = $resource["url"]; $format = $resource["format"]; $preliminary_indicators[2][] = '<div id="example'.$r.'"><strong>Checking example resource availability for</strong>: '.$url.' <img src="images/loading.gif"/></div><script> $(document).bind("ready", function() { $.get("uri_availability.php", {url: "'.$url.'", contentType: "'.$format.'"}, function(result) { $("#example'.$r.'").html(result); } ); });</script><a href="#example'.$r."\" onclick=\"$('.verbose').show()\">[more]</a>"; } if (($resource["format"] == "meta/void") || ($resource["format"] == "meta/sitemap")) { $void_or_sitemap = true; } if ($resource["format"] == "meta/rdf-schema") { $schema_link = true; } if (startsWith($resource["format"], "mapping/")) { $mapping_link = true; } } $deref_vocab_set = false; $deref_vocab_tags = array("deref-vocab", "no-deref-vocab", "no-proprietary-vocab"); $no_proprietary_vocab = false; foreach ($deref_vocab_tags as $tag) { if (in_array($tag, $package->tags)) { $deref_vocab_set = true; if ($tag == "no-proprietary-vocab") { $no_proprietary_vocab = true; } } } $published_by_set = false; $published_by_tags = array("published-by-producer", "published-by-third-party"); foreach ($published_by_tags as $tag) { if (in_array($tag, $package->tags)) { $published_by_set = true; } } $links = false; $link_targets = array(); foreach ($extras as $key => $value) { if (preg_match('/^links:(.*)/i', $key, $match)) { $links = true; $link_targets[] = $match[1]; } } $format_tags = false; foreach ($package->tags as $value) { if (preg_match('/^format-(.*)/i', $value, $match)) { $format_tags = true; break; } } $shortname_provided_if_needed = false; if (strlen($package->title) >= 50) { if (isset($extras["shortname"])) { $shortname_provided_if_needed = true; } } else { $shortname_provided_if_needed = true; } $groups = $package->groups; foreach ($groups as $id => $group_id) { $group = $ckan->get_group_entity($group_id); $groups[$id] = $group->name; } if (in_array("wrapper", $package->tags)) { $is_wrapper = true; } $namespace = false; if ($extras["namespace"]) { $namespace = true; } $description = false; if (isset($package->notes)) { if (strlen(trim($package->notes)) > 0) { $description = true; } } $level = 0; if (in_array("lodcloud", $groups)) { $level = 4; } else { if (isset($package->author) && isset($package->url) && (in_array("lod", $package->tags))) { $level = 1; if ($topic_set && $example_link) { $level = 2; if ($description && $license_information && $shortname_provided_if_needed && $deref_vocab_set && $published_by_set) { $level = 3; } } } } // check for enhanced information $missing_minimal_information = array(0=>array(), 1=>array(), 2=>array(), 3=>array(), 4=>array()); $missing_enhanced_information = array(0=>array(), 1=>array(), 2=>array(), 3=>array(), 4=>array()); if(!isset($package->url)) { $missing_minimal_information[1][] = "<strong>Missing URL.</strong> Please provide an URL for the data set."; } if (!isset($package->author)) { $missing_minimal_information[1][] = "<strong>Missing authorship.</strong> Please provide the name of publishing org and/or person using the CKAN field <em>Author</em>. It is important to know who created this data set."; } if (!isset($package->author_email) && !isset($package->maintainer_email)) { $missing_enhanced_information[1][] = "<strong>Missing contact email.</strong> Please provide a contact email using the CKAN field <em>Author email</em> or <em>Maintainer email</em>. It is important to know who to contact if there are errors or missing dataset descriptions."; } if (!in_array("lod", $package->tags)) { $missing_minimal_information[1][] = "<strong>Missing lod tag.</strong> Please tag the data set with <em>lod</em>."; } if (!$topic_set) { $missing_minimal_information[2][] = "<strong>Missing topic.</strong> No topic tag found. Use one of: <em>". join(", ", array_keys($topic_names))."</em>."; } if (!$example_link) { $missing_minimal_information[2][] = "<strong>Missing example URI.</strong> Please provide an example URI if available in the <em>Downloads & Resources</em> section, using one of the following formats: <em>".join(", ", $example_formats)."</em>. "; } if (!$sparql_endpoint) { $missing_enhanced_information[2][] = "<strong>Missing SPARQL endpoint.</strong> Please provide a link to the SPARQL endpoint if available in the <em>Downloads & Resources</em> section, using the <em>api/sparql</em> format."; } if (!$download_link) { $missing_enhanced_information[2][] = "<strong>Missing download(s).</strong> Please provide a link to the download file(s) if available in the <em>Downloads & Resources</em> section, using one of the following formats: <em>".join(", ", $download_formats)."</em>. "; } if ($sparql_endpoint) { $availability = "?"; // temporary. will go to a DB later along with other avail metrics $file = fopen('availability.csv', 'r'); while (($line = fgetcsv($file,1000,";")) !== FALSE) { if ($line[0]==$package->name) { $availability = (float) $line[1]; break; } } fclose($file); $lastmonth = "last month <a href='http://labs.mondeca.com/sparqlEndpointsStatus/details/".$package->name.".html'>[?]</a>"; $avail_msg = $availability."% last month."; if ($availability == "?") { $avail_msg = "not assessed yet. <a href='http://labs.mondeca.com/sparqlEndpointsStatus/'>[more]</a>"; } else if ($availability==100.00) { $avail_msg = "<span style='color:green'>$availability%</span> $lastmonth. Congratulations!"; } else if ($availability > 0.85) { $avail_msg = "<span style='color:green'>$availability%</span> $lastmonth."; } else if ($availability > 0.5) { $avail_msg = "<span style='color:red'>$availability%</span> $lastmonth. Can you improve?"; } else if ($availability==0.0) { $avail_msg = "<span style='color:red'>$availability%</span> $lastmonth. Please check the URL you provided."; } $preliminary_indicators[3][] = "<div><strong>SPARQL endpoint availability:</strong> ".$avail_msg."</div>"; } if (!is_numeric($extras["triples"])) { $missing_enhanced_information[2][] = "<strong>Missing size.</strong> No data set size found. Please provide the approximate size of the data set in RDF triples using the custom CKAN field <em>triples</em>."; } if (!$links /*&& !in_array("lodcloud.unconnected", $package->tags)*/) { $missing_enhanced_information[2][] = "<strong>Missing link count information.</strong> Please provide the custom CKAN field <em>links:<target_data_set></em> with the number of RDF links pointing at data set target_data_set. Please provide separate <em>links:<target_data_set></em> statements for each data set to which <em>".$package->name."</em> links."; } $link_targets_unavailable = array(); foreach($link_targets as $link_target) { try { $linked_package = $ckan->get_package_entity($link_target); } catch (Exception $e) { $link_targets_unavailable[] = $link_target; } } if (sizeof($link_targets_unavailable) > 0) { $error = "<img src=\"exclamation.png\"/> <strong>Link targets error.</strong> The following link targets do not exist as CKAN entries:<ul>"; foreach ($link_targets_unavailable as $target) { $error .= "<li>".$target."</li>"; } $error .= "</ul>"; $missing_enhanced_information[2][] = $error; } if (!$description) { $missing_minimal_information[3][] = "<strong>Missing description.</strong> Please provide a description of the data set using the CKAN field <em>Notes</em>."; } if (!$license_information) { $missing_minimal_information[3][] = "<strong>Missing license.</strong> Please provide the data set's license using the CKAN drop-down field <em>License</em>."; } if (!isset($package->version)) { $missing_enhanced_information[3][] = "<strong>Missing version.</strong> Please provide the last modification date or version of the data set using the CKAN field <em>Version</em>."; } if (!$shortname_provided_if_needed) { $missing_minimal_information[3][] = "<strong>Missing short name.</strong> The data set name is longer than 50 characters. Please provide a short name using the custom CKAN field <em>shortname</em>."; } if (!$namespace) { $missing_enhanced_information[3][] = "<strong>Missing instance namespace.</strong> Please provide the namespace used for instances of the dataset. For example, the namespace for DBpedia instances is <em>http://dbpedia.org/resource/</em>. This will be used to detect who links to your dataset."; } if (!$void_or_sitemap) { $missing_enhanced_information[3][] = "<strong>Missing voiD or Semantic Sitemap.</strong> Please provide a link to a voiD description or XML Sitemap if available in the <em>Downloads & Resources</em> section, using one of the following formats: <em>meta/void</em>, <em>meta/sitemap</em>."; } if (!$published_by_set) { $missing_minimal_information[3][] = "<strong>Missing publisher information.</strong> Please provide a tag indicating if the dataset was published by the producer of the data, or by a third party. The tag should be one of: <em>".join($published_by_tags,", ")."</em>."; } if (!$deref_vocab_set) { $missing_minimal_information[3][] = "<strong>Missing proprietary vocabulary information.</strong> Please provide a tag indicating if the dataset does not contain proprietary vocabulary terms, or if it contains proprietary terms, if they are dereferenceable or not. The tag should be one of: ".join($deref_vocab_tags,", ")."."; } if (!$mapping_link) { if (!$no_proprietary_vocab) { $missing_enhanced_information[3][] = "<strong>Missing mapping(s).</strong> If the data set provides vocabulary mappings to other vocabularies, provide a link to the mapping file in the <em>Resources</em> section, using the following format: <em>mapping/<format></em>. Replace <em><format></em> with the mapping/rule language used, like R2R or RIF."; } } if (!$schema_link) { if (!$no_proprietary_vocab) { $missing_enhanced_information[3][] = "<strong>Missing schema.</strong> If the data set uses a proprietary vocabulary, provide a download link to the RDF/OWL Schema used by the data set (in addition to having dereferenceable vocabulary URIs) in the <em>Resources</em> section, using the following format: <em>meta/rdf-schema</em>."; } } if (!$format_tags) { $missing_enhanced_information[3][] = "<strong>Missing vocabularies used.</strong> Provide vocabularies used by the data set as tags, e.g. <em>format-skos</em>, <em>format-foaf</em>."; } if (!in_array("vocab-mappings", $package->tags) && !in_array("no-vocab-mappings", $package->tags) && !$no_proprietary_vocab) { $missing_enhanced_information[3][] = "<strong>Missing information on vocabulary mappings.</strong> Indicate whether mappings for proprietary vocabulary terms are provided (<code>owl:equivalentClass</code>, <code>owl:equivalentProperty</code>, <code>rdfs:subClassOf</code>, and/or <code>rdfs:subPropertyOf</code> links, or mappings expressed as RIF rules or using the R2R Mapping Language) by using the tag <em>vocab-mappings</em>. Use <em>no-vocab-mappings</em> otherwise."; } if (!in_array("provenance-metadata", $package->tags) && !in_array("no-provenance-metadata", $package->tags)) { $missing_enhanced_information[3][] = "<strong>Missing information on provenance metadata.</strong> Indicates whether the data set provides provenance meta-information (creator of the data set, creation date, maybe creation method) as document meta-information or via a voiD description. For instance, using the <code>dc:creator</code> or <code>dc:date</code> properties. Use the tag <em>provenance-metadata</em> / <em>no-provenance-metadata</em>."; } if (!in_array("license-metadata", $package->tags) && !in_array("no-license-metadata", $package->tags)) { $missing_enhanced_information[3][] = "<strong>Missing information on licensing metadata.</strong> Indicates whether the data set provides licensing meta-information as document meta-information or via a voiD description. For instance, using the <code>dc:rights</code> property. Use the tag <em>license-metadata</em> / <em>no-license-metadata</em>."; } // print out messages $missing_infos = sizeof($missing_minimal_information[1]) + sizeof($missing_minimal_information[2]) + sizeof($missing_minimal_information[3]); foreach ($levels as $level_id => $level_name) { if (($level_id) == 0 && ($level > 0)) { continue; } if ($level_id <> $level) { echo "<p><a href=\"levels.html#level$level_id\">Level: ".$level_id . " (".$level_name.")</a> "; } else if ($level == $level_id) { echo "<p style=\"font-weight:bold;\"><a href=\"levels.html#level$level_id\">Level: ".$level_id . " (".$level_name.")</a> "; } if (sizeof($missing_minimal_information[$level_id]) > 0) { echo "<img src=\"cross.png\"></p>"; } elseif ($level == 4) { if (($level_id == 4) && ($missing_infos > 0)) { echo "<img src=\"exclamation.png\"></p>"; } else { echo "<img src=\"tick.png\"></p>"; } } else { if ($level > 0) { if ($level_id == 4) { echo "<img src=\"cross.png\"></p>"; } else { if ($level_id <= $level) { echo "<img src=\"tick.png\"></p>"; } else { echo "<img src=\"cross.png\"></p>"; } } } } if ($level_id > 0) { echo "<blockquote class=\"levels"; if ($level_id < 4) { if ((sizeof($missing_minimal_information[$level_id]) == 0) && (sizeof($missing_enhanced_information[$level_id]) == 0)) { echo " medal"; } else { echo " no-medal"; } } echo "\">"; // Messages to encourage going further with dataset description. if (sizeof($missing_minimal_information[$level_id]) > 0) { echo "<img src=\"exclamation.png\">".join($missing_minimal_information[$level_id], "<br/> <img src=\"exclamation.png\"> "); echo "<br/>"; } echo join($missing_enhanced_information[$level_id], "<br/>") . "<br/></blockquote>"; } } $warning = false; if (in_array("lodcloud.needsinfo", $package->tags)) { if (!$warning) { echo "<h3>Warning</h3>"; } $warning = true; echo "<p><span class=flag>Needs information.</span> An editor has indicated (via lodcloud.needsinfo) that the data provider or dataset homepage does not provide mininum information (and information can't be determined from SPARQL endpoint or downloads).</p>"; } if (in_array("lodcloud.needsfixing", $package->tags)) { if (!$warning) { echo "<h3>Warning</h3>"; } $warning = true; echo "<p><span class=flag>Needs fixing.</span> An editor has indicated (via lodcloud.needsfixing) that the dataset needs to be fixed. Please check the notes for further information.</p>"; } if (in_array("lodcloud.unconnected", $package->tags)) { if (!$warning) { echo "<h3>Warning</h3>"; } $warning = true; echo "<p><span class=flag>Unconnected.</span> An editor has indicated that the dataset has no external RDF links to or from other datasets and thus can not be added to the LOD cloud.</p>"; } if (in_array("lodcloud.nolinks", $package->tags)) { if (!$warning) { echo "<h3>Warning</h3>"; } $warning = true; echo "<p><span class=flag>No external links.</span> An editor has indicated that the dataset has no external RDF links to other datasets.</p>"; } if ($missing_infos > 0) { echo "<h3>Minimum Information</h3>"; echo "<p>There is some information missing before the data set can be reviewed for addition to the LOD cloud:</p>"; echo "<ul>"; foreach ($missing_minimal_information as $level => $infos) { foreach ($infos as $item) { echo "<li>".$item."</li>"; } } echo "</ul>"; } if (sizeof($missing_enhanced_information) > 0) { echo "<h3>Enhanced Information</h3>"; echo "<p>Please provide the following additional information about the data set. This information helps the community to know more about the development state of the Web of Linked Data. Moreover, following best practices for dataset description will make easier for users (and search engines) to consume this dataset. Find more help <a href='levels.html'>here</a>.</p>"; echo "<ul>"; foreach ($missing_enhanced_information as $level => $infos) { foreach ($infos as $item) { $item = str_replace("<img src=\"exclamation.png\"/> ", "", $item); echo "<li>".$item."</li>"; } } echo "</ul>"; } if (sizeof($preliminary_indicators) > 0) { echo "<h3>Preliminary assessment:</h3><blockquote>"; echo "<p>For each resource within the accepted download and example formats, we attempt to connect to your URL to check if the link is broken. Results are below.</p>"; foreach ($levels as $level_id => $level_name) { if (sizeof($preliminary_indicators[$level_id]) > 0) { echo join($preliminary_indicators[$level_id], "<br/>"); } } echo "<br/></blockquote>"; } echo "<p><a href=\"http://ckan.net/package/edit/".$package->name."\">Edit</a> this package on CKAN.</p>"; ?> </p> <?php } // else no duplicate } catch (Exception $e) { echo "<p>Package <strong>".$package."</strong> not found.</p>"; } } ?> </p></div> </BODY></HTML>