-
Notifications
You must be signed in to change notification settings - Fork 10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update the save() method to set relative paths for resources. Fixes frictionlessdata#41. #51
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -183,20 +183,31 @@ public function save($zip_filename) | |
{ | ||
Package::isZipPresent(); | ||
$zip = new ZipArchive(); | ||
|
||
$packageCopy = $this->copy(); | ||
|
||
$base = tempnam(sys_get_temp_dir(), 'datapackage-zip-'); | ||
$files = [ | ||
'datapackage.json' => $base.'datapackage.json', | ||
]; | ||
$ri = 0; | ||
foreach ($this as $resource) { | ||
foreach ($packageCopy as $resource) { | ||
if ($resource->isRemote()) { | ||
continue; | ||
} | ||
|
||
$resourceFiles = []; | ||
$fileNames = $resource->save($base.'resource-'.$ri); | ||
foreach ($fileNames as $fileName) { | ||
$relname = str_replace($base.'resource-'.$ri, '', $fileName); | ||
$files['resource-'.$ri.$relname] = $fileName; | ||
$resourceFiles[] = 'resource-'.$ri.$relname; | ||
} | ||
$resource->descriptor()->path = count($resourceFiles) == 1 ? $resourceFiles[0] : $resourceFiles; | ||
++$ri; | ||
} | ||
$this->saveDescriptor($files['datapackage.json']); | ||
$packageCopy->saveDescriptor($files['datapackage.json']); | ||
|
||
register_shutdown_function(function () use ($base) { | ||
Utils::removeDir($base); | ||
}); | ||
|
@@ -210,19 +221,29 @@ public function save($zip_filename) | |
} | ||
} | ||
|
||
/** | ||
* Make a new Datapackage object that is a copy of the current Datapackage. Bypasses validation. | ||
* @return $this | ||
* @throws DatapackageValidationFailedException | ||
*/ | ||
protected function copy() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since you are here, this could need a PHP DOC saying what it does. I know it is obvious but makes future contributor's work easier. Like the fact that it creates a new Object but skips validation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, docblock, of course. I can add that. I can fix that in a new commit and put it in this same pull request, right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes! Once the pull is open you can keep pushing commits into your local/forked same branch. |
||
{ | ||
return new static($this->descriptor, $this->basePath, true); | ||
} | ||
|
||
protected $descriptor; | ||
protected $currentResourcePosition = 0; | ||
protected $basePath; | ||
protected $skipValidations = false; | ||
|
||
/** | ||
* called by the resources iterator for each iteration. | ||
* | ||
* @param object $descriptor | ||
* | ||
* @return \frictionlessdata\datapackage\Resources\BaseResource | ||
* @throws \frictionlessdata\datapackage\Exceptions\ResourceValidationFailedException | ||
*/ | ||
/** | ||
* called by the resources iterator for each iteration. | ||
* | ||
* @param object $descriptor | ||
* | ||
* @return \frictionlessdata\datapackage\Resources\BaseResource | ||
* @throws \frictionlessdata\datapackage\Exceptions\ResourceValidationFailedException | ||
*/ | ||
protected function initResource($descriptor) | ||
{ | ||
return Factory::resource($descriptor, $this->basePath, $this->skipValidations); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -141,6 +141,37 @@ public function testHttpSource() | |
); | ||
} | ||
|
||
public function testHttpSourceSaveAndLoad() | ||
{ | ||
$package = Mocks\MockFactory::datapackage('mock-http://simple_valid_datapackage_mock_http_data.json'); | ||
|
||
$filename = tempnam(sys_get_temp_dir(), 'datapackage-php-tests-').'.zip'; | ||
//save the datapackage | ||
if (is_file($filename)) { | ||
unlink($filename); | ||
} | ||
$package->save($filename); | ||
|
||
//load the new package | ||
$package2 = Mocks\MockFactory::datapackage($filename); | ||
|
||
$this->assertDatapackage( | ||
(object) [ | ||
'name' => 'datapackage-name', | ||
'resources' => [ | ||
(object) [ | ||
'name' => 'resource-name', | ||
'path' => ['mock-http://foo.txt', 'mock-http://foo.txt'], | ||
], | ||
], | ||
], | ||
['resource-name' => ['foo', 'foo']], | ||
$package2 | ||
); | ||
|
||
unlink($filename); | ||
} | ||
|
||
public function testMultiDataDatapackage() | ||
{ | ||
$out = []; | ||
|
@@ -476,6 +507,29 @@ public function testCreateEditDatapackageDescriptor() | |
$zip->close(); | ||
unlink($filename); | ||
$tempdir = $tempdir.DIRECTORY_SEPARATOR; | ||
|
||
//after saving to disk, the paths are updated | ||
$expectedDatapackageDescriptor = (object) [ | ||
'name' => 'my-datapackage-name', | ||
'resources' => [ | ||
(object) [ | ||
'name' => 'my-default-resource', | ||
'path' => ["resource-0-data-0", "resource-0-data-1"], | ||
], | ||
(object) [ | ||
'name' => 'my-renamed-tabular-resource', | ||
'path' => "resource-1.csv", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is where were should test for paths that are also file name and URI maybe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed. Thanks for taking the time to review this! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks to you! This is all great and the needed changes are not many. Good work |
||
'profile' => 'tabular-data-resource', | ||
'schema' => (object) [ | ||
'fields' => [ | ||
(object) ['name' => 'id', 'type' => 'integer'], | ||
(object) ['name' => 'name', 'type' => 'string'], | ||
], | ||
], | ||
], | ||
], | ||
]; | ||
|
||
$this->assertEquals($expectedDatapackageDescriptor, json_decode(file_get_contents($tempdir.'datapackage.json'))); | ||
$this->assertEquals('foo', file_get_contents($tempdir.'resource-0-data-0')); | ||
$this->assertEquals("testing 改善\n", file_get_contents($tempdir.'resource-0-data-1')); | ||
|
@@ -484,6 +538,43 @@ public function testCreateEditDatapackageDescriptor() | |
Utils::removeDir($tempdir); | ||
} | ||
|
||
public function testSaveAndLoadZip() | ||
{ | ||
//generate a csv file | ||
$csv_filepath = tempnam(sys_get_temp_dir(),'example-csv'); | ||
|
||
//create example csv | ||
file_put_contents($csv_filepath, "name,email\nJohn Doe,john@example.com"); | ||
|
||
//create a new datapackage object | ||
$package = Package::create(['name' => 'csv-example','profile' => 'tabular-data-package']); | ||
|
||
//add a csv file | ||
$package->addResource('example.csv', [ | ||
"profile" => "tabular-data-resource", | ||
"schema" => ["fields" => [["name" => "name", "type" => "string"],["name" => "email", "type" => "string"]]], | ||
"path" => $csv_filepath | ||
]); | ||
|
||
//save the datapackage | ||
$filename = tempnam(sys_get_temp_dir(), 'datapackage-php-tests-').'.zip'; | ||
if (is_file($filename)) { | ||
unlink($filename); | ||
} | ||
$package->save($filename); | ||
|
||
//delete example csv | ||
unlink($csv_filepath); | ||
|
||
//load the new package | ||
$package2 = Package::load($filename); | ||
|
||
//assert you get expected content back out | ||
$this->assertEquals([['name' => 'John Doe', 'email' => 'john@example.com']], $package2->resource('example.csv')->read()); | ||
|
||
unlink($filename); | ||
} | ||
|
||
public function testLoadDatapackageZip() | ||
{ | ||
$package = Package::load(dirname(__FILE__).'/fixtures/datapackage_zip.zip'); | ||
|
@@ -626,21 +717,21 @@ public function testCsvDialect() | |
'CommitteeTypeDesc' => 'ועדה משותפת', | ||
'Email' => null, | ||
'StartDate' => Carbon::__set_state(array( | ||
'date' => '2004-08-12 00:00:00.000000', | ||
'timezone_type' => 3, | ||
'timezone' => 'UTC', | ||
)), | ||
'date' => '2004-08-12 00:00:00.000000', | ||
'timezone_type' => 3, | ||
'timezone' => 'UTC', | ||
)), | ||
'FinishDate' => null, | ||
'AdditionalTypeID' => null, | ||
'AdditionalTypeDesc' => null, | ||
'ParentCommitteeID' => null, | ||
'CommitteeParentName' => null, | ||
'IsCurrent' => true, | ||
'LastUpdatedDate' => Carbon::__set_state(array( | ||
'date' => '2015-03-20 12:02:57.000000', | ||
'timezone_type' => 3, | ||
'timezone' => 'UTC', | ||
)), | ||
'date' => '2015-03-20 12:02:57.000000', | ||
'timezone_type' => 3, | ||
'timezone' => 'UTC', | ||
)), | ||
), $row); | ||
} | ||
++$rowNum; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I may need to test this further but let me see if I understand this:
path
property according to theschema
can either be a string or an array of strings.datapackage-php/src/Validators/schemas/data-package.json
Lines 262 to 276 in 48e73fc
If you have only one resource file for a resource you use the the file path/string, if not you use the array, but if fileNames (save) is empty (should not happen really) but What happens if the resource has also e.g
URIs
for the same resource?let's say mytable.csv and https://myrepo.org/data/mytable.csv?
Maybe you should take the original paths from the descriptor first and only replace the original absolute paths that match the files you are iterating over but keep the URIs if any?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're correct, I hadn't considered the case when path is a URI. I'll make sure that's covered in the tests and then work to fix the implementation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So I played around with remote URLs some and it turns out the BaseDatapackage->save() method currently writes remote resources to disk as part of saving (even before my pull request changes). As I understand it, that's not how it's supposed to work, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi. You are right, but i would say (personal opinion) this may be seen as a not so good documented expected behavior/facility that can save some steps for people building Packages. Since
fopen
in PHP can deal with URLs same way as with a local file you get this for free. A datastream level on::save
you already saw thisdatapackage-php/src/DataStreams/DefaultDataStream.php
Lines 46 to 51 in 48e73fc
Of course it would be nice if one could flag if the local path is going to be the only one to be kept or also the original remote one,
but does not mean (I guess) that you can not still for a local resource/file add an additional ->path manually to mimic that behavior.
Not sure how the Python and JS versions deal with this. I may want to check with their interfaces to ensure we do this the same way. Probably a different ISSUE/Pull.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree it would be useful to be able to download remote files and save them locally into the datapackage. Perhaps that'd be a good parameter/option for the
Datapackage->save
method? Or on the resource object with maybe amakeLocal()
call or similar. Either way I don't think it should be the default.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree. probably at the Datastream level we should have flag. Will dig into the other language implementations and see how they do this. Will let you know how this goes. thx