This repository has been archived by the owner on May 30, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 104
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
And ensure datasets reference propelml.org URLs, not filesystem paths. Fixes #375
- Loading branch information
Showing
9 changed files
with
255 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
/*! | ||
Copyright 2018 Propel http://propel.site/. All rights reserved. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
// When people load datasets in Propel, they need to be downloaded from HTTP. | ||
// To avoid making people download the same dataset every time they start | ||
// a training program, we provide a local cache of these datasets. | ||
// The $HOME/.propel/cache directory is where these files will be stored. | ||
|
||
import * as path from "path"; | ||
import * as rimraf from "rimraf"; | ||
import { assert, fetchArrayBuffer, IS_WEB, nodeRequire, URL } from "./util"; | ||
import { mkdirp, propelDir } from "./util_node"; | ||
|
||
export interface Cache { | ||
clearAll(): Promise<void>; | ||
get(url: string): Promise<null | ArrayBuffer>; | ||
set(url: string, ab: ArrayBuffer): Promise<void>; | ||
} | ||
|
||
let cacheImpl: Cache; | ||
|
||
// TODO move this function to src/fetch.ts | ||
export async function fetchWithCache(url: string): Promise<ArrayBuffer> { | ||
let ab = await cacheImpl.get(url); | ||
if (ab != null) { | ||
return ab; | ||
} | ||
ab = await fetchArrayBuffer(url); | ||
cacheImpl.set(url, ab); | ||
return ab; | ||
} | ||
|
||
export function clearAll(): Promise<void> { | ||
return cacheImpl.clearAll(); | ||
} | ||
|
||
function cacheBase(): string { | ||
return path.resolve(propelDir(), "cache"); | ||
} | ||
|
||
// Maps a URL to a cache filename. Example: | ||
// "http://propelml.org/data/mnist/train-images-idx3-ubyte.bin" | ||
// "$HOME/.propel/cache/propelml.org/data/mnist/train-images-idx3-ubyte.bin" | ||
export function url2Filename(url: string): string { | ||
// Throw on browser. We expose this method for testing, but only run it on | ||
// Node. | ||
assert(!IS_WEB, "url2Filename is unsupposed in the browser"); | ||
const u = new URL(url); | ||
if (!(u.protocol === "http:" || u.protocol === "https:")) { | ||
throw Error(`Unsupported protocol '${u.protocol}'`); | ||
} | ||
if (u.pathname.indexOf("..") >= 0) { | ||
throw Error("Cache name cannot include '..'"); | ||
} | ||
// Note we purposely leave the port out of the cache path because | ||
// Windows doesn't allow colons in filenames. This is probably fine | ||
// in 99% of cases and is the simplest solution. | ||
const cacheFn = path.resolve(path.join(cacheBase(), u.hostname, u.pathname)); | ||
assert(cacheFn.startsWith(cacheBase())); | ||
return cacheFn; | ||
} | ||
|
||
if (IS_WEB) { | ||
// On web do nothing. No caching. | ||
// Maybe use local storage? | ||
cacheImpl = { | ||
async clearAll(): Promise<void> { }, | ||
async get(url: string): Promise<null | ArrayBuffer> { | ||
return null; | ||
}, | ||
async set(url: string, ab: ArrayBuffer): Promise<void> { }, | ||
}; | ||
} else { | ||
// Node caching uses the disk. | ||
const fs = nodeRequire("fs"); | ||
|
||
cacheImpl = { | ||
async clearAll(): Promise<void> { | ||
rimraf.sync(cacheBase()); | ||
console.log("Delete cache dir", cacheBase()); | ||
}, | ||
|
||
async get(url: string): Promise<null | ArrayBuffer> { | ||
const cacheFn = url2Filename(url); | ||
if (fs.existsSync(cacheFn)) { | ||
const b = fs.readFileSync(cacheFn, null); | ||
return b.buffer.slice(b.byteOffset, | ||
b.byteOffset + b.byteLength) as ArrayBuffer; | ||
} else { | ||
return null; | ||
} | ||
}, | ||
|
||
async set(url: string, ab: ArrayBuffer): Promise<void> { | ||
const cacheFn = url2Filename(url); | ||
const cacheDir = path.dirname(cacheFn); | ||
mkdirp(cacheDir); | ||
fs.writeFileSync(cacheFn, Buffer.from(ab)); | ||
}, | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
/*! | ||
Copyright 2018 Propel http://propel.site/. All rights reserved. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
// Node-only test. Browsers have caching built-in. | ||
|
||
import * as fs from "fs"; | ||
import { test } from "../tools/tester"; | ||
import * as cache from "./cache"; | ||
import { assert, IS_NODE, nodeRequire } from "./util"; | ||
import { isDir } from "./util_node"; | ||
|
||
// Helper function to start a local web server. | ||
// TODO should be moved to tools/tester eventually. | ||
async function localServer(cb: (url: string) => Promise<void>): Promise<void> { | ||
if (!IS_NODE) { | ||
// We don't need a local server, since we're being hosted from one already. | ||
await cb(`http://${document.location.host}/`); | ||
} else { | ||
const root = __dirname + "/../build/dev_website"; | ||
assert(isDir(root), root + | ||
" does not exist. Run ./tools/dev_website before running this test."); | ||
const { createServer } = nodeRequire("http-server"); | ||
const server = createServer({ cors: true, root }); | ||
server.listen(); | ||
const port = server.server.address().port; | ||
const url = `http://127.0.0.1:${port}/`; | ||
try { | ||
await cb(url); | ||
} finally { | ||
server.close(); | ||
} | ||
} | ||
} | ||
|
||
if (IS_NODE) { | ||
test(async function cache_url2Filename() { | ||
const actual = cache.url2Filename( | ||
"http://propelml.org/data/mnist/train-images-idx3-ubyte.bin"); | ||
const expected0 = | ||
".propel/cache/propelml.org/data/mnist/train-images-idx3-ubyte.bin"; | ||
// Split and join done for windows compat. | ||
const { join } = nodeRequire("path"); | ||
const expected = join(...expected0.split("/")); | ||
assert(actual.endsWith(expected)); | ||
}); | ||
} | ||
|
||
test(async function cache_fetchWithCache() { | ||
cache.clearAll(); | ||
await localServer(async function(url: string) { | ||
url += "/data/mnist/train-images-idx3-ubyte.bin"; | ||
const ab = await cache.fetchWithCache(url); | ||
assert(ab.byteLength === 47040016); | ||
if (IS_NODE) { | ||
assert(fs.existsSync(cache.url2Filename(url))); | ||
} | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
/*! | ||
Copyright 2018 Propel http://propel.site/. All rights reserved. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
import * as fs from "fs"; | ||
import * as path from "path"; | ||
import { assert } from "./util"; | ||
|
||
export function isDir(p: string): boolean { | ||
try { | ||
return fs.statSync(p).isDirectory(); | ||
} catch (e) { | ||
if (e.code === "ENOENT") return false; | ||
throw e; | ||
} | ||
} | ||
|
||
/** Returns "$HOME/.propel/" or PROPEL_DIR env var. */ | ||
export function propelDir(): string { | ||
if (process.env.PROPEL_DIR) { | ||
return process.env.PROPEL_DIR; | ||
} else { | ||
const homeDir = process.platform === "win32" ? process.env.USERPROFILE | ||
: process.env.HOME; | ||
return path.join(homeDir, ".propel/"); | ||
} | ||
} | ||
|
||
/** Recursive mkdir. */ | ||
export function mkdirp(dirname: string): void { | ||
if (!isDir(dirname)) { | ||
const parentDir = path.dirname(dirname); | ||
assert(parentDir !== dirname && parentDir.length > 1); | ||
mkdirp(parentDir); | ||
fs.mkdirSync(dirname); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters