diff --git a/README.md b/README.md index 21fa92fdb4c..fe6034a12d5 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ This client supports the following Google Cloud Platform services: * [Google Prediction API](#google-prediction-api) * [Google Translate API](#google-translate-api) * [Google Cloud Logging](#google-cloud-logging-beta) (Beta) +* [Google Cloud Natural Language](#google-cloud-natural-language-beta) (Beta) * [Google Cloud Resource Manager](#google-cloud-resource-manager-beta) (Beta) * [Google Cloud Vision](#google-cloud-vision-beta) (Beta) @@ -772,6 +773,90 @@ loggingClient.getEntries(function(err, entries) { ``` +## Google Cloud Natural Language (Beta) + +> **This is a Beta release of Google Cloud Natural Language.** This feature is not covered by any SLA or deprecation policy and may be subject to backward-incompatible changes. + +- [API Documentation][gcloud-language-docs] +- [Official Documentation][cloud-language-docs] + +#### Using the all-in-one module + +``` +$ npm install --save google-cloud +``` + +```js +var gcloud = require('google-cloud'); +var language = gcloud.language; +``` + +#### Using the Natural Language API module + +``` +$ npm install --save @google-cloud/language +``` + +```js +var language = require('@google-cloud/language'); +``` + +#### Preview + +```js +// Authenticating on a per-API-basis. You don't need to do this if you auth on a +// global basis (see Authorization section above). + +var languageClient = language({ + projectId: 'grape-spaceship-123', + keyFilename: '/path/to/keyfile.json' +}); + +// Get the entities from a sentence. +languageClient.detectEntities('Stephen of Michigan!', function(err, entities) { + // entities = { + // people: ['Stephen'], + // places: ['Michigan'] + // } +}); + +// Create a document if you plan to run multiple detections. +var document = languageClient.document('Contributions welcome!'); + +// Analyze the sentiment of the document. +document.detectSentiment(function(err, sentiment) { + // sentiment = 100 // Large numbers represent more positive sentiments. +}); + +// Parse the syntax of the document. +document.annotate(function(err, annotations) { + // annotations = { + // language: 'en', + // sentiment: 100, + // entities: {}, + // sentences: ['Contributions welcome!'], + // tokens: [ + // { + // text: 'Contributions', + // partOfSpeech: 'Noun (common and proper)', + // partOfSpeechTag: 'NOUN' + // }, + // { + // text: 'welcome', + // partOfSpeech: 'Verb (all tenses and modes)', + // partOfSpeechTag: 'VERB' + // }, + // { + // text: '!', + // partOfSpeech: 'Punctuation', + // partOfSpeechTag: 'PUNCT' + // } + // ] + // } +}); +``` + + ## Google Cloud Resource Manager (Beta) > **This is a Beta release of Google Cloud Resource Manager.** This feature is not covered by any SLA or deprecation policy and may be subject to backward-incompatible changes. @@ -803,8 +888,8 @@ var resource = require('@google-cloud/resource'); #### Preview ```js -// Authorizing on a per-API-basis. You don't need to do this if you auth on a -// global basis (see Authorization section above). +// Authenticating on a per-API-basis. You don't need to do this if you auth on a +// global basis (see Authentication section above). var resourceClient = resource({ projectId: 'grape-spaceship-123', @@ -858,8 +943,8 @@ var vision = require('@google-cloud/vision'); #### Preview ```js -// Authorizing on a per-API-basis. You don't need to do this if you auth on a -// global basis (see Authorization section above). +// Authenticating on a per-API-basis. You don't need to do this if you auth on a +// global basis (see Authentication section above). var visionClient = vision({ projectId: 'grape-spaceship-123', @@ -975,6 +1060,7 @@ Apache 2.0 - See [COPYING](COPYING) for more information. [gcloud-compute-docs]: https://googlecloudplatform.github.io/gcloud-node/#/docs/compute [gcloud-datastore-docs]: https://googlecloudplatform.github.io/gcloud-node/#/docs/datastore [gcloud-dns-docs]: https://googlecloudplatform.github.io/gcloud-node/#/docs/dns +[gcloud-language-docs]: https://googlecloudplatform.github.io/gcloud-node/#/docs/language [gcloud-logging-docs]: https://googlecloudplatform.github.io/gcloud-node/#/docs/logging [gcloud-prediction-docs]: https://googlecloudplatform.github.io/gcloud-node/#/docs/prediction [gcloud-pubsub-docs]: https://googlecloudplatform.github.io/gcloud-node/#/docs/pubsub @@ -1011,6 +1097,8 @@ Apache 2.0 - See [COPYING](COPYING) for more information. [cloud-dns-docs]: https://cloud.google.com/dns/docs +[cloud-language-docs]: https://cloud.google.com/natural-language/docs + [cloud-logging-docs]: https://cloud.google.com/logging/docs [cloud-prediction-docs]: https://cloud.google.com/prediction/docs diff --git a/docs/json/master/language/.gitkeep b/docs/json/master/language/.gitkeep new file mode 100644 index 00000000000..e69de29bb2d diff --git a/docs/toc.json b/docs/toc.json index 1409599f223..4559bf6387e 100644 --- a/docs/toc.json +++ b/docs/toc.json @@ -137,6 +137,14 @@ "title": "Transaction", "type": "datastore/transaction" }] + }, { + }, { + "title": "Language", + "type": "language", + "nav": [{ + "title": "Document", + "type": "language/document" + }] }, { "title": "Logging", "type": "logging", diff --git a/packages/common/src/service-object.js b/packages/common/src/service-object.js index c738b7bbbfb..17fe8395852 100644 --- a/packages/common/src/service-object.js +++ b/packages/common/src/service-object.js @@ -50,7 +50,7 @@ var util = require('./util.js'); * @param {object} config - Configuration object. * @param {string} config.baseUrl - The base URL to make API requests to. * @param {string} config.createMethod - The method which creates this object. - * @param {string} config.id - The identifier of the object. For example, the + * @param {string=} config.id - The identifier of the object. For example, the * name of a Storage bucket or Pub/Sub topic. * @param {object=} config.methods - A map of each method name that should be * inherited. @@ -308,7 +308,7 @@ ServiceObject.prototype.request = function(reqOpts, callback) { var uriComponents = [ this.baseUrl, - this.id, + this.id || '', reqOpts.uri ]; diff --git a/packages/common/test/service-object.js b/packages/common/test/service-object.js index e7d0a45cff4..820f083baf8 100644 --- a/packages/common/test/service-object.js +++ b/packages/common/test/service-object.js @@ -606,6 +606,22 @@ describe('ServiceObject', function() { serviceObject.request(reqOpts, done); }); + it('should not require a service object ID', function(done) { + var expectedUri = [ + serviceObject.baseUrl, + reqOpts.uri + ].join('/'); + + serviceObject.parent.request = function(reqOpts) { + assert.strictEqual(reqOpts.uri, expectedUri); + done(); + }; + + delete serviceObject.id; + + serviceObject.request(reqOpts, assert.ifError); + }); + it('should support absolute uris', function(done) { var expectedUri = 'http://www.google.com'; diff --git a/packages/google-cloud/package.json b/packages/google-cloud/package.json index 31ccf1a4cdf..cf285cd1c31 100644 --- a/packages/google-cloud/package.json +++ b/packages/google-cloud/package.json @@ -93,19 +93,20 @@ "vision" ], "dependencies": { + "extend": "^3.0.0", "@google-cloud/bigquery": "0.1.0", "@google-cloud/bigtable": "0.1.0", "@google-cloud/compute": "0.1.0", "@google-cloud/datastore": "0.1.0", "@google-cloud/dns": "0.1.0", - "@google-cloud/prediction": "0.1.0", + "@google-cloud/language": "0.1.0", "@google-cloud/logging": "0.1.0", + "@google-cloud/prediction": "0.1.0", "@google-cloud/pubsub": "0.1.0", "@google-cloud/resource": "0.1.0", "@google-cloud/storage": "0.1.0", "@google-cloud/translate": "0.1.0", - "@google-cloud/vision": "0.1.0", - "extend": "^3.0.0" + "@google-cloud/vision": "0.1.0" }, "devDependencies": { "proxyquire": "^1.7.10" diff --git a/packages/google-cloud/src/index.js b/packages/google-cloud/src/index.js index 97323c6f001..682af7cd867 100644 --- a/packages/google-cloud/src/index.js +++ b/packages/google-cloud/src/index.js @@ -127,6 +127,30 @@ var apis = { */ dns: require('@google-cloud/dns'), + /** + * The [Google Cloud Natural Language](https://cloud.google.com/natural-language/docs) + * API provides natural language understanding technologies to developers, + * including sentiment analysis, entity recognition, and syntax analysis. + * + *
+ * **This is a Beta release of Google Cloud Natural Language.** This API is + * not covered by any SLA or deprecation policy and may be subject to + * backward-incompatible changes. + *
+ * + * @type {module:language} + * + * @return {module:language} + * + * @example + * var gcloud = require('google-cloud'); + * var language = gcloud.language({ + * projectId: 'grape-spaceship-123', + * keyFilename: '/path/to/keyfile.json' + * }); + */ + language: require('@google-cloud/language'), + /** * [Google Cloud Logging](https://cloud.google.com/logging/docs) collects and * stores logs from applications and services on the Google Cloud Platform: diff --git a/packages/google-cloud/test/index.js b/packages/google-cloud/test/index.js index 244e54f77ed..c9f609f8c3d 100644 --- a/packages/google-cloud/test/index.js +++ b/packages/google-cloud/test/index.js @@ -37,6 +37,7 @@ var FakeBigtable = createFakeApi(); var FakeCompute = createFakeApi(); var FakeDatastore = createFakeApi(); var FakeDNS = createFakeApi(); +var FakeLanguage = createFakeApi(); var FakeLogging = createFakeApi(); var FakePrediction = createFakeApi(); var FakePubSub = createFakeApi(); @@ -55,6 +56,7 @@ describe('gcloud', function() { '@google-cloud/compute': FakeCompute, '@google-cloud/datastore': FakeDatastore, '@google-cloud/dns': FakeDNS, + '@google-cloud/language': FakeLanguage, '@google-cloud/logging': FakeLogging, '@google-cloud/prediction': FakePrediction, '@google-cloud/pubsub': FakePubSub, @@ -89,6 +91,10 @@ describe('gcloud', function() { assert.strictEqual(gcloud.dns, FakeDNS); }); + it('should export static language', function() { + assert.strictEqual(gcloud.language, FakeLanguage); + }); + it('should export static logging', function() { assert.strictEqual(gcloud.logging, FakeLogging); }); @@ -193,6 +199,15 @@ describe('gcloud', function() { }); }); + describe('language', function() { + it('should create a new Language', function() { + var language = localGcloud.language(options); + + assert(language instanceof FakeLanguage); + assert.strictEqual(language.calledWith_[0], options); + }); + }); + describe('logging', function() { it('should create a new Logging', function() { var logging = localGcloud.logging(options); diff --git a/packages/language/package.json b/packages/language/package.json new file mode 100644 index 00000000000..4bc72b4e34d --- /dev/null +++ b/packages/language/package.json @@ -0,0 +1,77 @@ +{ + "name": "@google-cloud/language", + "version": "0.1.0", + "author": "Google Inc.", + "description": "Google Cloud Natural Language Client Library for Node.js", + "contributors": [ + { + "name": "Burcu Dogan", + "email": "jbd@google.com" + }, + { + "name": "Johan Euphrosine", + "email": "proppy@google.com" + }, + { + "name": "Patrick Costello", + "email": "pcostell@google.com" + }, + { + "name": "Ryan Seys", + "email": "ryan@ryanseys.com" + }, + { + "name": "Silvano Luciani", + "email": "silvano@google.com" + }, + { + "name": "Stephen Sawchuk", + "email": "sawchuk@gmail.com" + } + ], + "main": "./src/index.js", + "files": [ + "./src/*", + "AUTHORS", + "CONTRIBUTORS", + "COPYING" + ], + "repository": "googlecloudplatform/gcloud-node", + "keywords": [ + "google apis client", + "google api client", + "google apis", + "google api", + "google", + "google cloud platform", + "google cloud", + "cloud", + "google cloud natural language", + "google cloud language", + "natural language", + "language" + ], + "dependencies": { + "@google-cloud/common": "^0.1.0", + "@google-cloud/storage": "^0.1.0", + "arrify": "^1.0.1", + "extend": "^3.0.0", + "google-proto-files": "^0.4.0", + "is": "^3.0.1", + "propprop": "^0.3.1", + "string-format-obj": "^1.0.0" + }, + "devDependencies": { + "mocha": "^2.1.0", + "proxyquire": "^1.7.10" + }, + "scripts": { + "publish": "../../scripts/publish.sh", + "test": "mocha test/*.js", + "system-test": "mocha system-test/*.js --no-timeouts --bail" + }, + "license": "Apache-2.0", + "engines": { + "node": ">=0.12.0" + } +} diff --git a/packages/language/src/document.js b/packages/language/src/document.js new file mode 100644 index 00000000000..278941feb50 --- /dev/null +++ b/packages/language/src/document.js @@ -0,0 +1,758 @@ +/*! + * Copyright 2016 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * @module language/document + */ + +'use strict'; + +var arrify = require('arrify'); +var extend = require('extend'); +var File = require('@google-cloud/storage').File; +var format = require('string-format-obj'); +var is = require('is'); +var prop = require('propprop'); + +/*! Developer Documentation + * + * @param {module:language} language - The parent Language object. + * @param {object=} config - Configuration object. + */ +/* + * Create a Natural Language Document object. From this object, you will be able + * to run multiple detections. + * + * @constructor + * @alias module:language/document + * + * @example + * var gcloud = require('google-cloud'); + * + * var language = gcloud.language({ + * projectId: 'grape-spaceship-123' + * }); + * + * var textToAnalyze = [ + * 'Google is an American multinational technology company specializing in', + * 'Internet-related services and products.' + * ].join(' '); + * + * var document = language.document(textToAnalyze); + */ +function Document(language, config) { + var content = config.content || config; + + // `reqOpts` is the payload passed to each `request()`. This object is used as + // the default for all API requests made with this Document. + this.reqOpts = { + document: {} + }; + + if (config.encoding) { + var encodingType = config.encoding.toUpperCase().replace(/[ -]/g, ''); + this.reqOpts.encodingType = encodingType; + } + + if (config.language) { + this.reqOpts.document.language = config.language; + } + + if (config.type) { + this.reqOpts.document.type = config.type.toUpperCase(); + + if (this.reqOpts.document.type === 'TEXT') { + this.reqOpts.document.type = 'PLAIN_TEXT'; + } + } else { + // Default to plain text. + this.reqOpts.document.type = 'PLAIN_TEXT'; + } + + if (content instanceof File) { + this.reqOpts.document.gcsContentUri = format('gs://{bucket}/{file}', { + bucket: encodeURIComponent(content.bucket.id), + file: encodeURIComponent(content.id) + }); + } else { + this.reqOpts.document.content = content; + } + + this.request = language.request.bind(language); +} + +/** + * The parts of speech that will be recognized by the Natural Language API. + * + * @private + * @type {object} + */ +Document.PART_OF_SPEECH = { + UNKNOWN: 'Unknown', + ADJ: 'Adjective', + ADP: 'Adposition (preposition and postposition)', + ADV: 'Adverb', + CONJ: 'Conjunction', + DET: 'Determiner', + NOUN: 'Noun (common and proper)', + NUM: 'Cardinal number', + PRON: 'Pronoun', + PRT: 'Particle or other function word', + PUNCT: 'Punctuation', + VERB: 'Verb (all tenses and modes)', + X: 'Other: foreign words, typos, abbreviations', + AFFIX: 'Affix' +}; + +/** + * Run an annotation of the text from the document. + * + * By default, all annotation types are requested: + * + * - The sentiment of the document (positive or negative) + * - The entities of the document (people, places, things, etc.) + * - The syntax of the document (adjectives, nouns, verbs, etc.) + * + * See the examples below for how to request a subset of those types. + * + * If you only need one type of annotation, you may appreciate one of these + * other methods from our API: + * + * - {module:language#detectEntities} + * - {module:language#detectSentiment} + * + * @resource [documents.annotateText API Documentation]{@link https://cloud.google.com/natural-language/reference/rest/v1beta1/documents/annotateText} + * + * @param {object=} options - Configuration object. + * @param {boolean} options.entities - Detect the entities from this document. + * By default, all features (`entities`, `sentiment`, and `syntax`) are + * enabled. By overriding any of these values, all defaults are switched to + * `false`. + * @param {number} options.sentiment - Detect the sentiment from this document. + * By default, all features (`entities`, `sentiment`, and `syntax`) are + * enabled. By overriding any of these values, all defaults are switched to + * `false`. + * @param {boolean} options.syntax - Detect the syntax from this document. By + * default, all features (`entities`, `sentiment`, and `syntax`) are + * enabled. By overriding any of these values, all defaults are switched to + * `false`. + * @param {boolean} options.verbose - Enable verbose mode for more detailed + * results. Default: `false` + * @param {function} callback - The callback function. + * @param {?error} callback.err - An error occurred while making this request. + * @param {object} callback.annotation - The formatted API response. + * @param {string} callback.annotation.language - The language detected from the + * text. + * @param {number} callback.annotation.sentiment - A value in the range of + * `-100` to `100`. Large numbers represent more positive sentiments. + * @param {object} callback.annotation.entities - The recognized entities from + * the text, grouped by the type of entity. + * @param {string[]} callback.annotation.entities.art - Art entities detected + * from the text. This is only present if detections of this type were + * found. + * @param {string[]} callback.annotation.entities.events - Event entities + * detected from the text. This is only present if detections of this type + * were found. + * @param {string[]} callback.annotation.entities.goods - Consumer good entities + * detected from the text. This is only present if detections of this type + * were found. + * @param {string[]} callback.annotation.entities.organizations - Organization + * entities detected from the text. This is only present if detections of + * this type were found. + * @param {string[]} callback.annotation.entities.other - Other entities + * detected from the text. This is only present if detections of this type + * were found. + * @param {string[]} callback.annotation.entities.people - People entities + * detected from the text. This is only present if detections of this type + * were found. + * @param {string[]} callback.annotation.entities.places - Place entities + * detected from the text. This is only present if detections of this type + * were found. + * @param {string[]} callback.annotation.entities.unknown - Unknown entities + * detected from the text. This is only present if detections of this type + * were found. + * @param {string[]} callback.annotation.sentences - Sentences detected from the + * text. + * @param {object[]} callback.annotation.tokens - Parts of speech that were + * detected from the text. + * @param {object[]} callback.annotation.tokens.text - The piece of text + * analyzed. + * @param {object[]} callback.annotation.tokens.partOfSpeech - A description of + * the part of speech (`Noun (common and propert)`, + * `Verb (all tenses and modes)`, etc.). + * @param {object[]} callback.annotation.tokens.partOfSpeechTag - A short code + * for the type of speech (`NOUN`, `VERB`, etc.). + * @param {object} callback.apiResponse - The full API response. + * + * @example + * document.annotate(function(err, annotation) { + * if (err) { + * // Error handling omitted. + * } + * + * // annotation = { + * // language: 'en', + * // sentiment: 100, + * // entities: { + * // organizations: [ + * // 'Google' + * // ], + * // places: [ + * // 'American' + * // ] + * // }, + * // sentences: [ + * // 'Google is an American multinational technology company ' + + * // 'specializing in Internet-related services and products.' + * // ], + * // tokens: [ + * // { + * // text: 'Google', + * // partOfSpeech: 'Noun (common and proper)', + * // partOfSpeechTag: 'NOUN' + * // }, + * // { + * // text: 'is', + * // partOfSpeech: 'Verb (all tenses and modes)', + * // partOfSpeechTag: 'VERB' + * // }, + * // ... + * // ] + * // } + * }); + * + * //- + * // To request only certain annotation types, provide an options object. + * //- + * var options = { + * entities: true, + * sentiment: true + * }; + * + * document.annotate(options, function(err, annotation) { + * if (err) { + * // Error handling omitted. + * } + * + * // annotation = { + * // language: 'en', + * // sentiment: 100, + * // entities: { + * // organizations: [ + * // 'Google' + * // ], + * // places: [ + * // 'American' + * // ] + * // } + * // } + * }); + * + * //- + * // Verbose mode may also be enabled for more detailed results. + * //- + * var options = { + * verbose: true + * }; + * + * document.annotate(options, function(err, annotation) { + * if (err) { + * // Error handling omitted. + * } + * + * // annotation = { + * // language: 'en', + * // sentiment: { + * // polarity: 100, + * // magnitude: 40 + * // }, + * // entities: { + * // organizations: [ + * // { + * // name: 'Google', + * // type: 'ORGANIZATION', + * // metadata: { + * // wikipedia_url: 'http://en.wikipedia.org/wiki/Google' + * // }, + * // salience: 65.137446, + * // mentions: [ + * // { + * // text: { + * // content: 'Google', + * // beginOffset: -1 + * // } + * // } + * // ] + * // } + * // ], + * // places: [ + * // { + * // name: 'American', + * // type: 'LOCATION', + * // metadata: { + * // wikipedia_url: 'http://en.wikipedia.org/wiki/United_States' + * // }, + * // salience: 13.947370648384094, + * // mentions: [ + * // { + * // text: [ + * // { + * // content: 'American', + * // beginOffset: -1 + * // } + * // ] + * // } + * // ] + * // } + * // ] + * // }, + * // sentences: [ + * // { + * // content: + * // 'Google is an American multinational technology company' + + * // 'specializing in Internet-related services and products.' + * // beginOffset: -1 + * // } + * // ], + * // tokens: [ + * // { + * // text: { + * // content: 'Google', + * // beginOffset: -1 + * // }, + * // partOfSpeech: { + * // tag: 'NOUN' + * // }, + * // dependencyEdge: { + * // headTokenIndex: 1, + * // label: 'NSUBJ' + * // }, + * // lemma: 'Google' + * // }, + * // ... + * // ] + * // } + * }); + */ +Document.prototype.annotate = function(options, callback) { + if (is.fn(options)) { + callback = options; + options = {}; + } + + var features = { + extractDocumentSentiment: true, + extractEntities: true, + extractSyntax: true + }; + + var featuresRequested = { + extractDocumentSentiment: options.sentiment === true, + extractEntities: options.entities === true, + extractSyntax: options.syntax === true + }; + + var numFeaturesRequested = 0; + + for (var featureRequested in featuresRequested) { + if (featuresRequested[featureRequested]) { + numFeaturesRequested++; + } + } + + if (numFeaturesRequested > 0) { + features = featuresRequested; + } + + var verbose = options.verbose === true; + + var grpcOpts = { + service: 'LanguageService', + method: 'annotateText' + }; + + var reqOpts = extend({ + features: features + }, this.reqOpts); + + this.request(grpcOpts, reqOpts, function(err, resp) { + if (err) { + callback(err, null, resp); + return; + } + + var originalResp = extend(true, {}, resp); + + var annotation = { + language: resp.language + }; + + if (resp.documentSentiment) { + var sentiment = resp.documentSentiment; + annotation.sentiment = Document.formatSentiment_(sentiment, verbose); + } + + if (resp.entities) { + annotation.entities = Document.formatEntities_(resp.entities, verbose); + } + + // This prevents empty `sentences` and `tokens` arrays being returned to + // users who never wanted sentences or tokens to begin with. + if (numFeaturesRequested === 0 || featuresRequested.syntax) { + annotation.sentences = Document.formatSentences_(resp.sentences, verbose); + annotation.tokens = Document.formatTokens_(resp.tokens, verbose); + } + + callback(null, annotation, originalResp); + }); +}; + +/** + * Detect entities from the document. + * + * @resource [documents.analyzeEntities API Documentation]{@link https://cloud.google.com/natural-language/reference/rest/v1beta1/documents/analyzeEntities} + * + * @param {object=} options - Configuration object. + * @param {boolean} options.verbose - Enable verbose mode for more detailed + * results. Default: `false` + * @param {function} callback - The callback function. + * @param {?error} callback.err - An error occurred while making this request. + * @param {object} callback.entities - The recognized entities from the text, + * grouped by the type of entity. + * @param {string[]} callback.entities.art - Art entities detected from the + * text. This is only present if detections of this type were found. + * @param {string[]} callback.entities.events - Event entities detected from the + * text. This is only present if detections of this type were found. + * @param {string[]} callback.entities.goods - Consumer good entities detected + * from the text. This is only present if detections of this type were + * found. + * @param {string[]} callback.entities.organizations - Organization entities + * detected from the text. This is only present if detections of this type + * were found. + * @param {string[]} callback.entities.other - Other entities detected from the + * text. This is only present if detections of this type were found. + * @param {string[]} callback.entities.people - People entities detected from + * the text. This is only present if detections of this type were found. + * @param {string[]} callback.entities.places - Place entities detected from the + * text. This is only present if detections of this type were found. + * @param {string[]} callback.entities.unknown - Unknown entities detected from + * the text. This is only present if detections of this type were found. + * @param {object} callback.apiResponse - The full API response. + * + * @example + * document.detectEntities(function(err, entities) { + * if (err) { + * // Error handling omitted. + * } + * + * // entities = { + * // organizations: [ + * // 'Google' + * // ], + * // places: [ + * // 'American' + * // ] + * // } + * }); + * + * //- + * // Verbose mode may also be enabled for more detailed results. + * //- + * var options = { + * verbose: true + * }; + * + * document.detectEntities(options, function(err, entities) { + * if (err) { + * // Error handling omitted. + * } + * + * // entities = { + * // organizations: [ + * // { + * // name: 'Google', + * // type: 'ORGANIZATION', + * // metadata: { + * // wikipedia_url: 'http: * //en.wikipedia.org/wiki/Google' + * // }, + * // salience: 65.137446, + * // mentions: [ + * // { + * // text: { + * // content: 'Google', + * // beginOffset: -1 + * // } + * // } + * // ] + * // } + * // ], + * // places: [ + * // { + * // name: 'American', + * // type: 'LOCATION', + * // metadata: { + * // wikipedia_url: 'http: * //en.wikipedia.org/wiki/United_States' + * // }, + * // salience: 13.947371, + * // mentions: [ + * // { + * // text: { + * // content: 'American', + * // beginOffset: -1 + * // } + * // } + * // ] + * // } + * // ] + * // } + * }); + */ +Document.prototype.detectEntities = function(options, callback) { + if (is.fn(options)) { + callback = options; + options = {}; + } + + var verbose = options.verbose === true; + + var grpcOpts = { + service: 'LanguageService', + method: 'analyzeEntities' + }; + + this.request(grpcOpts, this.reqOpts, function(err, resp) { + if (err) { + callback(err, null, resp); + return; + } + + var originalResp = extend(true, {}, resp); + var groupedEntities = Document.formatEntities_(resp.entities, verbose); + + callback(null, groupedEntities, originalResp); + }); +}; + +/** + * Detect the sentiment of the text in this document. + * + * @resource [documents.analyzeSentiment API Documentation]{@link https://cloud.google.com/natural-language/reference/rest/v1beta1/documents/analyzeSentiment} + * + * @param {object=} options - Configuration object. + * @param {boolean} options.verbose - Enable verbose mode for more detailed + * results. Default: `false` + * @param {function} callback - The callback function. + * @param {?error} callback.err - An error occurred while making this request. + * @param {number} callback.sentiment - A value in the range of `-100` to `100`. + * Large numbers represent more positive sentiments. + * @param {object} callback.apiResponse - The full API response. + * + * @example + * document.detectSentiment(function(err, sentiment) { + * if (err) { + * // Error handling omitted. + * } + * + * // sentiment = 100 + * }); + * + * //- + * // Verbose mode may also be enabled for more detailed results. + * //- + * var options = { + * verbose: true + * }; + * + * document.detectSentiment(options, function(err, sentiment) { + * if (err) { + * // Error handling omitted. + * } + * + * // sentiment = { + * // polarity: 100, + * // magnitude: 40 + * // } + * }); + */ +Document.prototype.detectSentiment = function(options, callback) { + if (is.fn(options)) { + callback = options; + options = {}; + } + + var verbose = options.verbose === true; + + var grpcOpts = { + service: 'LanguageService', + method: 'analyzeSentiment' + }; + + this.request(grpcOpts, this.reqOpts, function(err, resp) { + if (err) { + callback(err, null, resp); + return; + } + + var originalResp = extend(true, {}, resp); + var sentiment = Document.formatSentiment_(resp.documentSentiment, verbose); + + callback(null, sentiment, originalResp); + }); +}; + +/** + * Take a raw response from the API and make it more user-friendly. + * + * @private + * + * @param {object} entities - A group of entities returned from the API. + * @param {boolean} verbose - Enable verbose mode for more detailed results. + * @return {object} - The formatted entity object. + */ +Document.formatEntities_ = function(entities, verbose) { + var GROUP_NAME_TO_TYPE = { + UNKNOWN: 'unknown', + PERSON: 'people', + LOCATION: 'places', + ORGANIZATION: 'organizations', + EVENT: 'events', + WORK_OF_ART: 'art', + CONSUMER_GOOD: 'goods', + OTHER: 'other' + }; + + var groupedEntities = entities.reduce(function(acc, entity) { + entity = extend(true, {}, entity); + + var groupName = GROUP_NAME_TO_TYPE[entity.type]; + + entity.salience *= 100; + + acc[groupName] = arrify(acc[groupName]); + acc[groupName].push(entity); + acc[groupName].sort(Document.sortByProperty_('salience')); + + return acc; + }, {}); + + if (!verbose) { + // Simplify the response to only include an array of `name`s. + for (var groupName in groupedEntities) { + if (groupedEntities.hasOwnProperty(groupName)) { + groupedEntities[groupName] = + groupedEntities[groupName].map(prop('name')); + } + } + } + + return groupedEntities; +}; + +/** + * Take a raw response from the API and make it more user-friendly. + * + * @private + * + * @param {object[]} sentences - A group of sentence detections returned from + * the API. + * @param {boolean} verbose - Enable verbose mode for more detailed results. + * @return {object[]|string[]} - The formatted sentences or sentence descriptor + * objects in verbose mode. + */ +Document.formatSentences_ = function(sentences, verbose) { + sentences = sentences.map(prop('text')); + + if (!verbose) { + sentences = sentences.map(prop('content')); + } + + return sentences; +}; + +/** + * Take a raw response from the API and make it more user-friendly. + * + * @private + * + * @param {object} sentiment - An analysis of the document's sentiment from the + * API. + * @param {boolean} verbose - Enable verbose mode for more detailed results. + * @return {number|object} - The sentiment represented as a number in the range + * of `-100` to `100` or an object containing `polarity` and `magnitude` + * measurements in verbose mode. + */ +Document.formatSentiment_ = function(sentiment, verbose) { + sentiment = { + polarity: sentiment.polarity *= 100, + magnitude: sentiment.magnitude *= 100 + }; + + if (!verbose) { + sentiment = sentiment.polarity; + } + + return sentiment; +}; + +/** + * Take a raw response from the API and make it more user-friendly. + * + * @private + * + * @param {object[]} tokens - A group of syntax detections returned from the + * API. + * @param {boolean} verbose - Enable verbose mode for more detailed results. + * @return {number|object} - A slimmed down, simplified object or the original + * object in verbose mode. + */ +Document.formatTokens_ = function(tokens, verbose) { + if (!verbose) { + tokens = tokens.map(function(token) { + return { + text: token.text.content, + partOfSpeech: Document.PART_OF_SPEECH[token.partOfSpeech.tag], + partOfSpeechTag: token.partOfSpeech.tag + }; + }); + } + + return tokens; +}; + +/** + * Comparator function to sort an array of objects by a property. + * + * @private + * + * @param {string} propertyName - The name of the property to sort by. + * @return {function} - The comparator function. + */ +Document.sortByProperty_ = function(propertyName) { + return function(entityA, entityB) { + if (entityA[propertyName] < entityB[propertyName]) { + return 1; + } + + if (entityA[propertyName] > entityB[propertyName]) { + return -1; + } + + return 0; + }; +}; + +module.exports = Document; + diff --git a/packages/language/src/index.js b/packages/language/src/index.js new file mode 100644 index 00000000000..3aab2b5f3e3 --- /dev/null +++ b/packages/language/src/index.js @@ -0,0 +1,432 @@ +/*! + * Copyright 2016 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * @module language + */ + +'use strict'; + +var extend = require('extend'); +var is = require('is'); +var GrpcService = require('@google-cloud/common').GrpcService; +var googleProtoFiles = require('google-proto-files'); +var nodeutil = require('util'); +var util = require('@google-cloud/common').util; + +/** + * @type {module:language/document} + * @private + */ +var Document = require('./document.js'); + +var PKG = require('../package.json'); + +/** + * The [Google Cloud Natural Language](https://cloud.google.com/natural-language/docs) + * API provides natural language understanding technologies to developers, + * including sentiment analysis, entity recognition, and syntax analysis. This + * API is part of the larger Cloud Machine Learning API. + * + * The Cloud Natural Language API currently supports English for + * [sentiment analysis](https://cloud.google.com/natural-language/docs/reference/rest/v1beta1/documents/analyzeSentiment) + * and English, Spanish, and Japanese for + * [entity analysis](https://cloud.google.com/natural-language/docs/reference/rest/v1beta1/documents/analyzeEntities) + * and + * [syntax analysis](https://cloud.google.com/natural-language/docs/reference/rest/v1beta1/documents/annotateText). + * + * @constructor + * @alias module:language + * + * @classdesc + * The object returned from `gcloud.language` gives you access to the methods + * that will run detections and annotations from your text. + * + * To learn more about Google Cloud Natural Language, see the official + * [Google Cloud Natural Language API Documentation](https://cloud.google.com/natural-language/docs). + * + * @param {object} options - [Configuration object](#/docs). + * + * @example + * var gcloud = require('google-cloud')({ + * keyFilename: '/path/to/keyfile.json', + * projectId: 'grape-spaceship-123' + * }); + * + * var language = gcloud.language(); + */ +function Language(options) { + if (!(this instanceof Language)) { + options = util.normalizeArguments(this, options); + return new Language(options); + } + + var config = { + baseUrl: 'language.googleapis.com', + service: 'language', + apiVersion: 'v1beta1', + protoServices: { + LanguageService: { + path: googleProtoFiles.language.v1beta1, + service: 'cloud.language' + } + }, + scopes: [ + 'https://www.googleapis.com/auth/cloud-platform' + ], + userAgent: PKG.name + '/' + PKG.version + }; + + GrpcService.call(this, config, options); +} + +nodeutil.inherits(Language, GrpcService); + +/** + * Run an annotation of a block of text. + * + * NOTE: This is a convenience method which doesn't require creating a + * {module:language/document} object. If you are only running a single + * detection, this may be more convenient. However, if you plan to run multiple + * detections, it's easier to create a {module:language/document} object. + * + * @resource [documents.annotate API Documentation]{@link https://cloud.google.com/natural-language/reference/rest/v1beta1/documents/annotate} + * + * @param {string|module:storage/file} content - Inline content or a Storage + * File object. + * @param {object=} options - Configuration object. + * @param {string} options.encoding - `UTF8`, `UTF16`, or `UTF32`. See + * [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1beta1/EncodingType). + * @param {string} options.language - The language of the text. + * @param {string} options.type - The type of document, either `html` or `text`. + * @param {boolean} options.verbose - Enable verbose mode for more detailed + * results. Default: `false` + * @param {function} callback - See {module:language/document#annotate}. + * + * @example + * //- + * // See {module:language/document#annotate} for a detailed breakdown of + * // the arguments your callback will be executed with. + * //- + * function callback(err, entities, apiResponse) {} + * + * language.annotate('Hello!', callback); + * + * //- + * // Or, provide a reference to a file hosted on Google Cloud Storage. + * //- + * var gcs = gcloud.storage(); + * var bucket = gcs.bucket('my-bucket'); + * var file = bucket.file('my-file'); + * + * language.annotate(file, callback); + * + * //- + * // Specify HTML content. + * //- + * var options = { + * type: 'html' + * }; + * + * language.annotate('Hello!', options, callback); + * + * //- + * // Verbose mode may also be enabled for more detailed results. + * //- + * var options = { + * verbose: true + * }; + * + * language.annotate('Hello!', options, callback); + */ +Language.prototype.annotate = function(content, options, callback) { + if (is.fn(options)) { + callback = options; + options = {}; + } + + options = extend({}, options, { + content: content + }); + + var document = this.document(options); + document.annotate(options, callback); +}; + +/** + * Detect the entities from a block of text. + * + * NOTE: This is a convenience method which doesn't require creating a + * {module:language/document} object. If you are only running a single + * detection, this may be more convenient. However, if you plan to run multiple + * detections, it's easier to create a {module:language/document} object. + * + * @resource [documents.analyzeEntities API Documentation]{@link https://cloud.google.com/natural-language/reference/rest/v1beta1/documents/analyzeEntities} + * + * @param {string|module:storage/file} content - Inline content or a Storage + * File object. + * @param {object=} options - Configuration object. + * @param {string} options.encoding - `UTF8`, `UTF16`, or `UTF32`. See + * [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1beta1/EncodingType). + * @param {string} options.language - The language of the text. + * @param {string} options.type - The type of document, either `html` or `text`. + * @param {boolean} options.verbose - Enable verbose mode for more detailed + * results. Default: `false` + * @param {function} callback - See {module:language/document#detectEntities}. + * + * @example + * //- + * // See {module:language/document#detectEntities} for a detailed breakdown of + * // the arguments your callback will be executed with. + * //- + * function callback(err, entities, apiResponse) {} + * + * language.detectEntities('Axel Foley is from Detroit', callback); + * + * //- + * // Or, provide a reference to a file hosted on Google Cloud Storage. + * //- + * var gcs = gcloud.storage(); + * var bucket = gcs.bucket('my-bucket'); + * var file = bucket.file('my-file'); + * + * language.detectEntities(file, callback); + * + * //- + * // Specify HTML content. + * //- + * var options = { + * type: 'html' + * }; + * + * language.detectEntities('Axel Foley is from Detroit', options, callback); + * + * //- + * // Verbose mode may also be enabled for more detailed results. + * //- + * var options = { + * verbose: true + * }; + * + * language.detectEntities('Axel Foley is from Detroit', options, callback); + */ +Language.prototype.detectEntities = function(content, options, callback) { + if (is.fn(options)) { + callback = options; + options = {}; + } + + options = extend({}, options, { + content: content + }); + + var document = this.document(options); + document.detectEntities(options, callback); +}; + +/** + * Detect the sentiment of a block of text. + * + * NOTE: This is a convenience method which doesn't require creating a + * {module:language/document} object. If you are only running a single + * detection, this may be more convenient. However, if you plan to run multiple + * detections, it's easier to create a {module:language/document} object. + * + * @resource [documents.analyzeSentiment API Documentation]{@link https://cloud.google.com/natural-language/reference/rest/v1beta1/documents/analyzeSentiment} + * + * @param {string|module:storage/file} content - Inline content or a Storage + * File object. + * @param {object=} options - Configuration object. + * @param {string} options.encoding - `UTF8`, `UTF16`, or `UTF32`. See + * [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1beta1/EncodingType). + * @param {string} options.language - The language of the text. + * @param {string} options.type - The type of document, either `html` or `text`. + * @param {boolean} options.verbose - Enable verbose mode for more detailed + * results. Default: `false` + * @param {function} callback - See {module:language/document#detectSentiment}. + * + * @example + * //- + * // See {module:language/document#detectSentiment} for a detailed breakdown of + * // the arguments your callback will be executed with. + * //- + * function callback(err, sentiment, apiResponse) {} + * + * language.detectSentiment('Hello!', callback); + * + * //- + * // Or, provide a reference to a file hosted on Google Cloud Storage. + * //- + * var gcs = gcloud.storage(); + * var bucket = gcs.bucket('my-bucket'); + * var file = bucket.file('my-file'); + * + * language.detectSentiment(file, callback); + * + * //- + * // Specify HTML content. + * //- + * var options = { + * type: 'html' + * }; + * + * language.detectSentiment('<h1>Document Title</h1>', options, callback); + * + * //- + * // Verbose mode may also be enabled for more detailed results. + * //- + * var options = { + * verbose: true + * }; + * + * language.detectSentiment('Hello!', options, callback); + */ +Language.prototype.detectSentiment = function(content, options, callback) { + if (is.fn(options)) { + callback = options; + options = {}; + } + + options = extend({}, options, { + content: content + }); + + var document = this.document(options); + document.detectSentiment(options, callback); +}; + +/** + * Create a Document object for an unknown type. If you know the type, use the + * appropriate method below: + * + * - {module:language#html} - For HTML documents. + * - {module:language#text} - For text documents. + * + * @param {object|string|module:storage/file} config - Configuration object, the + * inline content of the document, or a Storage File object. + * @param {string|module:storage/file} options.content - If using `config` as an + * object to specify the encoding and/or language of the document, use this + * property to pass the inline content of the document or a Storage File + * object. + * @param {string} options.encoding - `UTF8`, `UTF16`, or `UTF32`. See + * [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1beta1/EncodingType). + * @param {string} options.language - The language of the text. + * @return {module:language/document} + * + * @example + * var document = language.document('Inline content of an unknown type.'); + * + * //- + * // Or, provide a reference to a file hosted on Google Cloud Storage. + * //- + * var gcs = gcloud.storage(); + * var bucket = gcs.bucket('my-bucket'); + * var file = bucket.file('my-file'); + * + * var document = language.document(file); + * + * //- + * // You can now run detections on the document. + * // + * // See {module:language/document} for a complete list of methods available. + * //- + * document.detectEntities(function(err, entities) {}); + */ +Language.prototype.document = function(config) { + return new Document(this, config); +}; + +/** + * Create a Document object from an HTML document. You may provide either inline + * HTML content or a Storage File object (see {module:storage/file}). + * + * @param {string|module:storage/file} content - Inline HTML content or a + * Storage File object. + * @param {object=} options - Configuration object. + * @param {string} options.encoding - `UTF8`, `UTF16`, or `UTF32`. See + * [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1beta1/EncodingType). + * @param {string} options.language - The language of the text. + * @return {module:language/document} + * + * @example + * var document = language.html('<h1>Document Title</h1>'); + * + * //- + * // Or, provide a reference to a file hosted on Google Cloud Storage. + * //- + * var gcs = gcloud.storage(); + * var bucket = gcs.bucket('my-bucket'); + * var file = bucket.file('my-file.html'); + * + * var document = language.html(file); + * + * //- + * // You can now run detections on the document. + * // + * // See {module:language/document} for a complete list of methods available. + * //- + * document.detectEntities(function(err, entities) {}); + */ +Language.prototype.html = function(content, options) { + options = extend({}, options, { + type: 'HTML', + content: content + }); + + return this.document(options); +}; + +/** + * Create a Document object from a text-based document. You may provide either + * inline text content or a Storage File object (see {module:storage/file}). + * + * @param {string|module:storage/file} content - Inline text content or a + * Storage File object. + * @param {object=} options - Configuration object. + * @param {string} options.encoding - `UTF8`, `UTF16`, or `UTF32`. See + * [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1beta1/EncodingType). + * @param {string} options.language - The language of the text. + * @return {module:language/document} + * + * @example + * var document = language.text('This is using inline text content.'); + * + * //- + * // Or, provide a reference to a file hosted on Google Cloud Storage. + * //- + * var gcs = gcloud.storage(); + * var bucket = gcs.bucket('my-bucket'); + * var file = bucket.file('my-file.txt'); + * + * var document = language.text(file); + * + * //- + * // You can now run detections on the document. + * // + * // See {module:language/document} for a complete list of methods available. + * //- + * document.detectEntities(function(err, entities) {}); + */ +Language.prototype.text = function(content, options) { + options = extend({}, options, { + type: 'PLAIN_TEXT', + content: content + }); + + return this.document(options); +}; + +module.exports = Language; diff --git a/packages/language/system-test/language.js b/packages/language/system-test/language.js new file mode 100644 index 00000000000..969763ae63c --- /dev/null +++ b/packages/language/system-test/language.js @@ -0,0 +1,463 @@ +/*! + * Copyright 2016 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +var assert = require('assert'); +var is = require('is'); +var Storage = require('@google-cloud/storage'); +var through = require('through2'); +var uuid = require('node-uuid'); + +var env = require('../../../system-test/env.js'); +var Language = require('../'); + +describe('Language', function() { + var language; + + var TESTS_PREFIX = 'gcloud-tests-language-'; + + var GCS; + var BUCKET; + + var TEXT_CONTENT_SENTENCES = [ + 'Hello from stephen and dave!', + 'If you find yourself in michigan, come say hi!' + ]; + + var HTML_CONTENT = [ + '', + '