diff --git a/packages/google-cloud-language/package.json b/packages/google-cloud-language/package.json new file mode 100644 index 00000000000..4bc72b4e34d --- /dev/null +++ b/packages/google-cloud-language/package.json @@ -0,0 +1,77 @@ +{ + "name": "@google-cloud/language", + "version": "0.1.0", + "author": "Google Inc.", + "description": "Google Cloud Natural Language Client Library for Node.js", + "contributors": [ + { + "name": "Burcu Dogan", + "email": "jbd@google.com" + }, + { + "name": "Johan Euphrosine", + "email": "proppy@google.com" + }, + { + "name": "Patrick Costello", + "email": "pcostell@google.com" + }, + { + "name": "Ryan Seys", + "email": "ryan@ryanseys.com" + }, + { + "name": "Silvano Luciani", + "email": "silvano@google.com" + }, + { + "name": "Stephen Sawchuk", + "email": "sawchuk@gmail.com" + } + ], + "main": "./src/index.js", + "files": [ + "./src/*", + "AUTHORS", + "CONTRIBUTORS", + "COPYING" + ], + "repository": "googlecloudplatform/gcloud-node", + "keywords": [ + "google apis client", + "google api client", + "google apis", + "google api", + "google", + "google cloud platform", + "google cloud", + "cloud", + "google cloud natural language", + "google cloud language", + "natural language", + "language" + ], + "dependencies": { + "@google-cloud/common": "^0.1.0", + "@google-cloud/storage": "^0.1.0", + "arrify": "^1.0.1", + "extend": "^3.0.0", + "google-proto-files": "^0.4.0", + "is": "^3.0.1", + "propprop": "^0.3.1", + "string-format-obj": "^1.0.0" + }, + "devDependencies": { + "mocha": "^2.1.0", + "proxyquire": "^1.7.10" + }, + "scripts": { + "publish": "../../scripts/publish.sh", + "test": "mocha test/*.js", + "system-test": "mocha system-test/*.js --no-timeouts --bail" + }, + "license": "Apache-2.0", + "engines": { + "node": ">=0.12.0" + } +} diff --git a/packages/google-cloud-language/src/document.js b/packages/google-cloud-language/src/document.js new file mode 100644 index 00000000000..278941feb50 --- /dev/null +++ b/packages/google-cloud-language/src/document.js @@ -0,0 +1,758 @@ +/*! + * Copyright 2016 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * @module language/document + */ + +'use strict'; + +var arrify = require('arrify'); +var extend = require('extend'); +var File = require('@google-cloud/storage').File; +var format = require('string-format-obj'); +var is = require('is'); +var prop = require('propprop'); + +/*! Developer Documentation + * + * @param {module:language} language - The parent Language object. + * @param {object=} config - Configuration object. + */ +/* + * Create a Natural Language Document object. From this object, you will be able + * to run multiple detections. + * + * @constructor + * @alias module:language/document + * + * @example + * var gcloud = require('google-cloud'); + * + * var language = gcloud.language({ + * projectId: 'grape-spaceship-123' + * }); + * + * var textToAnalyze = [ + * 'Google is an American multinational technology company specializing in', + * 'Internet-related services and products.' + * ].join(' '); + * + * var document = language.document(textToAnalyze); + */ +function Document(language, config) { + var content = config.content || config; + + // `reqOpts` is the payload passed to each `request()`. This object is used as + // the default for all API requests made with this Document. + this.reqOpts = { + document: {} + }; + + if (config.encoding) { + var encodingType = config.encoding.toUpperCase().replace(/[ -]/g, ''); + this.reqOpts.encodingType = encodingType; + } + + if (config.language) { + this.reqOpts.document.language = config.language; + } + + if (config.type) { + this.reqOpts.document.type = config.type.toUpperCase(); + + if (this.reqOpts.document.type === 'TEXT') { + this.reqOpts.document.type = 'PLAIN_TEXT'; + } + } else { + // Default to plain text. + this.reqOpts.document.type = 'PLAIN_TEXT'; + } + + if (content instanceof File) { + this.reqOpts.document.gcsContentUri = format('gs://{bucket}/{file}', { + bucket: encodeURIComponent(content.bucket.id), + file: encodeURIComponent(content.id) + }); + } else { + this.reqOpts.document.content = content; + } + + this.request = language.request.bind(language); +} + +/** + * The parts of speech that will be recognized by the Natural Language API. + * + * @private + * @type {object} + */ +Document.PART_OF_SPEECH = { + UNKNOWN: 'Unknown', + ADJ: 'Adjective', + ADP: 'Adposition (preposition and postposition)', + ADV: 'Adverb', + CONJ: 'Conjunction', + DET: 'Determiner', + NOUN: 'Noun (common and proper)', + NUM: 'Cardinal number', + PRON: 'Pronoun', + PRT: 'Particle or other function word', + PUNCT: 'Punctuation', + VERB: 'Verb (all tenses and modes)', + X: 'Other: foreign words, typos, abbreviations', + AFFIX: 'Affix' +}; + +/** + * Run an annotation of the text from the document. + * + * By default, all annotation types are requested: + * + * - The sentiment of the document (positive or negative) + * - The entities of the document (people, places, things, etc.) + * - The syntax of the document (adjectives, nouns, verbs, etc.) + * + * See the examples below for how to request a subset of those types. + * + * If you only need one type of annotation, you may appreciate one of these + * other methods from our API: + * + * - {module:language#detectEntities} + * - {module:language#detectSentiment} + * + * @resource [documents.annotateText API Documentation]{@link https://cloud.google.com/natural-language/reference/rest/v1beta1/documents/annotateText} + * + * @param {object=} options - Configuration object. + * @param {boolean} options.entities - Detect the entities from this document. + * By default, all features (`entities`, `sentiment`, and `syntax`) are + * enabled. By overriding any of these values, all defaults are switched to + * `false`. + * @param {number} options.sentiment - Detect the sentiment from this document. + * By default, all features (`entities`, `sentiment`, and `syntax`) are + * enabled. By overriding any of these values, all defaults are switched to + * `false`. + * @param {boolean} options.syntax - Detect the syntax from this document. By + * default, all features (`entities`, `sentiment`, and `syntax`) are + * enabled. By overriding any of these values, all defaults are switched to + * `false`. + * @param {boolean} options.verbose - Enable verbose mode for more detailed + * results. Default: `false` + * @param {function} callback - The callback function. + * @param {?error} callback.err - An error occurred while making this request. + * @param {object} callback.annotation - The formatted API response. + * @param {string} callback.annotation.language - The language detected from the + * text. + * @param {number} callback.annotation.sentiment - A value in the range of + * `-100` to `100`. Large numbers represent more positive sentiments. + * @param {object} callback.annotation.entities - The recognized entities from + * the text, grouped by the type of entity. + * @param {string[]} callback.annotation.entities.art - Art entities detected + * from the text. This is only present if detections of this type were + * found. + * @param {string[]} callback.annotation.entities.events - Event entities + * detected from the text. This is only present if detections of this type + * were found. + * @param {string[]} callback.annotation.entities.goods - Consumer good entities + * detected from the text. This is only present if detections of this type + * were found. + * @param {string[]} callback.annotation.entities.organizations - Organization + * entities detected from the text. This is only present if detections of + * this type were found. + * @param {string[]} callback.annotation.entities.other - Other entities + * detected from the text. This is only present if detections of this type + * were found. + * @param {string[]} callback.annotation.entities.people - People entities + * detected from the text. This is only present if detections of this type + * were found. + * @param {string[]} callback.annotation.entities.places - Place entities + * detected from the text. This is only present if detections of this type + * were found. + * @param {string[]} callback.annotation.entities.unknown - Unknown entities + * detected from the text. This is only present if detections of this type + * were found. + * @param {string[]} callback.annotation.sentences - Sentences detected from the + * text. + * @param {object[]} callback.annotation.tokens - Parts of speech that were + * detected from the text. + * @param {object[]} callback.annotation.tokens.text - The piece of text + * analyzed. + * @param {object[]} callback.annotation.tokens.partOfSpeech - A description of + * the part of speech (`Noun (common and propert)`, + * `Verb (all tenses and modes)`, etc.). + * @param {object[]} callback.annotation.tokens.partOfSpeechTag - A short code + * for the type of speech (`NOUN`, `VERB`, etc.). + * @param {object} callback.apiResponse - The full API response. + * + * @example + * document.annotate(function(err, annotation) { + * if (err) { + * // Error handling omitted. + * } + * + * // annotation = { + * // language: 'en', + * // sentiment: 100, + * // entities: { + * // organizations: [ + * // 'Google' + * // ], + * // places: [ + * // 'American' + * // ] + * // }, + * // sentences: [ + * // 'Google is an American multinational technology company ' + + * // 'specializing in Internet-related services and products.' + * // ], + * // tokens: [ + * // { + * // text: 'Google', + * // partOfSpeech: 'Noun (common and proper)', + * // partOfSpeechTag: 'NOUN' + * // }, + * // { + * // text: 'is', + * // partOfSpeech: 'Verb (all tenses and modes)', + * // partOfSpeechTag: 'VERB' + * // }, + * // ... + * // ] + * // } + * }); + * + * //- + * // To request only certain annotation types, provide an options object. + * //- + * var options = { + * entities: true, + * sentiment: true + * }; + * + * document.annotate(options, function(err, annotation) { + * if (err) { + * // Error handling omitted. + * } + * + * // annotation = { + * // language: 'en', + * // sentiment: 100, + * // entities: { + * // organizations: [ + * // 'Google' + * // ], + * // places: [ + * // 'American' + * // ] + * // } + * // } + * }); + * + * //- + * // Verbose mode may also be enabled for more detailed results. + * //- + * var options = { + * verbose: true + * }; + * + * document.annotate(options, function(err, annotation) { + * if (err) { + * // Error handling omitted. + * } + * + * // annotation = { + * // language: 'en', + * // sentiment: { + * // polarity: 100, + * // magnitude: 40 + * // }, + * // entities: { + * // organizations: [ + * // { + * // name: 'Google', + * // type: 'ORGANIZATION', + * // metadata: { + * // wikipedia_url: 'http://en.wikipedia.org/wiki/Google' + * // }, + * // salience: 65.137446, + * // mentions: [ + * // { + * // text: { + * // content: 'Google', + * // beginOffset: -1 + * // } + * // } + * // ] + * // } + * // ], + * // places: [ + * // { + * // name: 'American', + * // type: 'LOCATION', + * // metadata: { + * // wikipedia_url: 'http://en.wikipedia.org/wiki/United_States' + * // }, + * // salience: 13.947370648384094, + * // mentions: [ + * // { + * // text: [ + * // { + * // content: 'American', + * // beginOffset: -1 + * // } + * // ] + * // } + * // ] + * // } + * // ] + * // }, + * // sentences: [ + * // { + * // content: + * // 'Google is an American multinational technology company' + + * // 'specializing in Internet-related services and products.' + * // beginOffset: -1 + * // } + * // ], + * // tokens: [ + * // { + * // text: { + * // content: 'Google', + * // beginOffset: -1 + * // }, + * // partOfSpeech: { + * // tag: 'NOUN' + * // }, + * // dependencyEdge: { + * // headTokenIndex: 1, + * // label: 'NSUBJ' + * // }, + * // lemma: 'Google' + * // }, + * // ... + * // ] + * // } + * }); + */ +Document.prototype.annotate = function(options, callback) { + if (is.fn(options)) { + callback = options; + options = {}; + } + + var features = { + extractDocumentSentiment: true, + extractEntities: true, + extractSyntax: true + }; + + var featuresRequested = { + extractDocumentSentiment: options.sentiment === true, + extractEntities: options.entities === true, + extractSyntax: options.syntax === true + }; + + var numFeaturesRequested = 0; + + for (var featureRequested in featuresRequested) { + if (featuresRequested[featureRequested]) { + numFeaturesRequested++; + } + } + + if (numFeaturesRequested > 0) { + features = featuresRequested; + } + + var verbose = options.verbose === true; + + var grpcOpts = { + service: 'LanguageService', + method: 'annotateText' + }; + + var reqOpts = extend({ + features: features + }, this.reqOpts); + + this.request(grpcOpts, reqOpts, function(err, resp) { + if (err) { + callback(err, null, resp); + return; + } + + var originalResp = extend(true, {}, resp); + + var annotation = { + language: resp.language + }; + + if (resp.documentSentiment) { + var sentiment = resp.documentSentiment; + annotation.sentiment = Document.formatSentiment_(sentiment, verbose); + } + + if (resp.entities) { + annotation.entities = Document.formatEntities_(resp.entities, verbose); + } + + // This prevents empty `sentences` and `tokens` arrays being returned to + // users who never wanted sentences or tokens to begin with. + if (numFeaturesRequested === 0 || featuresRequested.syntax) { + annotation.sentences = Document.formatSentences_(resp.sentences, verbose); + annotation.tokens = Document.formatTokens_(resp.tokens, verbose); + } + + callback(null, annotation, originalResp); + }); +}; + +/** + * Detect entities from the document. + * + * @resource [documents.analyzeEntities API Documentation]{@link https://cloud.google.com/natural-language/reference/rest/v1beta1/documents/analyzeEntities} + * + * @param {object=} options - Configuration object. + * @param {boolean} options.verbose - Enable verbose mode for more detailed + * results. Default: `false` + * @param {function} callback - The callback function. + * @param {?error} callback.err - An error occurred while making this request. + * @param {object} callback.entities - The recognized entities from the text, + * grouped by the type of entity. + * @param {string[]} callback.entities.art - Art entities detected from the + * text. This is only present if detections of this type were found. + * @param {string[]} callback.entities.events - Event entities detected from the + * text. This is only present if detections of this type were found. + * @param {string[]} callback.entities.goods - Consumer good entities detected + * from the text. This is only present if detections of this type were + * found. + * @param {string[]} callback.entities.organizations - Organization entities + * detected from the text. This is only present if detections of this type + * were found. + * @param {string[]} callback.entities.other - Other entities detected from the + * text. This is only present if detections of this type were found. + * @param {string[]} callback.entities.people - People entities detected from + * the text. This is only present if detections of this type were found. + * @param {string[]} callback.entities.places - Place entities detected from the + * text. This is only present if detections of this type were found. + * @param {string[]} callback.entities.unknown - Unknown entities detected from + * the text. This is only present if detections of this type were found. + * @param {object} callback.apiResponse - The full API response. + * + * @example + * document.detectEntities(function(err, entities) { + * if (err) { + * // Error handling omitted. + * } + * + * // entities = { + * // organizations: [ + * // 'Google' + * // ], + * // places: [ + * // 'American' + * // ] + * // } + * }); + * + * //- + * // Verbose mode may also be enabled for more detailed results. + * //- + * var options = { + * verbose: true + * }; + * + * document.detectEntities(options, function(err, entities) { + * if (err) { + * // Error handling omitted. + * } + * + * // entities = { + * // organizations: [ + * // { + * // name: 'Google', + * // type: 'ORGANIZATION', + * // metadata: { + * // wikipedia_url: 'http: * //en.wikipedia.org/wiki/Google' + * // }, + * // salience: 65.137446, + * // mentions: [ + * // { + * // text: { + * // content: 'Google', + * // beginOffset: -1 + * // } + * // } + * // ] + * // } + * // ], + * // places: [ + * // { + * // name: 'American', + * // type: 'LOCATION', + * // metadata: { + * // wikipedia_url: 'http: * //en.wikipedia.org/wiki/United_States' + * // }, + * // salience: 13.947371, + * // mentions: [ + * // { + * // text: { + * // content: 'American', + * // beginOffset: -1 + * // } + * // } + * // ] + * // } + * // ] + * // } + * }); + */ +Document.prototype.detectEntities = function(options, callback) { + if (is.fn(options)) { + callback = options; + options = {}; + } + + var verbose = options.verbose === true; + + var grpcOpts = { + service: 'LanguageService', + method: 'analyzeEntities' + }; + + this.request(grpcOpts, this.reqOpts, function(err, resp) { + if (err) { + callback(err, null, resp); + return; + } + + var originalResp = extend(true, {}, resp); + var groupedEntities = Document.formatEntities_(resp.entities, verbose); + + callback(null, groupedEntities, originalResp); + }); +}; + +/** + * Detect the sentiment of the text in this document. + * + * @resource [documents.analyzeSentiment API Documentation]{@link https://cloud.google.com/natural-language/reference/rest/v1beta1/documents/analyzeSentiment} + * + * @param {object=} options - Configuration object. + * @param {boolean} options.verbose - Enable verbose mode for more detailed + * results. Default: `false` + * @param {function} callback - The callback function. + * @param {?error} callback.err - An error occurred while making this request. + * @param {number} callback.sentiment - A value in the range of `-100` to `100`. + * Large numbers represent more positive sentiments. + * @param {object} callback.apiResponse - The full API response. + * + * @example + * document.detectSentiment(function(err, sentiment) { + * if (err) { + * // Error handling omitted. + * } + * + * // sentiment = 100 + * }); + * + * //- + * // Verbose mode may also be enabled for more detailed results. + * //- + * var options = { + * verbose: true + * }; + * + * document.detectSentiment(options, function(err, sentiment) { + * if (err) { + * // Error handling omitted. + * } + * + * // sentiment = { + * // polarity: 100, + * // magnitude: 40 + * // } + * }); + */ +Document.prototype.detectSentiment = function(options, callback) { + if (is.fn(options)) { + callback = options; + options = {}; + } + + var verbose = options.verbose === true; + + var grpcOpts = { + service: 'LanguageService', + method: 'analyzeSentiment' + }; + + this.request(grpcOpts, this.reqOpts, function(err, resp) { + if (err) { + callback(err, null, resp); + return; + } + + var originalResp = extend(true, {}, resp); + var sentiment = Document.formatSentiment_(resp.documentSentiment, verbose); + + callback(null, sentiment, originalResp); + }); +}; + +/** + * Take a raw response from the API and make it more user-friendly. + * + * @private + * + * @param {object} entities - A group of entities returned from the API. + * @param {boolean} verbose - Enable verbose mode for more detailed results. + * @return {object} - The formatted entity object. + */ +Document.formatEntities_ = function(entities, verbose) { + var GROUP_NAME_TO_TYPE = { + UNKNOWN: 'unknown', + PERSON: 'people', + LOCATION: 'places', + ORGANIZATION: 'organizations', + EVENT: 'events', + WORK_OF_ART: 'art', + CONSUMER_GOOD: 'goods', + OTHER: 'other' + }; + + var groupedEntities = entities.reduce(function(acc, entity) { + entity = extend(true, {}, entity); + + var groupName = GROUP_NAME_TO_TYPE[entity.type]; + + entity.salience *= 100; + + acc[groupName] = arrify(acc[groupName]); + acc[groupName].push(entity); + acc[groupName].sort(Document.sortByProperty_('salience')); + + return acc; + }, {}); + + if (!verbose) { + // Simplify the response to only include an array of `name`s. + for (var groupName in groupedEntities) { + if (groupedEntities.hasOwnProperty(groupName)) { + groupedEntities[groupName] = + groupedEntities[groupName].map(prop('name')); + } + } + } + + return groupedEntities; +}; + +/** + * Take a raw response from the API and make it more user-friendly. + * + * @private + * + * @param {object[]} sentences - A group of sentence detections returned from + * the API. + * @param {boolean} verbose - Enable verbose mode for more detailed results. + * @return {object[]|string[]} - The formatted sentences or sentence descriptor + * objects in verbose mode. + */ +Document.formatSentences_ = function(sentences, verbose) { + sentences = sentences.map(prop('text')); + + if (!verbose) { + sentences = sentences.map(prop('content')); + } + + return sentences; +}; + +/** + * Take a raw response from the API and make it more user-friendly. + * + * @private + * + * @param {object} sentiment - An analysis of the document's sentiment from the + * API. + * @param {boolean} verbose - Enable verbose mode for more detailed results. + * @return {number|object} - The sentiment represented as a number in the range + * of `-100` to `100` or an object containing `polarity` and `magnitude` + * measurements in verbose mode. + */ +Document.formatSentiment_ = function(sentiment, verbose) { + sentiment = { + polarity: sentiment.polarity *= 100, + magnitude: sentiment.magnitude *= 100 + }; + + if (!verbose) { + sentiment = sentiment.polarity; + } + + return sentiment; +}; + +/** + * Take a raw response from the API and make it more user-friendly. + * + * @private + * + * @param {object[]} tokens - A group of syntax detections returned from the + * API. + * @param {boolean} verbose - Enable verbose mode for more detailed results. + * @return {number|object} - A slimmed down, simplified object or the original + * object in verbose mode. + */ +Document.formatTokens_ = function(tokens, verbose) { + if (!verbose) { + tokens = tokens.map(function(token) { + return { + text: token.text.content, + partOfSpeech: Document.PART_OF_SPEECH[token.partOfSpeech.tag], + partOfSpeechTag: token.partOfSpeech.tag + }; + }); + } + + return tokens; +}; + +/** + * Comparator function to sort an array of objects by a property. + * + * @private + * + * @param {string} propertyName - The name of the property to sort by. + * @return {function} - The comparator function. + */ +Document.sortByProperty_ = function(propertyName) { + return function(entityA, entityB) { + if (entityA[propertyName] < entityB[propertyName]) { + return 1; + } + + if (entityA[propertyName] > entityB[propertyName]) { + return -1; + } + + return 0; + }; +}; + +module.exports = Document; + diff --git a/packages/google-cloud-language/src/index.js b/packages/google-cloud-language/src/index.js new file mode 100644 index 00000000000..3aab2b5f3e3 --- /dev/null +++ b/packages/google-cloud-language/src/index.js @@ -0,0 +1,432 @@ +/*! + * Copyright 2016 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * @module language + */ + +'use strict'; + +var extend = require('extend'); +var is = require('is'); +var GrpcService = require('@google-cloud/common').GrpcService; +var googleProtoFiles = require('google-proto-files'); +var nodeutil = require('util'); +var util = require('@google-cloud/common').util; + +/** + * @type {module:language/document} + * @private + */ +var Document = require('./document.js'); + +var PKG = require('../package.json'); + +/** + * The [Google Cloud Natural Language](https://cloud.google.com/natural-language/docs) + * API provides natural language understanding technologies to developers, + * including sentiment analysis, entity recognition, and syntax analysis. This + * API is part of the larger Cloud Machine Learning API. + * + * The Cloud Natural Language API currently supports English for + * [sentiment analysis](https://cloud.google.com/natural-language/docs/reference/rest/v1beta1/documents/analyzeSentiment) + * and English, Spanish, and Japanese for + * [entity analysis](https://cloud.google.com/natural-language/docs/reference/rest/v1beta1/documents/analyzeEntities) + * and + * [syntax analysis](https://cloud.google.com/natural-language/docs/reference/rest/v1beta1/documents/annotateText). + * + * @constructor + * @alias module:language + * + * @classdesc + * The object returned from `gcloud.language` gives you access to the methods + * that will run detections and annotations from your text. + * + * To learn more about Google Cloud Natural Language, see the official + * [Google Cloud Natural Language API Documentation](https://cloud.google.com/natural-language/docs). + * + * @param {object} options - [Configuration object](#/docs). + * + * @example + * var gcloud = require('google-cloud')({ + * keyFilename: '/path/to/keyfile.json', + * projectId: 'grape-spaceship-123' + * }); + * + * var language = gcloud.language(); + */ +function Language(options) { + if (!(this instanceof Language)) { + options = util.normalizeArguments(this, options); + return new Language(options); + } + + var config = { + baseUrl: 'language.googleapis.com', + service: 'language', + apiVersion: 'v1beta1', + protoServices: { + LanguageService: { + path: googleProtoFiles.language.v1beta1, + service: 'cloud.language' + } + }, + scopes: [ + 'https://www.googleapis.com/auth/cloud-platform' + ], + userAgent: PKG.name + '/' + PKG.version + }; + + GrpcService.call(this, config, options); +} + +nodeutil.inherits(Language, GrpcService); + +/** + * Run an annotation of a block of text. + * + * NOTE: This is a convenience method which doesn't require creating a + * {module:language/document} object. If you are only running a single + * detection, this may be more convenient. However, if you plan to run multiple + * detections, it's easier to create a {module:language/document} object. + * + * @resource [documents.annotate API Documentation]{@link https://cloud.google.com/natural-language/reference/rest/v1beta1/documents/annotate} + * + * @param {string|module:storage/file} content - Inline content or a Storage + * File object. + * @param {object=} options - Configuration object. + * @param {string} options.encoding - `UTF8`, `UTF16`, or `UTF32`. See + * [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1beta1/EncodingType). + * @param {string} options.language - The language of the text. + * @param {string} options.type - The type of document, either `html` or `text`. + * @param {boolean} options.verbose - Enable verbose mode for more detailed + * results. Default: `false` + * @param {function} callback - See {module:language/document#annotate}. + * + * @example + * //- + * // See {module:language/document#annotate} for a detailed breakdown of + * // the arguments your callback will be executed with. + * //- + * function callback(err, entities, apiResponse) {} + * + * language.annotate('Hello!', callback); + * + * //- + * // Or, provide a reference to a file hosted on Google Cloud Storage. + * //- + * var gcs = gcloud.storage(); + * var bucket = gcs.bucket('my-bucket'); + * var file = bucket.file('my-file'); + * + * language.annotate(file, callback); + * + * //- + * // Specify HTML content. + * //- + * var options = { + * type: 'html' + * }; + * + * language.annotate('Hello!', options, callback); + * + * //- + * // Verbose mode may also be enabled for more detailed results. + * //- + * var options = { + * verbose: true + * }; + * + * language.annotate('Hello!', options, callback); + */ +Language.prototype.annotate = function(content, options, callback) { + if (is.fn(options)) { + callback = options; + options = {}; + } + + options = extend({}, options, { + content: content + }); + + var document = this.document(options); + document.annotate(options, callback); +}; + +/** + * Detect the entities from a block of text. + * + * NOTE: This is a convenience method which doesn't require creating a + * {module:language/document} object. If you are only running a single + * detection, this may be more convenient. However, if you plan to run multiple + * detections, it's easier to create a {module:language/document} object. + * + * @resource [documents.analyzeEntities API Documentation]{@link https://cloud.google.com/natural-language/reference/rest/v1beta1/documents/analyzeEntities} + * + * @param {string|module:storage/file} content - Inline content or a Storage + * File object. + * @param {object=} options - Configuration object. + * @param {string} options.encoding - `UTF8`, `UTF16`, or `UTF32`. See + * [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1beta1/EncodingType). + * @param {string} options.language - The language of the text. + * @param {string} options.type - The type of document, either `html` or `text`. + * @param {boolean} options.verbose - Enable verbose mode for more detailed + * results. Default: `false` + * @param {function} callback - See {module:language/document#detectEntities}. + * + * @example + * //- + * // See {module:language/document#detectEntities} for a detailed breakdown of + * // the arguments your callback will be executed with. + * //- + * function callback(err, entities, apiResponse) {} + * + * language.detectEntities('Axel Foley is from Detroit', callback); + * + * //- + * // Or, provide a reference to a file hosted on Google Cloud Storage. + * //- + * var gcs = gcloud.storage(); + * var bucket = gcs.bucket('my-bucket'); + * var file = bucket.file('my-file'); + * + * language.detectEntities(file, callback); + * + * //- + * // Specify HTML content. + * //- + * var options = { + * type: 'html' + * }; + * + * language.detectEntities('Axel Foley is from Detroit', options, callback); + * + * //- + * // Verbose mode may also be enabled for more detailed results. + * //- + * var options = { + * verbose: true + * }; + * + * language.detectEntities('Axel Foley is from Detroit', options, callback); + */ +Language.prototype.detectEntities = function(content, options, callback) { + if (is.fn(options)) { + callback = options; + options = {}; + } + + options = extend({}, options, { + content: content + }); + + var document = this.document(options); + document.detectEntities(options, callback); +}; + +/** + * Detect the sentiment of a block of text. + * + * NOTE: This is a convenience method which doesn't require creating a + * {module:language/document} object. If you are only running a single + * detection, this may be more convenient. However, if you plan to run multiple + * detections, it's easier to create a {module:language/document} object. + * + * @resource [documents.analyzeSentiment API Documentation]{@link https://cloud.google.com/natural-language/reference/rest/v1beta1/documents/analyzeSentiment} + * + * @param {string|module:storage/file} content - Inline content or a Storage + * File object. + * @param {object=} options - Configuration object. + * @param {string} options.encoding - `UTF8`, `UTF16`, or `UTF32`. See + * [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1beta1/EncodingType). + * @param {string} options.language - The language of the text. + * @param {string} options.type - The type of document, either `html` or `text`. + * @param {boolean} options.verbose - Enable verbose mode for more detailed + * results. Default: `false` + * @param {function} callback - See {module:language/document#detectSentiment}. + * + * @example + * //- + * // See {module:language/document#detectSentiment} for a detailed breakdown of + * // the arguments your callback will be executed with. + * //- + * function callback(err, sentiment, apiResponse) {} + * + * language.detectSentiment('Hello!', callback); + * + * //- + * // Or, provide a reference to a file hosted on Google Cloud Storage. + * //- + * var gcs = gcloud.storage(); + * var bucket = gcs.bucket('my-bucket'); + * var file = bucket.file('my-file'); + * + * language.detectSentiment(file, callback); + * + * //- + * // Specify HTML content. + * //- + * var options = { + * type: 'html' + * }; + * + * language.detectSentiment('<h1>Document Title</h1>', options, callback); + * + * //- + * // Verbose mode may also be enabled for more detailed results. + * //- + * var options = { + * verbose: true + * }; + * + * language.detectSentiment('Hello!', options, callback); + */ +Language.prototype.detectSentiment = function(content, options, callback) { + if (is.fn(options)) { + callback = options; + options = {}; + } + + options = extend({}, options, { + content: content + }); + + var document = this.document(options); + document.detectSentiment(options, callback); +}; + +/** + * Create a Document object for an unknown type. If you know the type, use the + * appropriate method below: + * + * - {module:language#html} - For HTML documents. + * - {module:language#text} - For text documents. + * + * @param {object|string|module:storage/file} config - Configuration object, the + * inline content of the document, or a Storage File object. + * @param {string|module:storage/file} options.content - If using `config` as an + * object to specify the encoding and/or language of the document, use this + * property to pass the inline content of the document or a Storage File + * object. + * @param {string} options.encoding - `UTF8`, `UTF16`, or `UTF32`. See + * [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1beta1/EncodingType). + * @param {string} options.language - The language of the text. + * @return {module:language/document} + * + * @example + * var document = language.document('Inline content of an unknown type.'); + * + * //- + * // Or, provide a reference to a file hosted on Google Cloud Storage. + * //- + * var gcs = gcloud.storage(); + * var bucket = gcs.bucket('my-bucket'); + * var file = bucket.file('my-file'); + * + * var document = language.document(file); + * + * //- + * // You can now run detections on the document. + * // + * // See {module:language/document} for a complete list of methods available. + * //- + * document.detectEntities(function(err, entities) {}); + */ +Language.prototype.document = function(config) { + return new Document(this, config); +}; + +/** + * Create a Document object from an HTML document. You may provide either inline + * HTML content or a Storage File object (see {module:storage/file}). + * + * @param {string|module:storage/file} content - Inline HTML content or a + * Storage File object. + * @param {object=} options - Configuration object. + * @param {string} options.encoding - `UTF8`, `UTF16`, or `UTF32`. See + * [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1beta1/EncodingType). + * @param {string} options.language - The language of the text. + * @return {module:language/document} + * + * @example + * var document = language.html('<h1>Document Title</h1>'); + * + * //- + * // Or, provide a reference to a file hosted on Google Cloud Storage. + * //- + * var gcs = gcloud.storage(); + * var bucket = gcs.bucket('my-bucket'); + * var file = bucket.file('my-file.html'); + * + * var document = language.html(file); + * + * //- + * // You can now run detections on the document. + * // + * // See {module:language/document} for a complete list of methods available. + * //- + * document.detectEntities(function(err, entities) {}); + */ +Language.prototype.html = function(content, options) { + options = extend({}, options, { + type: 'HTML', + content: content + }); + + return this.document(options); +}; + +/** + * Create a Document object from a text-based document. You may provide either + * inline text content or a Storage File object (see {module:storage/file}). + * + * @param {string|module:storage/file} content - Inline text content or a + * Storage File object. + * @param {object=} options - Configuration object. + * @param {string} options.encoding - `UTF8`, `UTF16`, or `UTF32`. See + * [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1beta1/EncodingType). + * @param {string} options.language - The language of the text. + * @return {module:language/document} + * + * @example + * var document = language.text('This is using inline text content.'); + * + * //- + * // Or, provide a reference to a file hosted on Google Cloud Storage. + * //- + * var gcs = gcloud.storage(); + * var bucket = gcs.bucket('my-bucket'); + * var file = bucket.file('my-file.txt'); + * + * var document = language.text(file); + * + * //- + * // You can now run detections on the document. + * // + * // See {module:language/document} for a complete list of methods available. + * //- + * document.detectEntities(function(err, entities) {}); + */ +Language.prototype.text = function(content, options) { + options = extend({}, options, { + type: 'PLAIN_TEXT', + content: content + }); + + return this.document(options); +}; + +module.exports = Language; diff --git a/packages/google-cloud-language/system-test/language.js b/packages/google-cloud-language/system-test/language.js new file mode 100644 index 00000000000..969763ae63c --- /dev/null +++ b/packages/google-cloud-language/system-test/language.js @@ -0,0 +1,463 @@ +/*! + * Copyright 2016 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +var assert = require('assert'); +var is = require('is'); +var Storage = require('@google-cloud/storage'); +var through = require('through2'); +var uuid = require('node-uuid'); + +var env = require('../../../system-test/env.js'); +var Language = require('../'); + +describe('Language', function() { + var language; + + var TESTS_PREFIX = 'gcloud-tests-language-'; + + var GCS; + var BUCKET; + + var TEXT_CONTENT_SENTENCES = [ + 'Hello from stephen and dave!', + 'If you find yourself in michigan, come say hi!' + ]; + + var HTML_CONTENT = [ + '', + ' ' + TEXT_CONTENT_SENTENCES[0] + '', + ' ' + TEXT_CONTENT_SENTENCES[1] + '', + '' + ].join('\n'); + + var TEXT_CONTENT = TEXT_CONTENT_SENTENCES.join(' '); + + before(function(done) { + language = new Language(env); + GCS = new Storage(env); + BUCKET = GCS.bucket(generateName('bucket')); + + BUCKET.create(done); + }); + + after(function(done) { + GCS.getBuckets({ prefix: TESTS_PREFIX }) + .on('error', done) + .pipe(through.obj(function(bucket, _, next) { + bucket.deleteFiles({ force: true }, function(err) { + if (err) { + next(err); + return; + } + + bucket.delete(next); + }); + })) + .on('error', done) + .on('finish', done); + }); + + var DESCRIBES = [ + { + name: 'HTML', + + vars: { + content: HTML_CONTENT, + type: 'html' + }, + + describes: [ + { + name: 'inline', + + getDocument: function(callback) { + callback(null, language.html(this.vars.content)); + } + }, + + { + name: 'GCS file', + + getDocument: function(callback) { + createFile(this.vars.content, function(err, file) { + if (err) { + callback(err); + return; + } + + callback(null, language.html(file)); + }); + } + } + ] + }, + + { + name: 'Text', + + vars: { + content: TEXT_CONTENT, + type: 'text' + }, + + describes: [ + { + name: 'inline', + + getDocument: function(callback) { + callback(null, language.text(this.vars.content)); + } + }, + + { + name: 'GCS file', + + getDocument: function(callback) { + createFile(this.vars.content, function(err, file) { + if (err) { + callback(err); + return; + } + + callback(null, language.text(file)); + }); + } + } + ] + }, + + { + name: 'Unknown', + + vars: { + content: TEXT_CONTENT + }, + + describes: [ + { + name: 'inline', + + getDocument: function(callback) { + callback(null, language.document(this.vars.content)); + } + }, + + { + name: 'GCS file', + + getDocument: function(callback) { + createFile(this.vars.content, function(err, file) { + if (err) { + callback(err); + return; + } + + callback(null, language.document(file)); + }); + } + } + ] + } + ]; + + DESCRIBES.forEach(function(describeObj) { + var CONTENT = describeObj.vars.content; + var CONTENT_TYPE = describeObj.vars.type; + + describe(describeObj.name, function() { + var innerDescribes = describeObj.describes; + + innerDescribes.forEach(function(innerDescribeObj) { + var DOC; + + describe(innerDescribeObj.name, function() { + before(function(done) { + var getDocument = innerDescribeObj.getDocument; + + getDocument.call(describeObj, function(err, doc) { + if (err) { + done(err); + return; + } + + DOC = doc; + done(); + }); + }); + + describe('annotation', function() { + it('should work without creating a document', function(done) { + if (!CONTENT_TYPE) { + language.annotate(CONTENT, validateAnnotationSimple(done)); + return; + } + + language.annotate( + CONTENT, + { type: CONTENT_TYPE }, + validateAnnotationSimple(done) + ); + }); + + it('should return the correct simplified response', function(done) { + DOC.annotate(validateAnnotationSimple(done)); + }); + + it('should support verbose mode', function(done) { + DOC.annotate({ verbose: true }, validateAnnotationVerbose(done)); + }); + + it('should return only a single feature', function(done) { + DOC.annotate({ + entities: true + }, validateAnnotationSingleFeatureSimple(done)); + }); + + it('should return a single feat in verbose mode', function(done) { + DOC.annotate({ + entities: true, + verbose: true + }, validateAnnotationSingleFeatureVerbose(done)); + }); + }); + + describe('entities', function() { + it('should work without creating a document', function(done) { + if (!CONTENT_TYPE) { + language.detectEntities(CONTENT, validateEntitiesSimple(done)); + return; + } + + language.detectEntities( + CONTENT, + { type: CONTENT_TYPE }, + validateEntitiesSimple(done) + ); + }); + + it('should return the correct simplified response', function(done) { + DOC.detectEntities(validateEntitiesSimple(done)); + }); + + it('should support verbose mode', function(done) { + DOC.detectEntities({ + verbose: true + }, validateEntitiesVerbose(done)); + }); + }); + + describe('sentiment', function() { + it('should work without creating a document', function(done) { + if (!CONTENT_TYPE) { + language.detectSentiment( + CONTENT, + validateSentimentSimple(done) + ); + return; + } + + language.detectSentiment( + CONTENT, + { type: CONTENT_TYPE }, + validateSentimentSimple(done) + ); + }); + + it('should return the correct simplified response', function(done) { + DOC.detectSentiment(validateSentimentSimple(done)); + }); + + it('should support verbose mode', function(done) { + DOC.detectSentiment({ + verbose: true + }, validateSentimentVerbose(done)); + }); + }); + }); + }); + }); + }); + + function generateName(resourceType) { + var id = uuid.v4().substr(0, 10); + return TESTS_PREFIX + resourceType + '-' + id; + } + + function createFile(content, callback) { + var file = BUCKET.file(generateName('file')); + + file.save(content, function(err) { + if (err) { + callback(err); + return; + } + + callback(null, file); + }); + } + + function validateAnnotationSimple(callback) { + return function(err, annotation, apiResponse) { + assert.ifError(err); + + assert.strictEqual(annotation.language, 'en'); + + assert(is.number(annotation.sentiment)); + + assert.deepEqual(annotation.entities, { + people: ['stephen', 'dave'], + places: ['michigan'] + }); + + assert.deepEqual(annotation.sentences, TEXT_CONTENT_SENTENCES); + + assert(is.array(annotation.tokens)); + assert.deepEqual(annotation.tokens[0], { + text: 'Hello', + partOfSpeech: 'Other: foreign words, typos, abbreviations', + partOfSpeechTag: 'X' + }); + + assert(is.object(apiResponse)); + + callback(); + }; + } + + function validateAnnotationVerbose(callback) { + return function(err, annotation, apiResponse) { + assert.ifError(err); + + assert.strictEqual(annotation.language, 'en'); + + assert(is.object(annotation.sentiment)); + + assert(is.array(annotation.entities.people)); + assert.strictEqual(annotation.entities.people.length, 2); + assert(is.object(annotation.entities.people[0])); + + assert(is.array(annotation.sentences)); + assert(is.object(annotation.sentences[0])); + + assert(is.array(annotation.tokens)); + assert(is.object(annotation.tokens[0])); + assert.strictEqual(annotation.tokens[0].text.content, 'Hello'); + + assert(is.object(apiResponse)); + + callback(); + }; + } + + function validateAnnotationSingleFeatureSimple(callback) { + return function(err, annotation, apiResponse) { + assert.ifError(err); + + assert.strictEqual(annotation.language, 'en'); + + assert.deepEqual(annotation.entities, { + people: ['stephen', 'dave'], + places: ['michigan'] + }); + + assert.strictEqual(annotation.sentences, undefined); + assert.strictEqual(annotation.sentiment, undefined); + assert.strictEqual(annotation.tokens, undefined); + + assert(is.object(apiResponse)); + + callback(); + }; + } + + function validateAnnotationSingleFeatureVerbose(callback) { + return function(err, annotation, apiResponse) { + assert.ifError(err); + + assert.strictEqual(annotation.language, 'en'); + + assert(is.array(annotation.entities.people)); + assert.strictEqual(annotation.entities.people.length, 2); + assert(is.object(annotation.entities.people[0])); + + assert.strictEqual(annotation.sentences, undefined); + assert.strictEqual(annotation.sentiment, undefined); + assert.strictEqual(annotation.tokens, undefined); + + assert(is.object(apiResponse)); + + callback(); + }; + } + + function validateEntitiesSimple(callback) { + return function(err, entities, apiResponse) { + assert.ifError(err); + + assert.deepEqual(entities, { + people: ['stephen', 'dave'], + places: ['michigan'] + }); + + assert(is.object(apiResponse)); + + callback(); + }; + } + + function validateEntitiesVerbose(callback) { + return function(err, entities, apiResponse) { + assert.ifError(err); + + assert(is.array(entities.people)); + assert.strictEqual(entities.people.length, 2); + assert(is.object(entities.people[0])); + + assert(is.object(apiResponse)); + + callback(); + }; + } + + function validateSentimentSimple(callback) { + return function(err, sentiment, apiResponse) { + assert.ifError(err); + + assert(is.number(sentiment)); + + assert(is.object(apiResponse)); + + callback(); + }; + } + + function validateSentimentVerbose(callback) { + return function(err, sentiment, apiResponse) { + assert.ifError(err); + + assert(is.object(sentiment)); + assert(is.number(sentiment.polarity)); + assert(is.number(sentiment.magnitude)); + + assert(is.object(apiResponse)); + + callback(); + }; + } +}); + diff --git a/packages/google-cloud-language/test/document.js b/packages/google-cloud-language/test/document.js new file mode 100644 index 00000000000..2801930c4f8 --- /dev/null +++ b/packages/google-cloud-language/test/document.js @@ -0,0 +1,832 @@ +/** + * Copyright 2016 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +var assert = require('assert'); +var extend = require('extend'); +var prop = require('propprop'); +var proxyquire = require('proxyquire'); +var util = require('@google-cloud/common').util; + +function FakeFile() {} + +describe('Document', function() { + var DocumentCache; + var Document; + var document; + + var LANGUAGE = { + request: util.noop + }; + var CONFIG = 'inline content'; + + before(function() { + Document = proxyquire('../src/document.js', { + '@google-cloud/storage': { + File: FakeFile + } + }); + + DocumentCache = extend(true, {}, Document); + }); + + beforeEach(function() { + for (var property in DocumentCache) { + if (DocumentCache.hasOwnProperty(property)) { + Document[property] = DocumentCache[property]; + } + } + + document = new Document(LANGUAGE, CONFIG); + }); + + describe('instantiation', function() { + it('should set the correct reqOpts for inline content', function() { + assert.deepEqual(document.reqOpts, { + document: { + content: CONFIG, + type: 'PLAIN_TEXT' + } + }); + }); + + it('should set the correct reqOpts for content with encoding', function() { + var document = new Document(LANGUAGE, { + content: CONFIG, + encoding: 'utf-8' + }); + + assert.deepEqual(document.reqOpts, { + document: { + content: CONFIG, + type: 'PLAIN_TEXT' + }, + encodingType: 'UTF8' + }); + }); + + it('should set the correct reqOpts for content with language', function() { + var document = new Document(LANGUAGE, { + content: CONFIG, + language: 'EN' + }); + + assert.deepEqual(document.reqOpts, { + document: { + content: CONFIG, + type: 'PLAIN_TEXT', + language: 'EN' + } + }); + }); + + it('should set the correct reqOpts for content with type', function() { + var document = new Document(LANGUAGE, { + content: CONFIG, + type: 'html' + }); + + assert.deepEqual(document.reqOpts, { + document: { + content: CONFIG, + type: 'HTML' + } + }); + }); + + it('should set the correct reqOpts for text', function() { + var document = new Document(LANGUAGE, { + content: CONFIG, + type: 'text' + }); + + assert.deepEqual(document.reqOpts, { + document: { + content: CONFIG, + type: 'PLAIN_TEXT' + } + }); + }); + + it('should set the GCS content URI from a File', function() { + var file = new FakeFile(); + + // Leave spaces in to check that it is URI-encoded: + file.bucket = { id: 'bucket id' }; + file.id = 'file name'; + + var document = new Document(LANGUAGE, { + content: file + }); + + assert.deepEqual(document.reqOpts, { + document: { + gcsContentUri: [ + 'gs://', + encodeURIComponent(file.bucket.id), + '/', + encodeURIComponent(file.id), + ].join(''), + type: 'PLAIN_TEXT' + } + }); + }); + + it('should create a request function', function(done) { + var LanguageInstance = { + request: function() { + assert.strictEqual(this, LanguageInstance); + done(); + } + }; + + var document = new Document(LanguageInstance, CONFIG); + document.request(); + }); + }); + + describe('PART_OF_SPEECH', function() { + var expectedPartOfSpeech = { + UNKNOWN: 'Unknown', + ADJ: 'Adjective', + ADP: 'Adposition (preposition and postposition)', + ADV: 'Adverb', + CONJ: 'Conjunction', + DET: 'Determiner', + NOUN: 'Noun (common and proper)', + NUM: 'Cardinal number', + PRON: 'Pronoun', + PRT: 'Particle or other function word', + PUNCT: 'Punctuation', + VERB: 'Verb (all tenses and modes)', + X: 'Other: foreign words, typos, abbreviations', + AFFIX: 'Affix' + }; + + it('should define the correct parts of speech', function() { + assert.deepEqual(Document.PART_OF_SPEECH, expectedPartOfSpeech); + }); + }); + + describe('annotate', function() { + it('should make the correct API request', function(done) { + document.request = function(grpcOpts, reqOpts) { + assert.deepEqual(grpcOpts, { + service: 'LanguageService', + method: 'annotateText' + }); + + assert.deepEqual(reqOpts, extend( + { + features: { + extractDocumentSentiment: true, + extractEntities: true, + extractSyntax: true + } + }, + document.reqOpts + )); + + done(); + }; + + document.annotate(assert.ifError); + }); + + it('should allow specifying individual features', function(done) { + document.request = function(grpcOpts, reqOpts) { + assert.deepEqual(reqOpts.features, { + extractDocumentSentiment: false, + extractEntities: true, + extractSyntax: true + }); + + done(); + }; + + document.annotate({ + entities: true, + syntax: true + }, assert.ifError); + }); + + describe('error', function() { + var apiResponse = {}; + var error = new Error('Error.'); + + beforeEach(function() { + document.request = function(grpcOpts, reqOpts, callback) { + callback(error, apiResponse); + }; + }); + + it('should exec callback with error and API response', function(done) { + document.annotate(function(err, annotation, apiResponse_) { + assert.strictEqual(err, error); + assert.strictEqual(annotation, null); + assert.strictEqual(apiResponse_, apiResponse); + done(); + }); + }); + }); + + describe('success', function() { + var apiResponses = { + default: { + language: 'EN', + sentences: [], + tokens: [] + }, + + withSentiment: { + documentSentiment: {} + }, + + withEntities: { + entities: [] + }, + + withSyntax: { + sentences: {}, + tokens: {} + } + }; + + apiResponses.withAll = extend( + {}, + apiResponses.default, + apiResponses.withSentiment, + apiResponses.withEntities, + apiResponses.withSyntax + ); + + function createRequestWithResponse(apiResponse) { + return function(grpcOpts, reqOpts, callback) { + callback(null, apiResponse, apiResponse); + }; + } + + beforeEach(function() { + Document.formatSentiment_ = util.noop; + Document.formatEntities_ = util.noop; + Document.formatSentences_ = util.noop; + Document.formatTokens_ = util.noop; + }); + + it('should always return the language', function(done) { + var apiResponse = apiResponses.default; + document.request = createRequestWithResponse(apiResponse); + + document.annotate(function(err, annotation, apiResponse_) { + assert.ifError(err); + assert.strictEqual(annotation.language, apiResponse.language); + assert.deepEqual(apiResponse_, apiResponse); + done(); + }); + }); + + it('should return syntax when no features are requested', function(done) { + var apiResponse = apiResponses.default; + document.request = createRequestWithResponse(apiResponse); + + var formattedSentences = []; + Document.formatSentences_ = function(sentences, verbose) { + assert.strictEqual(sentences, apiResponse.sentences); + assert.strictEqual(verbose, false); + return formattedSentences; + }; + + var formattedTokens = []; + Document.formatTokens_ = function(tokens, verbose) { + assert.strictEqual(tokens, apiResponse.tokens); + assert.strictEqual(verbose, false); + return formattedTokens; + }; + + document.annotate(function(err, annotation, apiResponse_) { + assert.ifError(err); + assert.strictEqual(annotation.sentences, formattedSentences); + assert.strictEqual(annotation.tokens, formattedTokens); + assert.deepEqual(apiResponse_, apiResponse); + done(); + }); + }); + + it('should return the formatted sentiment if available', function(done) { + var apiResponse = apiResponses.withSentiment; + document.request = createRequestWithResponse(apiResponse); + + var formattedSentiment = {}; + Document.formatSentiment_ = function(sentiment, verbose) { + assert.strictEqual(sentiment, apiResponse.documentSentiment); + assert.strictEqual(verbose, false); + return formattedSentiment; + }; + + document.annotate(function(err, annotation, apiResponse_) { + assert.ifError(err); + + assert.strictEqual(annotation.language, apiResponse.language); + assert.strictEqual(annotation.sentiment, formattedSentiment); + + assert.deepEqual(apiResponse_, apiResponse); + + done(); + }); + }); + + it('should return the formatted entities if available', function(done) { + var apiResponse = apiResponses.withEntities; + document.request = createRequestWithResponse(apiResponse); + + var formattedEntities = []; + Document.formatEntities_ = function(entities, verbose) { + assert.strictEqual(entities, apiResponse.entities); + assert.strictEqual(verbose, false); + return formattedEntities; + }; + + document.annotate(function(err, annotation, apiResponse_) { + assert.ifError(err); + + assert.strictEqual(annotation.language, apiResponse.language); + assert.strictEqual(annotation.entities, formattedEntities); + + assert.deepEqual(apiResponse_, apiResponse); + + done(); + }); + }); + + it('should not return syntax analyses when not wanted', function(done) { + var apiResponse = apiResponses.default; + document.request = createRequestWithResponse(apiResponse); + + document.annotate({ + entities: true, + sentiment: true + }, function(err, annotation) { + assert.ifError(err); + + assert.strictEqual(annotation.sentences, undefined); + assert.strictEqual(annotation.tokens, undefined); + + done(); + }); + }); + + it('should allow verbose mode', function(done) { + var apiResponse = apiResponses.withAll; + document.request = createRequestWithResponse(apiResponse); + + var numCallsWithCorrectVerbosityArgument = 0; + + function incrementVerbosityVar(_, verbose) { + if (verbose === true) { + numCallsWithCorrectVerbosityArgument++; + } + } + + Document.formatSentiment_ = incrementVerbosityVar; + Document.formatEntities_ = incrementVerbosityVar; + Document.formatSentences_ = incrementVerbosityVar; + Document.formatTokens_ = incrementVerbosityVar; + + document.annotate({ + verbose: true + }, function(err) { + assert.ifError(err); + + assert.strictEqual(numCallsWithCorrectVerbosityArgument, 4); + + done(); + }); + }); + }); + }); + + describe('detectEntities', function() { + it('should make the correct API request', function(done) { + document.request = function(grpcOpts, reqOpts) { + assert.deepEqual(grpcOpts, { + service: 'LanguageService', + method: 'analyzeEntities' + }); + + assert.strictEqual(reqOpts, document.reqOpts); + + done(); + }; + + document.detectEntities(assert.ifError); + }); + + describe('error', function() { + var apiResponse = {}; + var error = new Error('Error.'); + + beforeEach(function() { + document.request = function(grpcOpts, reqOpts, callback) { + callback(error, apiResponse); + }; + }); + + it('should exec callback with error and API response', function(done) { + document.detectEntities(function(err, entities, apiResponse_) { + assert.strictEqual(err, error); + assert.strictEqual(entities, null); + assert.strictEqual(apiResponse_, apiResponse); + done(); + }); + }); + }); + + describe('success', function() { + var apiResponse = { + entities: [] + }; + + var originalApiResponse = extend({}, apiResponse); + + beforeEach(function() { + document.request = function(grpcOpts, reqOpts, callback) { + callback(null, apiResponse); + }; + }); + + it('should format the entities', function(done) { + var formattedEntities = {}; + + Document.formatEntities_ = function(entities, verbose) { + assert.strictEqual(entities, apiResponse.entities); + assert.strictEqual(verbose, false); + return formattedEntities; + }; + + document.detectEntities(function(err, entities) { + assert.ifError(err); + assert.strictEqual(entities, formattedEntities); + done(); + }); + }); + + it('should clone the response object', function(done) { + document.detectEntities(function(err, entities, apiResponse_) { + assert.ifError(err); + assert.notStrictEqual(apiResponse_, apiResponse); + assert.deepEqual(apiResponse_, originalApiResponse); + done(); + }); + }); + + it('should allow verbose mode', function(done) { + Document.formatEntities_ = function(entities, verbose) { + assert.strictEqual(verbose, true); + done(); + }; + + document.detectEntities({ + verbose: true + }, assert.ifError); + }); + }); + }); + + describe('detectSentiment', function() { + it('should make the correct API request', function(done) { + document.request = function(grpcOpts, reqOpts) { + assert.deepEqual(grpcOpts, { + service: 'LanguageService', + method: 'analyzeSentiment' + }); + + assert.strictEqual(reqOpts, document.reqOpts); + + done(); + }; + + document.detectSentiment(assert.ifError); + }); + + describe('error', function() { + var apiResponse = {}; + var error = new Error('Error.'); + + beforeEach(function() { + document.request = function(grpcOpts, reqOpts, callback) { + callback(error, apiResponse); + }; + }); + + it('should exec callback with error and API response', function(done) { + document.detectSentiment(function(err, sentiment, apiResponse_) { + assert.strictEqual(err, error); + assert.strictEqual(sentiment, null); + assert.strictEqual(apiResponse_, apiResponse); + done(); + }); + }); + }); + + describe('success', function() { + var apiResponse = { + documentSentiment: {} + }; + + var originalApiResponse = extend({}, apiResponse); + + beforeEach(function() { + Document.formatSentiment_ = util.noop; + + document.request = function(grpcOpts, reqOpts, callback) { + callback(null, apiResponse); + }; + }); + + it('should format the sentiment', function(done) { + var formattedSentiment = {}; + + Document.formatSentiment_ = function(sentiment, verbose) { + assert.strictEqual(sentiment, apiResponse.documentSentiment); + assert.strictEqual(verbose, false); + return formattedSentiment; + }; + + document.detectSentiment(function(err, sentiment) { + assert.ifError(err); + assert.strictEqual(sentiment, formattedSentiment); + done(); + }); + }); + + it('should clone the response object', function(done) { + document.detectSentiment(function(err, sentiment, apiResponse_) { + assert.ifError(err); + assert.notStrictEqual(apiResponse_, apiResponse); + assert.deepEqual(apiResponse_, originalApiResponse); + done(); + }); + }); + + it('should allow verbose mode', function(done) { + Document.formatSentiment_ = function(sentiment, verbose) { + assert.strictEqual(verbose, true); + done(); + }; + + document.detectSentiment({ + verbose: true + }, assert.ifError); + }); + }); + }); + + describe('formatEntities_', function() { + var ENTITIES = [ + { type: 'UNKNOWN', salience: -1, name: 'second' }, + { type: 'UNKNOWN', salience: 1, name: 'first' }, + + { type: 'PERSON', salience: -1, name: 'second' }, + { type: 'PERSON', salience: 1, name: 'first' }, + + { type: 'LOCATION', salience: -1, name: 'second' }, + { type: 'LOCATION', salience: 1, name: 'first' }, + + { type: 'ORGANIZATION', salience: -1, name: 'second' }, + { type: 'ORGANIZATION', salience: 1, name: 'first' }, + + { type: 'EVENT', salience: -1, name: 'second' }, + { type: 'EVENT', salience: 1, name: 'first' }, + + { type: 'WORK_OF_ART', salience: -1, name: 'second' }, + { type: 'WORK_OF_ART', salience: 1, name: 'first' }, + + { type: 'CONSUMER_GOOD', salience: -1, name: 'second' }, + { type: 'CONSUMER_GOOD', salience: 1, name: 'first' }, + + { type: 'OTHER', salience: -1, name: 'second' }, + { type: 'OTHER', salience: 1, name: 'first' } + ]; + + var VERBOSE = false; + + var entitiesCopy = extend(true, {}, ENTITIES); + var FORMATTED_ENTITIES = { + unknown: [ entitiesCopy[1], entitiesCopy[0] ], + people: [ entitiesCopy[3], entitiesCopy[2] ], + places: [ entitiesCopy[5], entitiesCopy[4] ], + organizations: [ entitiesCopy[7], entitiesCopy[6] ], + events: [ entitiesCopy[9], entitiesCopy[8] ], + art: [ entitiesCopy[11], entitiesCopy[10] ], + goods: [ entitiesCopy[13], entitiesCopy[12] ], + other: [ entitiesCopy[15], entitiesCopy[14] ], + }; + + for (var entityType in FORMATTED_ENTITIES) { + FORMATTED_ENTITIES[entityType] = FORMATTED_ENTITIES[entityType] + .map(function(entity) { + entity.salience *= 100; + return entity; + }); + } + + var EXPECTED_FORMATTED_ENTITIES = { + default: extend(true, {}, FORMATTED_ENTITIES), + verbose: extend(true, {}, FORMATTED_ENTITIES) + }; + + for (var entityGroupType in EXPECTED_FORMATTED_ENTITIES.default) { + // Only the `name` property is returned by default: + EXPECTED_FORMATTED_ENTITIES.default[entityGroupType] = + EXPECTED_FORMATTED_ENTITIES.default[entityGroupType].map(prop('name')); + } + + it('should group and sort entities correctly', function() { + var formattedEntities = Document.formatEntities_(ENTITIES, VERBOSE); + + Document.sortByProperty_ = function(propertyName) { + assert.strictEqual(propertyName, 'salience'); + return function() { return -1; }; + }; + + assert.deepEqual(formattedEntities, EXPECTED_FORMATTED_ENTITIES.default); + }); + + it('should group and sort entities correctly in verbose mode', function() { + var formattedEntities = Document.formatEntities_(ENTITIES, true); + + Document.sortByProperty_ = function(propertyName) { + assert.strictEqual(propertyName, 'salience'); + return function() { return -1; }; + }; + + assert.deepEqual(formattedEntities, EXPECTED_FORMATTED_ENTITIES.verbose); + }); + }); + + describe('formatSentences_', function() { + var SENTENCES = [ + { + text: { + content: 'Sentence text', + property: 'value' + } + }, + { + text: { + content: 'Another sentence', + property: 'value' + } + } + ]; + + var VERBOSE = false; + + var EXPECTED_FORMATTED_SENTENCES = { + default: SENTENCES.map(prop('text')).map(prop('content')), + verbose: SENTENCES.map(prop('text')) + }; + + it('should correctly format sentences', function() { + var formattedSentences = Document.formatSentences_(SENTENCES, VERBOSE); + + assert.deepEqual( + formattedSentences, + EXPECTED_FORMATTED_SENTENCES.default + ); + }); + + it('should correctly format sentences in verbose mode', function() { + var formattedSentences = Document.formatSentences_(SENTENCES, true); + + assert.deepEqual( + formattedSentences, + EXPECTED_FORMATTED_SENTENCES.verbose + ); + }); + }); + + describe('formatSentiment_', function() { + var SENTIMENT = { + polarity: -0.5, + magnitude: 0.5 + }; + + var VERBOSE = false; + + var EXPECTED_FORMATTED_SENTIMENT = { + default: SENTIMENT.polarity * 100, + verbose: { + polarity: SENTIMENT.polarity * 100, + magnitude: SENTIMENT.magnitude * 100 + } + }; + + it('should format the sentiment correctly', function() { + var sentiment = extend({}, SENTIMENT); + var formattedSentiment = Document.formatSentiment_(sentiment, VERBOSE); + + assert.deepEqual( + formattedSentiment, + EXPECTED_FORMATTED_SENTIMENT.default + ); + }); + + it('should format the sentiment correctly in verbose mode', function() { + var sentiment = extend({}, SENTIMENT); + var formattedSentiment = Document.formatSentiment_(sentiment, true); + + assert.deepEqual( + formattedSentiment, + EXPECTED_FORMATTED_SENTIMENT.verbose + ); + }); + }); + + describe('formatTokens_', function() { + var TOKENS = [ + { + text: { + content: 'Text content' + }, + partOfSpeech: { + tag: 'PART_OF_SPEECH_TAG' + }, + property: 'value' + } + ]; + + var VERBOSE = false; + + var PART_OF_SPEECH = { + PART_OF_SPEECH_TAG: 'part of speech value' + }; + + var EXPECTED_FORMATTED_TOKENS = { + default: TOKENS.map(function(token) { + return { + text: token.text.content, + partOfSpeech: PART_OF_SPEECH.PART_OF_SPEECH_TAG, + partOfSpeechTag: 'PART_OF_SPEECH_TAG' + }; + }), + + verbose: TOKENS + }; + + beforeEach(function() { + Document.PART_OF_SPEECH = PART_OF_SPEECH; + }); + + it('should correctly format tokens', function() { + var formattedTokens = Document.formatTokens_(TOKENS, VERBOSE); + + assert.deepEqual(formattedTokens, EXPECTED_FORMATTED_TOKENS.default); + }); + + it('should correctly format tokens in verbose mode', function() { + var formattedTokens = Document.formatTokens_(TOKENS, true); + + assert.deepEqual(formattedTokens, EXPECTED_FORMATTED_TOKENS.verbose); + }); + }); + + describe('sortByProperty_', function() { + var sortFn; + + beforeEach(function() { + sortFn = Document.sortByProperty_('sortedProperty'); + }); + + it('should sort by a property name', function() { + assert.strictEqual( + sortFn({ sortedProperty: 0 }, { sortedProperty: 1 }), + 1 + ); + + assert.strictEqual( + sortFn({ sortedProperty: 1 }, { sortedProperty: -1 }), + -1 + ); + + assert.strictEqual( + sortFn({ sortedProperty: 0 }, { sortedProperty: 0 }), + 0 + ); + }); + }); +}); diff --git a/packages/google-cloud-language/test/index.js b/packages/google-cloud-language/test/index.js new file mode 100644 index 00000000000..13a9af7d227 --- /dev/null +++ b/packages/google-cloud-language/test/index.js @@ -0,0 +1,303 @@ +/** + * Copyright 2016 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +var assert = require('assert'); +var extend = require('extend'); +var googleProtoFiles = require('google-proto-files'); +var proxyquire = require('proxyquire'); +var util = require('@google-cloud/common').util; + +var PKG = require('../package.json'); + +var fakeUtil = extend(true, {}, util); + +function FakeDocument() { + this.calledWith_ = arguments; +} + +function FakeGrpcService() { + this.calledWith_ = arguments; +} + +describe('Language', function() { + var Language; + var language; + + var OPTIONS = {}; + + before(function() { + Language = proxyquire('../src/index.js', { + '@google-cloud/common': { + util: fakeUtil, + GrpcService: FakeGrpcService + }, + './document.js': FakeDocument + }); + }); + + beforeEach(function() { + language = new Language(OPTIONS); + }); + + describe('instantiation', function() { + it('should normalize the arguments', function() { + var options = { + projectId: 'project-id', + credentials: 'credentials', + email: 'email', + keyFilename: 'keyFile' + }; + + var normalizeArguments = fakeUtil.normalizeArguments; + var normalizeArgumentsCalled = false; + var fakeContext = {}; + + fakeUtil.normalizeArguments = function(context, options_) { + normalizeArgumentsCalled = true; + assert.strictEqual(context, fakeContext); + assert.strictEqual(options, options_); + return options_; + }; + + Language.call(fakeContext, options); + assert(normalizeArgumentsCalled); + + fakeUtil.normalizeArguments = normalizeArguments; + }); + + it('should inherit from GrpcService', function() { + assert(language instanceof FakeGrpcService); + + var calledWith = language.calledWith_[0]; + + assert.deepEqual(calledWith, { + baseUrl: 'language.googleapis.com', + service: 'language', + apiVersion: 'v1beta1', + protoServices: { + LanguageService: { + path: googleProtoFiles.language.v1beta1, + service: 'cloud.language' + } + }, + scopes: [ + 'https://www.googleapis.com/auth/cloud-platform' + ], + userAgent: PKG.name + '/' + PKG.version + }); + }); + }); + + describe('annotate', function() { + var CONTENT = '...'; + var OPTIONS = { + property: 'value' + }; + + var EXPECTED_OPTIONS = { + withCustomOptions: extend({}, OPTIONS, { + content: CONTENT + }), + + withoutCustomOptions: extend({}, { + content: CONTENT + }) + }; + + it('should call annotate on a Document', function(done) { + language.document = function(options) { + assert.deepEqual(options, EXPECTED_OPTIONS.withCustomOptions); + + return { + annotate: function(options, callback) { + assert.deepEqual(options, EXPECTED_OPTIONS.withCustomOptions); + callback(); // done() + } + }; + }; + + language.annotate(CONTENT, OPTIONS, done); + }); + + it('should not require options', function(done) { + language.document = function(options) { + assert.deepEqual(options, EXPECTED_OPTIONS.withoutCustomOptions); + + return { + annotate: function(options, callback) { + assert.deepEqual(options, EXPECTED_OPTIONS.withoutCustomOptions); + callback(); // done() + } + }; + }; + + language.annotate(CONTENT, done); + }); + }); + + describe('detectEntities', function() { + var CONTENT = '...'; + var OPTIONS = { + property: 'value' + }; + + var EXPECTED_OPTIONS = { + withCustomOptions: extend({}, OPTIONS, { + content: CONTENT + }), + + withoutCustomOptions: extend({}, { + content: CONTENT + }) + }; + + it('should call detectEntities on a Document', function(done) { + language.document = function(options) { + assert.deepEqual(options, EXPECTED_OPTIONS.withCustomOptions); + + return { + detectEntities: function(options, callback) { + assert.deepEqual(options, EXPECTED_OPTIONS.withCustomOptions); + callback(); // done() + } + }; + }; + + language.detectEntities(CONTENT, OPTIONS, done); + }); + + it('should not require options', function(done) { + language.document = function(options) { + assert.deepEqual(options, EXPECTED_OPTIONS.withoutCustomOptions); + + return { + detectEntities: function(options, callback) { + assert.deepEqual(options, EXPECTED_OPTIONS.withoutCustomOptions); + callback(); // done() + } + }; + }; + + language.detectEntities(CONTENT, done); + }); + }); + + describe('detectSentiment', function() { + var CONTENT = '...'; + var OPTIONS = { + property: 'value' + }; + + var EXPECTED_OPTIONS = { + withCustomOptions: extend({}, OPTIONS, { + content: CONTENT + }), + + withoutCustomOptions: extend({}, { + content: CONTENT + }) + }; + + it('should call detectSentiment on a Document', function(done) { + language.document = function(options) { + assert.deepEqual(options, EXPECTED_OPTIONS.withCustomOptions); + + return { + detectSentiment: function(options, callback) { + assert.deepEqual(options, EXPECTED_OPTIONS.withCustomOptions); + callback(); // done() + } + }; + }; + + language.detectSentiment(CONTENT, OPTIONS, done); + }); + + it('should not require options', function(done) { + language.document = function(options) { + assert.deepEqual(options, EXPECTED_OPTIONS.withoutCustomOptions); + + return { + detectSentiment: function(options, callback) { + assert.deepEqual(options, EXPECTED_OPTIONS.withoutCustomOptions); + callback(); // done() + } + }; + }; + + language.detectSentiment(CONTENT, done); + }); + }); + + describe('document', function() { + var CONFIG = {}; + + it('should create a Document', function() { + var document = language.document(CONFIG); + + assert.strictEqual(document.calledWith_[0], language); + assert.strictEqual(document.calledWith_[1], CONFIG); + }); + }); + + describe('html', function() { + var CONTENT = '...'; + var OPTIONS = { + property: 'value' + }; + + var EXPECTED_OPTIONS = extend({}, OPTIONS, { + type: 'HTML', + content: CONTENT + }); + + it('should create a Document', function() { + var document = {}; + + language.document = function(options) { + assert.deepEqual(options, EXPECTED_OPTIONS); + return document; + }; + + assert.strictEqual(language.html(CONTENT, OPTIONS), document); + }); + }); + + describe('text', function() { + var CONTENT = '...'; + var OPTIONS = { + property: 'value' + }; + + var EXPECTED_OPTIONS = extend({}, OPTIONS, { + type: 'PLAIN_TEXT', + content: CONTENT + }); + + it('should create a Document', function() { + var document = {}; + + language.document = function(options) { + assert.deepEqual(options, EXPECTED_OPTIONS); + return document; + }; + + assert.strictEqual(language.text(CONTENT, OPTIONS), document); + }); + }); +});