diff --git a/test/language/document.js b/test/language/document.js index e69de29bb2d1..6903212ea7a3 100644 --- a/test/language/document.js +++ b/test/language/document.js @@ -0,0 +1,832 @@ +/** + * Copyright 2016 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +var assert = require('assert'); +var extend = require('extend'); +var mockery = require('mockery-next'); +var prop = require('propprop'); + +var util = require('../../lib/common/util.js'); + +function FakeFile() {} + +describe.only('Document', function() { + var DocumentCache; + var Document; + var document; + + var LANGUAGE = { + request: util.noop + }; + var CONFIG = 'inline content'; + + before(function() { + mockery.registerMock('../../lib/storage/file.js', FakeFile); + + mockery.enable({ + useCleanCache: true, + warnOnUnregistered: false + }); + + Document = require('../../lib/language/document.js'); + DocumentCache = extend(true, {}, Document); + }); + + after(function() { + mockery.deregisterAll(); + mockery.disable(); + }); + + beforeEach(function() { + for (var property in DocumentCache) { + if (DocumentCache.hasOwnProperty(property)) { + Document[property] = DocumentCache[property]; + } + } + + document = new Document(LANGUAGE, CONFIG); + }); + + describe('instantiation', function() { + it('should set the correct reqOpts for inline content', function() { + assert.deepEqual(document.reqOpts, { + document: { + content: CONFIG, + type: 'PLAIN_TEXT' + } + }); + }); + + it('should set the correct reqOpts for content with encoding', function() { + var document = new Document(LANGUAGE, { + content: CONFIG, + encoding: 'utf-8' + }); + + assert.deepEqual(document.reqOpts, { + document: { + content: CONFIG, + type: 'PLAIN_TEXT' + }, + encodingType: 'UTF8' + }); + }); + + it('should set the correct reqOpts for content with language', function() { + var document = new Document(LANGUAGE, { + content: CONFIG, + language: 'EN' + }); + + assert.deepEqual(document.reqOpts, { + document: { + content: CONFIG, + type: 'PLAIN_TEXT', + language: 'EN' + } + }); + }); + + it('should set the correct reqOpts for content with type', function() { + var document = new Document(LANGUAGE, { + content: CONFIG, + type: 'html' + }); + + assert.deepEqual(document.reqOpts, { + document: { + content: CONFIG, + type: 'HTML' + } + }); + }); + + it('should set the correct reqOpts for text', function() { + var document = new Document(LANGUAGE, { + content: CONFIG, + type: 'text' + }); + + assert.deepEqual(document.reqOpts, { + document: { + content: CONFIG, + type: 'PLAIN_TEXT' + } + }); + }); + + it('should set the GCS content URI from a File', function() { + var file = new FakeFile(); + + // Leave spaces in to check that it is URI-encoded: + file.bucket = { id: 'bucket id' }; + file.id = 'file name'; + + var document = new Document(LANGUAGE, { + content: file + }); + + assert.deepEqual(document.reqOpts, { + document: { + gcsContentUri: [ + 'gs://', + encodeURIComponent(file.bucket.id), + '/', + encodeURIComponent(file.id), + ].join(''), + type: 'PLAIN_TEXT' + } + }); + }); + + it('should create a request function', function(done) { + var LanguageInstance = { + request: function() { + assert.strictEqual(this, LanguageInstance); + done(); + } + }; + + var document = new Document(LanguageInstance, CONFIG); + document.request(); + }); + }); + + describe('PART_OF_SPEECH', function() { + var expectedPartOfSpeech = { + UNKNOWN: 'Unknown', + ADJ: 'Adjective', + ADP: 'Adposition (preposition and postposition)', + ADV: 'Adverb', + CONJ: 'Conjunction', + DET: 'Determiner', + NOUN: 'Noun (common and proper)', + NUM: 'Cardinal number', + PRON: 'Pronoun', + PRT: 'Particle or other function word', + PUNCT: 'Punctuation', + VERB: 'Verb (all tenses and modes)', + X: 'Other: foreign words, typos, abbreviations', + AFFIX: 'Affix' + }; + + it('should define the correct parts of speech', function() { + assert.deepEqual(Document.PART_OF_SPEECH, expectedPartOfSpeech); + }); + }); + + describe('annotate', function() { + it('should make the correct API request', function(done) { + document.request = function(grpcOpts, reqOpts) { + assert.deepEqual(grpcOpts, { + service: 'LanguageService', + method: 'annotateText' + }); + + assert.deepEqual(reqOpts, extend( + { + features: { + extractDocumentSentiment: true, + extractEntities: true, + extractSyntax: true + } + }, + document.reqOpts + )); + + done(); + }; + + document.annotate(assert.ifError); + }); + + it('should allow specifying individual features', function(done) { + document.request = function(grpcOpts, reqOpts) { + assert.deepEqual(reqOpts.features, { + extractDocumentSentiment: false, + extractEntities: true, + extractSyntax: true + }); + + done(); + }; + + document.annotate({ + entities: true, + syntax: true + }, assert.ifError); + }); + + describe('error', function() { + var apiResponse = {}; + var error = new Error('Error.'); + + beforeEach(function() { + document.request = function(grpcOpts, reqOpts, callback) { + callback(error, apiResponse); + }; + }); + + it('should exec callback with error and API response', function(done) { + document.annotate(function(err, annotation, apiResponse_) { + assert.strictEqual(err, error); + assert.strictEqual(annotation, null); + assert.strictEqual(apiResponse_, apiResponse); + done(); + }); + }); + }); + + describe('success', function() { + var apiResponses = { + default: { + language: 'EN', + sentences: [], + tokens: [] + }, + + withSentiment: { + documentSentiment: {} + }, + + withEntities: { + entities: [] + }, + + withSyntax: { + sentences: {}, + tokens: {} + } + }; + + apiResponses.withAll = extend( + {}, + apiResponses.default, + apiResponses.withSentiment, + apiResponses.withEntities, + apiResponses.withSyntax + ); + + function createRequestWithResponse(apiResponse) { + return function(grpcOpts, reqOpts, callback) { + callback(null, apiResponse, apiResponse); + }; + } + + beforeEach(function() { + Document.formatSentiment_ = util.noop; + Document.formatEntities_ = util.noop; + Document.formatSentences_ = util.noop; + Document.formatTokens_ = util.noop; + }); + + it('should return the language and syntax by default', function(done) { + var apiResponse = apiResponses.default; + document.request = createRequestWithResponse(apiResponse); + + var formattedSentences = []; + Document.formatSentences_ = function(sentences, verbose) { + assert.strictEqual(sentences, apiResponse.sentences); + assert.strictEqual(verbose, false); + return formattedSentences; + }; + + var formattedTokens = []; + Document.formatTokens_ = function(tokens, verbose) { + assert.strictEqual(tokens, apiResponse.tokens); + assert.strictEqual(verbose, false); + return formattedTokens; + }; + + document.annotate(function(err, annotation, apiResponse_) { + assert.ifError(err); + + assert.strictEqual(annotation.language, apiResponse.language); + assert.strictEqual(annotation.sentences, formattedSentences); + assert.strictEqual(annotation.tokens, formattedTokens); + + assert.deepEqual(apiResponse_, apiResponse); + + done(); + }); + }); + + it('should return the formatted sentiment if available', function(done) { + var apiResponse = apiResponses.withSentiment; + document.request = createRequestWithResponse(apiResponse); + + var formattedSentiment = {}; + Document.formatSentiment_ = function(sentiment, verbose) { + assert.strictEqual(sentiment, apiResponse.documentSentiment); + assert.strictEqual(verbose, false); + return formattedSentiment; + }; + + document.annotate(function(err, annotation, apiResponse_) { + assert.ifError(err); + + assert.strictEqual(annotation.language, apiResponse.language); + assert.strictEqual(annotation.sentiment, formattedSentiment); + + assert.deepEqual(apiResponse_, apiResponse); + + done(); + }); + }); + + it('should return the formatted entities if available', function(done) { + var apiResponse = apiResponses.withEntities; + document.request = createRequestWithResponse(apiResponse); + + var formattedEntities = []; + Document.formatEntities_ = function(entities, verbose) { + assert.strictEqual(entities, apiResponse.entities); + assert.strictEqual(verbose, false); + return formattedEntities; + }; + + document.annotate(function(err, annotation, apiResponse_) { + assert.ifError(err); + + assert.strictEqual(annotation.language, apiResponse.language); + assert.strictEqual(annotation.entities, formattedEntities); + + assert.deepEqual(apiResponse_, apiResponse); + + done(); + }); + }); + + it('should not return syntax analyses when not wanted', function(done) { + var apiResponse = apiResponses.default; + document.request = createRequestWithResponse(apiResponse); + + document.annotate({ + entities: true, + sentiment: true + }, function(err, annotation) { + assert.ifError(err); + + assert.strictEqual(annotation.sentences, undefined); + assert.strictEqual(annotation.tokens, undefined); + + done(); + }); + }); + + it('should allow verbose mode', function(done) { + var apiResponse = apiResponses.withAll; + document.request = createRequestWithResponse(apiResponse); + + var numCallsWithCorrectVerbosityArgument = 0; + + function incrementVerbosityVar(_, verbose) { + if (verbose === true) { + numCallsWithCorrectVerbosityArgument++; + } + } + + Document.formatSentiment_ = incrementVerbosityVar; + Document.formatEntities_ = incrementVerbosityVar; + Document.formatSentences_ = incrementVerbosityVar; + Document.formatTokens_ = incrementVerbosityVar; + + document.annotate({ + verbose: true + }, function(err) { + assert.ifError(err); + + assert.strictEqual(numCallsWithCorrectVerbosityArgument, 4); + + done(); + }); + }); + }); + }); + + describe('detectEntities', function() { + it('should make the correct API request', function(done) { + document.request = function(grpcOpts, reqOpts) { + assert.deepEqual(grpcOpts, { + service: 'LanguageService', + method: 'analyzeEntities' + }); + + assert.strictEqual(reqOpts, document.reqOpts); + + done(); + }; + + document.detectEntities(assert.ifError); + }); + + describe('error', function() { + var apiResponse = {}; + var error = new Error('Error.'); + + beforeEach(function() { + document.request = function(grpcOpts, reqOpts, callback) { + callback(error, apiResponse); + }; + }); + + it('should exec callback with error and API response', function(done) { + document.detectEntities(function(err, entities, apiResponse_) { + assert.strictEqual(err, error); + assert.strictEqual(entities, null); + assert.strictEqual(apiResponse_, apiResponse); + done(); + }); + }); + }); + + describe('success', function() { + var apiResponse = { + entities: [] + }; + + var originalApiResponse = extend({}, apiResponse); + + beforeEach(function() { + document.request = function(grpcOpts, reqOpts, callback) { + callback(null, apiResponse); + }; + }); + + it('should format the entities', function(done) { + var formattedEntities = {}; + + Document.formatEntities_ = function(entities, verbose) { + assert.strictEqual(entities, apiResponse.entities); + assert.strictEqual(verbose, false); + return formattedEntities; + }; + + document.detectEntities(function(err, entities) { + assert.ifError(err); + assert.strictEqual(entities, formattedEntities); + done(); + }); + }); + + it('should clone the response object', function(done) { + document.detectEntities(function(err, entities, apiResponse_) { + assert.ifError(err); + assert.notStrictEqual(apiResponse_, apiResponse); + assert.deepEqual(apiResponse_, originalApiResponse); + done(); + }); + }); + + it('should allow verbose mode', function(done) { + Document.formatEntities_ = function(entities, verbose) { + assert.strictEqual(verbose, true); + done(); + }; + + document.detectEntities({ + verbose: true + }, assert.ifError); + }); + }); + }); + + describe('detectSentiment', function() { + it('should make the correct API request', function(done) { + document.request = function(grpcOpts, reqOpts) { + assert.deepEqual(grpcOpts, { + service: 'LanguageService', + method: 'analyzeSentiment' + }); + + assert.strictEqual(reqOpts, document.reqOpts); + + done(); + }; + + document.detectSentiment(assert.ifError); + }); + + describe('error', function() { + var apiResponse = {}; + var error = new Error('Error.'); + + beforeEach(function() { + document.request = function(grpcOpts, reqOpts, callback) { + callback(error, apiResponse); + }; + }); + + it('should exec callback with error and API response', function(done) { + document.detectSentiment(function(err, sentiment, apiResponse_) { + assert.strictEqual(err, error); + assert.strictEqual(sentiment, null); + assert.strictEqual(apiResponse_, apiResponse); + done(); + }); + }); + }); + + describe('success', function() { + var apiResponse = { + documentSentiment: {} + }; + + var originalApiResponse = extend({}, apiResponse); + + beforeEach(function() { + Document.formatSentiment_ = util.noop; + + document.request = function(grpcOpts, reqOpts, callback) { + callback(null, apiResponse); + }; + }); + + it('should format the sentiment', function(done) { + var formattedSentiment = {}; + + Document.formatSentiment_ = function(sentiment, verbose) { + assert.strictEqual(sentiment, apiResponse.documentSentiment); + assert.strictEqual(verbose, false); + return formattedSentiment; + }; + + document.detectSentiment(function(err, sentiment) { + assert.ifError(err); + assert.strictEqual(sentiment, formattedSentiment); + done(); + }); + }); + + it('should clone the response object', function(done) { + document.detectSentiment(function(err, sentiment, apiResponse_) { + assert.ifError(err); + assert.notStrictEqual(apiResponse_, apiResponse); + assert.deepEqual(apiResponse_, originalApiResponse); + done(); + }); + }); + + it('should allow verbose mode', function(done) { + Document.formatSentiment_ = function(sentiment, verbose) { + assert.strictEqual(verbose, true); + done(); + }; + + document.detectSentiment({ + verbose: true + }, assert.ifError); + }); + }); + }); + + describe('formatEntities_', function() { + var ENTITIES = [ + { type: 'UNKNOWN', salience: -1, name: 'second' }, + { type: 'UNKNOWN', salience: 1, name: 'first' }, + + { type: 'PERSON', salience: -1, name: 'second' }, + { type: 'PERSON', salience: 1, name: 'first' }, + + { type: 'LOCATION', salience: -1, name: 'second' }, + { type: 'LOCATION', salience: 1, name: 'first' }, + + { type: 'ORGANIZATION', salience: -1, name: 'second' }, + { type: 'ORGANIZATION', salience: 1, name: 'first' }, + + { type: 'EVENT', salience: -1, name: 'second' }, + { type: 'EVENT', salience: 1, name: 'first' }, + + { type: 'WORK_OF_ART', salience: -1, name: 'second' }, + { type: 'WORK_OF_ART', salience: 1, name: 'first' }, + + { type: 'CONSUMER_GOOD', salience: -1, name: 'second' }, + { type: 'CONSUMER_GOOD', salience: 1, name: 'first' }, + + { type: 'OTHER', salience: -1, name: 'second' }, + { type: 'OTHER', salience: 1, name: 'first' } + ]; + + var VERBOSE = false; + + var entitiesCopy = extend(true, {}, ENTITIES); + var FORMATTED_ENTITIES = { + unknown: [ entitiesCopy[1], entitiesCopy[0] ], + people: [ entitiesCopy[3], entitiesCopy[2] ], + places: [ entitiesCopy[5], entitiesCopy[4] ], + organizations: [ entitiesCopy[7], entitiesCopy[6] ], + events: [ entitiesCopy[9], entitiesCopy[8] ], + art: [ entitiesCopy[11], entitiesCopy[10] ], + goods: [ entitiesCopy[13], entitiesCopy[12] ], + other: [ entitiesCopy[15], entitiesCopy[14] ], + }; + + for (var entityType in FORMATTED_ENTITIES) { + FORMATTED_ENTITIES[entityType] = FORMATTED_ENTITIES[entityType] + .map(function(entity) { + entity.salience *= 100; + return entity; + }); + } + + var EXPECTED_FORMATTED_ENTITIES = { + default: extend(true, {}, FORMATTED_ENTITIES), + verbose: extend(true, {}, FORMATTED_ENTITIES) + }; + + for (var entityGroupType in EXPECTED_FORMATTED_ENTITIES.default) { + // Only the `name` property is returned by default: + EXPECTED_FORMATTED_ENTITIES.default[entityGroupType] = + EXPECTED_FORMATTED_ENTITIES.default[entityGroupType].map(prop('name')); + } + + it('should group and sort entities correctly', function() { + var formattedEntities = Document.formatEntities_(ENTITIES, VERBOSE); + + Document.sortByProperty_ = function(propertyName) { + assert.strictEqual(propertyName, 'salience'); + return function() { return -1; }; + }; + + assert.deepEqual(formattedEntities, EXPECTED_FORMATTED_ENTITIES.default); + }); + + it('should group and sort entities correctly in verbose mode', function() { + var formattedEntities = Document.formatEntities_(ENTITIES, true); + + Document.sortByProperty_ = function(propertyName) { + assert.strictEqual(propertyName, 'salience'); + return function() { return -1; }; + }; + + assert.deepEqual(formattedEntities, EXPECTED_FORMATTED_ENTITIES.verbose); + }); + }); + + describe('formatSentences_', function() { + var SENTENCES = [ + { + text: { + content: 'Sentence text', + property: 'value' + } + }, + { + text: { + content: 'Another sentence', + property: 'value' + } + } + ]; + + var VERBOSE = false; + + var EXPECTED_FORMATTED_SENTENCES = { + default: SENTENCES.map(prop('text')).map(prop('content')), + verbose: SENTENCES.map(prop('text')) + }; + + it('should correctly format sentences', function() { + var formattedSentences = Document.formatSentences_(SENTENCES, VERBOSE); + + assert.deepEqual( + formattedSentences, + EXPECTED_FORMATTED_SENTENCES.default + ); + }); + + it('should correctly format sentences in verbose mode', function() { + var formattedSentences = Document.formatSentences_(SENTENCES, true); + + assert.deepEqual( + formattedSentences, + EXPECTED_FORMATTED_SENTENCES.verbose + ); + }); + }); + + describe('formatSentiment_', function() { + var SENTIMENT = { + polarity: -0.5, + magnitude: 0.5 + }; + + var VERBOSE = false; + + var EXPECTED_FORMATTED_SENTIMENT = { + default: SENTIMENT.polarity * 100, + verbose: { + polarity: SENTIMENT.polarity * 100, + magnitude: SENTIMENT.magnitude * 100 + } + }; + + it('should format the sentiment correctly', function() { + var sentiment = extend({}, SENTIMENT); + var formattedSentiment = Document.formatSentiment_(sentiment, VERBOSE); + + assert.deepEqual( + formattedSentiment, + EXPECTED_FORMATTED_SENTIMENT.default + ); + }); + + it('should format the sentiment correctly in verbose mode', function() { + var sentiment = extend({}, SENTIMENT); + var formattedSentiment = Document.formatSentiment_(sentiment, true); + + assert.deepEqual( + formattedSentiment, + EXPECTED_FORMATTED_SENTIMENT.verbose + ); + }); + }); + + describe('formatTokens_', function() { + var TOKENS = [ + { + text: { + content: 'Text content' + }, + partOfSpeech: { + tag: 'PART_OF_SPEECH_TAG' + }, + property: 'value' + } + ]; + + var VERBOSE = false; + + var PART_OF_SPEECH = { + PART_OF_SPEECH_TAG: 'part of speech value' + }; + + var EXPECTED_FORMATTED_TOKENS = { + default: TOKENS.map(function(token) { + return { + text: token.text.content, + partOfSpeech: PART_OF_SPEECH.PART_OF_SPEECH_TAG, + partOfSpeechTag: 'PART_OF_SPEECH_TAG' + }; + }), + + verbose: TOKENS + }; + + beforeEach(function() { + Document.PART_OF_SPEECH = PART_OF_SPEECH; + }); + + it('should correctly format tokens', function() { + var formattedTokens = Document.formatTokens_(TOKENS, VERBOSE); + + assert.deepEqual(formattedTokens, EXPECTED_FORMATTED_TOKENS.default); + }); + + it('should correctly format tokens in verbose mode', function() { + var formattedTokens = Document.formatTokens_(TOKENS, true); + + assert.deepEqual(formattedTokens, EXPECTED_FORMATTED_TOKENS.verbose); + }); + }); + + describe('sortByProperty_', function() { + var sortFn; + + beforeEach(function() { + sortFn = Document.sortByProperty_('sortedProperty'); + }); + + it('should sort by a property name', function() { + assert.strictEqual( + sortFn({ sortedProperty: 0 }, { sortedProperty: 1 }), + 1 + ); + + assert.strictEqual( + sortFn({ sortedProperty: 1 }, { sortedProperty: -1 }), + -1 + ); + + assert.strictEqual( + sortFn({ sortedProperty: 0 }, { sortedProperty: 0 }), + 0 + ); + }); + }); +});