From 030e0cf432a10ba7706b922d0ba34a6c0ebbafcd Mon Sep 17 00:00:00 2001
From: Ace Nassri <anassri@google.com>
Date: Wed, 18 Oct 2017 15:14:51 -0700
Subject: [PATCH] Add DLP samples (BigQuery, DeID, RiskAnalysis) (#474)

* Add BigQuery samples + a few minor tweaks

* Update comments + fix failing test

* Sync w/codegen changes

* Add DeID samples

* Add DeID tests + remove infoTypes from DeID samples

* Remove unused option

* Add risk analysis samples

* Update README

* Add region tags + fix comment
---
 dlp/deid.js                      | 163 ++++++++++++++
 dlp/inspect.js                   | 142 +++++++++++--
 dlp/metadata.js                  |   4 +-
 dlp/package.json                 |  27 ++-
 dlp/quickstart.js                |   2 +-
 dlp/redact.js                    |   4 +-
 dlp/risk.js                      | 350 +++++++++++++++++++++++++++++++
 dlp/system-test/deid.test.js     |  64 ++++++
 dlp/system-test/inspect.test.js  |  21 +-
 dlp/system-test/metadata.test.js |   1 +
 dlp/system-test/risk.test.js     |  96 +++++++++
 11 files changed, 842 insertions(+), 32 deletions(-)
 create mode 100644 dlp/deid.js
 create mode 100644 dlp/risk.js
 create mode 100644 dlp/system-test/deid.test.js
 create mode 100644 dlp/system-test/risk.test.js

diff --git a/dlp/deid.js b/dlp/deid.js
new file mode 100644
index 0000000000..31e7083664
--- /dev/null
+++ b/dlp/deid.js
@@ -0,0 +1,163 @@
+/**
+ * Copyright 2017, Google, Inc.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+'use strict';
+
+function deidentifyWithMask (string, maskingCharacter, numberToMask) {
+  // [START deidentify_masking]
+  // Imports the Google Cloud Data Loss Prevention library
+  const DLP = require('@google-cloud/dlp');
+
+  // Instantiates a client
+  const dlp = new DLP.DlpServiceClient();
+
+  // The string to deidentify
+  // const string = 'My SSN is 372819127';
+
+  // (Optional) The maximum number of sensitive characters to mask in a match
+  // If omitted from the request or set to 0, the API will mask any matching characters
+  // const numberToMask = 5;
+
+  // (Optional) The character to mask matching sensitive data with
+  // const maskingCharacter = 'x';
+
+  // Construct deidentification request
+  const items = [{ type: 'text/plain', value: string }];
+  const request = {
+    deidentifyConfig: {
+      infoTypeTransformations: {
+        transformations: [{
+          primitiveTransformation: {
+            characterMaskConfig: {
+              maskingCharacter: maskingCharacter,
+              numberToMask: numberToMask
+            }
+          }
+        }]
+      }
+    },
+    items: items
+  };
+
+  // Run deidentification request
+  dlp.deidentifyContent(request)
+    .then((response) => {
+      const deidentifiedItems = response[0].items;
+      console.log(deidentifiedItems[0].value);
+    })
+    .catch((err) => {
+      console.log(`Error in deidentifyWithMask: ${err.message || err}`);
+    });
+  // [END deidentify_masking]
+}
+
+function deidentifyWithFpe (string, alphabet, keyName, wrappedKey) {
+  // [START deidentify_fpe]
+  // Imports the Google Cloud Data Loss Prevention library
+  const DLP = require('@google-cloud/dlp');
+
+  // Instantiates a client
+  const dlp = new DLP.DlpServiceClient();
+
+  // The string to deidentify
+  // const string = 'My SSN is 372819127';
+
+  // The set of characters to replace sensitive ones with
+  // For more information, see https://cloud.google.com/dlp/docs/reference/rest/v2beta1/content/deidentify#FfxCommonNativeAlphabet
+  // const alphabet = 'ALPHA_NUMERIC';
+
+  // The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key
+  // const keyName = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME';
+
+  // The encrypted ('wrapped') AES-256 key to use
+  // This key should be encrypted using the Cloud KMS key specified above
+  // const wrappedKey = 'YOUR_ENCRYPTED_AES_256_KEY'
+
+  // Construct deidentification request
+  const items = [{ type: 'text/plain', value: string }];
+  const request = {
+    deidentifyConfig: {
+      infoTypeTransformations: {
+        transformations: [{
+          primitiveTransformation: {
+            cryptoReplaceFfxFpeConfig: {
+              cryptoKey: {
+                kmsWrapped: {
+                  wrappedKey: wrappedKey,
+                  cryptoKeyName: keyName
+                }
+              },
+              commonAlphabet: alphabet
+            }
+          }
+        }]
+      }
+    },
+    items: items
+  };
+
+  // Run deidentification request
+  dlp.deidentifyContent(request)
+    .then((response) => {
+      const deidentifiedItems = response[0].items;
+      console.log(deidentifiedItems[0].value);
+    })
+    .catch((err) => {
+      console.log(`Error in deidentifyWithFpe: ${err.message || err}`);
+    });
+  // [END deidentify_fpe]
+}
+
+const cli = require(`yargs`)
+  .demand(1)
+  .command(
+    `mask <string>`,
+    `Deidentify sensitive data by masking it with a character.`,
+  {
+    maskingCharacter: {
+      type: 'string',
+      alias: 'c',
+      default: ''
+    },
+    numberToMask: {
+      type: 'number',
+      alias: 'n',
+      default: 0
+    }
+  },
+    (opts) => deidentifyWithMask(opts.string, opts.maskingCharacter, opts.numberToMask)
+  )
+  .command(
+    `fpe <string> <wrappedKey> <keyName>`,
+    `Deidentify sensitive data using Format Preserving Encryption (FPE).`,
+  {
+    alphabet: {
+      type: 'string',
+      alias: 'a',
+      default: 'ALPHA_NUMERIC',
+      choices: ['NUMERIC', 'HEXADECIMAL', 'UPPER_CASE_ALPHA_NUMERIC', 'ALPHA_NUMERIC']
+    }
+  },
+    (opts) => deidentifyWithFpe(opts.string, opts.alphabet, opts.keyName, opts.wrappedKey)
+  )
+  .example(`node $0 mask "My SSN is 372819127"`)
+  .example(`node $0 fpe "My SSN is 372819127" <YOUR_ENCRYPTED_AES_256_KEY> <YOUR_KEY_NAME>`)
+  .wrap(120)
+  .recommendCommands()
+  .epilogue(`For more information, see https://cloud.google.com/dlp/docs.`);
+
+if (module === require.main) {
+  cli.help().strict().argv; // eslint-disable-line
+}
diff --git a/dlp/inspect.js b/dlp/inspect.js
index 2e4cc073fc..b01032e467 100644
--- a/dlp/inspect.js
+++ b/dlp/inspect.js
@@ -25,7 +25,7 @@ function inspectString (string, minLikelihood, maxFindings, infoTypes, includeQu
   const DLP = require('@google-cloud/dlp');
 
   // Instantiates a client
-  const dlp = DLP();
+  const dlp = new DLP.DlpServiceClient();
 
   // The string to inspect
   // const string = 'My name is Gary and my email is gary@example.com';
@@ -37,7 +37,7 @@ function inspectString (string, minLikelihood, maxFindings, infoTypes, includeQu
   // const maxFindings = 0;
 
   // The infoTypes of information to match
-  // const infoTypes = [{ name: 'US_MALE_NAME', name: 'US_FEMALE_NAME' }];
+  // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }];
 
   // Whether to include the matching string
   // const includeQuote = true;
@@ -85,7 +85,7 @@ function inspectFile (filepath, minLikelihood, maxFindings, infoTypes, includeQu
   const DLP = require('@google-cloud/dlp');
 
   // Instantiates a client
-  const dlp = DLP();
+  const dlp = new DLP.DlpServiceClient();
 
   // The path to a local file to inspect. Can be a text, JPG, or PNG file.
   // const fileName = 'path/to/image.png';
@@ -97,7 +97,7 @@ function inspectFile (filepath, minLikelihood, maxFindings, infoTypes, includeQu
   // const maxFindings = 0;
 
   // The infoTypes of information to match
-  // const infoTypes = [{ name: 'US_MALE_NAME' }, { name: 'US_FEMALE_NAME' }];
+  // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }];
 
   // Whether to include the matching string
   // const includeQuote = true;
@@ -148,7 +148,7 @@ function promiseInspectGCSFile (bucketName, fileName, minLikelihood, maxFindings
   const DLP = require('@google-cloud/dlp');
 
   // Instantiates a client
-  const dlp = DLP();
+  const dlp = new DLP.DlpServiceClient();
 
   // The name of the bucket where the file resides.
   // const bucketName = 'YOUR-BUCKET';
@@ -164,7 +164,7 @@ function promiseInspectGCSFile (bucketName, fileName, minLikelihood, maxFindings
   // const maxFindings = 0;
 
   // The infoTypes of information to match
-  // const infoTypes = [{ name: 'US_MALE_NAME' }, { name: 'US_FEMALE_NAME' }];
+  // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }];
 
   // Get reference to the file to be inspected
   const storageItems = {
@@ -222,7 +222,7 @@ function eventInspectGCSFile (bucketName, fileName, minLikelihood, maxFindings,
   const DLP = require('@google-cloud/dlp');
 
   // Instantiates a client
-  const dlp = DLP();
+  const dlp = new DLP.DlpServiceClient();
 
   // The name of the bucket where the file resides.
   // const bucketName = 'YOUR-BUCKET';
@@ -238,7 +238,7 @@ function eventInspectGCSFile (bucketName, fileName, minLikelihood, maxFindings,
   // const maxFindings = 0;
 
   // The infoTypes of information to match
-  // const infoTypes = [{ name: 'US_MALE_NAME' }, { name: 'US_FEMALE_NAME' }];
+  // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }];
 
   // Get reference to the file to be inspected
   const storageItems = {
@@ -307,7 +307,7 @@ function inspectDatastore (projectId, namespaceId, kind, minLikelihood, maxFindi
   const DLP = require('@google-cloud/dlp');
 
   // Instantiates a client
-  const dlp = DLP();
+  const dlp = new DLP.DlpServiceClient();
 
   // (Optional) The project ID containing the target Datastore
   // const projectId = process.env.GCLOUD_PROJECT;
@@ -326,9 +326,9 @@ function inspectDatastore (projectId, namespaceId, kind, minLikelihood, maxFindi
   // const maxFindings = 0;
 
   // The infoTypes of information to match
-  // const infoTypes = [{ name: 'US_MALE_NAME' }, { name: 'US_FEMALE_NAME' }];
+  // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }];
 
-  // Get reference to the file to be inspected
+  // Construct items to be inspected
   const storageItems = {
     datastoreOptions: {
       partitionId: {
@@ -384,6 +384,86 @@ function inspectDatastore (projectId, namespaceId, kind, minLikelihood, maxFindi
   // [END inspect_datastore]
 }
 
+function inspectBigquery (projectId, datasetId, tableId, minLikelihood, maxFindings, infoTypes, includeQuote) {
+  // [START inspect_bigquery]
+  // Imports the Google Cloud Data Loss Prevention library
+  const DLP = require('@google-cloud/dlp');
+
+  // Instantiates a client
+  const dlp = new DLP.DlpServiceClient();
+
+  // (Optional) The project ID to run the API call under
+  // const projectId = process.env.GCLOUD_PROJECT;
+
+  // The ID of the dataset to inspect, e.g. 'my_dataset'
+  // const datasetId = 'my_dataset';
+
+  // The ID of the table to inspect, e.g. 'my_table'
+  // const tableId = 'my_table';
+
+  // The minimum likelihood required before returning a match
+  // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED';
+
+  // The maximum number of findings to report (0 = server maximum)
+  // const maxFindings = 0;
+
+  // The infoTypes of information to match
+  // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }];
+
+  // Construct items to be inspected
+  const storageItems = {
+    bigQueryOptions: {
+      tableReference: {
+        projectId: projectId,
+        datasetId: datasetId,
+        tableId: tableId
+      }
+    }
+  };
+
+  // Construct request for creating an inspect job
+  const request = {
+    inspectConfig: {
+      infoTypes: infoTypes,
+      minLikelihood: minLikelihood,
+      maxFindings: maxFindings
+    },
+    storageConfig: storageItems
+  };
+
+  // Run inspect-job creation request
+  dlp.createInspectOperation(request)
+    .then((createJobResponse) => {
+      const operation = createJobResponse[0];
+
+      // Start polling for job completion
+      return operation.promise();
+    })
+    .then((completeJobResponse) => {
+      // When job is complete, get its results
+      const jobName = completeJobResponse[0].name;
+      return dlp.listInspectFindings({
+        name: jobName
+      });
+    })
+    .then((results) => {
+      const findings = results[0].result.findings;
+      if (findings.length > 0) {
+        console.log(`Findings:`);
+        findings.forEach((finding) => {
+          console.log(`\tInfo type: ${finding.infoType.name}`);
+          console.log(`\tLikelihood: ${finding.likelihood}`);
+        });
+      } else {
+        console.log(`No findings.`);
+      }
+    })
+    .catch((err) => {
+      console.log(`Error in inspectBigquery: ${err.message || err}`);
+    });
+  // [END inspect_bigquery]
+}
+
 const cli = require(`yargs`) // eslint-disable-line
   .demand(1)
   .command(
@@ -434,6 +514,26 @@ const cli = require(`yargs`) // eslint-disable-line
       opts.infoTypes
     )
   )
+  .command(
+    `bigquery <datasetName> <tableName>`,
+    `Inspects a BigQuery table using the Data Loss Prevention API.`,
+  {
+    projectId: {
+      type: 'string',
+      alias: 'p',
+      default: process.env.GCLOUD_PROJECT
+    }
+  },
+    (opts) => inspectBigquery(
+      opts.projectId,
+      opts.datasetName,
+      opts.tableName,
+      opts.minLikelihood,
+      opts.maxFindings,
+      opts.infoTypes,
+      opts.includeQuote
+    )
+  )
   .command(
     `datastore <kind>`,
     `Inspect a Datastore instance using the Data Loss Prevention API.`,
@@ -449,7 +549,15 @@ const cli = require(`yargs`) // eslint-disable-line
       default: ''
     }
   },
-    (opts) => inspectDatastore(opts.projectId, opts.namespaceId, opts.kind, opts.minLikelihood, opts.maxFindings, opts.infoTypes, opts.includeQuote)
+    (opts) => inspectDatastore(
+      opts.projectId,
+      opts.namespaceId,
+      opts.kind,
+      opts.minLikelihood,
+      opts.maxFindings,
+      opts.infoTypes,
+      opts.includeQuote
+    )
   )
   .option('m', {
     alias: 'minLikelihood',
@@ -477,15 +585,9 @@ const cli = require(`yargs`) // eslint-disable-line
     type: 'boolean',
     global: true
   })
-  .option('l', {
-    alias: 'languageCode',
-    default: 'en-US',
-    type: 'string',
-    global: true
-  })
   .option('t', {
     alias: 'infoTypes',
-    default: [],
+    default: ['PHONE_NUMBER', 'EMAIL_ADDRESS', 'CREDIT_CARD_NUMBER'],
     type: 'array',
     global: true,
     coerce: (infoTypes) => infoTypes.map((type) => {
@@ -496,6 +598,8 @@ const cli = require(`yargs`) // eslint-disable-line
   .example(`node $0 file resources/test.txt`)
   .example(`node $0 gcsFilePromise my-bucket my-file.txt`)
   .example(`node $0 gcsFileEvent my-bucket my-file.txt`)
+  .example(`node $0 bigquery my-dataset my-table`)
+  .example(`node $0 datastore my-datastore-kind`)
   .wrap(120)
   .recommendCommands()
   .epilogue(`For more information, see https://cloud.google.com/dlp/docs. Optional flags are explained at https://cloud.google.com/dlp/docs/reference/rest/v2beta1/content/inspect#InspectConfig`);
diff --git a/dlp/metadata.js b/dlp/metadata.js
index be492f5c03..4725d0794d 100644
--- a/dlp/metadata.js
+++ b/dlp/metadata.js
@@ -21,7 +21,7 @@ function listInfoTypes (category, languageCode) {
   const DLP = require('@google-cloud/dlp');
 
   // Instantiates a client
-  const dlp = DLP();
+  const dlp = new DLP.DlpServiceClient();
 
   // The category of info types to list.
   // const category = 'CATEGORY_TO_LIST';
@@ -52,7 +52,7 @@ function listRootCategories (languageCode) {
   const DLP = require('@google-cloud/dlp');
 
   // Instantiates a client
-  const dlp = DLP();
+  const dlp = new DLP.DlpServiceClient();
 
   // The BCP-47 language code to use, e.g. 'en-US'
   // const languageCode = 'en-US';
diff --git a/dlp/package.json b/dlp/package.json
index 05a02a77f8..24ac62fa84 100644
--- a/dlp/package.json
+++ b/dlp/package.json
@@ -20,6 +20,10 @@
   "cloud-repo-tools": {
     "requiresKeyFile": true,
     "requiresProjectId": true,
+    "requiredEnvVars": [
+      "DLP_DEID_WRAPPED_KEY",
+      "DLP_DEID_KEY_NAME"
+    ],
     "product": "dlp",
     "samples": [
       {
@@ -42,15 +46,30 @@
         "file": "metadata.js",
         "docs_link": "https://cloud.google.com/dlp/docs",
         "usage": "node metadata.js --help"
+      },
+      {
+        "id": "deid",
+        "name": "DeID",
+        "file": "deid.js",
+        "docs_link": "https://cloud.google.com/dlp/docs",
+        "usage": "node deid.js --help"
+      },
+      {
+        "id": "risk",
+        "name": "Risk Analysis",
+        "file": "risk.js",
+        "docs_link": "https://cloud.google.com/dlp/docs",
+        "usage": "node risk.js --help"
       }
     ]
   },
   "dependencies": {
+    "@google-cloud/bigquery": "^0.9.6",
     "@google-cloud/dlp": "^0.1.0",
     "google-auth-library": "0.10.0",
-    "google-auto-auth": "0.7.1",
-    "google-proto-files": "0.12.1",
-    "mime": "1.3.6",
+    "google-auto-auth": "0.7.2",
+    "google-proto-files": "0.13.0",
+    "mime": "1.4.0",
     "request": "2.81.0",
     "request-promise": "4.2.1",
     "safe-buffer": "5.1.1",
@@ -58,6 +77,6 @@
   },
   "devDependencies": {
     "@google-cloud/nodejs-repo-tools": "1.4.17",
-    "ava": "0.21.0"
+    "ava": "0.22.0"
   }
 }
diff --git a/dlp/quickstart.js b/dlp/quickstart.js
index 370348330c..392e8008fb 100644
--- a/dlp/quickstart.js
+++ b/dlp/quickstart.js
@@ -20,7 +20,7 @@
 const DLP = require('@google-cloud/dlp');
 
 // Instantiates a client
-const dlp = DLP();
+const dlp = new DLP.DlpServiceClient();
 
 // The string to inspect
 const string = 'Robert Frost';
diff --git a/dlp/redact.js b/dlp/redact.js
index e299592df5..2bc1c23902 100644
--- a/dlp/redact.js
+++ b/dlp/redact.js
@@ -21,7 +21,7 @@ function redactString (string, replaceString, minLikelihood, infoTypes) {
   const DLP = require('@google-cloud/dlp');
 
   // Instantiates a client
-  const dlp = DLP();
+  const dlp = new DLP.DlpServiceClient();
 
   // The string to inspect
   // const string = 'My name is Gary and my email is gary@example.com';
@@ -74,7 +74,7 @@ function redactImage (filepath, minLikelihood, infoTypes, outputPath) {
   const DLP = require('@google-cloud/dlp');
 
   // Instantiates a client
-  const dlp = DLP();
+  const dlp = new DLP.DlpServiceClient();
 
   // The path to a local file to inspect. Can be a JPG or PNG image file.
   // const fileName = 'path/to/image.png';
diff --git a/dlp/risk.js b/dlp/risk.js
new file mode 100644
index 0000000000..6faa1c4a7e
--- /dev/null
+++ b/dlp/risk.js
@@ -0,0 +1,350 @@
+/**
+ * Copyright 2017, Google, Inc.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+'use strict';
+
+function numericalRiskAnalysis (projectId, datasetId, tableId, columnName) {
+  // [START numerical_risk]
+  // Imports the Google Cloud Data Loss Prevention library
+  const DLP = require('@google-cloud/dlp');
+
+  // Instantiates a client
+  const dlp = new DLP.DlpServiceClient();
+
+  // (Optional) The project ID to run the API call under
+  // const projectId = process.env.GCLOUD_PROJECT;
+
+  // The ID of the dataset to inspect, e.g. 'my_dataset'
+  // const datasetId = 'my_dataset';
+
+  // The ID of the table to inspect, e.g. 'my_table'
+  // const tableId = 'my_table';
+
+  // The name of the column to compute risk metrics for, e.g. 'age'
+  // Note that this column must be a numeric data type
+  // const columnName = 'firstName';
+
+  const sourceTable = {
+    projectId: projectId,
+    datasetId: datasetId,
+    tableId: tableId
+  };
+
+  // Construct request for creating a risk analysis job
+  const request = {
+    privacyMetric: {
+      numericalStatsConfig: {
+        field: {
+          columnName: columnName
+        }
+      }
+    },
+    sourceTable: sourceTable
+  };
+
+  // Create helper function for unpacking values
+  const getValue = (obj) => obj[Object.keys(obj)[0]];
+
+  // Run risk analysis job
+  dlp.analyzeDataSourceRisk(request)
+    .then((response) => {
+      const operation = response[0];
+      return operation.promise();
+    })
+    .then((completedJobResponse) => {
+      const results = completedJobResponse[0].numericalStatsResult;
+
+      console.log(`Value Range: [${getValue(results.minValue)}, ${getValue(results.maxValue)}]`);
+
+      // Print unique quantile values
+      let tempValue = null;
+      results.quantileValues.forEach((result, percent) => {
+        const value = getValue(result);
+
+        // Only print new values
+        if ((tempValue !== value) &&
+            !(tempValue && tempValue.equals && tempValue.equals(value))) {
+          console.log(`Value at ${percent}% quantile: ${value}`);
+          tempValue = value;
+        }
+      });
+    })
+    .catch((err) => {
+      console.log(`Error in numericalRiskAnalysis: ${err.message || err}`);
+    });
+    // [END numerical_risk]
+}
+
+function categoricalRiskAnalysis (projectId, datasetId, tableId, columnName) {
+  // [START categorical_risk]
+  // Imports the Google Cloud Data Loss Prevention library
+  const DLP = require('@google-cloud/dlp');
+
+  // Instantiates a client
+  const dlp = new DLP.DlpServiceClient();
+
+  // (Optional) The project ID to run the API call under
+  // const projectId = process.env.GCLOUD_PROJECT;
+
+  // The ID of the dataset to inspect, e.g. 'my_dataset'
+  // const datasetId = 'my_dataset';
+
+  // The ID of the table to inspect, e.g. 'my_table'
+  // const tableId = 'my_table';
+
+  // The name of the column to compute risk metrics for, e.g. 'firstName'
+  // const columnName = 'firstName';
+
+  const sourceTable = {
+    projectId: projectId,
+    datasetId: datasetId,
+    tableId: tableId
+  };
+
+  // Construct request for creating a risk analysis job
+  const request = {
+    privacyMetric: {
+      categoricalStatsConfig: {
+        field: {
+          columnName: columnName
+        }
+      }
+    },
+    sourceTable: sourceTable
+  };
+
+  // Create helper function for unpacking values
+  const getValue = (obj) => obj[Object.keys(obj)[0]];
+
+  // Run risk analysis job
+  dlp.analyzeDataSourceRisk(request)
+    .then((response) => {
+      const operation = response[0];
+      return operation.promise();
+    })
+    .then((completedJobResponse) => {
+      const results = completedJobResponse[0].categoricalStatsResult.valueFrequencyHistogramBuckets[0];
+      console.log(`Most common value occurs ${results.valueFrequencyUpperBound} time(s)`);
+      console.log(`Least common value occurs ${results.valueFrequencyLowerBound} time(s)`);
+      console.log(`${results.bucketSize} unique values total.`);
+      results.bucketValues.forEach((bucket) => {
+        console.log(`Value ${getValue(bucket.value)} occurs ${bucket.count} time(s).`);
+      });
+    })
+    .catch((err) => {
+      console.log(`Error in categoricalRiskAnalysis: ${err.message || err}`);
+    });
+    // [END categorical_risk]
+}
+
+function kAnonymityAnalysis (projectId, datasetId, tableId, quasiIds) {
+  // [START k_anonymity]
+  // Imports the Google Cloud Data Loss Prevention library
+  const DLP = require('@google-cloud/dlp');
+
+  // Instantiates a client
+  const dlp = new DLP.DlpServiceClient();
+
+  // (Optional) The project ID to run the API call under
+  // const projectId = process.env.GCLOUD_PROJECT;
+
+  // The ID of the dataset to inspect, e.g. 'my_dataset'
+  // const datasetId = 'my_dataset';
+
+  // The ID of the table to inspect, e.g. 'my_table'
+  // const tableId = 'my_table';
+
+  // A set of columns that form a composite key ('quasi-identifiers')
+  // const quasiIds = [{ columnName: 'age' }, { columnName: 'city' }];
+
+  const sourceTable = {
+    projectId: projectId,
+    datasetId: datasetId,
+    tableId: tableId
+  };
+
+  // Construct request for creating a risk analysis job
+  const request = {
+    privacyMetric: {
+      kAnonymityConfig: {
+        quasiIds: quasiIds
+      }
+    },
+    sourceTable: sourceTable
+  };
+
+  // Create helper function for unpacking values
+  const getValue = (obj) => obj[Object.keys(obj)[0]];
+
+  // Run risk analysis job
+  dlp.analyzeDataSourceRisk(request)
+    .then((response) => {
+      const operation = response[0];
+      return operation.promise();
+    })
+    .then((completedJobResponse) => {
+      const results = completedJobResponse[0].kAnonymityResult.equivalenceClassHistogramBuckets[0];
+      console.log(`Bucket size range: [${results.equivalenceClassSizeLowerBound}, ${results.equivalenceClassSizeUpperBound}]`);
+
+      results.bucketValues.forEach((bucket) => {
+        const quasiIdValues = bucket.quasiIdsValues.map(getValue).join(', ');
+        console.log(`  Quasi-ID values: {${quasiIdValues}}`);
+        console.log(`  Class size: ${bucket.equivalenceClassSize}`);
+      });
+    })
+    .catch((err) => {
+      console.log(`Error in kAnonymityAnalysis: ${err.message || err}`);
+    });
+    // [END k_anonymity]
+}
+
+function lDiversityAnalysis (projectId, datasetId, tableId, sensitiveAttribute, quasiIds) {
+  // [START l_diversity]
+  // Imports the Google Cloud Data Loss Prevention library
+  const DLP = require('@google-cloud/dlp');
+
+  // Instantiates a client
+  const dlp = new DLP.DlpServiceClient();
+
+  // (Optional) The project ID to run the API call under
+  // const projectId = process.env.GCLOUD_PROJECT;
+
+  // The ID of the dataset to inspect, e.g. 'my_dataset'
+  // const datasetId = 'my_dataset';
+
+  // The ID of the table to inspect, e.g. 'my_table'
+  // const tableId = 'my_table';
+
+  // The column to measure l-diversity relative to, e.g. 'firstName'
+  // const sensitiveAttribute = 'name';
+
+  // A set of columns that form a composite key ('quasi-identifiers')
+  // const quasiIds = [{ columnName: 'age' }, { columnName: 'city' }];
+
+  const sourceTable = {
+    projectId: projectId,
+    datasetId: datasetId,
+    tableId: tableId
+  };
+
+  // Construct request for creating a risk analysis job
+  const request = {
+    privacyMetric: {
+      lDiversityConfig: {
+        quasiIds: quasiIds,
+        sensitiveAttribute: {
+          columnName: sensitiveAttribute
+        }
+      }
+    },
+    sourceTable: sourceTable
+  };
+
+  // Create helper function for unpacking values
+  const getValue = (obj) => obj[Object.keys(obj)[0]];
+
+  // Run risk analysis job
+  dlp.analyzeDataSourceRisk(request)
+    .then((response) => {
+      const operation = response[0];
+      return operation.promise();
+    })
+    .then((completedJobResponse) => {
+      const results = completedJobResponse[0].lDiversityResult.sensitiveValueFrequencyHistogramBuckets[0];
+
+      console.log(`Bucket size range: [${results.sensitiveValueFrequencyLowerBound}, ${results.sensitiveValueFrequencyUpperBound}]`);
+      results.bucketValues.forEach((bucket) => {
+        const quasiIdValues = bucket.quasiIdsValues.map(getValue).join(', ');
+        console.log(`  Quasi-ID values: {${quasiIdValues}}`);
+        console.log(`  Class size: ${bucket.equivalenceClassSize}`);
+        bucket.topSensitiveValues.forEach((valueObj) => {
+          console.log(`    Sensitive value ${getValue(valueObj.value)} occurs ${valueObj.count} time(s).`);
+        });
+      });
+    })
+    .catch((err) => {
+      console.log(`Error in lDiversityAnalysis: ${err.message || err}`);
+    });
+    // [END l_diversity]
+}
+
+const cli = require(`yargs`) // eslint-disable-line
+  .demand(1)
+  .command(
+    `numerical <datasetId> <tableId> <columnName>`,
+    `Computes risk metrics of a column of numbers in a Google BigQuery table.`,
+    {},
+    (opts) => numericalRiskAnalysis(
+      opts.projectId,
+      opts.datasetId,
+      opts.tableId,
+      opts.columnName
+    )
+  )
+  .command(
+    `categorical <datasetId> <tableId> <columnName>`,
+    `Computes risk metrics of a column of data in a Google BigQuery table.`,
+    {},
+    (opts) => categoricalRiskAnalysis(
+      opts.projectId,
+      opts.datasetId,
+      opts.tableId,
+      opts.columnName
+    )
+  )
+  .command(
+    `kAnonymity <datasetId> <tableId> [quasiIdColumnNames..]`,
+    `Computes the k-anonymity of a column set in a Google BigQuery table.`,
+    {},
+    (opts) => kAnonymityAnalysis(
+      opts.projectId,
+      opts.datasetId,
+      opts.tableId,
+      opts.quasiIdColumnNames.map((f) => {
+        return { columnName: f };
+      })
+    )
+  )
+  .command(
+    `lDiversity <datasetId> <tableId> <sensitiveAttribute> [quasiIdColumnNames..]`,
+    `Computes the l-diversity of a column set in a Google BigQuery table.`,
+    {},
+    (opts) => lDiversityAnalysis(
+      opts.projectId,
+      opts.datasetId,
+      opts.tableId,
+      opts.sensitiveAttribute,
+      opts.quasiIdColumnNames.map((f) => {
+        return { columnName: f };
+      })
+    )
+  )
+  .option('p', {
+    type: 'string',
+    alias: 'projectId',
+    default: process.env.GCLOUD_PROJECT,
+    global: true
+  })
+  .example(`node $0 numerical nhtsa_traffic_fatalities accident_2015 state_number -p bigquery-public-data`)
+  .example(`node $0 categorical nhtsa_traffic_fatalities accident_2015 state_name -p bigquery-public-data`)
+  .example(`node $0 kAnonymity nhtsa_traffic_fatalities accident_2015 state_number county -p bigquery-public-data`)
+  .example(`node $0 lDiversity nhtsa_traffic_fatalities accident_2015 city state_number county -p bigquery-public-data`)
+  .wrap(120)
+  .recommendCommands()
+  .epilogue(`For more information, see https://cloud.google.com/dlp/docs.`);
+
+if (module === require.main) {
+  cli.help().strict().argv; // eslint-disable-line
+}
diff --git a/dlp/system-test/deid.test.js b/dlp/system-test/deid.test.js
new file mode 100644
index 0000000000..b7348a9314
--- /dev/null
+++ b/dlp/system-test/deid.test.js
@@ -0,0 +1,64 @@
+/**
+ * Copyright 2017, Google, Inc.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+'use strict';
+
+const path = require('path');
+const test = require('ava');
+const tools = require('@google-cloud/nodejs-repo-tools');
+
+const cmd = 'node deid';
+const cwd = path.join(__dirname, `..`);
+
+const harmfulString = 'My SSN is 372819127';
+const harmlessString = 'My favorite color is blue';
+
+const wrappedKey = process.env.DLP_DEID_WRAPPED_KEY;
+const keyName = process.env.DLP_DEID_KEY_NAME;
+
+test.before(tools.checkCredentials);
+
+// deidentify_masking
+test(`should mask sensitive data in a string`, async (t) => {
+  const output = await tools.runAsync(`${cmd} mask "${harmfulString}" -c x -n 5`, cwd);
+  t.is(output, 'My SSN is xxxxx9127');
+});
+
+test(`should ignore insensitive data when masking a string`, async (t) => {
+  const output = await tools.runAsync(`${cmd} mask "${harmlessString}"`, cwd);
+  t.is(output, harmlessString);
+});
+
+test(`should handle masking errors`, async (t) => {
+  const output = await tools.runAsync(`${cmd} mask "${harmfulString}" -n -1`, cwd);
+  t.regex(output, /Error in deidentifyWithMask/);
+});
+
+// deidentify_fpe
+test(`should FPE encrypt sensitive data in a string`, async (t) => {
+  const output = await tools.runAsync(`${cmd} fpe "${harmfulString}" ${wrappedKey} ${keyName} -a NUMERIC`, cwd);
+  t.regex(output, /My SSN is \d{9}/);
+  t.not(output, harmfulString);
+});
+
+test(`should ignore insensitive data when FPE encrypting a string`, async (t) => {
+  const output = await tools.runAsync(`${cmd} fpe "${harmlessString}" ${wrappedKey} ${keyName}`, cwd);
+  t.is(output, harmlessString);
+});
+
+test(`should handle FPE encryption errors`, async (t) => {
+  const output = await tools.runAsync(`${cmd} fpe "${harmfulString}" ${wrappedKey} BAD_KEY_NAME`, cwd);
+  t.regex(output, /Error in deidentifyWithFpe/);
+});
diff --git a/dlp/system-test/inspect.test.js b/dlp/system-test/inspect.test.js
index 246f52fd40..922f83e96e 100644
--- a/dlp/system-test/inspect.test.js
+++ b/dlp/system-test/inspect.test.js
@@ -75,7 +75,6 @@ test.serial(`should inspect multiple GCS text files with event handlers`, async
   t.regex(output, /Processed \d+ of approximately \d+ bytes./);
   t.regex(output, /Info type: PHONE_NUMBER/);
   t.regex(output, /Info type: EMAIL_ADDRESS/);
-  t.regex(output, /Info type: CREDIT_CARD_NUMBER/);
 });
 
 test.serial(`should handle a GCS file with no sensitive data with event handlers`, async (t) => {
@@ -100,7 +99,6 @@ test.serial(`should inspect multiple GCS text files with promises`, async (t) =>
   const output = await tools.runAsync(`${cmd} gcsFilePromise nodejs-docs-samples-dlp *.txt`, cwd);
   t.regex(output, /Info type: PHONE_NUMBER/);
   t.regex(output, /Info type: EMAIL_ADDRESS/);
-  t.regex(output, /Info type: CREDIT_CARD_NUMBER/);
 });
 
 test.serial(`should handle a GCS file with no sensitive data with promises`, async (t) => {
@@ -116,7 +114,6 @@ test.serial(`should report GCS file handling errors with promises`, async (t) =>
 // inspect_datastore
 test.serial(`should inspect Datastore`, async (t) => {
   const output = await tools.runAsync(`${cmd} datastore Person --namespaceId DLP`, cwd);
-  t.regex(output, /Info type: PHONE_NUMBER/);
   t.regex(output, /Info type: EMAIL_ADDRESS/);
 });
 
@@ -125,11 +122,27 @@ test.serial(`should handle Datastore with no sensitive data`, async (t) => {
   t.is(output, 'No findings.');
 });
 
-test.serial(`should report Datastore file handling errors`, async (t) => {
+test.serial(`should report Datastore errors`, async (t) => {
   const output = await tools.runAsync(`${cmd} datastore Harmless --namespaceId DLP -t BAD_TYPE`, cwd);
   t.regex(output, /Error in inspectDatastore/);
 });
 
+// inspect_bigquery
+test.serial(`should inspect a Bigquery table`, async (t) => {
+  const output = await tools.runAsync(`${cmd} bigquery integration_tests_dlp harmful`, cwd);
+  t.regex(output, /Info type: CREDIT_CARD_NUMBER/);
+});
+
+test.serial(`should handle a Bigquery table with no sensitive data`, async (t) => {
+  const output = await tools.runAsync(`${cmd} bigquery integration_tests_dlp harmless `, cwd);
+  t.is(output, 'No findings.');
+});
+
+test.serial(`should report Bigquery table handling errors`, async (t) => {
+  const output = await tools.runAsync(`${cmd} bigquery integration_tests_dlp harmless -t BAD_TYPE`, cwd);
+  t.regex(output, /Error in inspectBigquery/);
+});
+
 // CLI options
 test(`should have a minLikelihood option`, async (t) => {
   const promiseA = tools.runAsync(`${cmd} string "My phone number is (123) 456-7890." -m POSSIBLE`, cwd);
diff --git a/dlp/system-test/metadata.test.js b/dlp/system-test/metadata.test.js
index 086ab9cf22..5f088a4620 100644
--- a/dlp/system-test/metadata.test.js
+++ b/dlp/system-test/metadata.test.js
@@ -27,6 +27,7 @@ test.before(tools.checkCredentials);
 test(`should list info types for a given category`, async (t) => {
   const output = await tools.runAsync(`${cmd} infoTypes GOVERNMENT`, cwd);
   t.regex(output, /US_DRIVERS_LICENSE_NUMBER/);
+  t.false(output.includes('AMERICAN_BANKERS_CUSIP_ID'));
 });
 
 test(`should inspect categories`, async (t) => {
diff --git a/dlp/system-test/risk.test.js b/dlp/system-test/risk.test.js
new file mode 100644
index 0000000000..8481ad911e
--- /dev/null
+++ b/dlp/system-test/risk.test.js
@@ -0,0 +1,96 @@
+/**
+ * Copyright 2017, Google, Inc.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+'use strict';
+
+const path = require('path');
+const test = require('ava');
+const tools = require('@google-cloud/nodejs-repo-tools');
+
+const cmd = 'node risk';
+const cwd = path.join(__dirname, `..`);
+
+const dataset = 'integration_tests_dlp';
+const uniqueField = 'Name';
+const repeatedField = 'Mystery';
+const numericField = 'Age';
+
+test.before(tools.checkCredentials);
+
+// numericalRiskAnalysis
+test(`should perform numerical risk analysis`, async (t) => {
+  const output = await tools.runAsync(`${cmd} numerical ${dataset} harmful ${numericField}`, cwd);
+  t.regex(output, /Value at 0% quantile: \d{2}/);
+  t.regex(output, /Value at \d{2}% quantile: \d{2}/);
+});
+
+test(`should handle numerical risk analysis errors`, async (t) => {
+  const output = await tools.runAsync(`${cmd} numerical ${dataset} nonexistent ${numericField}`, cwd);
+  t.regex(output, /Error in numericalRiskAnalysis/);
+});
+
+// categoricalRiskAnalysis
+test(`should perform categorical risk analysis on a string field`, async (t) => {
+  const output = await tools.runAsync(`${cmd} categorical ${dataset} harmful ${uniqueField}`, cwd);
+  t.regex(output, /Most common value occurs \d time\(s\)/);
+});
+
+test(`should perform categorical risk analysis on a number field`, async (t) => {
+  const output = await tools.runAsync(`${cmd} categorical ${dataset} harmful ${numericField}`, cwd);
+  t.regex(output, /Most common value occurs \d time\(s\)/);
+});
+
+test(`should handle categorical risk analysis errors`, async (t) => {
+  const output = await tools.runAsync(`${cmd} categorical ${dataset} nonexistent ${uniqueField}`, cwd);
+  t.regex(output, /Error in categoricalRiskAnalysis/);
+});
+
+// kAnonymityAnalysis
+test(`should perform k-anonymity analysis on a single field`, async (t) => {
+  const output = await tools.runAsync(`${cmd} kAnonymity ${dataset} harmful ${numericField}`, cwd);
+  t.regex(output, /Quasi-ID values: \{\d{2}\}/);
+  t.regex(output, /Class size: \d/);
+});
+
+test(`should perform k-anonymity analysis on multiple fields`, async (t) => {
+  const output = await tools.runAsync(`${cmd} kAnonymity ${dataset} harmful ${numericField} ${repeatedField}`, cwd);
+  t.regex(output, /Quasi-ID values: \{\d{2}, \d{4} \d{4} \d{4} \d{4}\}/);
+  t.regex(output, /Class size: \d/);
+});
+
+test(`should handle k-anonymity analysis errors`, async (t) => {
+  const output = await tools.runAsync(`${cmd} kAnonymity ${dataset} nonexistent ${numericField}`, cwd);
+  t.regex(output, /Error in kAnonymityAnalysis/);
+});
+
+// lDiversityAnalysis
+test(`should perform l-diversity analysis on a single field`, async (t) => {
+  const output = await tools.runAsync(`${cmd} lDiversity ${dataset} harmful ${uniqueField} ${numericField}`, cwd);
+  t.regex(output, /Quasi-ID values: \{\d{2}\}/);
+  t.regex(output, /Class size: \d/);
+  t.regex(output, /Sensitive value James occurs \d time\(s\)/);
+});
+
+test(`should perform l-diversity analysis on multiple fields`, async (t) => {
+  const output = await tools.runAsync(`${cmd} lDiversity ${dataset} harmful ${uniqueField} ${numericField} ${repeatedField}`, cwd);
+  t.regex(output, /Quasi-ID values: \{\d{2}, \d{4} \d{4} \d{4} \d{4}\}/);
+  t.regex(output, /Class size: \d/);
+  t.regex(output, /Sensitive value James occurs \d time\(s\)/);
+});
+
+test(`should handle l-diversity analysis errors`, async (t) => {
+  const output = await tools.runAsync(`${cmd} lDiversity ${dataset} nonexistent ${uniqueField} ${numericField}`, cwd);
+  t.regex(output, /Error in lDiversityAnalysis/);
+});