Skip to content

Commit

Permalink
feat(datacatalog): add sample to create a fileset entry (#9590)
Browse files Browse the repository at this point in the history
Fixes #9589
  • Loading branch information
ricardolsmendes authored and tswast committed Nov 5, 2019
1 parent a5e5344 commit 045f35c
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 0 deletions.
26 changes: 26 additions & 0 deletions packages/google-cloud-datacatalog/samples/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,29 @@ def random_entry_group_id(client, project_id):
project_id, "us-central1", random_entry_group_id
)
client.delete_entry_group(entry_group_name)


@pytest.fixture
def random_entry_name(client, entry_group_name):
now = datetime.datetime.now()
random_entry_id = "example_entry_{}_{}".format(
now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8]
)
random_entry_name = "{}/entries/{}".format(entry_group_name, random_entry_id)
yield random_entry_name
client.delete_entry(random_entry_name)


@pytest.fixture
def entry_group_name(client, project_id):
now = datetime.datetime.now()
entry_group_id = "python_entry_group_sample_{}_{}".format(
now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8]
)
entry_group = client.create_entry_group(
datacatalog_v1beta1.DataCatalogClient.location_path(project_id, "us-central1"),
entry_group_id,
{},
)
yield entry_group.name
client.delete_entry_group(entry_group.name)
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import re

from ..v1beta1 import create_fileset_entry


def test_create_fileset_entry(capsys, client, random_entry_name):

entry_name_pattern = "(?P<entry_group_name>.+?)/entries/(?P<entry_id>.+?$)"
entry_name_matches = re.match(entry_name_pattern, random_entry_name)
entry_group_name = entry_name_matches.group("entry_group_name")
entry_id = entry_name_matches.group("entry_id")

create_fileset_entry.create_fileset_entry(client, entry_group_name, entry_id)
out, err = capsys.readouterr()
assert "Created entry {}".format(random_entry_name) in out
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def create_fileset_entry(client, entry_group_name, entry_id):

# [START datacatalog_create_fileset_tag]
from google.cloud import datacatalog_v1beta1

# TODO(developer): Construct a Data Catalog client object.
# client = datacatalog_v1beta1.DataCatalogClient()

# TODO(developer): Set entry_group_name to the Name of the entry group
# the entry will belong.
# entry_group_name = "your_entry_group_name"

# TODO(developer): Set entry_id to the ID of the entry to create.
# entry_id = "your_entry_id"

# Construct a full Entry object to send to the API.
entry = datacatalog_v1beta1.types.Entry()
entry.display_name = "My Fileset"
entry.description = "This Fileset consists of ..."
entry.gcs_fileset_spec.file_patterns.append("gs://my_bucket/*")
entry.type = datacatalog_v1beta1.enums.EntryType.FILESET

# Create the Schema, for example when you have a csv file.
columns = []
columns.append(
datacatalog_v1beta1.types.ColumnSchema(
column="first_name",
description="First name",
mode="REQUIRED",
type="STRING",
)
)

columns.append(
datacatalog_v1beta1.types.ColumnSchema(
column="last_name", description="Last name", mode="REQUIRED", type="STRING"
)
)

# Create sub columns for the addresses parent column
subcolumns = []
subcolumns.append(
datacatalog_v1beta1.types.ColumnSchema(
column="city", description="City", mode="NULLABLE", type="STRING"
)
)

subcolumns.append(
datacatalog_v1beta1.types.ColumnSchema(
column="state", description="State", mode="NULLABLE", type="STRING"
)
)

columns.append(
datacatalog_v1beta1.types.ColumnSchema(
column="addresses",
description="Addresses",
mode="REPEATED",
subcolumns=subcolumns,
type="RECORD",
)
)

entry.schema.columns.extend(columns)

# Send the entry to the API for creation.
# Raises google.api_core.exceptions.AlreadyExists if the Entry already
# exists within the project.
entry = client.create_entry(entry_group_name, entry_id, entry)
print("Created entry {}".format(entry.name))
# [END datacatalog_create_fileset_tag]

0 comments on commit 045f35c

Please sign in to comment.