Skip to content

Commit

Permalink
#85 Savepoint
Browse files Browse the repository at this point in the history
  • Loading branch information
docktermj committed Sep 27, 2024
1 parent 6474927 commit d5c487e
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 52 deletions.
3 changes: 0 additions & 3 deletions rootfs/notebooks/load_truthsets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,6 @@
"metadata": {},
"outputs": [],
"source": [
"sz_abstract_factory = SzAbstractFactory(\n",
" grpc_channel=grpc.insecure_channel(\"localhost:8261\")\n",
")\n",
"sz_config = sz_abstract_factory.create_sz_config()\n",
"sz_configmanager = sz_abstract_factory.create_sz_configmanager()\n",
"sz_diagnostic = sz_abstract_factory.create_sz_diagnostic()\n",
Expand Down
79 changes: 53 additions & 26 deletions rootfs/notebooks/load_user_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"id": "671ae8b2-3d6a-4802-a544-2ca4c66a2497",
"metadata": {},
"source": [
"# Load Senzing truth-sets\n",
"# Load user data\n",
"\n",
"These instructions show how to load user data into the Senzing engine."
]
Expand Down Expand Up @@ -52,7 +52,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "de03a1c2-d7b1-40a9-8154-1df47cd49fef",
"metadata": {},
"outputs": [],
Expand All @@ -72,7 +72,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"id": "ff9189d1-852d-46b0-9e2e-36f524c9038e",
"metadata": {},
"outputs": [],
Expand All @@ -85,12 +85,12 @@
"id": "a2f2238a-dc2c-4385-9c19-1633ee035bcd",
"metadata": {},
"source": [
"![Modify](img/pencil2.png) Identify uploaded file."
"![Modify](img/pencil2.png) **Modify the following.** Identify uploaded file."
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"id": "04555a6e-9c48-4a9b-b8b0-e7285b602fc8",
"metadata": {},
"outputs": [],
Expand All @@ -116,10 +116,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"id": "7f5cecdf-649a-4a8b-ad64-d2fc3f37166d",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found the following DATA_SOURCE values in the data: ['MY_DATASOURCE']\n"
]
}
],
"source": [
"datasources = []\n",
"\n",
Expand All @@ -130,8 +138,7 @@
" if datasource not in datasources:\n",
" datasources.append(datasource)\n",
"\n",
"print(f\"Found the following DATA_SOURCE values in the data: {datasources}\")\n",
" "
"print(f\"Found the following DATA_SOURCE values in the data: {datasources}\")"
]
},
{
Expand All @@ -152,7 +159,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"id": "3272f76c-dc95-4ff8-b573-8bddd55fb779",
"metadata": {},
"outputs": [],
Expand All @@ -172,14 +179,11 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"id": "301ce7eb-9069-4c8d-a1c4-ebe8f78d1bf6",
"metadata": {},
"outputs": [],
"source": [
"sz_abstract_factory = SzAbstractFactory(\n",
" grpc_channel=grpc.insecure_channel(\"localhost:8261\")\n",
")\n",
"sz_config = sz_abstract_factory.create_sz_config()\n",
"sz_configmanager = sz_abstract_factory.create_sz_configmanager()\n",
"sz_diagnostic = sz_abstract_factory.create_sz_diagnostic()\n",
Expand All @@ -196,14 +200,14 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"id": "fbf17cc3-3273-4097-90a2-f0b9a21674a1",
"metadata": {},
"outputs": [],
"source": [
"old_config_id = sz_configmanager.get_default_config_id()\n",
"old_json_config = sz_configmanager.get_config(old_config_id)\n",
"config_handle = sz_config.import_config(old_json_config) "
"config_handle = sz_config.import_config(old_json_config)"
]
},
{
Expand All @@ -216,10 +220,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"id": "97854ac2-ad01-4663-9f6c-4afd816230f9",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\"id\":\"SZSDK60014001\",\"reason\":\"SENZ2208|Data source code [MY_DATASOURCE] already exists.\"}\n"
]
}
],
"source": [
"for datasource in datasources:\n",
" try:\n",
Expand All @@ -238,16 +250,16 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"id": "32217742-9e09-4778-b7fe-1e23817251ec",
"metadata": {},
"outputs": [],
"source": [
"new_json_config = sz_config.export_config(config_handle)\n",
"new_config_id = sz_configmanager.add_config(\n",
" new_json_config, \"Add TruthSet datasources\"\n",
" new_json_config, \"Add user datasources\"\n",
")\n",
"sz_configmanager.replace_default_config_id(old_config_id, new_config_id) "
"sz_configmanager.replace_default_config_id(old_config_id, new_config_id)"
]
},
{
Expand All @@ -260,7 +272,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 10,
"id": "df805089-7a9a-4e4e-b959-c320819e855b",
"metadata": {},
"outputs": [],
Expand All @@ -287,14 +299,29 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"id": "cb8318ec-cd73-4e43-8349-586c8dd0dbfb",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\"DATA_SOURCE\":\"MY_DATASOURCE\",\"RECORD_ID\":\"157616467\",\"AFFECTED_ENTITIES\":[],\"INTERESTING_ENTITIES\":{\"ENTITIES\":[]}}\n",
"{\"DATA_SOURCE\":\"MY_DATASOURCE\",\"RECORD_ID\":\"115212881\",\"AFFECTED_ENTITIES\":[],\"INTERESTING_ENTITIES\":{\"ENTITIES\":[]}}\n",
"{\"DATA_SOURCE\":\"MY_DATASOURCE\",\"RECORD_ID\":\"645217904\",\"AFFECTED_ENTITIES\":[],\"INTERESTING_ENTITIES\":{\"ENTITIES\":[]}}\n",
"{\"DATA_SOURCE\":\"MY_DATASOURCE\",\"RECORD_ID\":\"212359276\",\"AFFECTED_ENTITIES\":[],\"INTERESTING_ENTITIES\":{\"ENTITIES\":[]}}\n",
"{\"DATA_SOURCE\":\"MY_DATASOURCE\",\"RECORD_ID\":\"526429955\",\"AFFECTED_ENTITIES\":[],\"INTERESTING_ENTITIES\":{\"ENTITIES\":[]}}\n",
"{\"DATA_SOURCE\":\"MY_DATASOURCE\",\"RECORD_ID\":\"604947448\",\"AFFECTED_ENTITIES\":[],\"INTERESTING_ENTITIES\":{\"ENTITIES\":[]}}\n",
"{\"DATA_SOURCE\":\"MY_DATASOURCE\",\"RECORD_ID\":\"513789670\",\"AFFECTED_ENTITIES\":[],\"INTERESTING_ENTITIES\":{\"ENTITIES\":[]}}\n",
"{\"DATA_SOURCE\":\"MY_DATASOURCE\",\"RECORD_ID\":\"212442489\",\"AFFECTED_ENTITIES\":[],\"INTERESTING_ENTITIES\":{\"ENTITIES\":[]}}\n"
]
}
],
"source": [
"with open(filepath, \"r\") as file:\n",
" for line in file:\n",
" try: \n",
" try:\n",
" line_as_dict = json.loads(line)\n",
" info = sz_engine.add_record(\n",
" line_as_dict.get(\"DATA_SOURCE\"),\n",
Expand All @@ -304,7 +331,7 @@
" )\n",
" print(info)\n",
" except SzError as err:\n",
" print(err) "
" print(err)"
]
}
],
Expand Down
6 changes: 4 additions & 2 deletions src/senzing_quickstart/hello_world.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# coding: utf-8

# # Senzing Hello World
#
#
# The following is a quick test of connectivity to Senzing using Python over gRPC.
#
#

# Import python packages.

Expand All @@ -16,6 +16,7 @@
import grpc
from senzing_grpc import SzAbstractFactory


# Create an abstract factory for accessing Senzing via gRPC.

# In[ ]:
Expand All @@ -40,3 +41,4 @@


print(json.dumps(json.loads(sz_product.get_version()), indent=2))

22 changes: 12 additions & 10 deletions src/senzing_quickstart/load_truthsets.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# coding: utf-8

# # Load Senzing truth-sets
#
#
# These instructions load the [Senzing truth-sets] into the Senzing engine.
#
#
# [Senzing truth-sets]: https://github.com/Senzing/truth-sets

# ## Prepare Python enviroment
Expand All @@ -21,6 +21,7 @@
import requests
from senzing_grpc import SzAbstractFactory, SzEngineFlags, SzError


# Set environment specific variables.

# In[ ]:
Expand Down Expand Up @@ -68,6 +69,7 @@
datasources.append(datasource)

print(f"Found the following DATA_SOURCE values in the data: {datasources}")



# ## Update Senzing configuration
Expand All @@ -87,9 +89,6 @@
# In[ ]:


sz_abstract_factory = SzAbstractFactory(
grpc_channel=grpc.insecure_channel("localhost:8261")
)
sz_config = sz_abstract_factory.create_sz_config()
sz_configmanager = sz_abstract_factory.create_sz_configmanager()
sz_diagnostic = sz_abstract_factory.create_sz_diagnostic()
Expand All @@ -103,7 +102,7 @@

old_config_id = sz_configmanager.get_default_config_id()
old_json_config = sz_configmanager.get_config(old_config_id)
config_handle = sz_config.import_config(old_json_config)
config_handle = sz_config.import_config(old_json_config)


# Add DataSources to Senzing configuration.
Expand All @@ -124,8 +123,10 @@


new_json_config = sz_config.export_config(config_handle)
new_config_id = sz_configmanager.add_config(new_json_config, "Add TruthSet datasources")
sz_configmanager.replace_default_config_id(old_config_id, new_config_id)
new_config_id = sz_configmanager.add_config(
new_json_config, "Add TruthSet datasources"
)
sz_configmanager.replace_default_config_id(old_config_id, new_config_id)


# With the change in Senzing configuration, Senzing objects need to be updated.
Expand All @@ -148,7 +149,7 @@
filepath = home_path + filename
with open(filepath, "r") as file:
for line in file:
try:
try:
line_as_dict = json.loads(line)
info = sz_engine.add_record(
line_as_dict.get("DATA_SOURCE"),
Expand All @@ -158,7 +159,7 @@
)
print(info)
except SzError as err:
print(err)
print(err)


# ## View results
Expand All @@ -183,3 +184,4 @@
}
search_result = sz_engine.search_by_attributes(json.dumps(search_query))
print(json.dumps(json.loads(search_result), indent=2))

18 changes: 7 additions & 11 deletions src/senzing_quickstart/load_user_data.py
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# coding: utf-8

# # Load Senzing truth-sets
# # Load user data
#
# These instructions show how to load user data into the Senzing engine.

Expand Down Expand Up @@ -37,7 +37,7 @@
home_path = "/notebooks/"


# ![Modify](img/pencil2.png) Identify uploaded file.
# ![Modify](img/pencil2.png) **Modify the following.** Identify uploaded file.

# In[ ]:

Expand All @@ -62,7 +62,6 @@
datasources.append(datasource)

print(f"Found the following DATA_SOURCE values in the data: {datasources}")



# ## Update Senzing configuration
Expand All @@ -82,9 +81,6 @@
# In[ ]:


sz_abstract_factory = SzAbstractFactory(
grpc_channel=grpc.insecure_channel("localhost:8261")
)
sz_config = sz_abstract_factory.create_sz_config()
sz_configmanager = sz_abstract_factory.create_sz_configmanager()
sz_diagnostic = sz_abstract_factory.create_sz_diagnostic()
Expand All @@ -98,7 +94,7 @@

old_config_id = sz_configmanager.get_default_config_id()
old_json_config = sz_configmanager.get_config(old_config_id)
config_handle = sz_config.import_config(old_json_config)
config_handle = sz_config.import_config(old_json_config)


# Add DataSources to Senzing configuration.
Expand All @@ -120,9 +116,9 @@

new_json_config = sz_config.export_config(config_handle)
new_config_id = sz_configmanager.add_config(
new_json_config, "Add TruthSet datasources"
new_json_config, "Add user datasources"
)
sz_configmanager.replace_default_config_id(old_config_id, new_config_id)
sz_configmanager.replace_default_config_id(old_config_id, new_config_id)


# With the change in Senzing configuration, Senzing objects need to be updated.
Expand All @@ -143,7 +139,7 @@

with open(filepath, "r") as file:
for line in file:
try:
try:
line_as_dict = json.loads(line)
info = sz_engine.add_record(
line_as_dict.get("DATA_SOURCE"),
Expand All @@ -153,5 +149,5 @@
)
print(info)
except SzError as err:
print(err)
print(err)

0 comments on commit d5c487e

Please sign in to comment.