Skip to content

Commit

Permalink
Merge branch 'datahub-project:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
anshbansal authored Feb 7, 2025
2 parents 5fa4c9d + bd47b11 commit e1d1d1b
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 305 deletions.
2 changes: 1 addition & 1 deletion docs/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -735,7 +735,7 @@ Please see our [Integrations page](https://datahubproject.io/integrations) if yo
| [bigquery](./generated/ingestion/sources/bigquery.md) | `pip install 'acryl-datahub[bigquery]'` | BigQuery source |
| [datahub-lineage-file](./generated/ingestion/sources/file-based-lineage.md) | _no additional dependencies_ | Lineage File source |
| [datahub-business-glossary](./generated/ingestion/sources/business-glossary.md) | _no additional dependencies_ | Business Glossary File source |
| [dbt](./generated/ingestion/sources/dbt.md) | _no additional dependencies_ | dbt source |
| [dbt](./generated/ingestion/sources/dbt.md) | `pip install 'acryl-datahub[dbt]'` | dbt source |
| [dremio](./generated/ingestion/sources/dremio.md) | `pip install 'acryl-datahub[dremio]'` | Dremio Source |
| [druid](./generated/ingestion/sources/druid.md) | `pip install 'acryl-datahub[druid]'` | Druid Source |
| [feast](./generated/ingestion/sources/feast.md) | `pip install 'acryl-datahub[feast]'` | Feast source (0.26.0) |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,50 @@ The Helm chart [datahub-executor-worker](https://executor-helm.acryl.io/index.ya
--set image.tag=v0.3.1 \
acryl datahub-executor-worker
```
9. As of DataHub Cloud `v0.3.8.2` It is possible to pass secrets to ingestion recipes using Kubernetes Secret CRDs as shown below. This allows to update secrets at runtime without restarting Remote Executor process.
```
# 1. Create K8s Secret object in remote executor namespace, e.g.
apiVersion: v1
kind: Secret
metadata:
name: datahub-secret-store
data:
REDSHIFT_PASSWORD: cmVkc2hpZnQtc2VjcmV0Cg==
SNOWFLAKE_PASSWORD: c25vd2ZsYWtlLXNlY3JldAo=
# 2. Add secret into your Remote Executor deployment:
extraVolumes:
- name: datahub-secret-store
secret:
secretName: datahub-secret-store
# 3. Mount it under /mnt/secrets directory
extraVolumeMounts:
- mountPath: /mnt/secrets
name: datahub-secret-store
```
You can then reference the mounted secrets directly in the ingestion recipe:
```yaml
source:
type: redshift
config:
host_port: '<redshift host:port>'
username: connector_test
table_lineage_mode: mixed
include_table_lineage: true
include_tables: true
include_views: true
profiling:
enabled: true
profile_table_level_only: false
stateful_ingestion:
enabled: true
password: '${REDSHIFT_PASSWORD}'
```
By default the executor will look for files mounted in `/mnt/secrets`, this is override-able by setting the env var:
`DATAHUB_EXECUTOR_FILE_SECRET_BASEDIR` to a different location (default: `/mnt/secrets`)

These files are expected to be under 1MB in data by default. To increase this limit set a higher value using:
`DATAHUB_EXECUTOR_FILE_SECRET_MAXLEN` (default: `1024768`, size in bytes)

## FAQ

Expand Down
2 changes: 1 addition & 1 deletion docs/managed-datahub/release-notes/v_0_3_8.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

Release Availability Date
---
21-Jan-2025
29-Jan-2025

Recommended CLI/SDK
---
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Callable, Dict, Iterable, List, MutableMapping, Optional

from datahub.ingestion.api.report import SupportsAsObj
from datahub.ingestion.source.common.subtypes import DatasetSubTypes
from datahub.ingestion.source.snowflake.constants import SnowflakeObjectDomain
from datahub.ingestion.source.snowflake.snowflake_connection import SnowflakeConnection
from datahub.ingestion.source.snowflake.snowflake_query import (
Expand Down Expand Up @@ -100,6 +101,9 @@ class SnowflakeTable(BaseTable):
def is_hybrid(self) -> bool:
return self.type is not None and self.type == "HYBRID TABLE"

def get_subtype(self) -> DatasetSubTypes:
return DatasetSubTypes.TABLE


@dataclass
class SnowflakeView(BaseView):
Expand All @@ -109,6 +113,9 @@ class SnowflakeView(BaseView):
column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
is_secure: bool = False

def get_subtype(self) -> DatasetSubTypes:
return DatasetSubTypes.VIEW


@dataclass
class SnowflakeSchema:
Expand Down Expand Up @@ -154,6 +161,9 @@ class SnowflakeStream:
column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
last_altered: Optional[datetime] = None

def get_subtype(self) -> DatasetSubTypes:
return DatasetSubTypes.SNOWFLAKE_STREAM


class _SnowflakeTagCache:
def __init__(self) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from datahub.ingestion.source.aws.s3_util import make_s3_urn_for_lineage
from datahub.ingestion.source.common.subtypes import (
DatasetContainerSubTypes,
DatasetSubTypes,
)
from datahub.ingestion.source.snowflake.constants import (
GENERIC_PERMISSION_ERROR_KEY,
Expand Down Expand Up @@ -467,7 +466,13 @@ def _process_schema(
context=f"{db_name}.{schema_name}",
)

def _process_tags(self, snowflake_schema, schema_name, db_name, domain):
def _process_tags(
self,
snowflake_schema: SnowflakeSchema,
schema_name: str,
db_name: str,
domain: str,
) -> None:
snowflake_schema.tags = self.tag_extractor.get_tags_on_object(
schema_name=schema_name, db_name=db_name, domain=domain
)
Expand Down Expand Up @@ -837,15 +842,7 @@ def gen_dataset_workunits(
if dpi_aspect:
yield dpi_aspect

subTypes = SubTypes(
typeNames=(
[DatasetSubTypes.SNOWFLAKE_STREAM]
if isinstance(table, SnowflakeStream)
else [DatasetSubTypes.VIEW]
if isinstance(table, SnowflakeView)
else [DatasetSubTypes.TABLE]
)
)
subTypes = SubTypes(typeNames=[table.get_subtype()])

yield MetadataChangeProposalWrapper(
entityUrn=dataset_urn, aspect=subTypes
Expand Down Expand Up @@ -932,9 +929,9 @@ def get_dataset_properties(
"OWNER_ROLE_TYPE": table.owner_role_type,
"TABLE_NAME": table.table_name,
"BASE_TABLES": table.base_tables,
"STALE_AFTER": table.stale_after.isoformat()
if table.stale_after
else None,
"STALE_AFTER": (
table.stale_after.isoformat() if table.stale_after else None
),
}.items()
if v
}
Expand Down
Loading

0 comments on commit e1d1d1b

Please sign in to comment.