Skip to content

Commit

Permalink
remove redundant imports and add separate functions for getting datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
Binh Vu committed Jan 22, 2024
1 parent 13db96e commit 96e21ff
Showing 1 changed file with 28 additions and 17 deletions.
45 changes: 28 additions & 17 deletions kgdata/wikidata/datasets/meta_graph_stats.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,61 @@
from __future__ import annotations

from collections import defaultdict
from dataclasses import dataclass
from functools import partial
from operator import add
from typing import Dict, Iterable, List, Optional, Tuple, TypeAlias, Union
from typing import Optional

import orjson
from sm.misc.funcs import filter_duplication

from kgdata.dataset import Dataset
from kgdata.db import deser_from_dict, ser_to_dict
from kgdata.wikidata.config import WikidataDirCfg
from kgdata.wikidata.datasets.entities import entities
from kgdata.wikidata.datasets.entity_outlinks import entity_outlinks
from kgdata.wikidata.datasets.entity_types import entity_types
from kgdata.wikidata.datasets.meta_graph import MetaEntity, meta_graph
from kgdata.wikidata.models.wdentity import WDEntity
from kgdata.wikidata.models.wdvalue import WDValue, WDValueKind


def meta_graph_stats():
def get_predicate_count_dataset(with_dep: bool = True):
cfg = WikidataDirCfg.get_instance()

# have information about the domains and ranges of predicates
predicate_count_ds = Dataset(
return Dataset(
cfg.meta_graph_stats / "predicate_count/*.gz",
deserialize=partial(deser_from_dict, PCount),
name="meta-graph-stats/predicate-count",
dependencies=[meta_graph()],
dependencies=[meta_graph()] if with_dep else [],
)

predicate_conn_ds = Dataset(

def get_predicate_conn_dataset(with_dep: bool = True):
cfg = WikidataDirCfg.get_instance()

# have information about the domains and ranges of predicates
return Dataset(
cfg.meta_graph_stats / "predicate_conn/*.gz",
deserialize=partial(deser_from_dict, PConnection),
name="meta-graph-stats/predicate-conn",
dependencies=[meta_graph()],
dependencies=[meta_graph()] if with_dep else [],
)

predicate_occurrence_ds = Dataset(

def get_predicate_occurrence_dataset(with_dep: bool = True):
cfg = WikidataDirCfg.get_instance()

# have information about the domains and ranges of predicates
return Dataset(
cfg.meta_graph_stats / "predicate_occurrence/*.gz",
deserialize=partial(deser_from_dict, PConnection),
deserialize=partial(deser_from_dict, POccurrence),
name="meta-graph-stats/predicate-occurrence",
dependencies=[meta_graph()],
dependencies=[meta_graph()] if with_dep else [],
)


def meta_graph_stats():
cfg = WikidataDirCfg.get_instance()

predicate_count_ds = get_predicate_count_dataset()
# have information about the domains and ranges of predicates
predicate_conn_ds = get_predicate_conn_dataset()
predicate_occurrence_ds = get_predicate_occurrence_dataset()

if not predicate_count_ds.has_complete_data():
(
meta_graph()
Expand Down

0 comments on commit 96e21ff

Please sign in to comment.