-
Notifications
You must be signed in to change notification settings - Fork 45
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Devesh Sarda
committed
Feb 12, 2024
1 parent
d3a3073
commit 90807f0
Showing
24 changed files
with
1,421 additions
and
1,012 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
{ | ||
"dataset_name" : "ogbn_arxiv", | ||
"features_stats" : { | ||
"page_size" : "16 KB", | ||
"feature_dimension" : 128, | ||
"feature_size" : "float32" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"dataset_name" : "ogbn_papers100m" | ||
} |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import os | ||
import json | ||
import argparse | ||
import random | ||
|
||
from src.dataset_loader import * | ||
from src.features_loader import * | ||
from src.sampler import * | ||
from src.visualizer import * | ||
|
||
|
||
def read_config_file(config_file): | ||
with open(config_file, "r") as reader: | ||
return json.load(reader) | ||
|
||
|
||
def read_arguments(): | ||
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) | ||
parser.add_argument("config_file", type=str, help="The config file containing the details for the simulation") | ||
return parser.parse_args() | ||
|
||
|
||
IMAGES_SAVE_DIR = "images" | ||
|
||
|
||
def main(): | ||
arguments = read_arguments() | ||
config = read_config_file(arguments.config_file) | ||
|
||
# Create the loaders | ||
data_loader = DatasetLoader(config["dataset_name"]) | ||
features_loader = FeaturesLoader(data_loader, config["features_stats"]) | ||
sampler = SubgraphSampler(data_loader, features_loader) | ||
|
||
# Perform sampling | ||
nodes_to_sample = [i for i in range(data_loader.get_num_nodes())] | ||
random.shuffle(nodes_to_sample) | ||
|
||
pages_loaded = [] | ||
for curr_node in nodes_to_sample: | ||
num_pages_read = sampler.perform_sampling_for_node(curr_node) | ||
if num_pages_read > 0: | ||
pages_loaded.append(num_pages_read) | ||
print("Got result for", len(pages_loaded), "nodes out of", len(nodes_to_sample), "nodes") | ||
|
||
# Save the histogram | ||
save_path = os.path.join(IMAGES_SAVE_DIR, os.path.basename(arguments.config_file).replace("json", "png")) | ||
visualize_results(pages_loaded, save_path, config["dataset_name"]) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import subprocess | ||
import os | ||
import numpy as np | ||
from collections import defaultdict | ||
|
||
|
||
class DatasetLoader: | ||
SAVE_DIR = "datasets" | ||
EDGES_PATH = "edges/train_edges.bin" | ||
|
||
def __init__(self, name): | ||
self.name = name | ||
os.makedirs(DatasetLoader.SAVE_DIR, exist_ok=True) | ||
self.save_dir = os.path.join(DatasetLoader.SAVE_DIR, self.name) | ||
if not os.path.exists(self.save_dir): | ||
self.create_dataset() | ||
self.load_dataset() | ||
|
||
def create_dataset(self): | ||
command_to_run = f"marius_preprocess --dataset {self.name} --output_directory {self.save_dir}" | ||
print("Running command", command_to_run) | ||
subprocess.check_output(command_to_run, shell=True) | ||
|
||
def load_dataset(self): | ||
# Load the file | ||
edges_path = os.path.join(self.save_dir, DatasetLoader.EDGES_PATH) | ||
with open(edges_path, "rb") as reader: | ||
edges_bytes = reader.read() | ||
|
||
# Create the adjacency map | ||
edges_flaten_arr = np.frombuffer(edges_bytes, dtype=np.int32) | ||
self.nodes = set(edges_flaten_arr) | ||
edges_arr = edges_flaten_arr.reshape((-1, 2)) | ||
self.num_edges = len(edges_arr) | ||
|
||
self.adjacency_map = {} | ||
for source, target in edges_arr: | ||
if source not in self.adjacency_map: | ||
self.adjacency_map[source] = [] | ||
self.adjacency_map[source].append(target) | ||
|
||
def get_num_nodes(self): | ||
return len(self.nodes) | ||
|
||
def get_neigbhors_for_node(self, node_id): | ||
if node_id not in self.adjacency_map: | ||
return [] | ||
|
||
return self.adjacency_map[node_id] | ||
|
||
def get_num_edges(self): | ||
return self.num_edges |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import humanfriendly | ||
import os | ||
import math | ||
|
||
|
||
class FeaturesLoader: | ||
def __init__(self, data_loader, features_stat): | ||
self.data_loader = data_loader | ||
self.page_size = humanfriendly.parse_size(features_stat["page_size"]) | ||
self.feature_size = int("".join(c for c in features_stat["feature_size"] if c.isdigit())) | ||
self.node_feature_size = self.feature_size * features_stat["feature_dimension"] | ||
|
||
self.nodes_per_page = max(int(self.page_size / self.node_feature_size), 1) | ||
self.total_pages = int(math.ceil(data_loader.get_num_nodes() / (1.0 * self.nodes_per_page))) | ||
|
||
def get_node_page(self, node_id): | ||
return int(node_id / self.nodes_per_page) | ||
|
||
def get_total_file_size(self): | ||
total_bytes = self.page_size * self.total_bytes | ||
return humanfriendly.format_size(total_bytes) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
class SubgraphSampler: | ||
def __init__(self, data_loader, features_loader): | ||
self.data_loader = data_loader | ||
self.features_loader = features_loader | ||
|
||
def perform_sampling_for_node(self, node_id): | ||
pages_read = set() | ||
for neighbor in self.data_loader.get_neigbhors_for_node(node_id): | ||
pages_read.add(self.features_loader.get_node_page(neighbor)) | ||
return len(pages_read) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import matplotlib.pyplot as plt | ||
import os | ||
|
||
|
||
def visualize_results(pages_loaded, save_path, dataset_name, num_bins=50): | ||
# Create the histogram | ||
plt.figure() | ||
plt.ecdf(pages_loaded, label="CDF") | ||
plt.hist(pages_loaded, bins=num_bins, histtype="step", density=True, cumulative=True, label="Cumulative histogram") | ||
plt.xlabel("Number of pages loaded for node inference") | ||
plt.ylabel("Percentage of nodes") | ||
plt.title("Number of pages loaded for node inference on " + dataset_name) | ||
plt.xlim(0, 50) | ||
plt.legend() | ||
|
||
# Save the result | ||
print("Saving the result to", save_path) | ||
plt.savefig(save_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.