Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

276 store spark info into db #277

Merged
merged 26 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
4023bab
Update notebook and server code to include SparkAppModel and SparkApp…
xuwenyihust Jul 11, 2024
c1a0e16
Update SparkApp service to include create_spark_app method in spark_a…
xuwenyihust Jul 11, 2024
e86b11b
Update SparkAppModel primary key in spark_app.py
xuwenyihust Jul 11, 2024
e17c252
Update SparkApp service to include create_spark_app method in spark_a…
xuwenyihust Jul 11, 2024
19571d4
Update SparkApp service to include create_spark_app method and test c…
xuwenyihust Jul 11, 2024
89dd483
Update SparkApp service to fix encoding issue in test_spark_app_servi…
xuwenyihust Jul 11, 2024
9268d73
Add print statement to test_spark_app_service.py for debugging purposes
xuwenyihust Jul 11, 2024
63e2421
Fix notebook_path in test_create_spark_app method of test_spark_app_s…
xuwenyihust Jul 11, 2024
521b6ad
Remove print statement in test_spark_app_service.py
xuwenyihust Jul 11, 2024
d0119e4
Add NotebookModel import and update notebook spark_app_id check in te…
xuwenyihust Jul 11, 2024
84de568
Update SparkApp service to include get_spark_app_by_notebook_path met…
xuwenyihust Jul 11, 2024
bab3054
Add database tables for spark_apps, directories, and notebooks in ini…
xuwenyihust Jul 11, 2024
d89cbc1
Update NotebookModel and NotebookSparkAppModel to remove spark_app_id…
xuwenyihust Jul 11, 2024
219d54b
Fix spark_app_dict assertion in test_spark_app_service.py
xuwenyihust Jul 11, 2024
b4766ca
Update notebook_id assignment in SparkApp service to use the 'id' att…
xuwenyihust Jul 11, 2024
4491594
Refactor SparkApp service to create and update spark app in database
xuwenyihust Jul 11, 2024
c6cafa1
Refactor SparkApp service to associate spark app with notebook in dat…
xuwenyihust Jul 11, 2024
b95adcf
Refactor SparkApp service to use 'id' attribute instead of 'notebook_…
xuwenyihust Jul 11, 2024
c43930f
Refactor SparkApp service to use NotebookModel query instead of Noteb…
xuwenyihust Jul 11, 2024
306e2d4
Refactor NotebookSparkAppModelTestCase to use NotebookModel and Spark…
xuwenyihust Jul 11, 2024
2510c5a
Fix error handling in Notebook.get_spark_app_by_notebook_path method
xuwenyihust Jul 11, 2024
6dc149b
Refactor Notebook.get_spark_app_by_notebook_path method to use 'id' a…
xuwenyihust Jul 11, 2024
fe62643
Refactor Notebook.get_spark_app_by_notebook_path method to use 'id' a…
xuwenyihust Jul 11, 2024
e779921
Refactor Notebook.get_spark_app_by_notebook_path method to use 'id' a…
xuwenyihust Jul 11, 2024
c73b934
Refactor Notebook.get_spark_app_by_notebook_path method to use 'id' a…
xuwenyihust Jul 11, 2024
db31e2c
Refactor database initialization script and update notebook names in …
xuwenyihust Jul 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion docker/postgres/init.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,45 @@ CREATE TABLE directories (
path VARCHAR(100) NOT NULL
);

CREATE TABLE spark_apps (
spark_app_id VARCHAR(100) PRIMARY KEY
);

CREATE TABLE notebook_spark_apps (
id SERIAL PRIMARY KEY,
notebook_id INT REFERENCES notebooks(id),
spark_app_id VARCHAR(100) REFERENCES spark_apps(spark_app_id)
);

GRANT ALL PRIVILEGES ON TABLE notebooks TO server;
GRANT ALL PRIVILEGES ON SEQUENCE notebooks_id_seq TO server;

GRANT ALL PRIVILEGES ON TABLE directories TO server;
GRANT ALL PRIVILEGES ON SEQUENCE directories_id_seq TO server;
GRANT ALL PRIVILEGES ON SEQUENCE directories_id_seq TO server;

GRANT ALL PRIVILEGES ON TABLE spark_apps TO server;

GRANT ALL PRIVILEGES ON TABLE notebook_spark_apps TO server;
GRANT ALL PRIVILEGES ON SEQUENCE notebook_spark_apps_id_seq TO server;

-- Add some initial data
INSERT INTO notebooks (name, path) VALUES ('demo.ipynb', 'work/demo.ipynb');
INSERT INTO notebooks (name, path) VALUES ('notebook.ipynb', 'work/notebook.ipynb');

INSERT INTO directories (name, path) VALUES ('work', '/work');
INSERT INTO directories (name, path) VALUES ('word-count', '/work/word-count');
INSERT INTO directories (name, path) VALUES ('sg-resale-flat-prices', '/work/sg-resale-flat-prices');
INSERT INTO directories (name, path) VALUES ('output', '/work/sg-resale-flat-prices/output');

INSERT INTO spark_apps (spark_app_id) VALUES ('app-0000-0000');
INSERT INTO spark_apps (spark_app_id) VALUES ('app-0000-0001');
INSERT INTO spark_apps (spark_app_id) VALUES ('app-0000-0002');
INSERT INTO spark_apps (spark_app_id) VALUES ('app-0000-0003');
INSERT INTO spark_apps (spark_app_id) VALUES ('app-0000-0004');

INSERT INTO notebook_spark_apps (notebook_id, spark_app_id) VALUES (1, 'app-0000-0000');
INSERT INTO notebook_spark_apps (notebook_id, spark_app_id) VALUES (1, 'app-0000-0001');
INSERT INTO notebook_spark_apps (notebook_id, spark_app_id) VALUES (1, 'app-0000-0002');

INSERT INTO notebook_spark_apps (notebook_id, spark_app_id) VALUES (2, 'app-0000-0003');
INSERT INTO notebook_spark_apps (notebook_id, spark_app_id) VALUES (2, 'app-0000-0004');
32 changes: 0 additions & 32 deletions examples/1/111.ipynb

This file was deleted.

1 change: 0 additions & 1 deletion server/app/models/notebook.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from flask import jsonify
from database import db

class NotebookModel(db.Model):
Expand Down
20 changes: 20 additions & 0 deletions server/app/models/notebook_spark_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from database import db

class NotebookSparkAppModel(db.Model):

__tablename__ = 'notebook_spark_apps'

id = db.Column(db.Integer, primary_key=True, autoincrement=True)
notebook_id = db.Column(db.Integer, db.ForeignKey('notebooks.id'), nullable=False)
spark_app_id = db.Column(db.String, db.ForeignKey('spark_apps.spark_app_id'), nullable=False)

def __init__(self, notebook_id, spark_app_id):
self.notebook_id = notebook_id
self.spark_app_id = spark_app_id

def to_dict(self):
return {
'id': self.id,
'notebook_id': self.notebook_id,
'spark_app_id': self.spark_app_id
}
16 changes: 16 additions & 0 deletions server/app/models/spark_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from database import db


class SparkAppModel(db.Model):

__tablename__ = 'spark_apps'

spark_app_id = db.Column(db.String, primary_key=True, nullable=False)

def __init__(self, spark_app_id):
self.spark_app_id = spark_app_id

def to_dict(self):
return {
'spark_app_id': self.spark_app_id,
}
5 changes: 5 additions & 0 deletions server/app/routes/notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,10 @@ def rename_or_move_notebook(notebook_path):
new_notebook_path = data.get('newPath', None)
return Notebook.move_notebook(notebook_path=notebook_path, new_notebook_path=new_notebook_path)

@notebook_blueprint.route('/notebook/spark_app/<path:notebook_path>', methods=['GET'])
def get_spark_app_by_notebook_path(notebook_path):
logging.info(f"Get spark apps by notebook path: {notebook_path}")
return Notebook.get_spark_app_by_notebook_path(notebook_path)



13 changes: 13 additions & 0 deletions server/app/routes/spark_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from flask import Blueprint, jsonify, request
from app.services.spark_app import SparkApp
import logging

spark_app_blueprint = Blueprint('spark_app', __name__)

logging.basicConfig(level=logging.INFO)

@spark_app_blueprint.route('/spark_app/<path:spark_app_id>', methods=['POST'])
def create_spark_app(spark_app_id):
data = request.get_json()
notebook_path = data.get('notebookPath', None)
return SparkApp.create_spark_app(spark_app_id=spark_app_id, notebook_path=notebook_path)
42 changes: 41 additions & 1 deletion server/app/services/notebook.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from app.models.notebook import NotebookModel
from app.models.notebook_spark_app import NotebookSparkAppModel
from app.models.spark_app import SparkAppModel
from flask import Response
from datetime import datetime
import requests
Expand Down Expand Up @@ -312,4 +314,42 @@ def move_notebook(notebook_path: str = None, new_notebook_path: str = None):

return Response(
response=json.dumps({'message': 'Notebook moved'}),
status=200)
status=200)

@staticmethod
def get_spark_app_by_notebook_path(notebook_path: str = None):
logger.info(f"Getting spark app with notebook path: {notebook_path}")

try:
notebook = NotebookModel.query.filter_by(path=notebook_path).first()
notebook_id = notebook.id
except Exception as e:
return Response(
response=json.dumps({'message': 'Error getting notebook from DB: ' + str(e)}),
status=404)

try:
notebook_spark_apps = NotebookSparkAppModel.query.filter_by(notebook_id=notebook_id)
spark_app_ids = [notebook_spark_app.spark_app_id for notebook_spark_app in notebook_spark_apps]
except Exception as e:
return Response(
response=json.dumps({'message': 'Error getting notebook - spark app relation from DB: ' + str(e)}),
status=404)
try:
spark_apps = []
for spark_app_id in spark_app_ids:
spark_app = SparkAppModel.query.filter_by(spark_app_id=spark_app_id).first()
spark_apps.append(spark_app)
except Exception as e:
return Response(
response=json.dumps({'message': 'Error getting spark app from DB: ' + str(e)}),
status=404)

if notebook is None:
return Response(
response=json.dumps({'message': 'Spark app not found in DB'}),
status=404)
else:
return Response(
response=json.dumps([x.to_dict() for x in spark_apps]),
status=200)
91 changes: 91 additions & 0 deletions server/app/services/spark_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from app.models.spark_app import SparkAppModel
from app.models.notebook import NotebookModel
from app.models.notebook_spark_app import NotebookSparkAppModel
from flask import Response
from datetime import datetime
import json
from database import db
from flask import current_app as app
import logging

logger = logging.getLogger(__name__)

class SparkApp:

@staticmethod
def get_all_spark_apps():
spark_apps = SparkAppModel.query.all()

# Convert the spark apps to dictionaries
spark_apps_dict = [spark_app.to_dict() for spark_app in spark_apps]

return Response(
response=json.dumps(spark_apps_dict),
status=200
)

@staticmethod
def get_spark_app_by_id(spark_app_id: str = None):
logger.info(f"Getting spark app with id: {spark_app_id}")

try:
spark_app = SparkAppModel.query.filter_by(spark_app_id=spark_app_id).first()
logger.info(f"Spark app found in DB: {spark_app}")
except Exception as e:
return Response(
response=json.dumps({'message': 'Error getting spark app from DB: ' + str(e)}),
status=404)

return Response(
response=json.dumps(spark_app.to_dict()),
status=200
)

@staticmethod
def create_spark_app(spark_app_id: str = None, notebook_path: str = None):
logger.info(f"Creating spark app with id: {spark_app_id} for notebook path: {notebook_path}")

if spark_app_id is None:
logger.error("Spark app id is None")
return Response(
response=json.dumps({'message': 'Spark app id is None'}),
status=404)

if notebook_path is None:
logger.error("Notebook path is None")
return Response(
response=json.dumps({'message': 'Notebook path is None'}),
status=404)

try:
# Create the spark app
spark_app = SparkAppModel(
spark_app_id=spark_app_id,
)

db.session.add(spark_app)
db.session.commit()

# Update the notebook_spark_app relationship
notebook = NotebookModel.query.filter_by(path=notebook_path).first()
notebook_id = notebook.id

notebook_spark_app = NotebookSparkAppModel(
notebook_id=notebook_id,
spark_app_id=spark_app_id
)

db.session.add(notebook_spark_app)
db.session.commit()

logger.info(f"Spark app created: {spark_app}")
except Exception as e:
logger.error(f"Error creating spark app: {e}")
return Response(
response=json.dumps({'message': 'Error creating spark app: ' + str(e)}),
status=404)

return Response(
response=json.dumps(spark_app.to_dict()),
status=200
)
2 changes: 2 additions & 0 deletions server/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from app.routes.directory import directory_blueprint
from app.routes.session import session_blueprint
from app.routes.kernel import kernel_blueprint
from app.routes.spark_app import spark_app_blueprint
from config import DevelopmentConfig, IntegrationTestingConfig, TestingConfig

def create_app():
Expand All @@ -32,6 +33,7 @@ def create_app():
app.register_blueprint(directory_blueprint)
app.register_blueprint(session_blueprint)
app.register_blueprint(kernel_blueprint)
app.register_blueprint(spark_app_blueprint)


if __name__ == '__main__':
Expand Down
44 changes: 44 additions & 0 deletions server/tests/models/test_notebook_spark_app_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import unittest
from flask_cors import CORS
from run import create_app
from database import db
from app.models.notebook_spark_app import NotebookSparkAppModel
from app.models.notebook import NotebookModel
from app.models.spark_app import SparkAppModel

class NotebookSparkAppModelTestCase(unittest.TestCase):
def setUp(self):
self.app = create_app()
self.client = self.app.test_client()
with self.app.app_context():
db.create_all()

def tearDown(self):
with self.app.app_context():
db.session.remove()
db.drop_all()

def test_notebook_spark_app_model(self):
with self.app.app_context():
notebook = NotebookModel(name='Test Notebook', path='')
db.session.add(notebook)
db.session.commit()

spark_app = SparkAppModel(spark_app_id='Test Spark App')
db.session.add(spark_app)
db.session.commit()

notebook_spark_app = NotebookSparkAppModel(notebook_id=notebook.id, spark_app_id='Test Spark App')
db.session.add(notebook_spark_app)
db.session.commit()

self.assertIsNotNone(notebook_spark_app.id)
self.assertEqual(notebook_spark_app.notebook_id, 1)
self.assertEqual(notebook_spark_app.spark_app_id, 'Test Spark App')

notebook_spark_app_dict = notebook_spark_app.to_dict()
self.assertEqual(notebook_spark_app_dict, {
'id': notebook_spark_app.id,
'notebook_id': 1,
'spark_app_id': 'Test Spark App'
})
31 changes: 31 additions & 0 deletions server/tests/models/test_spark_app_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import unittest
from flask_cors import CORS
from run import create_app
from database import db
from app.models.spark_app import SparkAppModel

class SparkAppModelTestCase(unittest.TestCase):

def setUp(self):
self.app = create_app()
self.client = self.app.test_client()
with self.app.app_context():
db.create_all()

def tearDown(self):
with self.app.app_context():
db.session.remove()
db.drop_all()

def test_spark_app_model(self):
with self.app.app_context():
spark_app = SparkAppModel(spark_app_id='Test Spark App')
db.session.add(spark_app)
db.session.commit()

self.assertEqual(spark_app.spark_app_id, 'Test Spark App')

spark_app_dict = spark_app.to_dict()
self.assertEqual(spark_app_dict, {
'spark_app_id': 'Test Spark App'
})
Loading
Loading