Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🔧 Increase max and default page limits #629

Merged
merged 5 commits into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ jobs:
command: |
python3 -m venv venv
. venv/bin/activate
pip install "cython<3.0.0" && pip install --no-build-isolation "pyyaml==5.4.0"
pip install -r dev-requirements.txt
pip install -r requirements.txt
pip install -e .
Expand Down
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ COPY bin/supervisord.conf /etc/supervisor/conf.d/supervisord.conf

# Setup dataservice
COPY requirements.txt /app/
RUN pip install "cython<3.0.0" && pip install --no-build-isolation "pyyaml==5.4.0"
RUN pip install -r /app/requirements.txt

COPY manage.py setup.py config.py /app/
Expand Down
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ https://realpython.com/intro-to-pyenv/
# Setup python environment and install dependencies
pyenv virtualenv 3.7.11 dataservice_venv
pyenv local dataservice_venv

# Important but *temporary*
# See https://github.com/yaml/pyyaml/issues/724
pip install "cython<3.0.0" && pip install --no-build-isolation "pyyaml==5.4.0"
Copy link
Member

@devbyaccident devbyaccident Aug 14, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's an annoying issue. Is there something that we need pyyaml at 5.4.0 for, or can we take a newer version like pyyaml<6.0? Would rather not hard-code a specific version to change later if we can avoid it.

Also, yaml/pyyaml#724 (comment)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@devbyaccident Lol that image is so accurate.

We've always had pyyaml pinned to 5.4.0, and I believe its bc a while ago, something broke and caused us to pin it. So I don't want to change anything that we don't absolutely have to in this PR. I can try changing it in another PR so the change isolated

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotchya, that makes sense. Ok, I'm happy to give it the 👍 , but I can't speak as much for the non-docker/package changes. Would still like someone from @kids-first/dataservice-reviewers to take a look.


pip install -r dev-requirements.txt
pip install -r requirements.txt
pip install -e .
Expand All @@ -103,7 +108,7 @@ source ./env_local.sh
flask db upgrade

# Run the flask web application
flask run
./manage.py
```

## Database
Expand Down
4 changes: 2 additions & 2 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ class Config:
PG_USER, PG_PASS, PG_HOST, PG_PORT, PG_NAME)

# Default number of results per request
DEFAULT_PAGE_LIMIT = 10
DEFAULT_PAGE_LIMIT = 100
# Determines the maximum number of results per request
MAX_PAGE_LIMIT = 100
MAX_PAGE_LIMIT = 1000

INDEXD_URL = os.environ.get('INDEXD_URL', None)
INDEXD_USER = os.environ.get('INDEXD_USER', 'test')
Expand Down
Empty file modified env_local.sh
100644 → 100755
Empty file.
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,3 @@ boto3==1.7.8
botocore==1.10.8
Jinja2==2.10
requests==2.24.0
PyYAML==5.4
3 changes: 3 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@
ENDPOINT_ENTITY_MAP = {v: k for k, v in ENTITY_ENDPOINT_MAP.items()}
ENDPOINTS = list(ENTITY_ENDPOINT_MAP.values())

DEFAULT_PAGE_LIMIT = 100
MAX_PAGE_LIMIT = 1000


def _create_entity_params(filepath):
# Read data from file
Expand Down
6 changes: 3 additions & 3 deletions tests/genomic_file/test_genomic_file_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from dataservice.api.sequencing_experiment.models import SequencingExperiment
from dataservice.api.genomic_file.models import GenomicFile
from tests.conftest import make_entities
from tests.conftest import ENTITY_TOTAL
from tests.conftest import ENTITY_TOTAL, DEFAULT_PAGE_LIMIT
from tests.mocks import MockIndexd


Expand Down Expand Up @@ -159,8 +159,8 @@ def test_get_list(client, indexd, genomic_files):

assert resp['_status']['code'] == 200
assert resp['total'] == GenomicFile.query.count()
assert len(resp['results']) == 10
assert indexd.get.call_count == 10
assert len(resp['results']) == DEFAULT_PAGE_LIMIT
assert indexd.get.call_count == DEFAULT_PAGE_LIMIT


def test_get_list_with_missing_files(client, indexd, genomic_files):
Expand Down
7 changes: 3 additions & 4 deletions tests/study_file/test_study_file_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
from dataservice.api.study_file.models import StudyFile
from dataservice.api.study.models import Study
from tests.utils import FlaskTestCase
from tests.conftest import make_entities
from tests.conftest import ENTITY_TOTAL
from tests.conftest import ENTITY_TOTAL, DEFAULT_PAGE_LIMIT, make_entities
from unittest.mock import MagicMock, patch
from tests.mocks import MockIndexd

Expand Down Expand Up @@ -132,8 +131,8 @@ def test_get_list(client, indexd, study_files):

assert resp['_status']['code'] == 200
assert resp['total'] == StudyFile.query.count()
assert len(resp['results']) == 10
assert indexd.get.call_count == 10
assert len(resp['results']) == DEFAULT_PAGE_LIMIT
assert indexd.get.call_count == DEFAULT_PAGE_LIMIT


def test_get_list_with_missing_files(client, indexd, study_files):
Expand Down
16 changes: 9 additions & 7 deletions tests/test_filter_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
from tests.conftest import (
ENTITY_ENDPOINT_MAP,
ENDPOINTS,
ENTITY_PARAMS
ENTITY_PARAMS,
MAX_PAGE_LIMIT,
DEFAULT_PAGE_LIMIT
)


Expand Down Expand Up @@ -56,8 +58,8 @@ def test_filter_params(self, client, entities, model):

# Check status code
assert response.status_code == 200
assert resp['limit'] == 10
assert len(resp['results']) == min(expected_total, 10)
assert resp['limit'] == DEFAULT_PAGE_LIMIT
assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT)
assert resp['total'] == expected_total
# All results have correct field values
for result in resp['results']:
Expand Down Expand Up @@ -122,8 +124,8 @@ def test_unknown_filter_params(self, client, entities, model):
assert response.status_code == 200
# Check content
resp = json.loads(response.data.decode('utf-8'))
assert resp['limit'] == 10
assert len(resp['results']) == min(expected_total, 10)
assert resp['limit'] == DEFAULT_PAGE_LIMIT
assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT)
assert resp['total'] == expected_total

@pytest.mark.parametrize('field', ['created_at', 'modified_at'])
Expand Down Expand Up @@ -154,8 +156,8 @@ def test_generated_date_filters(self, client, entities, model, field):
assert response.status_code == 200
# Check content
resp = json.loads(response.data.decode('utf-8'))
assert resp['limit'] == 10
assert len(resp['results']) == min(expected_total, 10)
assert resp['limit'] == DEFAULT_PAGE_LIMIT
assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT)
assert resp['total'] == expected_total
# All results have correct field values
for result in resp['results']:
Expand Down
104 changes: 52 additions & 52 deletions tests/test_pagination.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

from unittest.mock import MagicMock, patch
from tests.mocks import MockIndexd
from tests.conftest import ENDPOINTS
from tests.conftest import ENDPOINTS, MAX_PAGE_LIMIT, DEFAULT_PAGE_LIMIT

pytest_plugins = ['tests.mocks']

Expand All @@ -46,34 +46,34 @@ class TestPagination:
def participants(client):

# Add a bunch of studies for pagination
for i in range(101):
for i in range(MAX_PAGE_LIMIT + 1):
params = {
'external_id': f'Study_{i}',
'short_code': f'KF_{i}'
}
s = Study(**params)
db.session.add(s)

for i in range(101):
for i in range(MAX_PAGE_LIMIT + 1):
ca = CavaticaApp(name='app', revision=0)
db.session.add(ca)

# Add a bunch of study files
s0 = Study.query.filter_by(external_id='Study_0').one()
s1 = Study.query.filter_by(external_id='Study_1').one()
for i in range(101):
for i in range(MAX_PAGE_LIMIT+1):
sf = StudyFile(file_name='blah', study_id=s0.kf_id)
db.session.add(sf)

# Add a bunch of investigators
for _ in range(102):
for _ in range(MAX_PAGE_LIMIT + 2):
inv = Investigator(name='test')
inv.studies.extend([s0, s1])
db.session.add(inv)

# Add a bunch of families
families = []
for i in range(101):
for i in range(MAX_PAGE_LIMIT + 1):
families.append(Family(external_id='Family_{}'.format(i)))
db.session.add_all(families)
db.session.flush()
Expand All @@ -82,9 +82,9 @@ def participants(client):
f0 = Family.query.filter_by(external_id='Family_0').one()
f1 = Family.query.filter_by(external_id='Family_1').one()
seq_cen = None
for i in range(102):
f = f0 if i < 50 else f1
s = s0 if i < 50 else s1
for i in range(MAX_PAGE_LIMIT + 2):
f = f0 if i < int(MAX_PAGE_LIMIT/2) else f1
s = s0 if i < int(MAX_PAGE_LIMIT/2) else s1
data = {
'external_id': "test",
'is_proband': True,
Expand Down Expand Up @@ -168,26 +168,26 @@ def participants(client):
db.session.commit()

@pytest.mark.parametrize('endpoint, expected_total', [
('/participants', 50),
('/study-files', 101),
('/participants', int(MAX_PAGE_LIMIT/2)),
('/study-files', MAX_PAGE_LIMIT+1),
('/investigators', 1),
('/biospecimens', 50),
('/sequencing-experiments', 50),
('/diagnoses', 50),
('/outcomes', 50),
('/phenotypes', 50),
('/biospecimens', int(MAX_PAGE_LIMIT/2)),
('/sequencing-experiments', int(MAX_PAGE_LIMIT/2)),
('/diagnoses', int(MAX_PAGE_LIMIT/2)),
('/outcomes', int(MAX_PAGE_LIMIT/2)),
('/phenotypes', int(MAX_PAGE_LIMIT/2)),
('/families', 1),
('/family-relationships', 50),
('/genomic-files', 50),
('/read-groups', 50),
('/family-relationships', int(MAX_PAGE_LIMIT/2)),
('/genomic-files', int(MAX_PAGE_LIMIT/2)),
('/read-groups', int(MAX_PAGE_LIMIT/2)),
('/sequencing-centers', 1),
('/cavatica-apps', 1),
('/tasks', 50),
('/task-genomic-files', 50),
('/read-group-genomic-files', 50),
('/sequencing-experiment-genomic-files', 50),
('/biospecimen-genomic-files', 50),
('/biospecimen-diagnoses', 50)
('/tasks', int(MAX_PAGE_LIMIT/2)),
('/task-genomic-files', int(MAX_PAGE_LIMIT/2)),
('/read-group-genomic-files', int(MAX_PAGE_LIMIT/2)),
('/sequencing-experiment-genomic-files', int(MAX_PAGE_LIMIT/2)),
('/biospecimen-genomic-files', int(MAX_PAGE_LIMIT/2)),
('/biospecimen-diagnoses', int(MAX_PAGE_LIMIT/2))
])
def test_study_filter(self, client, participants,
endpoint, expected_total):
Expand All @@ -198,8 +198,8 @@ def test_study_filter(self, client, participants,
endpoint = '{}?study_id={}'.format(endpoint, s.kf_id)
resp = client.get(endpoint)
resp = json.loads(resp.data.decode('utf-8'))
assert len(resp['results']) == min(expected_total, 10)
assert resp['limit'] == 10
assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT)
assert resp['limit'] == DEFAULT_PAGE_LIMIT
assert resp['total'] == expected_total

ids_seen = []
Expand Down Expand Up @@ -233,7 +233,7 @@ def test_non_exist_study_filter(self, client, participants,
resp = json.loads(resp.data.decode('utf-8'))

assert len(resp['results']) == 0
assert resp['limit'] == 10
assert resp['limit'] == DEFAULT_PAGE_LIMIT
assert resp['total'] == 0

@pytest.mark.parametrize('study_id', ['blah', 3489, 'PT_00001111'])
Expand Down Expand Up @@ -261,32 +261,32 @@ def test_invalid_study_filter(self, client, participants,
assert 'study_id' in resp['_status']['message']

@pytest.mark.parametrize('endpoint, expected_total', [
('/studies', 101),
('/investigators', 102),
('/participants', 102),
('/outcomes', 102),
('/phenotypes', 102),
('/diagnoses', 102),
('/family-relationships', 101),
('/study-files', 101),
('/families', 101),
('/read-groups', 102),
('/studies', MAX_PAGE_LIMIT+1),
('/investigators', MAX_PAGE_LIMIT+2),
('/participants', MAX_PAGE_LIMIT+2),
('/outcomes', MAX_PAGE_LIMIT+2),
('/phenotypes', MAX_PAGE_LIMIT+2),
('/diagnoses', MAX_PAGE_LIMIT+2),
('/family-relationships', MAX_PAGE_LIMIT+1),
('/study-files', MAX_PAGE_LIMIT+1),
('/families', MAX_PAGE_LIMIT+1),
('/read-groups', MAX_PAGE_LIMIT+2),
('/sequencing-centers', 1),
('/cavatica-apps', 101),
('/tasks', 102),
('/task-genomic-files', 102),
('/read-group-genomic-files', 102),
('/sequencing-experiment-genomic-files', 102),
('/biospecimen-genomic-files', 102),
('/biospecimen-diagnoses', 102)
('/cavatica-apps', MAX_PAGE_LIMIT+1),
('/tasks', MAX_PAGE_LIMIT+2),
('/task-genomic-files', MAX_PAGE_LIMIT+2),
('/read-group-genomic-files', MAX_PAGE_LIMIT+2),
('/sequencing-experiment-genomic-files', MAX_PAGE_LIMIT+2),
('/biospecimen-genomic-files', MAX_PAGE_LIMIT+2),
('/biospecimen-diagnoses', MAX_PAGE_LIMIT+2)
])
def test_pagination(self, client, participants, endpoint, expected_total):
""" Test pagination of resource """
resp = client.get(endpoint)
resp = json.loads(resp.data.decode('utf-8'))

assert len(resp['results']) == min(expected_total, 10)
assert resp['limit'] == 10
assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT)
assert resp['limit'] == DEFAULT_PAGE_LIMIT
assert resp['total'] == expected_total

ids_seen = []
Expand All @@ -311,7 +311,7 @@ def test_same_created_at(self, client):
"""
created_at = datetime.now()
studies = [Study(external_id=f'Study_{i}', short_code=f'KF-ST{i}')
for i in range(50)]
for i in range(int(MAX_PAGE_LIMIT/2))]
db.session.add_all(studies)
db.session.flush()
for study in studies:
Expand Down Expand Up @@ -343,21 +343,21 @@ def test_limit(self, client, participants, endpoint):
assert len(response['results']) == 5
assert response['limit'] == 5

response = client.get(endpoint + '?limit=200')
response = client.get(endpoint + f'?limit={MAX_PAGE_LIMIT * 2}')
response = json.loads(response.data.decode('utf-8'))
if '/sequencing-centers' in endpoint:
assert len(response['results']) == 1
else:
assert len(response['results']) == 100
assert len(response['results']) == MAX_PAGE_LIMIT

# Check unexpected limit param uses default
response = client.get(endpoint + '?limit=dog')
response = json.loads(response.data.decode('utf-8'))
if '/sequencing-centers' in endpoint:
assert len(response['results']) == 1
else:
assert len(response['results']) == 10
assert response['limit'] == 10
assert len(response['results']) == DEFAULT_PAGE_LIMIT
assert response['limit'] == DEFAULT_PAGE_LIMIT

@pytest.mark.parametrize('endpoint', [
(ept) for ept in ENDPOINTS
Expand Down