Skip to content
This repository has been archived by the owner on Jan 24, 2018. It is now read-only.

Peer service #1556

Merged
merged 31 commits into from
Mar 6, 2017
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
06d70c0
Peer service initial commit
david4096 Feb 7, 2017
626f93e
Announce persistence to sqlite
david4096 Feb 7, 2017
93600b5
Simulated datarepo
david4096 Feb 7, 2017
7f16b8f
Add tests for adding and removing peers via repo cli
david4096 Feb 8, 2017
24d95e0
Add /info endpoint and improve simulated peers representation
david4096 Feb 8, 2017
fab13cd
Add peer to prepare compliance
david4096 Feb 15, 2017
a7b72b6
Merge remote-tracking branch 'upstream/master' into 1507_peers
david4096 Feb 15, 2017
1bf4012
Change URI to URL
david4096 Feb 15, 2017
1ce73b0
Add add peer data model, config setting for peer list
david4096 Feb 16, 2017
2d6035b
Add some comments, change uri to url
david4096 Feb 23, 2017
5ebdf5f
Merge remote-tracking branch 'upstream/master' into 1507_peers
david4096 Feb 23, 2017
eb66c55
Use datamodel in repo manager CLI
david4096 Feb 23, 2017
9f24c8b
Nicely add peer list from a file
david4096 Feb 24, 2017
25f6aef
Add updated client, add tests
david4096 Feb 24, 2017
7c82e2d
Set up client tests, add fallback for announcement
david4096 Feb 28, 2017
d23c681
Merge remote-tracking branch 'upstream/master' into 1507_peers
david4096 Feb 28, 2017
01bda2e
Add comments
david4096 Feb 28, 2017
2d0128e
Use iterator model
david4096 Feb 28, 2017
3c7553a
Handle announce a bit more cleanly
david4096 Feb 28, 2017
5b4914a
Remove redundant code paths
david4096 Feb 28, 2017
44138b2
Add announcement fields to database
david4096 Mar 1, 2017
38b6e1e
Add docs for configuration and managing peers
david4096 Mar 1, 2017
e406086
Use requests in place of client
david4096 Mar 1, 2017
a1625bd
Note the methods used only in testing
david4096 Mar 1, 2017
c9e3915
Move network methods to their own package
david4096 Mar 1, 2017
e766a1e
Flake fixes
david4096 Mar 2, 2017
60bb886
Add urlparse validation
david4096 Mar 2, 2017
49cc31e
Refactor m > models
david4096 Mar 2, 2017
8f13749
Add repo list and clear announcements
david4096 Mar 2, 2017
bc32445
Update constraints.txt
ejacox Mar 6, 2017
7cb30fa
Merge branch 'master' into 1507_peers
ejacox Mar 6, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions constraints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,5 @@
# the pinned packages in requirements.txt are used instead.
#
git+git://github.com/ga4gh/ga4gh-common.git@master#egg=ga4gh_common
git+git://github.com/ga4gh/schemas.git@master#egg=ga4gh_schemas
git+git://github.com/ga4gh/ga4gh-client.git@master#egg=ga4gh_client
git+git://github.com/david4096/schemas.git@760_peers#egg=ga4gh_schemas
git+git://github.com/david4096/ga4gh-client.git@60_peers#egg=ga4gh_client
6 changes: 6 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ REQUEST_VALIDATION
they conform to the protocol. This may result in clients with poor standards
compliance receiving errors rather than the expected results.

INITIAL_PEERS
When starting, you can set a list of initial peers to contact using a
simple text file. Add a URL per line for peers you would like to add to
the registry as initial peers. Each time the server starts, this set of
peers will be announced and added to the registry.

LANDING_MESSAGE_HTML
The server provides a simple landing page at its root. By setting this
value to point at a file containing an HTML block element it is possible to
Expand Down
38 changes: 38 additions & 0 deletions docs/datarepo.rst
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,44 @@ well in their repository.

$ ga4gh_repo verify registry.db

--------
add-peer
--------

The server maintains a list of known peers. To add a peer to this list use
the ``add-peer`` command.

.. argparse::
:module: ga4gh.server.cli.repomanager
:func: getRepoManagerParser
:prog: ga4gh_repo
:path: add-peer
:nodefault:

**Examples:**

.. code-block:: bash

$ ga4gh_repo add-peer http://1kgenomes.ga4gh.org

-----------
remove-peer
-----------

You can remove a peer from the list of peers by its URL.

.. argparse::
:module: ga4gh.server.cli.repomanager
:func: getRepoManagerParser
:prog: ga4gh_repo
:path: remove-peer
:nodefault:

**Examples:**

.. code-block:: bash

$ ga4gh_repo remove-peer http://1kgenomes.ga4gh.org

-----------
add-dataset
Expand Down
68 changes: 68 additions & 0 deletions ga4gh/server/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,15 @@ def expressionLevelsGenerator(self, request):
request, rnaQuant)
return iterator

def peersGenerator(self, request):
"""
Returns a generator over the (peer, nextPageToken) pairs
defined by the specified request.
"""
return paging.PeerIterator(
request,
self.getDataRepository())

###########################################################
#
# Public API methods. Each of these methods implements the
Expand Down Expand Up @@ -676,6 +685,65 @@ def runGetCallSet(self, id_):
callSet = variantSet.getCallSet(id_)
return self.runGetRequest(callSet)

def runGetInfo(self, request):
"""
Returns information about the service including protocol version.
"""
return protocol.toJson(protocol.GetInfoResponse(
protocol_version=protocol.version))

def runAddAnnouncement(self, flaskrequest):
"""
Takes a flask request from the frontend and attempts to parse
into an AnnouncePeerRequest. If successful, it will log the
announcement to the `announcement` table with some other metadata
gathered from the request.
"""
announcement = {}
# We want to parse the request ourselves to collect a little more
# data about it.
try:
requestData = protocol.fromJson(
flaskrequest.get_data(), protocol.AnnouncePeerRequest)
announcement['hostname'] = flaskrequest.host_url
announcement['remote_addr'] = flaskrequest.remote_addr
announcement['user_agent'] = flaskrequest.headers.get('User-Agent')
except AttributeError:
# Sometimes in testing we will send protocol requests instead
# of flask requests and so the hostname and user agent won't
# be present.
try:
requestData = protocol.fromJson(
flaskrequest, protocol.AnnouncePeerRequest)
except Exception as e:
raise exceptions.InvalidJsonException(e)
except Exception as e:
raise exceptions.InvalidJsonException(e)

# Validate the url before accepting the announcement
peer = datamodel.peers.Peer(requestData.peer.url)
peer.setAttributesJson(protocol.toJson(
requestData.peer.attributes))
announcement['url'] = peer.getUrl()
announcement['attributes'] = peer.getAttributes()
try:
self.getDataRepository().insertAnnouncement(announcement)
except:
raise exceptions.BadRequestException(announcement['url'])
return protocol.toJson(
protocol.AnnouncePeerResponse(success=True))

def runListPeers(self, request):
"""
Takes a ListPeersRequest and returns a ListPeersResponse using
a page_token and page_size if provided.
"""
return self.runSearchRequest(
request,
protocol.ListPeersRequest,
protocol.ListPeersResponse,
self.peersGenerator)

def runGetVariant(self, id_):
"""
Returns a variant with the given id
Expand Down
49 changes: 49 additions & 0 deletions ga4gh/server/cli/repomanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import ga4gh.server.datamodel.sequence_annotations as sequence_annotations
import ga4gh.server.datamodel.continuous as continuous
import ga4gh.server.datamodel.variants as variants
import ga4gh.server.datamodel.peers as peers
import ga4gh.server.datarepo as datarepo
import ga4gh.server.exceptions as exceptions
import ga4gh.server.repo.rnaseq2ga as rnaseq2ga
Expand Down Expand Up @@ -508,6 +509,33 @@ def func():
self._updateRepo(self._repo.removeIndividual, individual)
self._confirmDelete("Individual", individual.getLocalId(), func)

def addPeer(self):
"""
Adds a new peer into this repo
"""
self._openRepo()
try:
peer = peers.Peer(
self._args.url, json.loads(self._args.attributes))
except exceptions.BadUrlException:
raise exceptions.RepoManagerException("The URL for the peer was "
"malformed.")
except ValueError as e:
raise exceptions.RepoManagerException(
"The attributes message "
"was malformed. {}".format(e))
self._updateRepo(self._repo.insertPeer, peer)

def removePeer(self):
"""
Removes a peer by URL from this repo
"""
self._openRepo()

def func():
self._updateRepo(self._repo.removePeer, self._args.url)
self._confirmDelete("Peer", self._args.url, func)

def removeOntology(self):
"""
Removes an ontology from the repo.
Expand Down Expand Up @@ -654,6 +682,12 @@ def addOntologyNameArgument(cls, subparser):
"ontologyName",
help="the name of the ontology")

@classmethod
def addUrlArgument(cls, subparser):
subparser.add_argument(
"url",
help="The URL of the given resource")

@classmethod
def addReadGroupSetNameArgument(cls, subparser):
subparser.add_argument(
Expand Down Expand Up @@ -783,6 +817,21 @@ def getParser(cls):
listParser.set_defaults(runner="list")
cls.addRepoArgument(listParser)

addPeerParser = common_cli.addSubparser(
subparsers, "add-peer", "Add a peer to the registry by URL.")
addPeerParser.set_defaults(runner="addPeer")
cls.addRepoArgument(addPeerParser)
cls.addUrlArgument(addPeerParser)
cls.addAttributesArgument(addPeerParser)

removePeerParser = common_cli.addSubparser(
subparsers, "remove-peer", "Remove a peer from "
"the registry by URL.")
removePeerParser.set_defaults(runner="removePeer")
cls.addRepoArgument(removePeerParser)
cls.addUrlArgument(removePeerParser)
cls.addForceOption(removePeerParser)

addDatasetParser = common_cli.addSubparser(
subparsers, "add-dataset", "Add a dataset to the data repo")
addDatasetParser.set_defaults(runner="addDataset")
Expand Down
104 changes: 104 additions & 0 deletions ga4gh/server/datamodel/peers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""
Peer datamodel for exchanging data about GA4GH services.
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import json
import re

import ga4gh.server.exceptions as exceptions

import ga4gh.schemas.protocol as protocol


def isUrl(urlString):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there isn't some way to do this with urllib or urlparse or something?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(and if there isn't, this is general enough to be pushed down into a lower level... at least put a TODO, we don't need to put it in, say, common presently...)

Copy link
Member Author

@david4096 david4096 Mar 2, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, instead of a regex you can just catch an exception from urllib, I'll do that.

http://grokbase.com/t/python/python-list/018fex83ty/check-url-simply

On closer inspection I found that the urlparse method was too lenient for valid addresses and that in practice some regex is needed. I added it and kept the regex for now. We could definitely move it out, added an issue for it here #1594

"""
Attempts to return whether a given URL string is valid by applying
a regex match. From http://stackoverflow.com/questions/7160737/
"""
regex = re.compile(
r'^(?:http|ftp)s?://' # http:// or https://
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)'
r'+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain...
r'localhost|' # localhost...
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
r'(?::\d+)?' # optional port
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
return regex.match(urlString)


class Peer:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should always use new-style classes, which have object as a superclass

"""
This class represents an abstract Peer object.
It sets default values and getters, as well as the
toProtocolElement function.
"""
def __init__(self, url, attributes={}, record=None):
self._url = ""
self._attributes = {}
self.setUrl(url) \
.setAttributes(attributes)
if record is not None:
self.populateFromRow(record)

def setUrl(self, url):
"""
Attempt to safely set the URL by string.
"""
if isUrl(url):
self._url = url
else:
raise exceptions.BadUrlException(url)
return self

def getUrl(self):
return self._url

def setAttributes(self, attributes):
"""
Sets the attributes message to the provided value.
"""
self._attributes = attributes
return self

def setAttributesJson(self, attributesJson):
"""
Sets the attributes dictionary from a JSON string.
"""
try:
self._attributes = json.loads(attributesJson)
except:
raise exceptions.InvalidJsonException(attributesJson)
return self

def serializeAttributes(self, msg):
"""
Sets the attrbutes of a message during serialization.
"""
attributes = self.getAttributes()
for key in attributes:
protocol.setAttribute(
msg.attributes.attr[key].values, attributes[key])
return msg

def getAttributes(self):
"""
Returns the attributes for the DatamodelObject.
"""
return self._attributes

def toProtocolElement(self):
peer = protocol.Peer()
peer.url = self._url
self.serializeAttributes(peer)
return peer

def populateFromRow(self, peerRecord):
"""
This method accepts a model record and sets class variables.
"""
self.setUrl(peerRecord.url) \
.setAttributesJson(peerRecord.attributes)
return self
Loading