Skip to content

Commit

Permalink
Merge pull request #28 from hynek/intersphinx
Browse files Browse the repository at this point in the history
Use intersphinx files for symbol mining


Fixes #27
  • Loading branch information
hynek committed Aug 5, 2014
2 parents 8d524aa + 3e35f80 commit 00edea8
Show file tree
Hide file tree
Showing 13 changed files with 203 additions and 74 deletions.
4 changes: 2 additions & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
include *.rst *.txt LICENSE tox.ini .travis.yml M
recursive-include tests *.py *.html
include *.rst *.txt LICENSE tox.ini .travis.yml
recursive-include tests *.py *.html *.inv
7 changes: 7 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ Full usage: ::
set index html file for docset


Hints
-----
For Sphinx, you get the best results using the intersphinx_ parser that is used automatically if a version 2 ``objects.inv`` file is present.
This approach obviates parsing problems completely by using that machine readable file using Sphinx’s own APIs.


Installation
------------

Expand All @@ -94,3 +100,4 @@ If you haven’t pip_ yet, installation should be as easy as::
.. _Twisted: http://twistedmatrix.com/
.. _homebrew: http://mxcl.github.com/homebrew/
.. _pip: http://www.pip-installer.org/en/latest/installing.html#alternative-installation-procedures
.. _intersphinx: http://sphinx-doc.org/latest/ext/intersphinx.html
14 changes: 11 additions & 3 deletions doc2dash/parsers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
from . import pydoctor, sphinx
from __future__ import absolute_import, division, print_function

from . import pydoctor, sphinx, intersphinx

DOCTYPES = [sphinx.SphinxParser, pydoctor.PyDoctorParser]

DOCTYPES = [
intersphinx.InterSphinxParser,
sphinx.SphinxParser,
pydoctor.PyDoctorParser,
]


def get_doctype(path):
"""Gets the apropriate doctype for *path*."""
"""
Gets the apropriate doctype for *path*.
"""
for dt in DOCTYPES:
if dt.detect(path):
return dt
Expand Down
10 changes: 8 additions & 2 deletions doc2dash/parsers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,17 @@ def start(*args, **kwargs):
return start


APPLE_REF_TEMPLATE = '//apple_ref/cpp/{}/{}'


class _BaseParser(object):
"""
Abstract parser base class.
"""
APPLE_REF = '//apple_ref/cpp/{}/{}'
APPLE_REF = APPLE_REF_TEMPLATE
"""
Backward compatibility only, don't use in new code.
"""

def __init__(self, docpath):
self.docpath = docpath
Expand All @@ -51,7 +57,7 @@ def detect(cl, path):
that file.
"""
try:
with open(os.path.join(path, cl.DETECT_FILE)) as f:
with open(os.path.join(path, cl.DETECT_FILE), "rb") as f:
return cl.DETECT_PATTERN in f.read()
except IOError as e:
if e.errno == errno.ENOENT:
Expand Down
78 changes: 78 additions & 0 deletions doc2dash/parsers/intersphinx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from __future__ import absolute_import, division, print_function

import logging
import os

from six import iteritems
from sphinx.ext.intersphinx import read_inventory_v2

from . import types
from .base import _BaseParser
from .sphinx import find_and_patch_entry


log = logging.getLogger(__name__)


INV_TO_TYPE = {
"attribute": types.ATTRIBUTE,
"class": types.CLASS,
"classmethod": types.METHOD,
"data": types.VALUE,
"envvar": types.ENV,
"exception": types.EXCEPTION,
"function": types.FUNCTION,
"interface": types.INTERFACE,
"macro": types.MACRO,
"member": types.ATTRIBUTE,
"method": types.METHOD,
"module": types.PACKAGE,
"opcode": types.OPCODE,
"option": types.OPTION,
"staticmethod": types.METHOD,
"type": types.TYPE,
"variable": types.VARIABLE,
"var": types.VARIABLE,
}


class InterSphinxParser(_BaseParser):
"""
Parser for Sphinx-base documentation that generates an objects.inv file for
the intersphinx extension.
"""
name = "intersphinx"

DETECT_FILE = "objects.inv"
DETECT_PATTERN = b"# Sphinx inventory version 2"

def parse(self):
"""
Parse sphinx docs at self.docpath.
yield tuples of symbol name, type and path
"""
log.info('Creating database...')
with open(os.path.join(self.docpath, "objects.inv"), "rb") as inv_f:
inv_f.readline() # skip version line that is verified in detection
for t in _inv_to_elements(
read_inventory_v2(inv_f, "", os.path.join)
): # this is what Guido gave us `yield from` for :-|
yield t

def find_and_patch_entry(self, soup, entry): # pragma: nocover
return find_and_patch_entry(soup, entry)


def _inv_to_elements(inv):
"""
Iterate over a dictionary as returned from Sphinx's object.inv parser and
yield `name, type, path` tuples.
"""
for type_key, val in iteritems(inv):
try:
t = INV_TO_TYPE[type_key.split(":")[-1]]
for el, data in iteritems(val):
yield el, t, data[2]
except KeyError:
pass
20 changes: 11 additions & 9 deletions doc2dash/parsers/pydoctor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,28 @@
from bs4 import BeautifulSoup

from . import types
from .base import _BaseParser
from .base import _BaseParser, APPLE_REF_TEMPLATE


log = logging.getLogger(__name__)


class PyDoctorParser(_BaseParser):

"""Parser for pydoctor-based documentation: mainly Twisted."""

"""
Parser for pydoctor-based documentation: mainly Twisted.
"""
name = 'pydoctor'

DETECT_FILE = 'index.html'
DETECT_PATTERN = '''\
DETECT_PATTERN = b'''\
This documentation was automatically generated by
<a href="http://codespeak.net/~mwh/pydoctor/">pydoctor</a>'''

def parse(self):
"""Parse pydoctor docs at *docpath*.
"""
Parse pydoctor docs at *docpath*.
yield tuples of symbol name, type and path
"""
soup = BeautifulSoup(
open(os.path.join(self.docpath, 'nameIndex.html')),
Expand All @@ -42,15 +42,17 @@ def find_and_patch_entry(self, soup, entry):
link = soup.find('a', attrs={'name': entry.anchor})
if link:
tag = soup.new_tag('a')
tag['name'] = self.APPLE_REF.format(entry.type, entry.name)
tag['name'] = APPLE_REF_TEMPLATE.format(entry.type, entry.name)
link.insert_before(tag)
return True
else:
return False


def _guess_type(name, path):
"""Employ voodoo magic to guess the type of *name* in *path*."""
"""
Employ voodoo magic to guess the type of *name* in *path*.
"""
if name.rsplit('.', 1)[-1][0].isupper() and '#' not in path:
return types.CLASS
elif name.islower() and '#' not in path:
Expand Down
48 changes: 27 additions & 21 deletions doc2dash/parsers/sphinx.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,26 @@
from bs4 import BeautifulSoup

from . import types
from .base import _BaseParser
from .base import _BaseParser, APPLE_REF_TEMPLATE


log = logging.getLogger(__name__)


class SphinxParser(_BaseParser):

"""Parser for Sphinx-based documenation: Python, Django, Pyramid..."""

"""
Parser for Sphinx-based documenation: Python, Django, Pyramid...
"""
name = 'sphinx'

DETECT_FILE = '_static/searchtools.js'
DETECT_PATTERN = '* Sphinx JavaScript util'
DETECT_PATTERN = b'* Sphinx JavaScript util'

def parse(self):
"""Parse sphinx docs at *path*.
yield tuples of symbol name, type and path
"""
Parse sphinx docs at *path*.
yield tuples of symbol `name, type and path`
"""
for idx in POSSIBLE_INDEXES:
try:
Expand All @@ -40,19 +40,8 @@ def parse(self):
for t in _parse_soup(soup):
yield t

def find_and_patch_entry(self, soup, entry):
"""Modify soup so dash can generate TOCs on the fly."""
link = soup.find('a', {'class': 'headerlink'}, href='#' + entry.anchor)
tag = soup.new_tag('a')
tag['name'] = self.APPLE_REF.format(entry.type, entry.name)
if link:
link.parent.insert(0, tag)
return True
elif entry.anchor.startswith('module-'):
soup.h1.parent.insert(0, tag)
return True
else:
return False
def find_and_patch_entry(self, soup, entry): # pragma: nocover
return find_and_patch_entry(soup, entry)


POSSIBLE_INDEXES = [
Expand Down Expand Up @@ -158,3 +147,20 @@ def _get_type_and_name(text):
return type_, name
else:
return None, None


def find_and_patch_entry(soup, entry):
"""
Modify soup so dash can generate TOCs on the fly.
"""
link = soup.find('a', {'class': 'headerlink'}, href='#' + entry.anchor)
tag = soup.new_tag('a')
tag['name'] = APPLE_REF_TEMPLATE.format(entry.type, entry.name)
if link:
link.parent.insert(0, tag)
return True
elif entry.anchor.startswith('module-'):
soup.h1.parent.insert(0, tag)
return True
else:
return False
21 changes: 15 additions & 6 deletions doc2dash/parsers/types.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
CLASS = 'cl'
PACKAGE = 'Module'
METHOD = 'clm'
FUNCTION = 'func'
ATTRIBUTE = 'Attribute'
CONSTANT = 'clconst'
ATTRIBUTE = "Attribute"
CLASS = "Class"
CONSTANT = "Constant"
ENV = "Environment"
EXCEPTION = "Exception"
FUNCTION = "Function"
INTERFACE = "Interface"
MACRO = "Macro"
METHOD = "Method"
OPCODE = "Operator"
OPTION = "Option"
PACKAGE = "Module"
TYPE = "Type"
VALUE = "Value"
VARIABLE = "Variable"
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,10 @@ def find_version(*file_paths):
],
},
install_requires=[
"Sphinx==1.2.2",
"beautifulsoup4==4.3.2",
"lxml==3.3.5",
"six==1.7.3",
],
classifiers=[
'Development Status :: 5 - Production/Stable',
Expand Down
Binary file added tests/parsers/intersphinx/objects.inv
Binary file not shown.
18 changes: 18 additions & 0 deletions tests/parsers/intersphinx/test_intersphinx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from __future__ import absolute_import, division, print_function

import os

from doc2dash.parsers.intersphinx import InterSphinxParser


HERE = os.path.dirname(__file__)


class TestInterSphinxParser(object):
def test_parses(self):
"""
Parsing does not fail.
"""
p = InterSphinxParser(os.path.join(HERE))
for t in p.parse():
pass
11 changes: 7 additions & 4 deletions tests/parsers/sphinx/test_sphinx.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,8 @@ def test_strip_annotation():


def test_patcher():
p = sphinx.SphinxParser('foo')
soup = BeautifulSoup(open(os.path.join(HERE, 'function_example.html')))
assert p.find_and_patch_entry(
assert sphinx.find_and_patch_entry(
soup,
Entry(
'pyramid.config.Configurator.add_route',
Expand All @@ -128,5 +127,9 @@ def test_patcher():
'add_route'}
)
assert toc_link
assert not p.find_and_patch_entry(soup, Entry('invented', 'cl', 'nonex'))
assert p.find_and_patch_entry(soup, Entry('somemodule', 'cl', 'module-sm'))
assert not sphinx.find_and_patch_entry(
soup, Entry('invented', 'cl', 'nonex')
)
assert sphinx.find_and_patch_entry(
soup, Entry('somemodule', 'cl', 'module-sm')
)
Loading

0 comments on commit 00edea8

Please sign in to comment.