diff --git a/.gitignore b/.gitignore index 3c9e8e43..04023115 100644 --- a/.gitignore +++ b/.gitignore @@ -7,5 +7,10 @@ /dist/ /docs/_build/ /src/*.egg-info/ -__pycache__/ +__pycache__/* .idea/ + +# vscode +.vscode +.env +changelog.txt diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 1b5cd58f..a25f61fb 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,6 +1,6 @@ Be cordial or be on your way. --Kenneth Reitz -https://www.kennethreitz.org/essays/2013/01/27/be-cordial-or-be-on-your-way +https://kennethreitz.org/essays/2013/01/27/be-cordial-or-be-on-your-way # Contributor Covenant Code of Conduct diff --git a/camelot/core.py b/camelot/core.py index 4448cabe..cd9859f7 100644 --- a/camelot/core.py +++ b/camelot/core.py @@ -521,12 +521,12 @@ def set_edges(self, vertical, horizontal, joint_tol=2): def set_border(self): """Sets table border edges to True.""" - for r in range(len(self.rows)): - self.cells[r][0].left = True - self.cells[r][len(self.cols) - 1].right = True - for c in range(len(self.cols)): - self.cells[0][c].top = True - self.cells[len(self.rows) - 1][c].bottom = True + for index, row in enumerate(self.rows): + self.cells[index][0].left = True + self.cells[index][len(self.cols) - 1].right = True + for index, col in enumerate(self.cols): + self.cells[0][index].top = True + self.cells[len(self.rows) - 1][index].bottom = True return self def set_span(self): diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py index 01d17d96..5d3c7710 100644 --- a/camelot/parsers/lattice.py +++ b/camelot/parsers/lattice.py @@ -156,7 +156,7 @@ def implements_convert(): return backend @staticmethod - def _reduce_index(t, idx, shift_text): + def _reduce_index(table, idx, shift_text): """Reduces index of a text object if it lies within a spanning cell. @@ -181,32 +181,28 @@ def _reduce_index(t, idx, shift_text): indices = [] for r_idx, c_idx, text in idx: for d in shift_text: - if d == "l": - if t.cells[r_idx][c_idx].hspan: - while not t.cells[r_idx][c_idx].left: - c_idx -= 1 - if d == "r": - if t.cells[r_idx][c_idx].hspan: - while not t.cells[r_idx][c_idx].right: - c_idx += 1 - if d == "t": - if t.cells[r_idx][c_idx].vspan: - while not t.cells[r_idx][c_idx].top: - r_idx -= 1 - if d == "b": - if t.cells[r_idx][c_idx].vspan: - while not t.cells[r_idx][c_idx].bottom: - r_idx += 1 + if d == "l" and table.cells[r_idx][c_idx].hspan: + while not table.cells[r_idx][c_idx].left: + c_idx -= 1 + if d == "r" and table.cells[r_idx][c_idx].hspan: + while not table.cells[r_idx][c_idx].right: + c_idx += 1 + if d == "t" and table.cells[r_idx][c_idx].vspan: + while not table.cells[r_idx][c_idx].top: + r_idx -= 1 + if d == "b" and table.cells[r_idx][c_idx].vspan: + while not table.cells[r_idx][c_idx].bottom: + r_idx += 1 indices.append((r_idx, c_idx, text)) return indices @staticmethod - def _copy_spanning_text(t, copy_text=None): + def _copy_spanning_text(table, copy_text=None): """Copies over text in empty spanning cells. Parameters ---------- - t : camelot.core.Table + table : camelot.core.Table copy_text : list, optional (default: None) {'h', 'v'} Select one or more strings from above and pass them as a list @@ -215,23 +211,23 @@ def _copy_spanning_text(t, copy_text=None): Returns ------- - t : camelot.core.Table + table : camelot.core.Table """ for f in copy_text: if f == "h": - for i in range(len(t.cells)): - for j in range(len(t.cells[i])): - if t.cells[i][j].text.strip() == "": - if t.cells[i][j].hspan and not t.cells[i][j].left: - t.cells[i][j].text = t.cells[i][j - 1].text + for i in range(len(table.cells)): + for j in range(len(table.cells[i])): + if table.cells[i][j].text.strip() == "": + if table.cells[i][j].hspan and not table.cells[i][j].left: + table.cells[i][j].text = table.cells[i][j - 1].text elif f == "v": - for i in range(len(t.cells)): - for j in range(len(t.cells[i])): - if t.cells[i][j].text.strip() == "": - if t.cells[i][j].vspan and not t.cells[i][j].top: - t.cells[i][j].text = t.cells[i - 1][j].text - return t + for i in range(len(table.cells)): + for j in range(len(table.cells[i])): + if table.cells[i][j].text.strip() == "": + if table.cells[i][j].vspan and not table.cells[i][j].top: + table.cells[i][j].text = table.cells[i - 1][j].text + return table def _generate_table_bbox(self): def scale_areas(areas): diff --git a/camelot/utils.py b/camelot/utils.py index 29939f68..680ffaec 100644 --- a/camelot/utils.py +++ b/camelot/utils.py @@ -79,7 +79,10 @@ def download_url(url): """ filename = f"{random_string(6)}.pdf" with tempfile.NamedTemporaryFile("wb", delete=False) as f: - headers = {"User-Agent": "Mozilla/5.0"} + headers = { + "User-Agent": "Mozilla/5.0", + "Accept-Encoding": "gzip;q=1.0, deflate;q=0.9, br;q=0.8, compress;q=0.7, *;q=0.1" + } request = Request(url, None, headers) obj = urlopen(request) content_type = obj.info().get_content_type()