You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/pptx/parts/image.py", line 11, in
from PIL import Image as PIL_Image
File "/usr/lib/python3/dist-packages/PIL/Image.py", line 60, in
from . import _imaging as core
ImportError: cannot import name '_imaging' from 'PIL' (/usr/lib/python3/dist-packages/PIL/init.py)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/textract/parsers/init.py", line 69, in process
rel_module, 'textract.parsers'
File "/usr/lib/python3.7/importlib/init.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "", line 1006, in _gcd_import
File "", line 983, in _find_and_load
File "", line 967, in _find_and_load_unlocked
File "", line 677, in _load_unlocked
File "", line 728, in exec_module
File "", line 219, in _call_with_frames_removed
File "/usr/local/lib/python3.7/dist-packages/textract/parsers/pptx_parser.py", line 1, in
import pptx
File "/usr/local/lib/python3.7/dist-packages/pptx/init.py", line 14, in
from pptx.api import Presentation # noqa
File "/usr/local/lib/python3.7/dist-packages/pptx/api.py", line 15, in
from .package import Package
File "/usr/local/lib/python3.7/dist-packages/pptx/package.py", line 14, in
from .parts.image import Image, ImagePart
File "/usr/local/lib/python3.7/dist-packages/pptx/parts/image.py", line 13, in
import Image as PIL_Image
ModuleNotFoundError: No module named 'Image'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/anirudh/Desktop/pdo_ext/text_process.py", line 15, in
main()
File "/home/anirudh/Desktop/pdo_ext/text_process.py", line 8, in main
startprocess('/home/anirudh/Desktop/new_mock')
File "/home/anirudh/Desktop/pdo_ext/extraction.py", line 23, in extraction_process
textex(allfiles, alltext) # Starting the text extraction on all the files available.
File "/home/anirudh/Desktop/pdo_ext/textext.py", line 21, in text_extraction
output = textract.process(file_name).decode('utf-8')
File "/usr/local/lib/python3.7/dist-packages/textract/parsers/init.py", line 72, in process
raise exceptions.ExtensionNotSupported(ext)
textract.exceptions.ExtensionNotSupported: The filename extension .pptx is not yet supported by
textract. Please suggest this filename extension here:
Well .pptx is supported, but if you read carefully it says that ModuleNotFoundError: No module named 'Image' and this is due to that in the textract.parsers.__init__.py there is a try/except that tests only on importing the parser correctly and not its existance.
So basically if you install the Image module, this will be solved for you.
I am attaching the error it is throwing
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/pptx/parts/image.py", line 11, in
from PIL import Image as PIL_Image
File "/usr/lib/python3/dist-packages/PIL/Image.py", line 60, in
from . import _imaging as core
ImportError: cannot import name '_imaging' from 'PIL' (/usr/lib/python3/dist-packages/PIL/init.py)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/textract/parsers/init.py", line 69, in process
rel_module, 'textract.parsers'
File "/usr/lib/python3.7/importlib/init.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "", line 1006, in _gcd_import
File "", line 983, in _find_and_load
File "", line 967, in _find_and_load_unlocked
File "", line 677, in _load_unlocked
File "", line 728, in exec_module
File "", line 219, in _call_with_frames_removed
File "/usr/local/lib/python3.7/dist-packages/textract/parsers/pptx_parser.py", line 1, in
import pptx
File "/usr/local/lib/python3.7/dist-packages/pptx/init.py", line 14, in
from pptx.api import Presentation # noqa
File "/usr/local/lib/python3.7/dist-packages/pptx/api.py", line 15, in
from .package import Package
File "/usr/local/lib/python3.7/dist-packages/pptx/package.py", line 14, in
from .parts.image import Image, ImagePart
File "/usr/local/lib/python3.7/dist-packages/pptx/parts/image.py", line 13, in
import Image as PIL_Image
ModuleNotFoundError: No module named 'Image'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/anirudh/Desktop/pdo_ext/text_process.py", line 15, in
main()
File "/home/anirudh/Desktop/pdo_ext/text_process.py", line 8, in main
startprocess('/home/anirudh/Desktop/new_mock')
File "/home/anirudh/Desktop/pdo_ext/extraction.py", line 23, in extraction_process
textex(allfiles, alltext) # Starting the text extraction on all the files available.
File "/home/anirudh/Desktop/pdo_ext/textext.py", line 21, in text_extraction
output = textract.process(file_name).decode('utf-8')
File "/usr/local/lib/python3.7/dist-packages/textract/parsers/init.py", line 72, in process
raise exceptions.ExtensionNotSupported(ext)
textract.exceptions.ExtensionNotSupported: The filename extension .pptx is not yet supported by
textract. Please suggest this filename extension here:
Available extensions include: .csv, .doc, .docx, .eml, .epub, .gif, .htm, .html, .jpeg, .jpg, .json, .log, .mp3, .msg, .odt, .ogg, .pdf, .png, .pptx, .ps, .psv, .rtf, .tff, .tif, .tiff, .tsv, .txt, .wav, .xls, .xlsx
The text was updated successfully, but these errors were encountered: