jiaaro · jiaaro · May 24, 2018 · Mar 20, 2018 · Mar 20, 2018 · Mar 20, 2018
diff --git a/.travis.yml b/.travis.yml
@@ -8,7 +8,7 @@ before_install:
   - sudo add-apt-repository ppa:mc3man/trusty-media -y
   - sudo apt-get update --fix-missing
 install:
-  - sudo apt-get install -y ${CONVERTER} libopus-dev
+  - sudo apt-get install -y ${CONVERTER} libopus-dev python-scipy python3-scipy
 python:
   - "2.7"
   - "3.3"

diff --git a/pydub/audio_segment.py b/pydub/audio_segment.py
@@ -8,7 +8,9 @@
 import sys
 import struct
 from .logging_utils import log_conversion, log_subprocess_output
+from .utils import mediainfo_json, fsdecode
 import base64
+from collections import namedtuple
 
 try:
     from StringIO import StringIO
@@ -82,6 +84,67 @@ def classproperty(func):
 }
 
 
+WavSubChunk = namedtuple('WavSubChunk', ['id', 'position', 'size'])
+WavData = namedtuple('WavData', ['audio_format', 'channels', 'sample_rate',
+                                 'bits_per_sample', 'raw_data'])
+
+
+def extract_wav_headers(data):
+    # def search_subchunk(data, subchunk_id):
+    pos = 12  # The size of the RIFF chunk descriptor
+    subchunks = []
+    while pos + 8 < len(data) and len(subchunks) < 10:
+        subchunk_id = data[pos:pos + 4]
+        subchunk_size = struct.unpack_from('<I', data[pos + 4:pos + 8])[0]
+        subchunks.append(WavSubChunk(subchunk_id, pos, subchunk_size))
+        if subchunk_id == b'data':
+            # 'data' is the last subchunk
+            break
+        pos += subchunk_size + 8
+
+    return subchunks
+
+
+def read_wav_audio(data, headers=None):
+    if not headers:
+        headers = extract_wav_headers(data)
+
+    fmt = [x for x in headers if x.id == b'fmt ']
+    if not fmt or fmt[0].size < 16:
+        raise CouldntDecodeError("Couldn't find fmt header in wav data")
+    fmt = fmt[0]
+    pos = fmt.position + 8
+    audio_format = struct.unpack_from('<H', data[pos:pos + 2])[0]
+    if audio_format != 1 and audio_format != 0xFFFE:
+        raise CouldntDecodeError("Unknown audio format 0x%X in wav data" %
+                                 audio_format)
+
+    channels = struct.unpack_from('<H', data[pos + 2:pos + 4])[0]
+    sample_rate = struct.unpack_from('<I', data[pos + 4:pos + 8])[0]
+    bits_per_sample = struct.unpack_from('<H', data[pos + 14:pos + 16])[0]
+
+    data_hdr = headers[-1]
+    if data_hdr.id != b'data':
+        raise CouldntDecodeError("Couldn't find data header in wav data")
+
+    pos = data_hdr.position + 8
+    return WavData(audio_format, channels, sample_rate, bits_per_sample,
+                   data[pos:pos + data_hdr.size])
+
+
+def fix_wav_headers(data):
+    headers = extract_wav_headers(data)
+    if not headers or headers[-1].id != b'data':
+        return
+
+    # Set the file size in the RIFF chunk descriptor
+    data[4:8] = struct.pack('<I', len(data) - 8)
+
+    # Set the data size in the data subchunk
+    pos = headers[-1].position
+    data[pos + 4:pos + 8] = struct.pack('<I', len(data) - pos - 8)
+
+
 class AudioSegment(object):
     """
     AudioSegments are *immutable* objects representing segments of audio
@@ -152,19 +215,15 @@ def __init__(self, data=None, *args, **kwargs):
                     reader = data.read(2**31-1)
                 data = d
 
-            raw = wave.open(StringIO(data), 'rb')
+            wav_data = read_wav_audio(data)
+            if not wav_data:
+                raise CouldntDecodeError("Couldn't read wav audio from data")
 
-            raw.rewind()
-            self.channels = raw.getnchannels()
-            self.sample_width = raw.getsampwidth()
-            self.frame_rate = raw.getframerate()
+            self.channels = wav_data.channels
+            self.sample_width = wav_data.bits_per_sample // 8
+            self.frame_rate = wav_data.sample_rate
             self.frame_width = self.channels * self.sample_width
-
-            raw.rewind()
-
-            # the "or b''" base case is a work-around for a python 3.4
-            # see https://github.com/jiaaro/pydub/pull/107
-            self._data = raw.readframes(float('inf')) or b''
+            self._data = wav_data.raw_data
 
         # Convert 24-bit audio to 32-bit audio.
         # (stdlib audioop and array modules do not support 24-bit data)
@@ -185,7 +244,6 @@ def __init__(self, data=None, *args, **kwargs):
                 old_bytes = struct.pack(pack_fmt, b0, b1, b2)
                 byte_buffer.write(old_bytes)
 
-
             self._data = byte_buffer.getvalue()
             self.sample_width = 4
             self.frame_width = self.channels * self.sample_width
@@ -433,7 +491,7 @@ def from_mono_audiosegments(cls, *mono_segments):
         )
 
     @classmethod
-    def from_file(cls, file, format=None, codec=None, parameters=None, **kwargs):
+    def from_file_using_temporary_files(cls, file, format=None, codec=None, parameters=None, **kwargs):
         orig_file = file
         file = _fd_or_path_or_tempfile(file, 'rb', tempfile=False)
 
@@ -447,11 +505,8 @@ def is_format(f):
                 return True
             if isinstance(orig_file, basestring):
                 return orig_file.lower().endswith(".{0}".format(f))
-            if sys.version_info >= (3, 6):
-                if isinstance(orig_file, os.PathLike):
-                    path = os.fsdecode(orig_file)
-                    return path.lower().endswith(".{0}".format(f))
-
+            if isinstance(orig_file, bytes):
+                return orig_file.lower().endswith((".{0}".format(f)).encode('utf8'))
             return False
 
         if is_format("wav"):
@@ -532,6 +587,110 @@ def is_format(f):
 
         return obj
 
+    @classmethod
+    def from_file(cls, file, format=None, codec=None, parameters=None, **kwargs):
+        orig_file = file
+        try:
+            filename = fsdecode(file)
+        except TypeError:
+            filename = None
+        file = _fd_or_path_or_tempfile(file, 'rb', tempfile=False)
+
+        if format:
+            format = format.lower()
+            format = AUDIO_FILE_EXT_ALIASES.get(format, format)
+
+        def is_format(f):
+            f = f.lower()
+            if format == f:
+                return True
+
+            if filename:
+                return filename.lower().endswith(".{0}".format(f))
+
+            return False
+
+        if is_format("wav"):
+            try:
+                return cls._from_safe_wav(file)
+            except:
+                file.seek(0)
+        elif is_format("raw") or is_format("pcm"):
+            sample_width = kwargs['sample_width']
+            frame_rate = kwargs['frame_rate']
+            channels = kwargs['channels']
+            metadata = {
+                'sample_width': sample_width,
+                'frame_rate': frame_rate,
+                'channels': channels,
+                'frame_width': channels * sample_width
+            }
+            return cls(data=file.read(), metadata=metadata)
+
+        conversion_command = [cls.converter,
+                              '-y',  # always overwrite existing files
+                              ]
+
+        # If format is not defined
+        # ffmpeg/avconv will detect it automatically
+        if format:
+            conversion_command += ["-f", format]
+
+        if codec:
+            # force audio decoder
+            conversion_command += ["-acodec", codec]
+
+        if filename:
+            conversion_command += ["-i", filename]
+            stdin_parameter = None
+            stdin_data = None
+        else:
+            conversion_command += ["-i", "-"]
+            stdin_parameter = subprocess.PIPE
+            stdin_data = file.read()
+
+        info = mediainfo_json(orig_file)
+        if info:
+            audio_streams = [x for x in info['streams']
+                             if x['codec_type'] == 'audio']
+            # This is a workaround for some ffprobe versions that always say
+            # that mp3/mp4/aac/webm/ogg files contain fltp samples
+            if (audio_streams[0]['sample_fmt'] == 'fltp' and
+                (is_format("mp3") or is_format("mp4") or is_format("aac") or
+                 is_format("webm") or is_format("ogg"))):
+                bits_per_sample = 16
+            else:
+                bits_per_sample = audio_streams[0]['bits_per_sample']
+            acodec = 'pcm_s%dle' % bits_per_sample
+            conversion_command += ["-acodec", acodec]
+
+        conversion_command += [
+            "-vn",  # Drop any video streams if there are any
+            "-f", "wav",  # output options (filename last)
+            "-"
+        ]
+
+        if parameters is not None:
+            # extend arguments with arbitrary set
+            conversion_command.extend(parameters)
+
+        log_conversion(conversion_command)
+
+        p = subprocess.Popen(conversion_command, stdin=stdin_parameter,
+                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        p_out, p_err = p.communicate(input=stdin_data)
+
+        if p.returncode != 0 or len(p_out) == 0:
+            raise CouldntDecodeError("Decoding failed. ffmpeg returned error code: {0}\n\nOutput from ffmpeg/avlib:\n\n{1}".format(p.returncode, p_err))
+
+        p_out = bytearray(p_out)
+        fix_wav_headers(p_out)
+        obj = cls._from_safe_wav(BytesIO(p_out))
+
+        file.close()
+
+        return obj
+
     @classmethod
     def from_mp3(cls, file, parameters=None):
         return cls.from_file(file, 'mp3', parameters)

diff --git a/pydub/pyaudioop.py b/pydub/pyaudioop.py
@@ -1,4 +1,9 @@
-import __builtin__
+try:
+    from __builtin__ import max as builtin_max
+    from __builtin__ import min as builtin_min
+except ImportError:
+    from builtins import max as builtin_max
+    from builtins import min as builtin_min
 import math
 import struct
 from fractions import gcd
@@ -79,7 +84,7 @@ def _get_minval(size, signed=True):
 def _get_clipfn(size, signed=True):
     maxval = _get_maxval(size, signed)
     minval = _get_minval(size, signed)
-    return lambda val: __builtin__.max(min(val, maxval), minval)
+    return lambda val: builtin_max(min(val, maxval), minval)
 
 
 def _overflow(val, size, signed=True):
@@ -109,16 +114,16 @@ def max(cp, size):
     if len(cp) == 0:
         return 0
 
-    return __builtin__.max(abs(sample) for sample in _get_samples(cp, size))
+    return builtin_max(abs(sample) for sample in _get_samples(cp, size))
 
 
 def minmax(cp, size):
     _check_params(len(cp), size)
 
     max_sample, min_sample = 0, 0
     for sample in _get_samples(cp, size):
-        max_sample = __builtin__.max(sample, max_sample)
-        min_sample = __builtin__.min(sample, min_sample)
+        max_sample = builtin_max(sample, max_sample)
+        min_sample = builtin_min(sample, min_sample)
 
     return min_sample, max_sample
 
@@ -542,4 +547,4 @@ def lin2adpcm(cp, size, state):
 
 
 def adpcm2lin(cp, size, state):
-    raise NotImplementedError()
+    raise NotImplementedError()