Use bitstream filter to allow ADTS AAC audio in stream (#74151)

This commit is contained in:
uvjustin 2022-06-29 16:15:22 +08:00 committed by GitHub
parent 99329ef04f
commit f45afe7379
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 36 additions and 43 deletions

View File

@ -2,7 +2,7 @@
"domain": "generic", "domain": "generic",
"name": "Generic Camera", "name": "Generic Camera",
"config_flow": true, "config_flow": true,
"requirements": ["ha-av==10.0.0b3", "pillow==9.1.1"], "requirements": ["ha-av==10.0.0b4", "pillow==9.1.1"],
"documentation": "https://www.home-assistant.io/integrations/generic", "documentation": "https://www.home-assistant.io/integrations/generic",
"codeowners": ["@davet2001"], "codeowners": ["@davet2001"],
"iot_class": "local_push" "iot_class": "local_push"

View File

@ -2,7 +2,7 @@
"domain": "stream", "domain": "stream",
"name": "Stream", "name": "Stream",
"documentation": "https://www.home-assistant.io/integrations/stream", "documentation": "https://www.home-assistant.io/integrations/stream",
"requirements": ["PyTurboJPEG==1.6.6", "ha-av==10.0.0b3"], "requirements": ["PyTurboJPEG==1.6.6", "ha-av==10.0.0b4"],
"dependencies": ["http"], "dependencies": ["http"],
"codeowners": ["@hunterjm", "@uvjustin", "@allenporter"], "codeowners": ["@hunterjm", "@uvjustin", "@allenporter"],
"quality_scale": "internal", "quality_scale": "internal",

View File

@ -108,6 +108,7 @@ class StreamMuxer:
hass: HomeAssistant, hass: HomeAssistant,
video_stream: av.video.VideoStream, video_stream: av.video.VideoStream,
audio_stream: av.audio.stream.AudioStream | None, audio_stream: av.audio.stream.AudioStream | None,
audio_bsf: av.BitStreamFilterContext | None,
stream_state: StreamState, stream_state: StreamState,
stream_settings: StreamSettings, stream_settings: StreamSettings,
) -> None: ) -> None:
@ -118,6 +119,7 @@ class StreamMuxer:
self._av_output: av.container.OutputContainer = None self._av_output: av.container.OutputContainer = None
self._input_video_stream: av.video.VideoStream = video_stream self._input_video_stream: av.video.VideoStream = video_stream
self._input_audio_stream: av.audio.stream.AudioStream | None = audio_stream self._input_audio_stream: av.audio.stream.AudioStream | None = audio_stream
self._audio_bsf = audio_bsf
self._output_video_stream: av.video.VideoStream = None self._output_video_stream: av.video.VideoStream = None
self._output_audio_stream: av.audio.stream.AudioStream | None = None self._output_audio_stream: av.audio.stream.AudioStream | None = None
self._segment: Segment | None = None self._segment: Segment | None = None
@ -192,7 +194,9 @@ class StreamMuxer:
# Check if audio is requested # Check if audio is requested
output_astream = None output_astream = None
if input_astream: if input_astream:
output_astream = container.add_stream(template=input_astream) output_astream = container.add_stream(
template=self._audio_bsf or input_astream
)
return container, output_vstream, output_astream return container, output_vstream, output_astream
def reset(self, video_dts: int) -> None: def reset(self, video_dts: int) -> None:
@ -234,6 +238,12 @@ class StreamMuxer:
self._part_has_keyframe |= packet.is_keyframe self._part_has_keyframe |= packet.is_keyframe
elif packet.stream == self._input_audio_stream: elif packet.stream == self._input_audio_stream:
if self._audio_bsf:
self._audio_bsf.send(packet)
while packet := self._audio_bsf.recv():
packet.stream = self._output_audio_stream
self._av_output.mux(packet)
return
packet.stream = self._output_audio_stream packet.stream = self._output_audio_stream
self._av_output.mux(packet) self._av_output.mux(packet)
@ -355,12 +365,6 @@ class PeekIterator(Iterator):
"""Return and consume the next item available.""" """Return and consume the next item available."""
return self._next() return self._next()
def replace_underlying_iterator(self, new_iterator: Iterator) -> None:
"""Replace the underlying iterator while preserving the buffer."""
self._iterator = new_iterator
if not self._buffer:
self._next = self._iterator.__next__
def _pop_buffer(self) -> av.Packet: def _pop_buffer(self) -> av.Packet:
"""Consume items from the buffer until exhausted.""" """Consume items from the buffer until exhausted."""
if self._buffer: if self._buffer:
@ -422,10 +426,12 @@ def is_keyframe(packet: av.Packet) -> Any:
return packet.is_keyframe return packet.is_keyframe
def unsupported_audio(packets: Iterator[av.Packet], audio_stream: Any) -> bool: def get_audio_bitstream_filter(
"""Detect ADTS AAC, which is not supported by pyav.""" packets: Iterator[av.Packet], audio_stream: Any
) -> av.BitStreamFilterContext | None:
"""Return the aac_adtstoasc bitstream filter if ADTS AAC is detected."""
if not audio_stream: if not audio_stream:
return False return None
for count, packet in enumerate(packets): for count, packet in enumerate(packets):
if count >= PACKETS_TO_WAIT_FOR_AUDIO: if count >= PACKETS_TO_WAIT_FOR_AUDIO:
# Some streams declare an audio stream and never send any packets # Some streams declare an audio stream and never send any packets
@ -436,10 +442,15 @@ def unsupported_audio(packets: Iterator[av.Packet], audio_stream: Any) -> bool:
if audio_stream.codec.name == "aac" and packet.size > 2: if audio_stream.codec.name == "aac" and packet.size > 2:
with memoryview(packet) as packet_view: with memoryview(packet) as packet_view:
if packet_view[0] == 0xFF and packet_view[1] & 0xF0 == 0xF0: if packet_view[0] == 0xFF and packet_view[1] & 0xF0 == 0xF0:
_LOGGER.warning("ADTS AAC detected - disabling audio stream") _LOGGER.debug(
return True "ADTS AAC detected. Adding aac_adtstoaac bitstream filter"
)
bsf = av.BitStreamFilter("aac_adtstoasc")
bsf_context = bsf.create()
bsf_context.set_input_stream(audio_stream)
return bsf_context
break break
return False return None
def stream_worker( def stream_worker(
@ -500,12 +511,8 @@ def stream_worker(
# Use a peeking iterator to peek into the start of the stream, ensuring # Use a peeking iterator to peek into the start of the stream, ensuring
# everything looks good, then go back to the start when muxing below. # everything looks good, then go back to the start when muxing below.
try: try:
if audio_stream and unsupported_audio(container_packets.peek(), audio_stream): # Get the required bitstream filter
audio_stream = None audio_bsf = get_audio_bitstream_filter(container_packets.peek(), audio_stream)
container_packets.replace_underlying_iterator(
filter(dts_validator.is_valid, container.demux(video_stream))
)
# Advance to the first keyframe for muxing, then rewind so the muxing # Advance to the first keyframe for muxing, then rewind so the muxing
# loop below can consume. # loop below can consume.
first_keyframe = next( first_keyframe = next(
@ -535,7 +542,12 @@ def stream_worker(
) from ex ) from ex
muxer = StreamMuxer( muxer = StreamMuxer(
stream_state.hass, video_stream, audio_stream, stream_state, stream_settings stream_state.hass,
video_stream,
audio_stream,
audio_bsf,
stream_state,
stream_settings,
) )
muxer.reset(start_dts) muxer.reset(start_dts)

View File

@ -780,7 +780,7 @@ guppy3==3.1.2
# homeassistant.components.generic # homeassistant.components.generic
# homeassistant.components.stream # homeassistant.components.stream
ha-av==10.0.0b3 ha-av==10.0.0b4
# homeassistant.components.ffmpeg # homeassistant.components.ffmpeg
ha-ffmpeg==3.0.2 ha-ffmpeg==3.0.2

View File

@ -559,7 +559,7 @@ guppy3==3.1.2
# homeassistant.components.generic # homeassistant.components.generic
# homeassistant.components.stream # homeassistant.components.stream
ha-av==10.0.0b3 ha-av==10.0.0b4
# homeassistant.components.ffmpeg # homeassistant.components.ffmpeg
ha-ffmpeg==3.0.2 ha-ffmpeg==3.0.2

View File

@ -552,25 +552,6 @@ async def test_audio_packets_not_found(hass):
assert len(decoded_stream.audio_packets) == 0 assert len(decoded_stream.audio_packets) == 0
async def test_adts_aac_audio(hass):
"""Set up an ADTS AAC audio stream and disable audio."""
py_av = MockPyAv(audio=True)
num_packets = PACKETS_TO_WAIT_FOR_AUDIO + 1
packets = list(PacketSequence(num_packets))
packets[1].stream = AUDIO_STREAM
packets[1].dts = int(packets[0].dts / VIDEO_FRAME_RATE * AUDIO_SAMPLE_RATE)
packets[1].pts = int(packets[0].pts / VIDEO_FRAME_RATE * AUDIO_SAMPLE_RATE)
# The following is packet data is a sign of ADTS AAC
packets[1][0] = 255
packets[1][1] = 241
decoded_stream = await async_decode_stream(hass, packets, py_av=py_av)
assert len(decoded_stream.audio_packets) == 0
# All decoded video packets are still preserved
assert len(decoded_stream.video_packets) == num_packets - 1
async def test_audio_is_first_packet(hass): async def test_audio_is_first_packet(hass):
"""Set up an audio stream and audio packet is the first packet in the stream.""" """Set up an audio stream and audio packet is the first packet in the stream."""
py_av = MockPyAv(audio=True) py_av = MockPyAv(audio=True)