diff --git a/homeassistant/components/stream/worker.py b/homeassistant/components/stream/worker.py
index 773170449e1..8d1df37d039 100644
--- a/homeassistant/components/stream/worker.py
+++ b/homeassistant/components/stream/worker.py
@@ -208,6 +208,16 @@ def stream_worker(source, options, segment_buffer, quit_event):
                     missing_dts += 1
                     continue
                 if packet.stream == audio_stream:
+                    # detect ADTS AAC and disable audio
+                    if audio_stream.codec.name == "aac" and packet.size > 2:
+                        with memoryview(packet) as packet_view:
+                            if packet_view[0] == 0xFF and packet_view[1] & 0xF0 == 0xF0:
+                                _LOGGER.warning(
+                                    "ADTS AAC detected - disabling audio stream"
+                                )
+                                container_packets = container.demux(video_stream)
+                                audio_stream = None
+                                continue
                     found_audio = True
                 elif (
                     segment_start_pts is None
diff --git a/tests/components/stream/test_worker.py b/tests/components/stream/test_worker.py
index 2c202a290ce..bef5d366a8f 100644
--- a/tests/components/stream/test_worker.py
+++ b/tests/components/stream/test_worker.py
@@ -57,6 +57,11 @@ class FakePyAvStream:
         self.time_base = fractions.Fraction(1, rate)
         self.profile = "ignored-profile"
 
+        class FakeCodec:
+            name = "aac"
+
+        self.codec = FakeCodec()
+
 
 VIDEO_STREAM = FakePyAvStream(VIDEO_STREAM_FORMAT, VIDEO_FRAME_RATE)
 AUDIO_STREAM = FakePyAvStream(AUDIO_STREAM_FORMAT, AUDIO_SAMPLE_RATE)
@@ -87,13 +92,18 @@ class PacketSequence:
             raise StopIteration
         self.packet += 1
 
-        class FakePacket:
+        class FakePacket(bytearray):
+            # Be a bytearray so that memoryview works
+            def __init__(self):
+                super().__init__(3)
+
             time_base = fractions.Fraction(1, VIDEO_FRAME_RATE)
             dts = self.packet * PACKET_DURATION / time_base
             pts = self.packet * PACKET_DURATION / time_base
             duration = PACKET_DURATION / time_base
             stream = VIDEO_STREAM
             is_keyframe = True
+            size = 3
 
         return FakePacket()
 
@@ -107,8 +117,8 @@ class FakePyAvContainer:
         self.packets = PacketSequence(0)
 
         class FakePyAvStreams:
-            video = video_stream
-            audio = audio_stream
+            video = [video_stream] if video_stream else []
+            audio = [audio_stream] if audio_stream else []
 
         self.streams = FakePyAvStreams()
 
@@ -171,8 +181,8 @@ class MockPyAv:
 
     def __init__(self, video=True, audio=False):
         """Initialize the MockPyAv."""
-        video_stream = [VIDEO_STREAM] if video else []
-        audio_stream = [AUDIO_STREAM] if audio else []
+        video_stream = VIDEO_STREAM if video else None
+        audio_stream = AUDIO_STREAM if audio else None
         self.container = FakePyAvContainer(
             video_stream=video_stream, audio_stream=audio_stream
         )
@@ -413,6 +423,23 @@ async def test_audio_packets_not_found(hass):
     assert len(decoded_stream.audio_packets) == 0
 
 
+async def test_adts_aac_audio(hass):
+    """Set up an ADTS AAC audio stream and disable audio."""
+    py_av = MockPyAv(audio=True)
+
+    num_packets = PACKETS_TO_WAIT_FOR_AUDIO + 1
+    packets = list(PacketSequence(num_packets))
+    packets[1].stream = AUDIO_STREAM
+    packets[1].dts = packets[0].dts / VIDEO_FRAME_RATE * AUDIO_SAMPLE_RATE
+    packets[1].pts = packets[0].pts / VIDEO_FRAME_RATE * AUDIO_SAMPLE_RATE
+    # The following is packet data is a sign of ADTS AAC
+    packets[1][0] = 255
+    packets[1][1] = 241
+
+    decoded_stream = await async_decode_stream(hass, iter(packets), py_av=py_av)
+    assert len(decoded_stream.audio_packets) == 0
+
+
 async def test_audio_is_first_packet(hass):
     """Set up an audio stream and audio packet is the first packet in the stream."""
     py_av = MockPyAv(audio=True)