taglib: Efficient lookup for an ID3v2 tag in MPEG files with garbage

This commit is contained in:
MilhouseVH 2017-01-26 20:19:54 +00:00
parent 2dd739dd19
commit 51142dbeb6

View File

@ -0,0 +1,72 @@
From d2e0e5522308ce4c905c205c7bf5972ea71cd6e5 Mon Sep 17 00:00:00 2001
From: Tsuda Kageyu <tsuda.kageyu@gmail.com>
Date: Fri, 20 Jan 2017 21:14:38 +0900
Subject: [PATCH] Efficient lookup for an ID3v2 tag in MPEG files with garbage.
This looks for an ID3v2 tag until reaching the first valid MPEG frame in O(n) time.
---
taglib/mpeg/mpegfile.cpp | 45 +++++++++++++++++++++++++++++----------------
1 file changed, 29 insertions(+), 16 deletions(-)
diff --git a/taglib/mpeg/mpegfile.cpp b/taglib/mpeg/mpegfile.cpp
index af7253f..c634eeb 100644
--- a/taglib/mpeg/mpegfile.cpp
+++ b/taglib/mpeg/mpegfile.cpp
@@ -488,28 +488,41 @@ long MPEG::File::findID3v2()
const ByteVector headerID = ID3v2::Header::fileIdentifier();
seek(0);
+ if(readBlock(headerID.size()) == headerID)
+ return 0;
- const ByteVector data = readBlock(headerID.size());
- if(data.size() < headerID.size())
+ Header firstHeader(this, 0, true);
+ if(firstHeader.isValid())
return -1;
- if(data == headerID)
- return 0;
+ // Look for an ID3v2 tag until reaching the first valid MPEG frame.
- if(firstSyncByte(data[0]) && secondSynchByte(data[1]))
- return -1;
+ char frameSyncBytes[2] = {};
+ char tagHeaderBytes[4] = {};
+ long position = 0;
- // Look for the entire file, if neither an MEPG frame or ID3v2 tag was found
- // at the beginning of the file.
- // We don't care about the inefficiency of the code, since this is a seldom case.
+ while(true) {
+ seek(position);
+ const ByteVector buffer = readBlock(bufferSize());
+ if(buffer.isEmpty())
+ return -1;
- const long tagOffset = find(headerID);
- if(tagOffset < 0)
- return -1;
+ for(unsigned int i = 0; i < buffer.size(); ++i) {
+ frameSyncBytes[0] = frameSyncBytes[1];
+ frameSyncBytes[1] = buffer[i];
+ if(firstSyncByte(frameSyncBytes[0]) && secondSynchByte(frameSyncBytes[1])) {
+ Header header(this, position + i - 1, true);
+ if(header.isValid())
+ return -1;
+ }
- const long frameOffset = firstFrameOffset();
- if(frameOffset < tagOffset)
- return -1;
+ tagHeaderBytes[0] = tagHeaderBytes[1];
+ tagHeaderBytes[1] = tagHeaderBytes[2];
+ tagHeaderBytes[2] = buffer[i];
+ if(headerID == tagHeaderBytes)
+ return position + i - 2;
+ }
- return tagOffset;
+ position += bufferSize();
+ }
}