123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406 |
- From dc24f76cc621d56d11abb015654c427f7a7c0a31 Mon Sep 17 00:00:00 2001
- From: Axel Spoerl <axel.spoerl@qt.io>
- Date: Fri, 30 Jun 2023 12:43:59 +0200
- Subject: [PATCH] QXmlStreamReader: Raise error on unexpected tokens
- QXmlStreamReader accepted multiple DOCTYPE elements, containing DTD
- fragments in the XML prolog, and in the XML body.
- Well-formed but invalid XML files - with multiple DTD fragments in
- prolog and body, combined with recursive entity expansions - have
- caused infinite loops in QXmlStreamReader.
- This patch implements a token check in QXmlStreamReader.
- A stream is allowed to start with an XML prolog. StartDocument
- and DOCTYPE elements are only allowed in this prolog, which
- may also contain ProcessingInstruction and Comment elements.
- As soon as anything else is seen, the prolog ends.
- After that, the prolog-specific elements are treated as unexpected.
- Furthermore, the prolog can contain at most one DOCTYPE element.
- Update the documentation to reflect the new behavior.
- Add an autotest that checks the new error cases are correctly detected,
- and no error is raised for legitimate input.
- The original OSS-Fuzz files (see bug reports) are not included in this
- patch for file size reasons. They have been tested manually. Each of
- them has more than one DOCTYPE element, causing infinite loops in
- recursive entity expansions. The newly implemented functionality
- detects those invalid DTD fragments. By raising an error, it aborts
- stream reading before an infinite loop occurs.
- Thanks to OSS-Fuzz for finding this.
- Fixes: QTBUG-92113
- Fixes: QTBUG-95188
- Pick-to: 6.6 6.5 6.2 5.15
- Change-Id: I0a082b9188b2eee50b396c4d5b1c9e1fd237bbdd
- Reviewed-by: Volker Hilsheimer <volker.hilsheimer@qt.io>
- Fixes: https://security-tracker.debian.org/tracker/CVE-2023-38197
- Upstream: https://github.com/qt/qtbase/commit/c4301be7d5f94852e1b17f2c2989d5ca807855d4
- [Thomas: minor conflicts resolved when backporting]
- Signed-off-by: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
- ---
- src/corelib/serialization/qxmlstream.cpp | 145 +++++++++++++++++-
- src/corelib/serialization/qxmlstream_p.h | 11 ++
- .../qxmlstream/tokenError/dtdInBody.xml | 20 +++
- .../qxmlstream/tokenError/multipleDtd.xml | 20 +++
- .../qxmlstream/tokenError/wellFormed.xml | 15 ++
- .../qxmlstream/tst_qxmlstream.cpp | 39 +++++
- 6 files changed, 242 insertions(+), 8 deletions(-)
- create mode 100644 tests/auto/corelib/serialization/qxmlstream/tokenError/dtdInBody.xml
- create mode 100644 tests/auto/corelib/serialization/qxmlstream/tokenError/multipleDtd.xml
- create mode 100644 tests/auto/corelib/serialization/qxmlstream/tokenError/wellFormed.xml
- diff --git a/src/corelib/serialization/qxmlstream.cpp b/src/corelib/serialization/qxmlstream.cpp
- index a31bd16f1ce..ac5b291c278 100644
- --- a/src/corelib/serialization/qxmlstream.cpp
- +++ b/src/corelib/serialization/qxmlstream.cpp
- @@ -128,7 +128,7 @@ void reversed(const Range &&) = delete;
- addData() or by waiting for it to arrive on the device().
-
- \value UnexpectedElementError The parser encountered an element
- - that was different to those it expected.
- + or token that was different to those it expected.
-
- */
-
- @@ -265,13 +265,34 @@ QXmlStreamEntityResolver *QXmlStreamReader::entityResolver() const
-
- QXmlStreamReader is a well-formed XML 1.0 parser that does \e not
- include external parsed entities. As long as no error occurs, the
- - application code can thus be assured that the data provided by the
- - stream reader satisfies the W3C's criteria for well-formed XML. For
- - example, you can be certain that all tags are indeed nested and
- - closed properly, that references to internal entities have been
- - replaced with the correct replacement text, and that attributes have
- - been normalized or added according to the internal subset of the
- - DTD.
- + application code can thus be assured, that
- + \list
- + \li the data provided by the stream reader satisfies the W3C's
- + criteria for well-formed XML,
- + \li tokens are provided in a valid order.
- + \endlist
- +
- + Unless QXmlStreamReader raises an error, it guarantees the following:
- + \list
- + \li All tags are nested and closed properly.
- + \li References to internal entities have been replaced with the
- + correct replacement text.
- + \li Attributes have been normalized or added according to the
- + internal subset of the \l DTD.
- + \li Tokens of type \l StartDocument happen before all others,
- + aside from comments and processing instructions.
- + \li At most one DOCTYPE element (a token of type \l DTD) is present.
- + \li If present, the DOCTYPE appears before all other elements,
- + aside from StartDocument, comments and processing instructions.
- + \endlist
- +
- + In particular, once any token of type \l StartElement, \l EndElement,
- + \l Characters, \l EntityReference or \l EndDocument is seen, no
- + tokens of type StartDocument or DTD will be seen. If one is present in
- + the input stream, out of order, an error is raised.
- +
- + \note The token types \l Comment and \l ProcessingInstruction may appear
- + anywhere in the stream.
-
- If an error occurs while parsing, atEnd() and hasError() return
- true, and error() returns the error that occurred. The functions
- @@ -574,6 +595,7 @@ QXmlStreamReader::TokenType QXmlStreamReader::readNext()
- d->token = -1;
- return readNext();
- }
- + d->checkToken();
- return d->type;
- }
-
- @@ -658,6 +680,11 @@ static constexpr auto QXmlStreamReader_tokenTypeString = qOffsetStringArray(
- "ProcessingInstruction"
- );
-
- +static constexpr auto QXmlStreamReader_XmlContextString = qOffsetStringArray(
- + "Prolog",
- + "Body"
- +);
- +
- /*!
- \property QXmlStreamReader::namespaceProcessing
- \brief the namespace-processing flag of the stream reader.
- @@ -692,6 +719,14 @@ QString QXmlStreamReader::tokenString() const
- return QLatin1StringView(QXmlStreamReader_tokenTypeString.at(d->type));
- }
-
- +/*!
- + \internal
- + \return \param loc (Prolog/Body) as a string.
- + */
- +static constexpr QLatin1StringView contextString(QXmlStreamReaderPrivate::XmlContext ctxt)
- +{
- + return QLatin1StringView(QXmlStreamReader_XmlContextString.at(static_cast<int>(ctxt)));
- +}
- #endif // QT_NO_XMLSTREAMREADER
-
- QXmlStreamPrivateTagStack::QXmlStreamPrivateTagStack()
- @@ -778,6 +813,8 @@ void QXmlStreamReaderPrivate::init()
-
- type = QXmlStreamReader::NoToken;
- error = QXmlStreamReader::NoError;
- + currentContext = XmlContext::Prolog;
- + foundDTD = false;
- }
-
- /*
- @@ -3684,6 +3721,98 @@ void QXmlStreamWriter::writeCurrentToken(const QXmlStreamReader &reader)
- }
- }
-
- +static constexpr bool isTokenAllowedInContext(QXmlStreamReader::TokenType type,
- + QXmlStreamReaderPrivate::XmlContext loc)
- +{
- + switch (type) {
- + case QXmlStreamReader::StartDocument:
- + case QXmlStreamReader::DTD:
- + return loc == QXmlStreamReaderPrivate::XmlContext::Prolog;
- +
- + case QXmlStreamReader::StartElement:
- + case QXmlStreamReader::EndElement:
- + case QXmlStreamReader::Characters:
- + case QXmlStreamReader::EntityReference:
- + case QXmlStreamReader::EndDocument:
- + return loc == QXmlStreamReaderPrivate::XmlContext::Body;
- +
- + case QXmlStreamReader::Comment:
- + case QXmlStreamReader::ProcessingInstruction:
- + return true;
- +
- + case QXmlStreamReader::NoToken:
- + case QXmlStreamReader::Invalid:
- + return false;
- + }
- +
- + // GCC 8.x does not treat __builtin_unreachable() as constexpr
- +#if !defined(Q_CC_GNU_ONLY) || (Q_CC_GNU >= 900)
- + Q_UNREACHABLE();
- + return false;
- +#else
- + return false;
- +#endif
- +}
- +
- +/*!
- + \internal
- + \brief QXmlStreamReader::isValidToken
- + \return \c true if \param type is a valid token type.
- + \return \c false if \param type is an unexpected token,
- + which indicates a non-well-formed or invalid XML stream.
- + */
- +bool QXmlStreamReaderPrivate::isValidToken(QXmlStreamReader::TokenType type)
- +{
- + // Don't change currentContext, if Invalid or NoToken occur in the prolog
- + if (type == QXmlStreamReader::Invalid || type == QXmlStreamReader::NoToken)
- + return false;
- +
- + // If a token type gets rejected in the body, there is no recovery
- + const bool result = isTokenAllowedInContext(type, currentContext);
- + if (result || currentContext == XmlContext::Body)
- + return result;
- +
- + // First non-Prolog token observed => switch context to body and check again.
- + currentContext = XmlContext::Body;
- + return isTokenAllowedInContext(type, currentContext);
- +}
- +
- +/*!
- + \internal
- + Checks token type and raises an error, if it is invalid
- + in the current context (prolog/body).
- + */
- +void QXmlStreamReaderPrivate::checkToken()
- +{
- + Q_Q(QXmlStreamReader);
- +
- + // The token type must be consumed, to keep track if the body has been reached.
- + const XmlContext context = currentContext;
- + const bool ok = isValidToken(type);
- +
- + // Do nothing if an error has been raised already (going along with an unexpected token)
- + if (error != QXmlStreamReader::Error::NoError)
- + return;
- +
- + if (!ok) {
- + raiseError(QXmlStreamReader::UnexpectedElementError,
- + QObject::tr("Unexpected token type %1 in %2.")
- + .arg(q->tokenString(), contextString(context)));
- + return;
- + }
- +
- + if (type != QXmlStreamReader::DTD)
- + return;
- +
- + // Raise error on multiple DTD tokens
- + if (foundDTD) {
- + raiseError(QXmlStreamReader::UnexpectedElementError,
- + QObject::tr("Found second DTD token in %1.").arg(contextString(context)));
- + } else {
- + foundDTD = true;
- + }
- +}
- +
- /*!
- \fn bool QXmlStreamAttributes::hasAttribute(const QString &qualifiedName) const
- \since 4.5
- diff --git a/src/corelib/serialization/qxmlstream_p.h b/src/corelib/serialization/qxmlstream_p.h
- index 1fd69a2c1f8..f5059f8fcf9 100644
- --- a/src/corelib/serialization/qxmlstream_p.h
- +++ b/src/corelib/serialization/qxmlstream_p.h
- @@ -270,6 +270,17 @@ public:
- QStringDecoder decoder;
- bool atEnd;
-
- + enum class XmlContext
- + {
- + Prolog,
- + Body,
- + };
- +
- + XmlContext currentContext = XmlContext::Prolog;
- + bool foundDTD = false;
- + bool isValidToken(QXmlStreamReader::TokenType type);
- + void checkToken();
- +
- /*!
- \sa setType()
- */
- diff --git a/tests/auto/corelib/serialization/qxmlstream/tokenError/dtdInBody.xml b/tests/auto/corelib/serialization/qxmlstream/tokenError/dtdInBody.xml
- new file mode 100644
- index 00000000000..1c3ca4e2711
- --- /dev/null
- +++ b/tests/auto/corelib/serialization/qxmlstream/tokenError/dtdInBody.xml
- @@ -0,0 +1,20 @@
- +<!DOCTYPE TEST [
- + <!ELEMENT TESTATTRIBUTE (CASE+)>
- + <!ELEMENT CASE (CLASS, FUNCTION)>
- + <!ELEMENT CLASS (#PCDATA)>
- +
- + <!-- adding random ENTITY statement, as this is typical DTD content -->
- + <!ENTITY unite "∪">
- +
- + <!ATTLIST CASE CLASS CDATA #REQUIRED>
- +]>
- +<TEST>
- + <CASE>
- + <CLASS>tst_QXmlStream</CLASS>
- + </CASE>
- + <!-- invalid DTD in XML body follows -->
- + <!DOCTYPE DTDTEST [
- + <!ELEMENT RESULT (CASE+)>
- + <!ATTLIST RESULT OUTPUT CDATA #REQUIRED>
- + ]>
- +</TEST>
- diff --git a/tests/auto/corelib/serialization/qxmlstream/tokenError/multipleDtd.xml b/tests/auto/corelib/serialization/qxmlstream/tokenError/multipleDtd.xml
- new file mode 100644
- index 00000000000..cd398c0f9fd
- --- /dev/null
- +++ b/tests/auto/corelib/serialization/qxmlstream/tokenError/multipleDtd.xml
- @@ -0,0 +1,20 @@
- +<!DOCTYPE TEST [
- + <!ELEMENT TESTATTRIBUTE (CASE+)>
- + <!ELEMENT CASE (CLASS, FUNCTION, DATASET, COMMENTS)>
- + <!ELEMENT CLASS (#PCDATA)>
- +
- + <!-- adding random ENTITY statements, as this is typical DTD content -->
- + <!ENTITY iff "⇔">
- +
- + <!ATTLIST CASE CLASS CDATA #REQUIRED>
- +]>
- +<!-- invalid second DTD follows -->
- +<!DOCTYPE SECOND [
- + <!ELEMENT SECONDATTRIBUTE (#PCDATA)>
- + <!ENTITY on "∘">
- +]>
- +<TEST>
- + <CASE>
- + <CLASS>tst_QXmlStream</CLASS>
- + </CASE>
- +</TEST>
- diff --git a/tests/auto/corelib/serialization/qxmlstream/tokenError/wellFormed.xml b/tests/auto/corelib/serialization/qxmlstream/tokenError/wellFormed.xml
- new file mode 100644
- index 00000000000..1b61a3f0622
- --- /dev/null
- +++ b/tests/auto/corelib/serialization/qxmlstream/tokenError/wellFormed.xml
- @@ -0,0 +1,15 @@
- +<!DOCTYPE TEST [
- + <!ELEMENT TESTATTRIBUTE (CASE+)>
- + <!ELEMENT CASE (CLASS, FUNCTION, DATASET, COMMENTS)>
- + <!ELEMENT CLASS (#PCDATA)>
- +
- + <!-- adding random ENTITY statements, as this is typical DTD content -->
- + <!ENTITY unite "∪">
- +
- + <!ATTLIST CASE CLASS CDATA #REQUIRED>
- +]>
- +<TEST>
- + <CASE>
- + <CLASS>tst_QXmlStream</CLASS>
- + </CASE>
- +</TEST>
- diff --git a/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp b/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp
- index c64088d4774..8d2738700ea 100644
- --- a/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp
- +++ b/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp
- @@ -584,6 +584,9 @@ private slots:
-
- void entityExpansionLimit() const;
-
- + void tokenErrorHandling_data() const;
- + void tokenErrorHandling() const;
- +
- private:
- static QByteArray readFile(const QString &filename);
-
- @@ -1754,5 +1757,41 @@ void tst_QXmlStream::roundTrip() const
- QCOMPARE(out, in);
- }
-
- +void tst_QXmlStream::tokenErrorHandling_data() const
- +{
- + QTest::addColumn<QString>("fileName");
- + QTest::addColumn<QXmlStreamReader::Error>("expectedError");
- + QTest::addColumn<QString>("errorKeyWord");
- +
- + constexpr auto invalid = QXmlStreamReader::Error::UnexpectedElementError;
- + constexpr auto valid = QXmlStreamReader::Error::NoError;
- + QTest::newRow("DtdInBody") << "dtdInBody.xml" << invalid << "DTD";
- + QTest::newRow("multipleDTD") << "multipleDtd.xml" << invalid << "second DTD";
- + QTest::newRow("wellFormed") << "wellFormed.xml" << valid << "";
- +}
- +
- +void tst_QXmlStream::tokenErrorHandling() const
- +{
- + QFETCH(const QString, fileName);
- + QFETCH(const QXmlStreamReader::Error, expectedError);
- + QFETCH(const QString, errorKeyWord);
- +
- + const QDir dir(QFINDTESTDATA("tokenError"));
- + QFile file(dir.absoluteFilePath(fileName));
- +
- + // Cross-compiling: File will be on host only
- + if (!file.exists())
- + QSKIP("Testfile not found.");
- +
- + file.open(QIODevice::ReadOnly);
- + QXmlStreamReader reader(&file);
- + while (!reader.atEnd())
- + reader.readNext();
- +
- + QCOMPARE(reader.error(), expectedError);
- + if (expectedError != QXmlStreamReader::Error::NoError)
- + QVERIFY(reader.errorString().contains(errorKeyWord));
- +}
- +
- #include "tst_qxmlstream.moc"
- // vim: et:ts=4:sw=4:sts=4
- --
- 2.46.0
|