forked from pool/saxon6
4ab7455d44
- fix bnc#739498 - remove GPL covered com/icl/saxon/aelfred/XmlParser by file unde rmore permissive license OBS-URL: https://build.opensuse.org/request/show/144389 OBS-URL: https://build.opensuse.org/package/show/Java:packages/saxon6?expand=0&rev=4
1073 lines
29 KiB
Diff
1073 lines
29 KiB
Diff
--- XmlParser.java.old 2012-12-04 12:58:09.544956168 +0100
|
|
+++ XmlParser.java 2012-12-04 15:28:20.798929872 +0100
|
|
@@ -62,10 +62,12 @@
|
|
// The modification over the original source are flagged by
|
|
// <struct/> tags.
|
|
|
|
-// <struct>
|
|
-// package org.brownell.xml.aelfred2;
|
|
-import org.brownell.xml.aelfred2.*;
|
|
-// </struct>
|
|
+// removed all <struct/> modification and adapt the code to be able to replace
|
|
+// the GPL com/icl/saxo/aelfred/XmlParser.java, which is not compatible with
|
|
+// MPL license of the rest
|
|
+// mvyskocil@suse.com
|
|
+
|
|
+package com.icl.saxon.aelfred;
|
|
|
|
import java.io.BufferedInputStream;
|
|
import java.io.CharConversionException;
|
|
@@ -81,27 +83,20 @@
|
|
import java.util.Stack;
|
|
|
|
import org.xml.sax.SAXException;
|
|
-// <struct>
|
|
-import org.xml.sax.SAXNotRecognizedException;
|
|
-// </struct>
|
|
-
|
|
// $Id: XmlParser.java,v 1.20 2000/05/29 12:10:24 mojo Exp $
|
|
|
|
/**
|
|
* Parse XML documents and return parse events through call-backs.
|
|
- * Use the <code>StructSaxDriver</code> class as your entry point, as all
|
|
+ * Use the <code>SAXDriver</code> class as your entry point, as all
|
|
* internal parser interfaces are subject to change.
|
|
*
|
|
* @author Written by David Megginson <dmeggins@microstar.com>
|
|
* (version 1.2a with bugfixes)
|
|
* @author Updated by David Brownell <david-b@pacbell.net>
|
|
* @version $Date: 2000/05/29 12:10:24 $
|
|
- * @see StructSaxDriver
|
|
+ * @see SAXDriver
|
|
*/
|
|
-// <struct>
|
|
-// final class XmlParser
|
|
-final class StructXmlParser
|
|
-// </struct>
|
|
+final class XmlParser
|
|
{
|
|
// parse from buffer, avoiding slow per-character readCh()
|
|
private final static boolean USE_CHEATS = true;
|
|
@@ -121,10 +116,7 @@
|
|
* @see #parse
|
|
*/
|
|
// package private
|
|
-// <struct>
|
|
-// XmlParser ()
|
|
- StructXmlParser ()
|
|
-// </struct>
|
|
+ XmlParser ()
|
|
{
|
|
cleanupVariables ();
|
|
}
|
|
@@ -136,10 +128,7 @@
|
|
* @see #parse
|
|
*/
|
|
// package private
|
|
-// <struct>
|
|
-// final class XmlParser
|
|
-// void setHandler (SaxDriver handler)
|
|
- void setHandler (StructSaxDriver handler)
|
|
+ void setHandler (SAXDriver handler)
|
|
// </struct>
|
|
{
|
|
this.handler = handler;
|
|
@@ -194,7 +183,7 @@
|
|
handler.startDocument ();
|
|
|
|
pushURL ("[document]", basePublicId, baseURI,
|
|
- baseReader, baseInputStream, encoding);
|
|
+ baseReader, baseInputStream, encoding, false);
|
|
|
|
try {
|
|
parseDocument ();
|
|
@@ -461,6 +450,8 @@
|
|
private final static int LIT_DISABLE_EREF = 64;
|
|
// don't expand general entities, but make sure we _could_
|
|
private final static int LIT_ENTITY_CHECK = 128;
|
|
+ // literal is a public ID value
|
|
+ private final static int LIT_PUBID = 256;
|
|
|
|
|
|
//
|
|
@@ -480,7 +471,7 @@
|
|
* Report an error.
|
|
* @param message The error message.
|
|
* @param textFound The text that caused the error (or null).
|
|
- * @see StructSaxDriver#error
|
|
+ * @see SAXDriver#error
|
|
* @see #line
|
|
*/
|
|
private void error (String message, String textFound, String textExpected)
|
|
@@ -543,47 +534,13 @@
|
|
{
|
|
char c;
|
|
|
|
-// <struct>
|
|
- if (showDocStructure) {
|
|
-
|
|
- //
|
|
- // Struct: updated to show the comments
|
|
- //
|
|
-
|
|
- handler.attribute ("xmlns:str", "http://4xt.org/ns/xmlstructure", false);
|
|
- handler.startElement ("str:document");
|
|
- }
|
|
-// </struct>
|
|
-
|
|
+ try {
|
|
parseProlog ();
|
|
-
|
|
-// <struct>
|
|
- if (showDocStructure) {
|
|
-
|
|
- //
|
|
- // Struct: updated to show the document structure
|
|
- //
|
|
-
|
|
- handler.startElement ("str:body");
|
|
-
|
|
- }
|
|
-// </struct>
|
|
-
|
|
require ('<');
|
|
parseElement ();
|
|
-
|
|
-// <struct>
|
|
- if (showDocStructure) {
|
|
-
|
|
- //
|
|
- // Struct: updated to show the document structure
|
|
- //
|
|
-
|
|
- handler.endElement ("str:body");
|
|
- handler.startElement ("str:epilog");
|
|
-
|
|
- }
|
|
-// </struct>
|
|
+ } catch (EOFException eofe) {
|
|
+ error("premature end of file");
|
|
+ }
|
|
|
|
try {
|
|
parseMisc (); //skip all white, PIs, and comments
|
|
@@ -593,19 +550,6 @@
|
|
return;
|
|
}
|
|
|
|
-// <struct>
|
|
- if (showDocStructure) {
|
|
-
|
|
- //
|
|
- // Struct: updated to show the comments
|
|
- //
|
|
-
|
|
- handler.endElement ("str:epilog");
|
|
- handler.endElement ("str:document");
|
|
- }
|
|
-// </struct>
|
|
-
|
|
-
|
|
}
|
|
|
|
|
|
@@ -626,23 +570,7 @@
|
|
parseUntil ("--");
|
|
require ('>');
|
|
expandPE = saved;
|
|
-// <struct>
|
|
- if (! translateComments ) {
|
|
-// </struct>
|
|
- handler.comment (dataBuffer, 0, dataBufferPos);
|
|
-// <struct>
|
|
- } else {
|
|
- //
|
|
- // Struct: updated to show the comments
|
|
- //
|
|
- System.err.println("Comment : " );
|
|
- handler.attribute ("xmlns:str", "http://4xt.org/ns/xmlstructure", false);
|
|
- handler.startElement ("str:comment");
|
|
- handler.charData (dataBuffer, 0, dataBufferPos);
|
|
- handler.endElement ("str:comment");
|
|
-
|
|
- }
|
|
-// </struct>
|
|
+ handler.comment (dataBuffer, 0, dataBufferPos);
|
|
dataBufferPos = 0;
|
|
}
|
|
|
|
@@ -711,45 +639,12 @@
|
|
private void parseProlog ()
|
|
throws Exception
|
|
{
|
|
-
|
|
-// <struct>
|
|
- if (showDocStructure) {
|
|
-
|
|
- //
|
|
- // Struct: updated to show the prolog
|
|
- //
|
|
-
|
|
- handler.startElement ("str:prolog");
|
|
- if (version != null)
|
|
- handler.attribute ("str:version", version, false);
|
|
- if (encodingName != null)
|
|
- handler.attribute ("str:encoding", encodingName, false);
|
|
- if (standalone != null)
|
|
- handler.attribute ("str:standalone", standalone, false);
|
|
- handler.startElement ("str:X-M-L-Decl");
|
|
- handler.endElement ("str:X-M-L-Decl");
|
|
-
|
|
- }
|
|
-// </struct>
|
|
-
|
|
parseMisc ();
|
|
|
|
if (tryRead ("<!DOCTYPE")) {
|
|
parseDoctypedecl ();
|
|
parseMisc ();
|
|
}
|
|
-
|
|
-// <struct>
|
|
- if (showDocStructure) {
|
|
-
|
|
- //
|
|
- // Struct: updated to show the prolog
|
|
- //
|
|
-
|
|
- handler.endElement ("str:prolog");
|
|
- }
|
|
-// </struct>
|
|
-
|
|
}
|
|
|
|
|
|
@@ -775,12 +670,14 @@
|
|
throws SAXException, IOException
|
|
{
|
|
boolean white;
|
|
+ String encodingName = null;
|
|
+ String standalone;
|
|
int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
|
|
|
|
// Read the version.
|
|
require ("version");
|
|
parseEq ();
|
|
- version = readLiteral (flags);
|
|
+ String version = readLiteral (flags);
|
|
if (!version.equals ("1.0")) {
|
|
error ("unsupported XML version", version, "1.0");
|
|
}
|
|
@@ -905,10 +802,8 @@
|
|
encoding = ENCODING_UTF_8;
|
|
return;
|
|
} else if (encoding != ENCODING_EXTERNAL) {
|
|
- // fatal error
|
|
- error ("unsupported ASCII-derived encoding",
|
|
- encodingName,
|
|
- "UTF-8, US-ASCII, or ISO-8859-1");
|
|
+ // used to start with a new reader ...
|
|
+ throw new EncodingException(encodingName);
|
|
}
|
|
// else fallthrough ...
|
|
// it's ASCII-ish and something other than a builtin
|
|
@@ -962,7 +857,7 @@
|
|
|
|
reader = new InputStreamReader (is, encodingName);
|
|
sourceType = INPUT_READER;
|
|
- is = null;
|
|
+ //is = null;
|
|
}
|
|
|
|
|
|
@@ -1014,24 +909,6 @@
|
|
// report (a) declaration of name, (b) lexical info (ids)
|
|
handler.doctypeDecl (doctypeName, ids [0], ids [1]);
|
|
|
|
-// <struct>
|
|
- if (showDocStructure) {
|
|
-
|
|
- //
|
|
- // Struct: updated to show the doctype
|
|
- //
|
|
-
|
|
- if (doctypeName != null)
|
|
- handler.attribute ("str:name", doctypeName, false);
|
|
- if (ids [0] != null)
|
|
- handler.attribute ("str:publicId", ids [0], false);
|
|
- if (ids [1] != null)
|
|
- handler.attribute ("str:systemId", ids [1], false);
|
|
- handler.startElement ("str:doctype");
|
|
-
|
|
- }
|
|
-// </struct>
|
|
-
|
|
// Internal subset is parsed first, if present
|
|
skipWhitespace ();
|
|
if (tryRead ('[')) {
|
|
@@ -1054,7 +931,7 @@
|
|
|
|
// Read the external subset, if any
|
|
if (ids [1] != null) {
|
|
- pushURL ("[external subset]", ids [0], ids [1], null, null, null);
|
|
+ pushURL ("[external subset]", ids [0], ids [1], null, null, null, false);
|
|
|
|
// Loop until we end up back at '>'
|
|
while (true) {
|
|
@@ -1078,20 +955,6 @@
|
|
// done dtd
|
|
handler.endDoctype ();
|
|
expandPE = false;
|
|
-
|
|
-// <struct>
|
|
- if (showDocStructure) {
|
|
-
|
|
- //
|
|
- // Struct: updated to show the doctype
|
|
- //
|
|
-
|
|
- handler.endElement ("str:doctype");
|
|
-
|
|
- }
|
|
-// </struct>
|
|
-
|
|
-
|
|
}
|
|
|
|
|
|
@@ -1191,9 +1054,10 @@
|
|
}
|
|
}
|
|
// I guess not...
|
|
- handler.attribute (aname,
|
|
- getAttributeExpandedValue (gi, aname),
|
|
- false);
|
|
+ String foo = getAttributeExpandedValue (gi, aname);
|
|
+ if ( foo != null) {
|
|
+ handler.attribute (aname, foo, false);
|
|
+ }
|
|
}
|
|
}
|
|
|
|
@@ -1224,7 +1088,7 @@
|
|
* [41] Attribute ::= Name Eq AttValue
|
|
* </pre>
|
|
* @param name The name of the attribute's element.
|
|
- * @see StructSaxDriver#attribute
|
|
+ * @see SAXDriver#attribute
|
|
*/
|
|
private void parseAttribute (String name)
|
|
throws Exception
|
|
@@ -1321,16 +1185,7 @@
|
|
char c;
|
|
|
|
while (true) {
|
|
-
|
|
- switch (currentElementContent) {
|
|
- case CONTENT_ANY:
|
|
- case CONTENT_MIXED:
|
|
- parseCharData ();
|
|
- break;
|
|
- case CONTENT_ELEMENTS:
|
|
- parseWhitespace ();
|
|
- break;
|
|
- }
|
|
+ parseCharData ();
|
|
|
|
// Handle delimiters
|
|
c = readCh ();
|
|
@@ -1425,10 +1280,10 @@
|
|
throws Exception
|
|
{
|
|
if (tryRead ("EMPTY")) {
|
|
- setElement (name, CONTENT_EMPTY, null);
|
|
+ setElement (name, CONTENT_EMPTY, null, null);
|
|
return;
|
|
} else if (tryRead ("ANY")) {
|
|
- setElement (name, CONTENT_ANY, null);
|
|
+ setElement (name, CONTENT_ANY, null, null);
|
|
return;
|
|
} else {
|
|
require ('(');
|
|
@@ -1437,10 +1292,10 @@
|
|
if (tryRead ("#PCDATA")) {
|
|
dataBufferAppend ("#PCDATA");
|
|
parseMixed ();
|
|
- setElement (name, CONTENT_MIXED, dataBufferToString ());
|
|
+ setElement (name, CONTENT_MIXED, dataBufferToString (), null);
|
|
} else {
|
|
parseElements ();
|
|
- setElement (name, CONTENT_ELEMENTS, dataBufferToString ());
|
|
+ setElement (name, CONTENT_ELEMENTS, dataBufferToString (), null);
|
|
}
|
|
}
|
|
}
|
|
@@ -1624,7 +1479,7 @@
|
|
{
|
|
String name;
|
|
int type;
|
|
- String enum = null;
|
|
+ String enum2 = null;
|
|
|
|
// Read the attribute name.
|
|
name = readNmtoken (true);
|
|
@@ -1636,12 +1491,12 @@
|
|
// Get the string of enumerated values
|
|
// if necessary.
|
|
if (type == ATTRIBUTE_ENUMERATED || type == ATTRIBUTE_NOTATION) {
|
|
- enum = dataBufferToString ();
|
|
+ enum2 = dataBufferToString ();
|
|
}
|
|
|
|
// Read the default value.
|
|
requireWhitespace ();
|
|
- parseDefault (elementName, name, type, enum);
|
|
+ parseDefault (elementName, name, type, enum2);
|
|
}
|
|
|
|
|
|
@@ -1739,12 +1594,14 @@
|
|
String elementName,
|
|
String name,
|
|
int type,
|
|
- String enum
|
|
+ String enum2
|
|
) throws Exception
|
|
{
|
|
int valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
|
|
String value = null;
|
|
- int flags = LIT_ATTRIBUTE | LIT_DISABLE_CREF | LIT_ENTITY_CHECK;
|
|
+ int flags = LIT_ATTRIBUTE | LIT_DISABLE_CREF | LIT_ENTITY_CHECK | LIT_DISABLE_PE;
|
|
+ // ^^^^^^^^^^^^^^
|
|
+ // added MHK 20 Mar 2002
|
|
|
|
// Note: char refs not checked here, and input not normalized,
|
|
// since it's done correctly later when we actually expand any
|
|
@@ -1769,7 +1626,7 @@
|
|
}
|
|
} else
|
|
value = readLiteral (flags);
|
|
- setAttribute (elementName, name, type, enum, value, valueType);
|
|
+ setAttribute (elementName, name, type, enum2, value, valueType);
|
|
}
|
|
|
|
|
|
@@ -1946,51 +1803,35 @@
|
|
|
|
name = readNmtoken (true);
|
|
require (';');
|
|
-// <struct>
|
|
- if (! translateExternalParsedEntities) {
|
|
-// </struct>
|
|
- switch (getEntityType (name)) {
|
|
- case ENTITY_UNDECLARED:
|
|
- error ("reference to undeclared entity", name, null);
|
|
- break;
|
|
- case ENTITY_INTERNAL:
|
|
- System.err.println("Internal");
|
|
- pushString (name, getEntityValue (name));
|
|
- break;
|
|
- case ENTITY_TEXT:
|
|
- System.err.println("Text");
|
|
- if (externalAllowed) {
|
|
- pushURL (name, getEntityPublicId (name),
|
|
- getEntitySystemId (name),
|
|
- null, null, null);
|
|
- } else {
|
|
- error ("reference to external entity in attribute value.",
|
|
- name, null);
|
|
- }
|
|
- break;
|
|
- case ENTITY_NDATA:
|
|
- System.err.println("NDATA");
|
|
- if (externalAllowed) {
|
|
- error ("unparsed entity reference in content", name, null);
|
|
- } else {
|
|
- error ("reference to external entity in attribute value.",
|
|
- name, null);
|
|
- }
|
|
- break;
|
|
- }
|
|
-// <struct>
|
|
- } else {
|
|
- //
|
|
- // Struct: updated to show the entity call
|
|
- //
|
|
- System.err.println("Entity reference : " + name);
|
|
- handler.attribute ("xmlns:str", "http://4xt.org/ns/xmlstructure", false);
|
|
- handler.attribute ("str:name", name, false);
|
|
- handler.startElement ("str:entity");
|
|
- handler.endElement ("str:entity");
|
|
-
|
|
- }
|
|
- // </struct>
|
|
+ switch (getEntityType (name)) {
|
|
+ case ENTITY_UNDECLARED:
|
|
+ error ("reference to undeclared entity", name, null);
|
|
+ break;
|
|
+ case ENTITY_INTERNAL:
|
|
+ pushString (name, getEntityValue (name));
|
|
+ System.err.println("Internal");
|
|
+ break;
|
|
+ case ENTITY_TEXT:
|
|
+ System.err.println("Text");
|
|
+ if (externalAllowed) {
|
|
+ pushURL (name, getEntityPublicId (name),
|
|
+ getEntitySystemId (name),
|
|
+ null, null, null, true);
|
|
+ } else {
|
|
+ error ("reference to external entity in attribute value.",
|
|
+ name, null);
|
|
+ }
|
|
+ break;
|
|
+ case ENTITY_NDATA:
|
|
+ System.err.println("NDATA");
|
|
+ if (externalAllowed) {
|
|
+ error ("unparsed entity reference in content", name, null);
|
|
+ } else {
|
|
+ error ("reference to external entity in attribute value.",
|
|
+ name, null);
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
}
|
|
|
|
|
|
@@ -2027,7 +1868,7 @@
|
|
pushString (null, " ");
|
|
pushURL (name, getEntityPublicId (name),
|
|
getEntitySystemId (name),
|
|
- null, null, null);
|
|
+ null, null, null, true);
|
|
if (!inLiteral)
|
|
pushString (null, " ");
|
|
break;
|
|
@@ -2096,24 +1937,6 @@
|
|
notationName = readNmtoken (true);
|
|
setExternalDataEntity (name, ids [0], ids [1], notationName);
|
|
} else {
|
|
-
|
|
-// <struct>
|
|
- if (showDocStructure) {
|
|
-
|
|
- //
|
|
- // Struct: updated to show the external entities definitions
|
|
- //
|
|
-
|
|
- handler.attribute ("str:name", name, false);
|
|
- handler.attribute ("str:type", ids [0], false);
|
|
- handler.attribute ("str:systemId", ids [1], false);
|
|
- handler.startElement ("str:externalEntityDefinition");
|
|
- handler.endElement ("str:externalEntityDefinition");
|
|
-
|
|
- }
|
|
-// </struct>
|
|
-
|
|
-
|
|
setExternalTextEntity (name, ids [0], ids [1]);
|
|
}
|
|
}
|
|
@@ -2222,6 +2045,7 @@
|
|
|
|
// OK, the cheat didn't work; start over
|
|
// and do it by the book.
|
|
+ int closeSquareBracketCount = 0;
|
|
while (true) {
|
|
c = readCh ();
|
|
switch (c) {
|
|
@@ -2229,8 +2053,19 @@
|
|
case '&':
|
|
unread (c);
|
|
return;
|
|
- // XXX "]]>" precluded ...
|
|
+ case ']':
|
|
+ closeSquareBracketCount++;
|
|
+ dataBufferAppend(c);
|
|
+ break;
|
|
+ case '>':
|
|
+ if (closeSquareBracketCount>=2) {
|
|
+ // we've hit ']]>'
|
|
+ error ("']]>' is not allowed here");
|
|
+ break;
|
|
+ }
|
|
+ // fall-through
|
|
default:
|
|
+ closeSquareBracketCount=0;
|
|
dataBufferAppend (c);
|
|
break;
|
|
}
|
|
@@ -2486,6 +2321,9 @@
|
|
// Can't escape this normalization for attributes
|
|
case '\n':
|
|
case '\r':
|
|
+ if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0)
|
|
+ c = ' ';
|
|
+ break;
|
|
case '\t':
|
|
if ((flags & LIT_ATTRIBUTE) != 0)
|
|
c = ' ';
|
|
@@ -2497,7 +2335,6 @@
|
|
if (c == '#') {
|
|
if ((flags & LIT_DISABLE_CREF) != 0) {
|
|
dataBufferAppend ('&');
|
|
- dataBufferAppend ('#');
|
|
continue;
|
|
}
|
|
parseCharRef ();
|
|
@@ -2578,7 +2415,7 @@
|
|
|
|
if (tryRead ("PUBLIC")) {
|
|
requireWhitespace ();
|
|
- ids [0] = readLiteral (LIT_NORMALIZE | flags);
|
|
+ ids [0] = readLiteral (LIT_NORMALIZE | LIT_PUBID | flags);
|
|
if (inNotation) {
|
|
skipWhitespace ();
|
|
c = readCh ();
|
|
@@ -2987,30 +2824,32 @@
|
|
* [2] attribute hash table
|
|
*/
|
|
private Object []
|
|
- setElement (String name, int contentType, String contentModel)
|
|
+ setElement (String name, int contentType, String contentModel, Hashtable attributes) throws Exception
|
|
{
|
|
- Object element [] = (Object []) elementInfo.get (name);
|
|
+ Object element[] = (Object []) elementInfo.get (name);
|
|
|
|
// first <!ELEMENT ...> or <!ATTLIST ...> for this type
|
|
if (element == null) {
|
|
element = new Object [3];
|
|
element [0] = new Integer (contentType);
|
|
element [1] = contentModel;
|
|
- element [2] = new Hashtable (DEFAULT_ATTR_COUNT);
|
|
+ element [2] = attributes;
|
|
elementInfo.put (name, element);
|
|
return element;
|
|
}
|
|
|
|
// multiple <!ELEMENT ...> declarations
|
|
+ if (contentType != CONTENT_UNDECLARED) {
|
|
if (((Integer) element [0]).intValue () != CONTENT_UNDECLARED) {
|
|
- // warn ("multiple declarations for element type", name, null);
|
|
- return element;
|
|
- }
|
|
-
|
|
// <!ELEMENT ...> after associated <!ATTLIST ...>
|
|
element [0] = new Integer (contentType);
|
|
element [1] = contentModel;
|
|
+ }
|
|
|
|
+ }
|
|
+ else if (attributes != null) {
|
|
+ element[2] = attributes;
|
|
+ }
|
|
return element;
|
|
}
|
|
|
|
@@ -3023,9 +2862,9 @@
|
|
{
|
|
Object element[] = (Object[]) elementInfo.get (name);
|
|
|
|
- if (element == null)
|
|
- element = setElement (name, CONTENT_UNDECLARED, null);
|
|
- return (Hashtable) element [2];
|
|
+ if (element == null) return null;
|
|
+
|
|
+ return (Hashtable) element [2];
|
|
}
|
|
|
|
|
|
@@ -3211,6 +3050,9 @@
|
|
|
|
// Create a new hashtable if necessary.
|
|
attlist = getElementAttributes (elName);
|
|
+ if (attlist == null) {
|
|
+ attlist = new Hashtable ();
|
|
+ }
|
|
|
|
// ignore multiple attribute declarations!
|
|
if (attlist.get (name) != null) {
|
|
@@ -3224,6 +3066,9 @@
|
|
attribute [3] = enumeration;
|
|
attribute [4] = null;
|
|
attlist.put (name, attribute);
|
|
+
|
|
+ // save; but don't overwrite any existing <!ELEMENT ...>
|
|
+ setElement (elName, CONTENT_UNDECLARED, null, attlist);
|
|
}
|
|
}
|
|
|
|
@@ -3321,8 +3166,18 @@
|
|
if (entity == null) {
|
|
return null;
|
|
} else {
|
|
- return (String) entity [2];
|
|
- }
|
|
+ try {
|
|
+ String relativeURI = (String)entity [2];
|
|
+ URL baseURI = (URL)entity [5];
|
|
+ if (baseURI==null) return relativeURI;
|
|
+ URL absoluteURI = new URL( baseURI, relativeURI );
|
|
+ return absoluteURI.toString();
|
|
+ } catch (IOException err) {
|
|
+ // ignore the exception, a user entity resolver may be able
|
|
+ // to do something; if not, the error will be caught later
|
|
+ return (String)entity [2];
|
|
+ }
|
|
+ }
|
|
}
|
|
|
|
|
|
@@ -3402,12 +3257,14 @@
|
|
Object entity[];
|
|
|
|
if (entityInfo.get (eName) == null) {
|
|
- entity = new Object [5];
|
|
+ entity = new Object [6];
|
|
entity [0] = new Integer (eClass);
|
|
entity [1] = pubid;
|
|
entity [2] = sysid;
|
|
entity [3] = value;
|
|
entity [4] = nName;
|
|
+ entity [5] = (externalEntity == null ? null : externalEntity.getURL());
|
|
+ // added MHK: provides base URI for resolution
|
|
|
|
entityInfo.put (eName, entity);
|
|
}
|
|
@@ -3554,7 +3411,6 @@
|
|
while (readBufferPos >= readBufferLength) {
|
|
switch (sourceType) {
|
|
case INPUT_READER:
|
|
- case INPUT_EXTERNAL:
|
|
case INPUT_STREAM:
|
|
readDataChunk ();
|
|
while (readBufferLength < 1) {
|
|
@@ -3578,9 +3434,9 @@
|
|
line++;
|
|
column = 0;
|
|
} else {
|
|
- if (c == '<')
|
|
+ if (c == '<') {
|
|
/* favorite return to parseContent () .. NOP */ ;
|
|
- else if ((c < 0x0020 && (c != '\t') && (c != '\r')) || c > 0xFFFD)
|
|
+ }else if ((c < 0x0020 && (c != '\t') && (c != '\r')) || c > 0xFFFD)
|
|
error ("illegal XML character U+"
|
|
+ Integer.toHexString (c));
|
|
|
|
@@ -3589,7 +3445,7 @@
|
|
// are also spots in the internal subset where PE refs are fatal
|
|
// errors, hence yet another flag.
|
|
else if (c == '%' && expandPE) {
|
|
- if (peIsError)
|
|
+ if (peIsError && entityStack.size()==1)
|
|
error ("PE reference within decl in internal subset.");
|
|
parsePEReference ();
|
|
return readCh ();
|
|
@@ -3669,7 +3525,7 @@
|
|
* request an encoding explicitly, and it should also look at the
|
|
* headers with an HTTP connection.
|
|
* @param url The java.net.URL object for the entity.
|
|
- * @see StructSaxDriver#resolveEntity
|
|
+ * @see SAXDriver#resolveEntity
|
|
* @see #pushString
|
|
* @see #sourceType
|
|
* @see #pushInput
|
|
@@ -3683,7 +3539,8 @@
|
|
String systemId,
|
|
Reader reader,
|
|
InputStream stream,
|
|
- String encoding
|
|
+ String encoding,
|
|
+ boolean isAbsolute
|
|
) throws SAXException, IOException
|
|
{
|
|
URL url;
|
|
@@ -3700,28 +3557,43 @@
|
|
readBufferOverflow = -1;
|
|
is = null;
|
|
line = 1;
|
|
+ column = 0;
|
|
|
|
currentByteCount = 0;
|
|
|
|
+ if (!isAbsolute) {
|
|
// Make any system ID (URI/URL) absolute. There's one case
|
|
// where it may be null: parser was invoked without providing
|
|
// one, e.g. since the XML data came from a memory buffer.
|
|
|
|
+ try {
|
|
if (systemId != null && externalEntity != null) {
|
|
systemId = new URL (externalEntity.getURL (), systemId).toString ();
|
|
} else if (baseURI != null) {
|
|
systemId = new URL (new URL (baseURI), systemId).toString ();
|
|
// throws IOException if couldn't create new URL
|
|
}
|
|
+ } catch(java.io.IOException ioe) {
|
|
+ popInput();
|
|
+ error("Invalid URL " + systemId + "\n" + ioe.getMessage());
|
|
+ }
|
|
+ }
|
|
|
|
// See if the application wants to
|
|
// redirect the system ID and/or
|
|
// supply its own character stream.
|
|
if (reader == null && stream == null && systemId != null) {
|
|
- Object input = handler.resolveEntity (publicId, systemId);
|
|
+ Object input = null;
|
|
+ try {
|
|
+ input = handler.resolveEntity (publicId, systemId);
|
|
+ } catch (java.io.IOException ioe){
|
|
+ popInput();
|
|
+ error("Failure resolving entity " + systemId + "\n" + ioe.getMessage());
|
|
+ }
|
|
if (input != null) {
|
|
if (input instanceof String) {
|
|
systemId = (String) input;
|
|
+ isAbsolute = true;
|
|
} else if (input instanceof InputStream) {
|
|
stream = (InputStream) input;
|
|
} else if (input instanceof Reader) {
|
|
@@ -3748,20 +3620,23 @@
|
|
|
|
// Else we handle the conversion, and need to ensure
|
|
// it's done right.
|
|
+ sourceType = INPUT_STREAM;
|
|
if (stream != null) {
|
|
- sourceType = INPUT_STREAM;
|
|
is = stream;
|
|
url = null;
|
|
} else {
|
|
// We have to open our own stream to the URL.
|
|
|
|
- // Set the new status
|
|
- sourceType = INPUT_EXTERNAL;
|
|
url = new URL (systemId);
|
|
|
|
+ try {
|
|
externalEntity = url.openConnection ();
|
|
externalEntity.connect ();
|
|
is = externalEntity.getInputStream ();
|
|
+ } catch (java.io.IOException ioe){
|
|
+ popInput();
|
|
+ error("Cannot read from " + systemId + "\n" + ioe.getMessage());
|
|
+ }
|
|
}
|
|
|
|
// If we get to here, there must be
|
|
@@ -3781,7 +3656,7 @@
|
|
// application/xml;charset=something;otherAttr=...
|
|
// ... with many variants on 'something'
|
|
encoding = externalEntity.getContentType ();
|
|
- temp = encoding.indexOf ("charset");
|
|
+ temp = (encoding != null) ? encoding.indexOf ("charset") : -1;
|
|
|
|
// RFC 2376 sez MIME text defaults to ASCII, but since the
|
|
// JDK will create a MIME type out of thin air, we always
|
|
@@ -3790,7 +3665,7 @@
|
|
encoding = null; // autodetect
|
|
else {
|
|
temp = encoding.indexOf ('=', temp + 7);
|
|
- encoding = encoding.substring (temp);
|
|
+ encoding = encoding.substring (temp+1);
|
|
if ((temp = encoding.indexOf (';')) > 0)
|
|
encoding = encoding.substring (0, temp);
|
|
|
|
@@ -3817,9 +3692,39 @@
|
|
detectEncoding ();
|
|
ignoreEncoding = false;
|
|
}
|
|
+ is.mark(100);
|
|
|
|
// Read any XML or text declaration.
|
|
- tryEncodingDecl (ignoreEncoding);
|
|
+ try {
|
|
+ tryEncodingDecl (ignoreEncoding);
|
|
+ } catch (EncodingException x) {
|
|
+ encoding = x.getMessage ();
|
|
+
|
|
+ // if we don't handle the declared encoding,
|
|
+ // try letting a JVM InputStreamReader do it
|
|
+ try {
|
|
+ if (sourceType != INPUT_STREAM)
|
|
+ throw x;
|
|
+
|
|
+ is.reset ();
|
|
+ readBufferPos = 0;
|
|
+ readBufferLength = 0;
|
|
+ readBufferOverflow = -1;
|
|
+ line = 1;
|
|
+ currentByteCount = column = 0;
|
|
+
|
|
+ sourceType = INPUT_READER;
|
|
+ this.reader = new InputStreamReader (is, encoding);
|
|
+ is = null;
|
|
+
|
|
+ tryEncodingDecl (true);
|
|
+
|
|
+ } catch (IOException e) {
|
|
+ error ("unsupported text encoding",
|
|
+ encoding,
|
|
+ null);
|
|
+ }
|
|
+ }
|
|
}
|
|
|
|
|
|
@@ -3916,6 +3821,14 @@
|
|
// ff fe 00 00 UCS_4_4321 (with BOM)
|
|
}
|
|
|
|
+ // SECOND: three byte signature:
|
|
+ // look for UTF-8 byte order mark 3C 3F 78, allowed by XML 1.0 2nd edition
|
|
+
|
|
+ else if (tryEncoding (signature, (byte)0xef, (byte)0xbb, (byte)0xbf)) {
|
|
+ encoding = ENCODING_UTF_8;
|
|
+ is.read(); is.read(); is.read();
|
|
+ }
|
|
+
|
|
//
|
|
// SECOND: two byte encodings
|
|
// note ... with 1/14/2000 errata the XML spec identifies some
|
|
@@ -4002,6 +3915,20 @@
|
|
return ((sig [0] == b1) && (sig [1] == b2));
|
|
}
|
|
|
|
+ /**
|
|
+ * Check for a three-byte signature.
|
|
+ * <p>Looks for a UTF-8 byte-order mark.
|
|
+ * <p>Utility routine for detectEncoding ().
|
|
+ * @param sig The first four bytes read.
|
|
+ * @param b1 The first byte of the signature
|
|
+ * @param b2 The second byte of the signature
|
|
+ * @param b3 The second byte of the signature
|
|
+ * @see #detectEncoding
|
|
+ */
|
|
+ private static boolean tryEncoding (byte sig[], byte b1, byte b2, byte b3)
|
|
+ {
|
|
+ return ((sig [0] == b1) && (sig [1] == b2) && (sig [2] == b3));
|
|
+ }
|
|
|
|
/**
|
|
* This method pushes a string back onto input.
|
|
@@ -4131,25 +4058,24 @@
|
|
private void popInput ()
|
|
throws SAXException, IOException
|
|
{
|
|
+ String uri;
|
|
Object input[];
|
|
|
|
+ if (externalEntity != null)
|
|
+ uri = externalEntity.getURL ().toString ();
|
|
+ else
|
|
+ uri = baseURI;
|
|
|
|
switch (sourceType) {
|
|
|
|
- case INPUT_EXTERNAL:
|
|
- if (externalEntity != null) {
|
|
- handler.endExternalEntity (
|
|
- externalEntity.getURL ().toString ());
|
|
- }
|
|
- break;
|
|
case INPUT_STREAM:
|
|
- if (baseURI != null) {
|
|
+ if (is != null) {
|
|
handler.endExternalEntity (baseURI);
|
|
}
|
|
is.close ();
|
|
break;
|
|
case INPUT_READER:
|
|
- if (baseURI != null) {
|
|
+ if (reader != null && uri != null) {
|
|
handler.endExternalEntity (baseURI);
|
|
}
|
|
reader.close ();
|
|
@@ -4166,6 +4092,9 @@
|
|
s = (String) entityStack.pop ();
|
|
}
|
|
|
|
+ input = (Object[]) inputStack.pop ();
|
|
+ entityStack.pop ();
|
|
+
|
|
sourceType = ((Integer) input [0]).intValue ();
|
|
externalEntity = (URLConnection) input [1];
|
|
readBuffer = (char[]) input [2];
|
|
@@ -4807,40 +4736,6 @@
|
|
inCDATA = false;
|
|
|
|
symbolTable = new Object [SYMBOL_TABLE_LENGTH][];
|
|
-
|
|
-// <struct>
|
|
- showDocStructure = false;
|
|
- translateExternalParsedEntities = false;
|
|
- translateComments = false;
|
|
-
|
|
- try {
|
|
- showDocStructure = ((String)handler.getProperty("http://4xt.org/inclusions/showDocStructure")).equals("yes");
|
|
- } catch (SAXNotRecognizedException e) {
|
|
- showDocStructure = false;
|
|
-// System.err.println(e.toString());
|
|
- }
|
|
-
|
|
- if (showDocStructure) {
|
|
- translateExternalParsedEntities=true;
|
|
- translateComments=true;
|
|
- } else {
|
|
- try {
|
|
- translateExternalParsedEntities = ((String)handler.getProperty("http://4xt.org/inclusions/translateExternalParsedEntities")).equals("yes");
|
|
- } catch (SAXNotRecognizedException e) {
|
|
- translateExternalParsedEntities = false;
|
|
- }
|
|
- try {
|
|
- translateComments = ((String)handler.getProperty("http://4xt.org/inclusions/translateComments")).equals("yes");
|
|
- } catch (SAXNotRecognizedException e) {
|
|
- translateComments = false;
|
|
- }
|
|
- }
|
|
-
|
|
- System.err.println("showDocStructure : "+showDocStructure);
|
|
- System.err.println("translateExternalParsedEntities : "+translateExternalParsedEntities);
|
|
- System.err.println("translateComments : "+translateComments);
|
|
-// </struct>
|
|
-
|
|
}
|
|
|
|
|
|
@@ -4868,10 +4763,16 @@
|
|
symbolTable = null;
|
|
}
|
|
|
|
+ /* used to restart reading with some InputStreamReader */
|
|
+ static class EncodingException extends IOException
|
|
+ {
|
|
+ EncodingException (String encoding) { super (encoding); }
|
|
+ }
|
|
+
|
|
//
|
|
// The current XML handler interface.
|
|
//
|
|
- private StructSaxDriver handler;
|
|
+ private SAXDriver handler;
|
|
|
|
//
|
|
// I/O information.
|
|
@@ -4979,19 +4880,4 @@
|
|
// Utility flag: are we in CDATA? If so, whitespace isn't ignorable.
|
|
//
|
|
private boolean inCDATA;
|
|
-// <struct>
|
|
- //
|
|
- // Flags to define if we should show the document structure and if
|
|
- // we should expend the external parsed entities
|
|
- //
|
|
- private boolean showDocStructure;
|
|
- private boolean translateExternalParsedEntities;
|
|
- private boolean translateComments;
|
|
-
|
|
- String version=null;
|
|
- String encodingName = null;
|
|
- String standalone = null;
|
|
-// </struct>
|
|
-
|
|
-
|
|
}
|