saxon6/saxon-add-fixes-from-com-isl-saxon-aelfred.patch
Michal Vyskocil 4ab7455d44 Accepting request 144389 from home:mvyskocil:branches:Java:packages
- fix bnc#739498 - remove GPL covered com/icl/saxon/aelfred/XmlParser
  by file unde rmore permissive license

OBS-URL: https://build.opensuse.org/request/show/144389
OBS-URL: https://build.opensuse.org/package/show/Java:packages/saxon6?expand=0&rev=4
2012-12-06 10:24:13 +00:00

1073 lines
29 KiB
Diff

--- XmlParser.java.old 2012-12-04 12:58:09.544956168 +0100
+++ XmlParser.java 2012-12-04 15:28:20.798929872 +0100
@@ -62,10 +62,12 @@
// The modification over the original source are flagged by
// <struct/> tags.
-// <struct>
-// package org.brownell.xml.aelfred2;
-import org.brownell.xml.aelfred2.*;
-// </struct>
+// removed all <struct/> modification and adapt the code to be able to replace
+// the GPL com/icl/saxo/aelfred/XmlParser.java, which is not compatible with
+// MPL license of the rest
+// mvyskocil@suse.com
+
+package com.icl.saxon.aelfred;
import java.io.BufferedInputStream;
import java.io.CharConversionException;
@@ -81,27 +83,20 @@
import java.util.Stack;
import org.xml.sax.SAXException;
-// <struct>
-import org.xml.sax.SAXNotRecognizedException;
-// </struct>
-
// $Id: XmlParser.java,v 1.20 2000/05/29 12:10:24 mojo Exp $
/**
* Parse XML documents and return parse events through call-backs.
- * Use the <code>StructSaxDriver</code> class as your entry point, as all
+ * Use the <code>SAXDriver</code> class as your entry point, as all
* internal parser interfaces are subject to change.
*
* @author Written by David Megginson &lt;dmeggins@microstar.com&gt;
* (version 1.2a with bugfixes)
* @author Updated by David Brownell &lt;david-b@pacbell.net&gt;
* @version $Date: 2000/05/29 12:10:24 $
- * @see StructSaxDriver
+ * @see SAXDriver
*/
-// <struct>
-// final class XmlParser
-final class StructXmlParser
-// </struct>
+final class XmlParser
{
// parse from buffer, avoiding slow per-character readCh()
private final static boolean USE_CHEATS = true;
@@ -121,10 +116,7 @@
* @see #parse
*/
// package private
-// <struct>
-// XmlParser ()
- StructXmlParser ()
-// </struct>
+ XmlParser ()
{
cleanupVariables ();
}
@@ -136,10 +128,7 @@
* @see #parse
*/
// package private
-// <struct>
-// final class XmlParser
-// void setHandler (SaxDriver handler)
- void setHandler (StructSaxDriver handler)
+ void setHandler (SAXDriver handler)
// </struct>
{
this.handler = handler;
@@ -194,7 +183,7 @@
handler.startDocument ();
pushURL ("[document]", basePublicId, baseURI,
- baseReader, baseInputStream, encoding);
+ baseReader, baseInputStream, encoding, false);
try {
parseDocument ();
@@ -461,6 +450,8 @@
private final static int LIT_DISABLE_EREF = 64;
// don't expand general entities, but make sure we _could_
private final static int LIT_ENTITY_CHECK = 128;
+ // literal is a public ID value
+ private final static int LIT_PUBID = 256;
//
@@ -480,7 +471,7 @@
* Report an error.
* @param message The error message.
* @param textFound The text that caused the error (or null).
- * @see StructSaxDriver#error
+ * @see SAXDriver#error
* @see #line
*/
private void error (String message, String textFound, String textExpected)
@@ -543,47 +534,13 @@
{
char c;
-// <struct>
- if (showDocStructure) {
-
- //
- // Struct: updated to show the comments
- //
-
- handler.attribute ("xmlns:str", "http://4xt.org/ns/xmlstructure", false);
- handler.startElement ("str:document");
- }
-// </struct>
-
+ try {
parseProlog ();
-
-// <struct>
- if (showDocStructure) {
-
- //
- // Struct: updated to show the document structure
- //
-
- handler.startElement ("str:body");
-
- }
-// </struct>
-
require ('<');
parseElement ();
-
-// <struct>
- if (showDocStructure) {
-
- //
- // Struct: updated to show the document structure
- //
-
- handler.endElement ("str:body");
- handler.startElement ("str:epilog");
-
- }
-// </struct>
+ } catch (EOFException eofe) {
+ error("premature end of file");
+ }
try {
parseMisc (); //skip all white, PIs, and comments
@@ -593,19 +550,6 @@
return;
}
-// <struct>
- if (showDocStructure) {
-
- //
- // Struct: updated to show the comments
- //
-
- handler.endElement ("str:epilog");
- handler.endElement ("str:document");
- }
-// </struct>
-
-
}
@@ -626,23 +570,7 @@
parseUntil ("--");
require ('>');
expandPE = saved;
-// <struct>
- if (! translateComments ) {
-// </struct>
- handler.comment (dataBuffer, 0, dataBufferPos);
-// <struct>
- } else {
- //
- // Struct: updated to show the comments
- //
- System.err.println("Comment : " );
- handler.attribute ("xmlns:str", "http://4xt.org/ns/xmlstructure", false);
- handler.startElement ("str:comment");
- handler.charData (dataBuffer, 0, dataBufferPos);
- handler.endElement ("str:comment");
-
- }
-// </struct>
+ handler.comment (dataBuffer, 0, dataBufferPos);
dataBufferPos = 0;
}
@@ -711,45 +639,12 @@
private void parseProlog ()
throws Exception
{
-
-// <struct>
- if (showDocStructure) {
-
- //
- // Struct: updated to show the prolog
- //
-
- handler.startElement ("str:prolog");
- if (version != null)
- handler.attribute ("str:version", version, false);
- if (encodingName != null)
- handler.attribute ("str:encoding", encodingName, false);
- if (standalone != null)
- handler.attribute ("str:standalone", standalone, false);
- handler.startElement ("str:X-M-L-Decl");
- handler.endElement ("str:X-M-L-Decl");
-
- }
-// </struct>
-
parseMisc ();
if (tryRead ("<!DOCTYPE")) {
parseDoctypedecl ();
parseMisc ();
}
-
-// <struct>
- if (showDocStructure) {
-
- //
- // Struct: updated to show the prolog
- //
-
- handler.endElement ("str:prolog");
- }
-// </struct>
-
}
@@ -775,12 +670,14 @@
throws SAXException, IOException
{
boolean white;
+ String encodingName = null;
+ String standalone;
int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
// Read the version.
require ("version");
parseEq ();
- version = readLiteral (flags);
+ String version = readLiteral (flags);
if (!version.equals ("1.0")) {
error ("unsupported XML version", version, "1.0");
}
@@ -905,10 +802,8 @@
encoding = ENCODING_UTF_8;
return;
} else if (encoding != ENCODING_EXTERNAL) {
- // fatal error
- error ("unsupported ASCII-derived encoding",
- encodingName,
- "UTF-8, US-ASCII, or ISO-8859-1");
+ // used to start with a new reader ...
+ throw new EncodingException(encodingName);
}
// else fallthrough ...
// it's ASCII-ish and something other than a builtin
@@ -962,7 +857,7 @@
reader = new InputStreamReader (is, encodingName);
sourceType = INPUT_READER;
- is = null;
+ //is = null;
}
@@ -1014,24 +909,6 @@
// report (a) declaration of name, (b) lexical info (ids)
handler.doctypeDecl (doctypeName, ids [0], ids [1]);
-// <struct>
- if (showDocStructure) {
-
- //
- // Struct: updated to show the doctype
- //
-
- if (doctypeName != null)
- handler.attribute ("str:name", doctypeName, false);
- if (ids [0] != null)
- handler.attribute ("str:publicId", ids [0], false);
- if (ids [1] != null)
- handler.attribute ("str:systemId", ids [1], false);
- handler.startElement ("str:doctype");
-
- }
-// </struct>
-
// Internal subset is parsed first, if present
skipWhitespace ();
if (tryRead ('[')) {
@@ -1054,7 +931,7 @@
// Read the external subset, if any
if (ids [1] != null) {
- pushURL ("[external subset]", ids [0], ids [1], null, null, null);
+ pushURL ("[external subset]", ids [0], ids [1], null, null, null, false);
// Loop until we end up back at '>'
while (true) {
@@ -1078,20 +955,6 @@
// done dtd
handler.endDoctype ();
expandPE = false;
-
-// <struct>
- if (showDocStructure) {
-
- //
- // Struct: updated to show the doctype
- //
-
- handler.endElement ("str:doctype");
-
- }
-// </struct>
-
-
}
@@ -1191,9 +1054,10 @@
}
}
// I guess not...
- handler.attribute (aname,
- getAttributeExpandedValue (gi, aname),
- false);
+ String foo = getAttributeExpandedValue (gi, aname);
+ if ( foo != null) {
+ handler.attribute (aname, foo, false);
+ }
}
}
@@ -1224,7 +1088,7 @@
* [41] Attribute ::= Name Eq AttValue
* </pre>
* @param name The name of the attribute's element.
- * @see StructSaxDriver#attribute
+ * @see SAXDriver#attribute
*/
private void parseAttribute (String name)
throws Exception
@@ -1321,16 +1185,7 @@
char c;
while (true) {
-
- switch (currentElementContent) {
- case CONTENT_ANY:
- case CONTENT_MIXED:
- parseCharData ();
- break;
- case CONTENT_ELEMENTS:
- parseWhitespace ();
- break;
- }
+ parseCharData ();
// Handle delimiters
c = readCh ();
@@ -1425,10 +1280,10 @@
throws Exception
{
if (tryRead ("EMPTY")) {
- setElement (name, CONTENT_EMPTY, null);
+ setElement (name, CONTENT_EMPTY, null, null);
return;
} else if (tryRead ("ANY")) {
- setElement (name, CONTENT_ANY, null);
+ setElement (name, CONTENT_ANY, null, null);
return;
} else {
require ('(');
@@ -1437,10 +1292,10 @@
if (tryRead ("#PCDATA")) {
dataBufferAppend ("#PCDATA");
parseMixed ();
- setElement (name, CONTENT_MIXED, dataBufferToString ());
+ setElement (name, CONTENT_MIXED, dataBufferToString (), null);
} else {
parseElements ();
- setElement (name, CONTENT_ELEMENTS, dataBufferToString ());
+ setElement (name, CONTENT_ELEMENTS, dataBufferToString (), null);
}
}
}
@@ -1624,7 +1479,7 @@
{
String name;
int type;
- String enum = null;
+ String enum2 = null;
// Read the attribute name.
name = readNmtoken (true);
@@ -1636,12 +1491,12 @@
// Get the string of enumerated values
// if necessary.
if (type == ATTRIBUTE_ENUMERATED || type == ATTRIBUTE_NOTATION) {
- enum = dataBufferToString ();
+ enum2 = dataBufferToString ();
}
// Read the default value.
requireWhitespace ();
- parseDefault (elementName, name, type, enum);
+ parseDefault (elementName, name, type, enum2);
}
@@ -1739,12 +1594,14 @@
String elementName,
String name,
int type,
- String enum
+ String enum2
) throws Exception
{
int valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
String value = null;
- int flags = LIT_ATTRIBUTE | LIT_DISABLE_CREF | LIT_ENTITY_CHECK;
+ int flags = LIT_ATTRIBUTE | LIT_DISABLE_CREF | LIT_ENTITY_CHECK | LIT_DISABLE_PE;
+ // ^^^^^^^^^^^^^^
+ // added MHK 20 Mar 2002
// Note: char refs not checked here, and input not normalized,
// since it's done correctly later when we actually expand any
@@ -1769,7 +1626,7 @@
}
} else
value = readLiteral (flags);
- setAttribute (elementName, name, type, enum, value, valueType);
+ setAttribute (elementName, name, type, enum2, value, valueType);
}
@@ -1946,51 +1803,35 @@
name = readNmtoken (true);
require (';');
-// <struct>
- if (! translateExternalParsedEntities) {
-// </struct>
- switch (getEntityType (name)) {
- case ENTITY_UNDECLARED:
- error ("reference to undeclared entity", name, null);
- break;
- case ENTITY_INTERNAL:
- System.err.println("Internal");
- pushString (name, getEntityValue (name));
- break;
- case ENTITY_TEXT:
- System.err.println("Text");
- if (externalAllowed) {
- pushURL (name, getEntityPublicId (name),
- getEntitySystemId (name),
- null, null, null);
- } else {
- error ("reference to external entity in attribute value.",
- name, null);
- }
- break;
- case ENTITY_NDATA:
- System.err.println("NDATA");
- if (externalAllowed) {
- error ("unparsed entity reference in content", name, null);
- } else {
- error ("reference to external entity in attribute value.",
- name, null);
- }
- break;
- }
-// <struct>
- } else {
- //
- // Struct: updated to show the entity call
- //
- System.err.println("Entity reference : " + name);
- handler.attribute ("xmlns:str", "http://4xt.org/ns/xmlstructure", false);
- handler.attribute ("str:name", name, false);
- handler.startElement ("str:entity");
- handler.endElement ("str:entity");
-
- }
- // </struct>
+ switch (getEntityType (name)) {
+ case ENTITY_UNDECLARED:
+ error ("reference to undeclared entity", name, null);
+ break;
+ case ENTITY_INTERNAL:
+ pushString (name, getEntityValue (name));
+ System.err.println("Internal");
+ break;
+ case ENTITY_TEXT:
+ System.err.println("Text");
+ if (externalAllowed) {
+ pushURL (name, getEntityPublicId (name),
+ getEntitySystemId (name),
+ null, null, null, true);
+ } else {
+ error ("reference to external entity in attribute value.",
+ name, null);
+ }
+ break;
+ case ENTITY_NDATA:
+ System.err.println("NDATA");
+ if (externalAllowed) {
+ error ("unparsed entity reference in content", name, null);
+ } else {
+ error ("reference to external entity in attribute value.",
+ name, null);
+ }
+ break;
+ }
}
@@ -2027,7 +1868,7 @@
pushString (null, " ");
pushURL (name, getEntityPublicId (name),
getEntitySystemId (name),
- null, null, null);
+ null, null, null, true);
if (!inLiteral)
pushString (null, " ");
break;
@@ -2096,24 +1937,6 @@
notationName = readNmtoken (true);
setExternalDataEntity (name, ids [0], ids [1], notationName);
} else {
-
-// <struct>
- if (showDocStructure) {
-
- //
- // Struct: updated to show the external entities definitions
- //
-
- handler.attribute ("str:name", name, false);
- handler.attribute ("str:type", ids [0], false);
- handler.attribute ("str:systemId", ids [1], false);
- handler.startElement ("str:externalEntityDefinition");
- handler.endElement ("str:externalEntityDefinition");
-
- }
-// </struct>
-
-
setExternalTextEntity (name, ids [0], ids [1]);
}
}
@@ -2222,6 +2045,7 @@
// OK, the cheat didn't work; start over
// and do it by the book.
+ int closeSquareBracketCount = 0;
while (true) {
c = readCh ();
switch (c) {
@@ -2229,8 +2053,19 @@
case '&':
unread (c);
return;
- // XXX "]]>" precluded ...
+ case ']':
+ closeSquareBracketCount++;
+ dataBufferAppend(c);
+ break;
+ case '>':
+ if (closeSquareBracketCount>=2) {
+ // we've hit ']]>'
+ error ("']]>' is not allowed here");
+ break;
+ }
+ // fall-through
default:
+ closeSquareBracketCount=0;
dataBufferAppend (c);
break;
}
@@ -2486,6 +2321,9 @@
// Can't escape this normalization for attributes
case '\n':
case '\r':
+ if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0)
+ c = ' ';
+ break;
case '\t':
if ((flags & LIT_ATTRIBUTE) != 0)
c = ' ';
@@ -2497,7 +2335,6 @@
if (c == '#') {
if ((flags & LIT_DISABLE_CREF) != 0) {
dataBufferAppend ('&');
- dataBufferAppend ('#');
continue;
}
parseCharRef ();
@@ -2578,7 +2415,7 @@
if (tryRead ("PUBLIC")) {
requireWhitespace ();
- ids [0] = readLiteral (LIT_NORMALIZE | flags);
+ ids [0] = readLiteral (LIT_NORMALIZE | LIT_PUBID | flags);
if (inNotation) {
skipWhitespace ();
c = readCh ();
@@ -2987,30 +2824,32 @@
* [2] attribute hash table
*/
private Object []
- setElement (String name, int contentType, String contentModel)
+ setElement (String name, int contentType, String contentModel, Hashtable attributes) throws Exception
{
- Object element [] = (Object []) elementInfo.get (name);
+ Object element[] = (Object []) elementInfo.get (name);
// first <!ELEMENT ...> or <!ATTLIST ...> for this type
if (element == null) {
element = new Object [3];
element [0] = new Integer (contentType);
element [1] = contentModel;
- element [2] = new Hashtable (DEFAULT_ATTR_COUNT);
+ element [2] = attributes;
elementInfo.put (name, element);
return element;
}
// multiple <!ELEMENT ...> declarations
+ if (contentType != CONTENT_UNDECLARED) {
if (((Integer) element [0]).intValue () != CONTENT_UNDECLARED) {
- // warn ("multiple declarations for element type", name, null);
- return element;
- }
-
// <!ELEMENT ...> after associated <!ATTLIST ...>
element [0] = new Integer (contentType);
element [1] = contentModel;
+ }
+ }
+ else if (attributes != null) {
+ element[2] = attributes;
+ }
return element;
}
@@ -3023,9 +2862,9 @@
{
Object element[] = (Object[]) elementInfo.get (name);
- if (element == null)
- element = setElement (name, CONTENT_UNDECLARED, null);
- return (Hashtable) element [2];
+ if (element == null) return null;
+
+ return (Hashtable) element [2];
}
@@ -3211,6 +3050,9 @@
// Create a new hashtable if necessary.
attlist = getElementAttributes (elName);
+ if (attlist == null) {
+ attlist = new Hashtable ();
+ }
// ignore multiple attribute declarations!
if (attlist.get (name) != null) {
@@ -3224,6 +3066,9 @@
attribute [3] = enumeration;
attribute [4] = null;
attlist.put (name, attribute);
+
+ // save; but don't overwrite any existing <!ELEMENT ...>
+ setElement (elName, CONTENT_UNDECLARED, null, attlist);
}
}
@@ -3321,8 +3166,18 @@
if (entity == null) {
return null;
} else {
- return (String) entity [2];
- }
+ try {
+ String relativeURI = (String)entity [2];
+ URL baseURI = (URL)entity [5];
+ if (baseURI==null) return relativeURI;
+ URL absoluteURI = new URL( baseURI, relativeURI );
+ return absoluteURI.toString();
+ } catch (IOException err) {
+ // ignore the exception, a user entity resolver may be able
+ // to do something; if not, the error will be caught later
+ return (String)entity [2];
+ }
+ }
}
@@ -3402,12 +3257,14 @@
Object entity[];
if (entityInfo.get (eName) == null) {
- entity = new Object [5];
+ entity = new Object [6];
entity [0] = new Integer (eClass);
entity [1] = pubid;
entity [2] = sysid;
entity [3] = value;
entity [4] = nName;
+ entity [5] = (externalEntity == null ? null : externalEntity.getURL());
+ // added MHK: provides base URI for resolution
entityInfo.put (eName, entity);
}
@@ -3554,7 +3411,6 @@
while (readBufferPos >= readBufferLength) {
switch (sourceType) {
case INPUT_READER:
- case INPUT_EXTERNAL:
case INPUT_STREAM:
readDataChunk ();
while (readBufferLength < 1) {
@@ -3578,9 +3434,9 @@
line++;
column = 0;
} else {
- if (c == '<')
+ if (c == '<') {
/* favorite return to parseContent () .. NOP */ ;
- else if ((c < 0x0020 && (c != '\t') && (c != '\r')) || c > 0xFFFD)
+ }else if ((c < 0x0020 && (c != '\t') && (c != '\r')) || c > 0xFFFD)
error ("illegal XML character U+"
+ Integer.toHexString (c));
@@ -3589,7 +3445,7 @@
// are also spots in the internal subset where PE refs are fatal
// errors, hence yet another flag.
else if (c == '%' && expandPE) {
- if (peIsError)
+ if (peIsError && entityStack.size()==1)
error ("PE reference within decl in internal subset.");
parsePEReference ();
return readCh ();
@@ -3669,7 +3525,7 @@
* request an encoding explicitly, and it should also look at the
* headers with an HTTP connection.
* @param url The java.net.URL object for the entity.
- * @see StructSaxDriver#resolveEntity
+ * @see SAXDriver#resolveEntity
* @see #pushString
* @see #sourceType
* @see #pushInput
@@ -3683,7 +3539,8 @@
String systemId,
Reader reader,
InputStream stream,
- String encoding
+ String encoding,
+ boolean isAbsolute
) throws SAXException, IOException
{
URL url;
@@ -3700,28 +3557,43 @@
readBufferOverflow = -1;
is = null;
line = 1;
+ column = 0;
currentByteCount = 0;
+ if (!isAbsolute) {
// Make any system ID (URI/URL) absolute. There's one case
// where it may be null: parser was invoked without providing
// one, e.g. since the XML data came from a memory buffer.
+ try {
if (systemId != null && externalEntity != null) {
systemId = new URL (externalEntity.getURL (), systemId).toString ();
} else if (baseURI != null) {
systemId = new URL (new URL (baseURI), systemId).toString ();
// throws IOException if couldn't create new URL
}
+ } catch(java.io.IOException ioe) {
+ popInput();
+ error("Invalid URL " + systemId + "\n" + ioe.getMessage());
+ }
+ }
// See if the application wants to
// redirect the system ID and/or
// supply its own character stream.
if (reader == null && stream == null && systemId != null) {
- Object input = handler.resolveEntity (publicId, systemId);
+ Object input = null;
+ try {
+ input = handler.resolveEntity (publicId, systemId);
+ } catch (java.io.IOException ioe){
+ popInput();
+ error("Failure resolving entity " + systemId + "\n" + ioe.getMessage());
+ }
if (input != null) {
if (input instanceof String) {
systemId = (String) input;
+ isAbsolute = true;
} else if (input instanceof InputStream) {
stream = (InputStream) input;
} else if (input instanceof Reader) {
@@ -3748,20 +3620,23 @@
// Else we handle the conversion, and need to ensure
// it's done right.
+ sourceType = INPUT_STREAM;
if (stream != null) {
- sourceType = INPUT_STREAM;
is = stream;
url = null;
} else {
// We have to open our own stream to the URL.
- // Set the new status
- sourceType = INPUT_EXTERNAL;
url = new URL (systemId);
+ try {
externalEntity = url.openConnection ();
externalEntity.connect ();
is = externalEntity.getInputStream ();
+ } catch (java.io.IOException ioe){
+ popInput();
+ error("Cannot read from " + systemId + "\n" + ioe.getMessage());
+ }
}
// If we get to here, there must be
@@ -3781,7 +3656,7 @@
// application/xml;charset=something;otherAttr=...
// ... with many variants on 'something'
encoding = externalEntity.getContentType ();
- temp = encoding.indexOf ("charset");
+ temp = (encoding != null) ? encoding.indexOf ("charset") : -1;
// RFC 2376 sez MIME text defaults to ASCII, but since the
// JDK will create a MIME type out of thin air, we always
@@ -3790,7 +3665,7 @@
encoding = null; // autodetect
else {
temp = encoding.indexOf ('=', temp + 7);
- encoding = encoding.substring (temp);
+ encoding = encoding.substring (temp+1);
if ((temp = encoding.indexOf (';')) > 0)
encoding = encoding.substring (0, temp);
@@ -3817,9 +3692,39 @@
detectEncoding ();
ignoreEncoding = false;
}
+ is.mark(100);
// Read any XML or text declaration.
- tryEncodingDecl (ignoreEncoding);
+ try {
+ tryEncodingDecl (ignoreEncoding);
+ } catch (EncodingException x) {
+ encoding = x.getMessage ();
+
+ // if we don't handle the declared encoding,
+ // try letting a JVM InputStreamReader do it
+ try {
+ if (sourceType != INPUT_STREAM)
+ throw x;
+
+ is.reset ();
+ readBufferPos = 0;
+ readBufferLength = 0;
+ readBufferOverflow = -1;
+ line = 1;
+ currentByteCount = column = 0;
+
+ sourceType = INPUT_READER;
+ this.reader = new InputStreamReader (is, encoding);
+ is = null;
+
+ tryEncodingDecl (true);
+
+ } catch (IOException e) {
+ error ("unsupported text encoding",
+ encoding,
+ null);
+ }
+ }
}
@@ -3916,6 +3821,14 @@
// ff fe 00 00 UCS_4_4321 (with BOM)
}
+ // SECOND: three byte signature:
+ // look for UTF-8 byte order mark 3C 3F 78, allowed by XML 1.0 2nd edition
+
+ else if (tryEncoding (signature, (byte)0xef, (byte)0xbb, (byte)0xbf)) {
+ encoding = ENCODING_UTF_8;
+ is.read(); is.read(); is.read();
+ }
+
//
// SECOND: two byte encodings
// note ... with 1/14/2000 errata the XML spec identifies some
@@ -4002,6 +3915,20 @@
return ((sig [0] == b1) && (sig [1] == b2));
}
+ /**
+ * Check for a three-byte signature.
+ * <p>Looks for a UTF-8 byte-order mark.
+ * <p>Utility routine for detectEncoding ().
+ * @param sig The first four bytes read.
+ * @param b1 The first byte of the signature
+ * @param b2 The second byte of the signature
+ * @param b3 The second byte of the signature
+ * @see #detectEncoding
+ */
+ private static boolean tryEncoding (byte sig[], byte b1, byte b2, byte b3)
+ {
+ return ((sig [0] == b1) && (sig [1] == b2) && (sig [2] == b3));
+ }
/**
* This method pushes a string back onto input.
@@ -4131,25 +4058,24 @@
private void popInput ()
throws SAXException, IOException
{
+ String uri;
Object input[];
+ if (externalEntity != null)
+ uri = externalEntity.getURL ().toString ();
+ else
+ uri = baseURI;
switch (sourceType) {
- case INPUT_EXTERNAL:
- if (externalEntity != null) {
- handler.endExternalEntity (
- externalEntity.getURL ().toString ());
- }
- break;
case INPUT_STREAM:
- if (baseURI != null) {
+ if (is != null) {
handler.endExternalEntity (baseURI);
}
is.close ();
break;
case INPUT_READER:
- if (baseURI != null) {
+ if (reader != null && uri != null) {
handler.endExternalEntity (baseURI);
}
reader.close ();
@@ -4166,6 +4092,9 @@
s = (String) entityStack.pop ();
}
+ input = (Object[]) inputStack.pop ();
+ entityStack.pop ();
+
sourceType = ((Integer) input [0]).intValue ();
externalEntity = (URLConnection) input [1];
readBuffer = (char[]) input [2];
@@ -4807,40 +4736,6 @@
inCDATA = false;
symbolTable = new Object [SYMBOL_TABLE_LENGTH][];
-
-// <struct>
- showDocStructure = false;
- translateExternalParsedEntities = false;
- translateComments = false;
-
- try {
- showDocStructure = ((String)handler.getProperty("http://4xt.org/inclusions/showDocStructure")).equals("yes");
- } catch (SAXNotRecognizedException e) {
- showDocStructure = false;
-// System.err.println(e.toString());
- }
-
- if (showDocStructure) {
- translateExternalParsedEntities=true;
- translateComments=true;
- } else {
- try {
- translateExternalParsedEntities = ((String)handler.getProperty("http://4xt.org/inclusions/translateExternalParsedEntities")).equals("yes");
- } catch (SAXNotRecognizedException e) {
- translateExternalParsedEntities = false;
- }
- try {
- translateComments = ((String)handler.getProperty("http://4xt.org/inclusions/translateComments")).equals("yes");
- } catch (SAXNotRecognizedException e) {
- translateComments = false;
- }
- }
-
- System.err.println("showDocStructure : "+showDocStructure);
- System.err.println("translateExternalParsedEntities : "+translateExternalParsedEntities);
- System.err.println("translateComments : "+translateComments);
-// </struct>
-
}
@@ -4868,10 +4763,16 @@
symbolTable = null;
}
+ /* used to restart reading with some InputStreamReader */
+ static class EncodingException extends IOException
+ {
+ EncodingException (String encoding) { super (encoding); }
+ }
+
//
// The current XML handler interface.
//
- private StructSaxDriver handler;
+ private SAXDriver handler;
//
// I/O information.
@@ -4979,19 +4880,4 @@
// Utility flag: are we in CDATA? If so, whitespace isn't ignorable.
//
private boolean inCDATA;
-// <struct>
- //
- // Flags to define if we should show the document structure and if
- // we should expend the external parsed entities
- //
- private boolean showDocStructure;
- private boolean translateExternalParsedEntities;
- private boolean translateComments;
-
- String version=null;
- String encodingName = null;
- String standalone = null;
-// </struct>
-
-
}