diff options
Diffstat (limited to 'src/com/softsynth/util/XMLReader.java')
-rw-r--r-- | src/com/softsynth/util/XMLReader.java | 328 |
1 files changed, 328 insertions, 0 deletions
diff --git a/src/com/softsynth/util/XMLReader.java b/src/com/softsynth/util/XMLReader.java new file mode 100644 index 0000000..181097c --- /dev/null +++ b/src/com/softsynth/util/XMLReader.java @@ -0,0 +1,328 @@ +/* + * Copyright 1997 Phil Burk, Mobileer Inc + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.softsynth.util; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.io.PushbackInputStream; +import java.io.StreamCorruptedException; +import java.util.Hashtable; + +/** + * Parse an XML stream using a simple State Machine XMLReader does not buffer the input stream so + * you may want to do that yourself using a BufferedInputStream. + * + * @author (C) 1997 Phil Burk + * @see XMLListener + * @see XMLPrinter + */ + +public class XMLReader extends PushbackInputStream { + XMLListener listener; + final static int IDLE = 0; + final static int INTAG = 0; + static int depth = 0; + + final static int STATE_TOP = 0; + final static int STATE_TAG_NAME = 1; + final static int STATE_TAG_FIND_ANGLE = 2; + final static int STATE_TAG_ATTR_NAME = 3; + final static int STATE_TAG_FIND_EQUAL = 4; + final static int STATE_TAG_FIND_QUOTE = 5; + final static int STATE_TAG_ATTR_VALUE = 6; + final static int STATE_CONTENT = 7; + final static int STATE_CHECK_END = 8; + final static int STATE_TAG_SKIP = 9; + + public void setXMLListener(XMLListener listener) { + this.listener = listener; + } + + public XMLReader(InputStream stream) { + super(stream); + } + + /** + * Read a unicode character from a UTF8 stream. + * + * @throws IOException + */ + private int readChar() throws IOException { + int c = read(); + if (c < 0) + return c; // EOF + else if (c < 128) + return c; // regular ASCII char + // We are probably starting a multi-byte character. + { + byte[] bar = null; + if (c < 0xE0) + bar = new byte[2]; + else if (c < 0xF0) + bar = new byte[3]; + else if (c < 0xF8) + bar = new byte[4]; + else if (c < 0xFC) + bar = new byte[5]; + else if (c < 0xFE) + bar = new byte[6]; + bar[0] = (byte) c; + // Gather the rest of the bytes used to encode this character. + for (int i = 1; i < bar.length; i++) { + c = read(); + if ((c & 0xc0) != 0x80) + throw new IOException("invalid UTF8 continuation " + Integer.toHexString(c)); + bar[i] = (byte) c; + } + return new String(bar, "UTF8").charAt(0); + } + } + + /***************************************************************** + */ + public void parse() throws IOException { + int i; + char c; + + while (true) { + i = readChar(); + if (i < 0) + break; // got end of file + c = (char) i; + + if (c == '<') { + parseElement(); + } else if (!Character.isWhitespace(c)) { + throw new StreamCorruptedException( + "Unexpected character. This doesn't look like an XML file!"); + } + } + } + + String charToString(char c) { + String s; + switch (c) { + case '\r': + s = "\\r"; + break; + case '\n': + s = "\\n"; + break; + case '\t': + s = "\\t"; + break; + default: + if (Character.isWhitespace(c)) + s = " "; + else + s = "" + c; + break; + } + return s; + } + + boolean isStringWhite(String s) { + if (s == null) + return true; + int len = s.length(); + for (int i = 0; i < len; i++) { + if (!Character.isWhitespace(s.charAt(i))) { + return false; + } + } + return true; + } + + /***************************************************************** + */ + void parseElement() throws IOException { + int state = STATE_TAG_NAME; + int i; + char c; + String tagName = ""; + String name = null, value = null; + boolean ifEmpty = false; + boolean endTag = false; + boolean done = false; + boolean skipWhiteSpace = true; + char endQuote = '"'; // may also be single quote + String content = null; + Hashtable attributes = new Hashtable(); + + // System.out.println("\nparseElement() ---------- " + depth++); + while (!done) { + do { + i = readChar(); + if (i < 0) + throw new EOFException("EOF inside element!"); + c = (char) i; + } while (skipWhiteSpace && Character.isWhitespace(c)); + skipWhiteSpace = false; + + // System.out.print("(" + charToString(c) + "," + state + ")" ); + + switch (state) { + + case STATE_TAG_NAME: + if (Character.isWhitespace(c)) { + skipWhiteSpace = true; + state = STATE_TAG_FIND_ANGLE; + } else if (c == '/') // this tag has no matching end tag + { + ifEmpty = true; + state = STATE_TAG_FIND_ANGLE; + } else if (c == '>') // end of tag + { + if (endTag) { + listener.endElement(tagName); + done = true; + } else { + listener.beginElement(tagName, attributes, ifEmpty); + state = STATE_CONTENT; + } + } else if (c == '?') { + state = STATE_TAG_SKIP; // got version stuff so skip to end + } else if (c == '!') // FIXME - parse for "--" + { + state = STATE_TAG_SKIP; // got comment + } else { + tagName += c; + } + break; + + case STATE_TAG_SKIP: + if (c == '>') { + done = true; + } + break; + + case STATE_TAG_FIND_ANGLE: + if (c == '/') // this tag has no matching end tag + { + ifEmpty = true; + } else if (c == '>') { + if (endTag) { + listener.endElement(tagName); + done = true; + } else { + listener.beginElement(tagName, attributes, ifEmpty); + state = STATE_CONTENT; + done = ifEmpty; + } + } else { + state = STATE_TAG_ATTR_NAME; + name = "" + c; + } + break; + + case STATE_TAG_ATTR_NAME: + if (Character.isWhitespace(c)) { + skipWhiteSpace = true; + state = STATE_TAG_FIND_EQUAL; + } else if (c == '=') { + skipWhiteSpace = true; + state = STATE_TAG_FIND_QUOTE; + } else { + name += c; + } + break; + + case STATE_TAG_FIND_EQUAL: + if (c == '=') { + skipWhiteSpace = true; + state = STATE_TAG_FIND_QUOTE; + } else { + throw new StreamCorruptedException("Found " + charToString(c) + + ", expected =."); + } + break; + + case STATE_TAG_FIND_QUOTE: + if (c == '"') { + state = STATE_TAG_ATTR_VALUE; + value = ""; + endQuote = '"'; + } else if (c == '\'') { + state = STATE_TAG_ATTR_VALUE; + value = ""; + endQuote = '\''; + } else { + throw new StreamCorruptedException("Found " + charToString(c) + + ", expected '\"'."); + } + break; + + case STATE_TAG_ATTR_VALUE: + if (c == endQuote) { + attributes.put(name, value); + // System.out.println("\ngot " + name + " = " + value ); + skipWhiteSpace = true; + state = STATE_TAG_FIND_ANGLE; + } else { + value += c; + } + break; + + case STATE_CONTENT: + if (c == '<') { + state = STATE_CHECK_END; + if (!isStringWhite(content)) { + String unescaped = com.softsynth.util.XMLTools.unescapeText(content); + listener.foundContent(unescaped); + } + content = null; + } else { + if (content == null) + content = ""; + content += c; + } + break; + + case STATE_CHECK_END: + if (c == '/') { + endTag = true; + state = STATE_TAG_NAME; + tagName = ""; + } else { + unread(c); + parseElement(); + state = STATE_CONTENT; + } + break; + } + } + // System.out.println("\nparseElement: returns, " + --depth ); + } + + /** + * Get a single attribute from the Hashtable. Use the default if not found. + */ + public static int getAttribute(Hashtable attributes, String key, int defaultValue) { + String s = (String) attributes.get(key); + return (s == null) ? defaultValue : Integer.parseInt(s); + } + + /** + * Get a single attribute from the Hashtable. Use the default if not found. + */ + public static double getAttribute(Hashtable attributes, String key, double defaultValue) { + String s = (String) attributes.get(key); + return (s == null) ? defaultValue : Double.valueOf(s).doubleValue(); + } + +} |