/*
* Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
*
* This software is open source.
* See the bottom of this file for the licence.
*/
package org.dom4j.io;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import org.dom4j.Attribute;
import org.dom4j.CDATA;
import org.dom4j.Comment;
import org.dom4j.Document;
import org.dom4j.DocumentType;
import org.dom4j.Element;
import org.dom4j.Entity;
import org.dom4j.Namespace;
import org.dom4j.Node;
import org.dom4j.ProcessingInstruction;
import org.dom4j.Text;
import org.dom4j.tree.NamespaceStack;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.XMLFilterImpl;
import static org.dom4j.util.StringUtils.endsWithWhitespace;
import static org.dom4j.util.StringUtils.startsWithWhitespace;
/**
* <p>
* <code>XMLWriter</code> takes a DOM4J tree and formats it to a stream as
* XML. It can also take SAX events too so can be used by SAX clients as this
* object implements the {@link org.xml.sax.ContentHandler}and {@link
* LexicalHandler} interfaces. as well. This formatter performs typical document
* formatting. The XML declaration and processing instructions are always on
* their own lines. An {@link OutputFormat}object can be used to define how
* whitespace is handled when printing and allows various configuration options,
* such as to allow suppression of the XML declaration, the encoding declaration
* or whether empty documents are collapsed.
* </p>
*
* <p>
* There are <code>write(...)</code> methods to print any of the standard
* DOM4J classes, including <code>Document</code> and <code>Element</code>,
* to either a <code>Writer</code> or an <code>OutputStream</code>.
* Warning: using your own <code>Writer</code> may cause the writer's
* preferred character encoding to be ignored. If you use encodings other than
* UTF8, we recommend using the method that takes an OutputStream instead.
* </p>
*
* @author <a href="mailto:jstrachan@apache.org">James Strachan </a>
* @author Joseph Bowbeer
* @version $Revision: 1.83 $
*/
public class XMLWriter extends XMLFilterImpl implements LexicalHandler {
private static final String PAD_TEXT = " ";
protected static final String[] LEXICAL_HANDLER_NAMES = {
"http://xml.org/sax/properties/lexical-handler",
"http://xml.org/sax/handlers/LexicalHandler"};
protected static final OutputFormat DEFAULT_FORMAT = new OutputFormat();
/** Should entityRefs by resolved when writing ? */
private boolean resolveEntityRefs = true;
/**
* Stores the last type of node written so algorithms can refer to the
* previous node type
*/
protected int lastOutputNodeType;
/**
* Stores if the last written element node was a closing tag or an opening
* tag.
*/
private boolean lastElementClosed = false;
/** Stores the xml:space attribute value of preserve for whitespace flag */
protected boolean preserve = false;
/** The Writer used to output to */
protected Writer writer;
/** The Stack of namespaceStack written so far */
private NamespaceStack namespaceStack = new NamespaceStack();
/** The format used by this writer */
private OutputFormat format;
/** whether we should escape text */
private boolean escapeText = true;
/**
* The initial number of indentations (so you can print a whole document
* indented, if you like)
*/
private int indentLevel = 0;
/** buffer used when escaping strings */
private StringBuffer buffer = new StringBuffer();
/**
* whether we have added characters before from the same chunk of characters
*/
private boolean charsAdded = false;
private char lastChar;
/** Whether a flush should occur after writing a document */
private boolean autoFlush;
/** Lexical handler we should delegate to */
private LexicalHandler lexicalHandler;
/**
* Whether comments should appear inside DTD declarations - defaults to
* false
*/
private boolean showCommentsInDTDs;
/** Is the writer curerntly inside a DTD definition? */
private boolean inDTD;
/** The namespaces used for the current element when consuming SAX events */
private Map<String, String> namespacesMap;
/**
* what is the maximum allowed character code such as 127 in US-ASCII (7
* bit) or 255 in ISO- (8 bit) or -1 to not escape any characters (other
* than the special XML characters like < > &)
*/
private int maximumAllowedCharacter;
public XMLWriter(Writer writer) {
this(writer, DEFAULT_FORMAT);
}
public XMLWriter(Writer writer, OutputFormat format) {
this.writer = writer;
this.format = format;
namespaceStack.push(Namespace.NO_NAMESPACE);
}
public XMLWriter() {
this.format = DEFAULT_FORMAT;
this.writer = new BufferedWriter(new OutputStreamWriter(System.out));
this.autoFlush = true;
namespaceStack.push(Namespace.NO_NAMESPACE);
}
public XMLWriter(OutputStream out) throws UnsupportedEncodingException {
this.format = DEFAULT_FORMAT;
this.writer = createWriter(out, format.getEncoding());
this.autoFlush = true;
namespaceStack.push(Namespace.NO_NAMESPACE);
}
public XMLWriter(OutputStream out, OutputFormat format)
throws UnsupportedEncodingException {
this.format = format;
this.writer = createWriter(out, format.getEncoding());
this.autoFlush = true;
namespaceStack.push(Namespace.NO_NAMESPACE);
}
public XMLWriter(OutputFormat format) throws UnsupportedEncodingException {
this.format = format;
this.writer = createWriter(System.out, format.getEncoding());
this.autoFlush = true;
namespaceStack.push(Namespace.NO_NAMESPACE);
}
public void setWriter(Writer writer) {
this.writer = writer;
this.autoFlush = false;
}
public void setOutputStream(OutputStream out)
throws UnsupportedEncodingException {
this.writer = createWriter(out, format.getEncoding());
this.autoFlush = true;
}
/**
* DOCUMENT ME!
*
* @return true if text thats output should be escaped. This is enabled by
* default. It could be disabled if the output format is textual,
* like in XSLT where we can have xml, html or text output.
*/
public boolean isEscapeText() {
return escapeText;
}
/**
* Sets whether text output should be escaped or not. This is enabled by
* default. It could be disabled if the output format is textual, like in
* XSLT where we can have xml, html or text output.
*
* @param escapeText
* DOCUMENT ME!
*/
public void setEscapeText(boolean escapeText) {
this.escapeText = escapeText;
}
/**
* Set the initial indentation level. This can be used to output a document
* (or, more likely, an element) starting at a given indent level, so it's
* not always flush against the left margin. Default: 0
*
* @param indentLevel
* the number of indents to start with
*/
public void setIndentLevel(int indentLevel) {
this.indentLevel = indentLevel;
}
/**
* Returns the maximum allowed character code that should be allowed
* unescaped which defaults to 127 in US-A