/*
* xmlsave.c: Implemetation of the document serializer
*
* See Copyright for the status of this software.
*
* daniel@veillard.com
*/
#define IN_LIBXML
#include "libxml.h"
#include <string.h>
#include <libxml/xmlmemory.h>
#include <libxml/parserInternals.h>
#include <libxml/tree.h>
#include <libxml/xmlsave.h>
#define MAX_INDENT 60
#include <libxml/HTMLtree.h>
#include "buf.h"
#include "enc.h"
#include "save.h"
/************************************************************************
* *
* XHTML detection *
* *
************************************************************************/
#define XHTML_STRICT_PUBLIC_ID BAD_CAST \
"-//W3C//DTD XHTML 1.0 Strict//EN"
#define XHTML_STRICT_SYSTEM_ID BAD_CAST \
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
#define XHTML_FRAME_PUBLIC_ID BAD_CAST \
"-//W3C//DTD XHTML 1.0 Frameset//EN"
#define XHTML_FRAME_SYSTEM_ID BAD_CAST \
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"
#define XHTML_TRANS_PUBLIC_ID BAD_CAST \
"-//W3C//DTD XHTML 1.0 Transitional//EN"
#define XHTML_TRANS_SYSTEM_ID BAD_CAST \
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
#define XHTML_NS_NAME BAD_CAST "http://www.w3.org/1999/xhtml"
/**
* xmlIsXHTML:
* @systemID: the system identifier
* @publicID: the public identifier
*
* Try to find if the document correspond to an XHTML DTD
*
* Returns 1 if true, 0 if not and -1 in case of error
*/
int
xmlIsXHTML(const xmlChar *systemID, const xmlChar *publicID) {
if ((systemID == NULL) && (publicID == NULL))
return(-1);
if (publicID != NULL) {
if (xmlStrEqual(publicID, XHTML_STRICT_PUBLIC_ID)) return(1);
if (xmlStrEqual(publicID, XHTML_FRAME_PUBLIC_ID)) return(1);
if (xmlStrEqual(publicID, XHTML_TRANS_PUBLIC_ID)) return(1);
}
if (systemID != NULL) {
if (xmlStrEqual(systemID, XHTML_STRICT_SYSTEM_ID)) return(1);
if (xmlStrEqual(systemID, XHTML_FRAME_SYSTEM_ID)) return(1);
if (xmlStrEqual(systemID, XHTML_TRANS_SYSTEM_ID)) return(1);
}
return(0);
}
#ifdef LIBXML_OUTPUT_ENABLED
#define TODO \
xmlGenericError(xmlGenericErrorContext, \
"Unimplemented block at %s:%d\n", \
__FILE__, __LINE__);
struct _xmlSaveCtxt {
void *_private;
int type;
int fd;
const xmlChar *filename;
const xmlChar *encoding;
xmlCharEncodingHandlerPtr handler;
xmlOutputBufferPtr buf;
xmlDocPtr doc;
int options;
int level;
int format;
char indent[MAX_INDENT + 1]; /* array for indenting output */
int indent_nr;
int indent_size;
xmlCharEncodingOutputFunc escape; /* used for element content */
xmlCharEncodingOutputFunc escapeAttr;/* used for attribute content */
};
/************************************************************************
* *
* Output error handlers *
* *
************************************************************************/
/**
* xmlSaveErrMemory:
* @extra: extra informations
*
* Handle an out of memory condition
*/
static void
xmlSaveErrMemory(const char *extra)
{
__xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
}
/**
* xmlSaveErr:
* @code: the error number
* @node: the location of the error.
* @extra: extra informations
*
* Handle an out of memory condition
*/
static void
xmlSaveErr(int code, xmlNodePtr node, const char *extra)
{
const char *msg = NULL;
switch(code) {
case XML_SAVE_NOT_UTF8:
msg = "string is not in UTF-8\n";
break;
case XML_SAVE_CHAR_INVALID:
msg = "invalid character value\n";
break;
case XML_SAVE_UNKNOWN_ENCODING:
msg = "unknown encoding %s\n";
break;
case XML_SAVE_NO_DOCTYPE:
msg = "document has no DOCTYPE\n";
break;
default:
msg = "unexpected error number\n";
}
__xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
}
/************************************************************************
* *
* Special escaping routines *
* *
************************************************************************/
static unsigned char *
xmlSerializeHexCharRef(unsigned char *out, int val) {
unsigned char *ptr;
*out++ = '&';
*out++ = '#';
*out++ = 'x';
if (val < 0x10) ptr = out;
else if (val < 0x100) ptr = out + 1;
else if (val < 0x1000) ptr = out + 2;
else if (val < 0x10000) ptr = out + 3;
else if (val < 0x100000) ptr = out + 4;
else ptr = out + 5;
out = ptr + 1;
while (val > 0) {
switch (val & 0xF) {
case 0: *ptr-- = '0'; break;
case 1: *ptr-- = '1'; break;
case 2: *ptr-- = '2'; break;
case 3: *ptr-- = '3'; break;
case 4: *ptr-- = '4'; break;
case 5: *ptr-- = '5'; break;
case 6: *ptr-- = '6'; break;
case 7: *ptr-- = '7'; break;
case 8: *ptr-- = '8'; break;
case 9: *ptr-- = '9'; break;
case 0xA: *ptr-- = 'A'; break;
case 0xB: *ptr-- = 'B'; break;
case 0xC: *ptr-- = 'C'; break;
case 0xD: *ptr-- = 'D'; break;
case 0xE: *ptr-- = 'E'; break;
case 0xF: *ptr-- = 'F'; break;
default: *ptr-- = '0'; break;
}
val >>= 4;
}
*out++ = ';';
*out = 0;
return(out);
}
/**
* xmlEscapeEntities:
* @out: a pointer to an array of bytes to store the result
* @outlen: the length of @out
* @in: a pointer to an array of unescaped UTF-8 bytes
* @inlen: the length of @in
*
* Take a block of UTF-8 chars in and escape them. Used when there is no
* encoding specified.
*
* Returns 0 if success, or -1 otherwise
* The value of @inlen after return is the number of octets consumed
* if the return value is positive, else unpredictable.
* The value of @outlen after return is the number of octets consumed.
*/
static int
xmlEscapeEntities(unsigned char* out, int *outlen,
const xmlChar* in, int *inlen) {
unsigned char* outstart = out;
const unsigned char* base = in;
unsigned char* outend = out + *outlen;
const unsigned char* inend;
int val;
inend = in + (*inlen);
while ((in < inend) && (out < outend)) {
if (*in == '<') {
if (outend - out < 4) break;
*out++ = '&';
*out++ = 'l';
*out++ = 't';
*out++ = ';';
in++;
continue;
} else if (*in == '>') {
if (outend - out < 4) break;
*out++ = '&';
*out++ = 'g';
*out++ = 't';
*out++ = ';';
in++;
continue;
} else if (*in == '&') {
if (outend - out < 5) break;
*out++ = '&';
*out++ = 'a';
*out++ = 'm';
*out++ = 'p';
*out++ = ';';
in++;
continue;
} else if (((*in >= 0x20) && (*in < 0x80)) ||
(*in == '\n') || (*in == '\t')) {
/*
* default case, just copy !
*/
*out++ = *in++;
continue;
} else if (*in >= 0x80) {
/*
* We assume we have UTF-8 input.
*/
if (outend - out < 11) break;
if (*in < 0xC0) {
xmlSaveErr(XML_SAVE_NOT_UTF8, NULL, NULL);
in++;
goto error;
} else if (*in < 0xE0) {
if (inend - in < 2) break;
val = (in[0]) & 0x1F;
val <<= 6;
val |= (in[1]) & 0x3F;
in += 2;
} else if (*in < 0xF0) {
if (inend - in < 3) break;
val = (in[0]) & 0x0F;
val <<= 6;
val |= (in[1]) & 0x3F;
val <<= 6;
val |= (in[2]) & 0x3F;
in += 3;
} else if (*in < 0xF8) {
if (inend - in < 4) break;
val = (in[0]) & 0x07;
val <<= 6;
val |= (in[1]) & 0x3F;
val <<= 6;
val |= (in[2]) & 0x3F;
val <<= 6;
val |= (in[3]) & 0x3F;
in += 4;
} else {
xmlSaveErr(XML_SAVE_CHAR_INVALID, NULL, NULL);
in++;
goto error;
}
if (!IS_CHAR(val)) {
xmlSaveErr(XML_SAVE_CHAR_INVALID, NULL, NULL);
in++;
goto error;
}
/*
* We could do multiple things here. Just save as a char ref
*/
out = xmlSerializeHexCharRef(out, val);
} else if (IS_BYTE_CHAR(*in)) {
if (outend - out < 6) break;
out = xmlSerializeHexCharRef(out, *in++);
} else {
xmlGenericError(xmlGenericErrorContext,