392 lines
11 KiB
C
392 lines
11 KiB
C
|
// Copyright (C) 2004-2022 Artifex Software, Inc.
|
||
|
//
|
||
|
// This file is part of MuPDF.
|
||
|
//
|
||
|
// MuPDF is free software: you can redistribute it and/or modify it under the
|
||
|
// terms of the GNU Affero General Public License as published by the Free
|
||
|
// Software Foundation, either version 3 of the License, or (at your option)
|
||
|
// any later version.
|
||
|
//
|
||
|
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
|
||
|
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||
|
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
|
||
|
// details.
|
||
|
//
|
||
|
// You should have received a copy of the GNU Affero General Public License
|
||
|
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
|
||
|
//
|
||
|
// Alternative licensing terms are available from the licensor.
|
||
|
// For commercial licensing, see <https://www.artifex.com/> or contact
|
||
|
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
|
||
|
// CA 94129, USA, for further information.
|
||
|
|
||
|
#ifndef MUPDF_FITZ_XML_H
|
||
|
#define MUPDF_FITZ_XML_H
|
||
|
|
||
|
#include "mupdf/fitz/system.h"
|
||
|
#include "mupdf/fitz/context.h"
|
||
|
#include "mupdf/fitz/buffer.h"
|
||
|
#include "mupdf/fitz/pool.h"
|
||
|
#include "mupdf/fitz/archive.h"
|
||
|
|
||
|
/**
|
||
|
XML document model
|
||
|
*/
|
||
|
|
||
|
typedef struct fz_xml fz_xml;
|
||
|
|
||
|
/* For backwards compatibility */
|
||
|
typedef fz_xml fz_xml_doc;
|
||
|
|
||
|
/**
|
||
|
Parse the contents of buffer into a tree of xml nodes.
|
||
|
|
||
|
preserve_white: whether to keep or delete all-whitespace nodes.
|
||
|
*/
|
||
|
fz_xml *fz_parse_xml(fz_context *ctx, fz_buffer *buf, int preserve_white);
|
||
|
|
||
|
/**
|
||
|
Parse the contents of buffer into a tree of xml nodes.
|
||
|
|
||
|
preserve_white: whether to keep or delete all-whitespace nodes.
|
||
|
*/
|
||
|
fz_xml *fz_parse_xml_stream(fz_context *ctx, fz_stream *stream, int preserve_white);
|
||
|
|
||
|
/**
|
||
|
Parse the contents of an archive entry into a tree of xml nodes.
|
||
|
|
||
|
preserve_white: whether to keep or delete all-whitespace nodes.
|
||
|
*/
|
||
|
fz_xml *fz_parse_xml_archive_entry(fz_context *ctx, fz_archive *arch, const char *filename, int preserve_white);
|
||
|
|
||
|
/**
|
||
|
Try and parse the contents of an archive entry into a tree of xml nodes.
|
||
|
|
||
|
preserve_white: whether to keep or delete all-whitespace nodes.
|
||
|
|
||
|
Will return NULL if the archive entry can't be found. Otherwise behaves
|
||
|
the same as fz_parse_xml_archive_entry. May throw exceptions.
|
||
|
*/
|
||
|
fz_xml *fz_try_parse_xml_archive_entry(fz_context *ctx, fz_archive *arch, const char *filename, int preserve_white);
|
||
|
|
||
|
/**
|
||
|
Parse the contents of a buffer into a tree of XML nodes,
|
||
|
using the HTML5 parsing algorithm.
|
||
|
*/
|
||
|
fz_xml *fz_parse_xml_from_html5(fz_context *ctx, fz_buffer *buf);
|
||
|
|
||
|
/**
|
||
|
Add a reference to the XML.
|
||
|
*/
|
||
|
fz_xml *fz_keep_xml(fz_context *ctx, fz_xml *xml);
|
||
|
|
||
|
/**
|
||
|
Drop a reference to the XML. When the last reference is
|
||
|
dropped, the node and all its children and siblings will
|
||
|
be freed.
|
||
|
*/
|
||
|
void fz_drop_xml(fz_context *ctx, fz_xml *xml);
|
||
|
|
||
|
/**
|
||
|
Detach a node from the tree, unlinking it from its parent,
|
||
|
and setting the document root to the node.
|
||
|
*/
|
||
|
void fz_detach_xml(fz_context *ctx, fz_xml *node);
|
||
|
|
||
|
/**
|
||
|
Return the topmost XML node of a document.
|
||
|
*/
|
||
|
fz_xml *fz_xml_root(fz_xml_doc *xml);
|
||
|
|
||
|
/**
|
||
|
Return previous sibling of XML node.
|
||
|
*/
|
||
|
fz_xml *fz_xml_prev(fz_xml *item);
|
||
|
|
||
|
/**
|
||
|
Return next sibling of XML node.
|
||
|
*/
|
||
|
fz_xml *fz_xml_next(fz_xml *item);
|
||
|
|
||
|
/**
|
||
|
Return parent of XML node.
|
||
|
*/
|
||
|
fz_xml *fz_xml_up(fz_xml *item);
|
||
|
|
||
|
/**
|
||
|
Return first child of XML node.
|
||
|
*/
|
||
|
fz_xml *fz_xml_down(fz_xml *item);
|
||
|
|
||
|
/**
|
||
|
Return true if the tag name matches.
|
||
|
*/
|
||
|
int fz_xml_is_tag(fz_xml *item, const char *name);
|
||
|
|
||
|
/**
|
||
|
Return tag of XML node. Return NULL for text nodes.
|
||
|
*/
|
||
|
char *fz_xml_tag(fz_xml *item);
|
||
|
|
||
|
/**
|
||
|
Return the value of an attribute of an XML node.
|
||
|
NULL if the attribute doesn't exist.
|
||
|
*/
|
||
|
char *fz_xml_att(fz_xml *item, const char *att);
|
||
|
|
||
|
/**
|
||
|
Return the value of an attribute of an XML node.
|
||
|
If the first attribute doesn't exist, try the second.
|
||
|
NULL if neither attribute exists.
|
||
|
*/
|
||
|
char *fz_xml_att_alt(fz_xml *item, const char *one, const char *two);
|
||
|
|
||
|
/**
|
||
|
Check for a matching attribute on an XML node.
|
||
|
|
||
|
If the node has the requested attribute (name), and the value
|
||
|
matches (match) then return 1. Otherwise, 0.
|
||
|
*/
|
||
|
int fz_xml_att_eq(fz_xml *item, const char *name, const char *match);
|
||
|
|
||
|
/**
|
||
|
Add an attribute to an XML node.
|
||
|
*/
|
||
|
void fz_xml_add_att(fz_context *ctx, fz_pool *pool, fz_xml *node, const char *key, const char *val);
|
||
|
|
||
|
/**
|
||
|
Return the text content of an XML node.
|
||
|
Return NULL if the node is a tag.
|
||
|
*/
|
||
|
char *fz_xml_text(fz_xml *item);
|
||
|
|
||
|
/**
|
||
|
Pretty-print an XML tree to stdout.
|
||
|
*/
|
||
|
void fz_debug_xml(fz_xml *item, int level);
|
||
|
|
||
|
/**
|
||
|
Search the siblings of XML nodes starting with item looking for
|
||
|
the first with the given tag.
|
||
|
|
||
|
Return NULL if none found.
|
||
|
*/
|
||
|
fz_xml *fz_xml_find(fz_xml *item, const char *tag);
|
||
|
|
||
|
/**
|
||
|
Search the siblings of XML nodes starting with the first sibling
|
||
|
of item looking for the first with the given tag.
|
||
|
|
||
|
Return NULL if none found.
|
||
|
*/
|
||
|
fz_xml *fz_xml_find_next(fz_xml *item, const char *tag);
|
||
|
|
||
|
/**
|
||
|
Search the siblings of XML nodes starting with the first child
|
||
|
of item looking for the first with the given tag.
|
||
|
|
||
|
Return NULL if none found.
|
||
|
*/
|
||
|
fz_xml *fz_xml_find_down(fz_xml *item, const char *tag);
|
||
|
|
||
|
/**
|
||
|
Search the siblings of XML nodes starting with item looking for
|
||
|
the first with the given tag (or any tag if tag is NULL), and
|
||
|
with a matching attribute.
|
||
|
|
||
|
Return NULL if none found.
|
||
|
*/
|
||
|
fz_xml *fz_xml_find_match(fz_xml *item, const char *tag, const char *att, const char *match);
|
||
|
|
||
|
/**
|
||
|
Search the siblings of XML nodes starting with the first sibling
|
||
|
of item looking for the first with the given tag (or any tag if tag
|
||
|
is NULL), and with a matching attribute.
|
||
|
|
||
|
Return NULL if none found.
|
||
|
*/
|
||
|
fz_xml *fz_xml_find_next_match(fz_xml *item, const char *tag, const char *att, const char *match);
|
||
|
|
||
|
/**
|
||
|
Search the siblings of XML nodes starting with the first child
|
||
|
of item looking for the first with the given tag (or any tag if
|
||
|
tag is NULL), and with a matching attribute.
|
||
|
|
||
|
Return NULL if none found.
|
||
|
*/
|
||
|
fz_xml *fz_xml_find_down_match(fz_xml *item, const char *tag, const char *att, const char *match);
|
||
|
|
||
|
/**
|
||
|
Perform a depth first search from item, returning the first
|
||
|
child that matches the given tag (or any tag if tag is NULL),
|
||
|
with the given attribute (if att is non NULL), that matches
|
||
|
match (if match is non NULL).
|
||
|
*/
|
||
|
fz_xml *fz_xml_find_dfs(fz_xml *item, const char *tag, const char *att, const char *match);
|
||
|
|
||
|
/**
|
||
|
Perform a depth first search from item, returning the first
|
||
|
child that matches the given tag (or any tag if tag is NULL),
|
||
|
with the given attribute (if att is non NULL), that matches
|
||
|
match (if match is non NULL). The search stops if it ever
|
||
|
reaches the top of the tree, or the declared 'top' item.
|
||
|
*/
|
||
|
fz_xml *fz_xml_find_dfs_top(fz_xml *item, const char *tag, const char *att, const char *match, fz_xml *top);
|
||
|
|
||
|
/**
|
||
|
Perform a depth first search onwards from item, returning the first
|
||
|
child that matches the given tag (or any tag if tag is NULL),
|
||
|
with the given attribute (if att is non NULL), that matches
|
||
|
match (if match is non NULL).
|
||
|
*/
|
||
|
fz_xml *fz_xml_find_next_dfs(fz_xml *item, const char *tag, const char *att, const char *match);
|
||
|
|
||
|
/**
|
||
|
Perform a depth first search onwards from item, returning the first
|
||
|
child that matches the given tag (or any tag if tag is NULL),
|
||
|
with the given attribute (if att is non NULL), that matches
|
||
|
match (if match is non NULL). The search stops if it ever reaches
|
||
|
the top of the tree, or the declared 'top' item.
|
||
|
*/
|
||
|
fz_xml *fz_xml_find_next_dfs_top(fz_xml *item, const char *tag, const char *att, const char *match, fz_xml *top);
|
||
|
|
||
|
/**
|
||
|
DOM-like functions for html in xml.
|
||
|
*/
|
||
|
|
||
|
/**
|
||
|
Return a borrowed reference for the 'body' element of
|
||
|
the given DOM.
|
||
|
*/
|
||
|
fz_xml *fz_dom_body(fz_context *ctx, fz_xml *dom);
|
||
|
|
||
|
/**
|
||
|
Return a borrowed reference for the document (the top
|
||
|
level element) of the DOM.
|
||
|
*/
|
||
|
fz_xml *fz_dom_document_element(fz_context *ctx, fz_xml *dom);
|
||
|
|
||
|
/**
|
||
|
Create an element of a given tag type for the given DOM.
|
||
|
|
||
|
The element is not linked into the DOM yet.
|
||
|
*/
|
||
|
fz_xml *fz_dom_create_element(fz_context *ctx, fz_xml *dom, const char *tag);
|
||
|
|
||
|
/**
|
||
|
Create a text node for the given DOM.
|
||
|
|
||
|
The element is not linked into the DOM yet.
|
||
|
*/
|
||
|
fz_xml *fz_dom_create_text_node(fz_context *ctx, fz_xml *dom, const char *text);
|
||
|
|
||
|
/**
|
||
|
Find the first element matching the requirements in a depth first traversal from elt.
|
||
|
|
||
|
The tagname must match tag, unless tag is NULL, when all tag names are considered to match.
|
||
|
|
||
|
If att is NULL, then all tags match.
|
||
|
Otherwise:
|
||
|
If match is NULL, then only nodes that have an att attribute match.
|
||
|
If match is non-NULL, then only nodes that have an att attribute that matches match match.
|
||
|
|
||
|
Returns NULL (if no match found), or a borrowed reference to the first matching element.
|
||
|
*/
|
||
|
fz_xml *fz_dom_find(fz_context *ctx, fz_xml *elt, const char *tag, const char *att, const char *match);
|
||
|
|
||
|
/**
|
||
|
Find the next element matching the requirements.
|
||
|
*/
|
||
|
fz_xml *fz_dom_find_next(fz_context *ctx, fz_xml *elt, const char *tag, const char *att, const char *match);
|
||
|
|
||
|
/**
|
||
|
Insert an element as the last child of a parent, unlinking the
|
||
|
child from its current position if required.
|
||
|
*/
|
||
|
void fz_dom_append_child(fz_context *ctx, fz_xml *parent, fz_xml *child);
|
||
|
|
||
|
/**
|
||
|
Insert an element (new_elt), before another element (node),
|
||
|
unlinking the new_elt from its current position if required.
|
||
|
*/
|
||
|
void fz_dom_insert_before(fz_context *ctx, fz_xml *node, fz_xml *new_elt);
|
||
|
|
||
|
/**
|
||
|
Insert an element (new_elt), after another element (node),
|
||
|
unlinking the new_elt from its current position if required.
|
||
|
*/
|
||
|
void fz_dom_insert_after(fz_context *ctx, fz_xml *node, fz_xml *new_elt);
|
||
|
|
||
|
/**
|
||
|
Remove an element from the DOM. The element can be added back elsewhere
|
||
|
if required.
|
||
|
|
||
|
No reference counting changes for the element.
|
||
|
*/
|
||
|
void fz_dom_remove(fz_context *ctx, fz_xml *elt);
|
||
|
|
||
|
/**
|
||
|
Clone an element (and its children).
|
||
|
|
||
|
A borrowed reference to the clone is returned. The clone is not
|
||
|
yet linked into the DOM.
|
||
|
*/
|
||
|
fz_xml *fz_dom_clone(fz_context *ctx, fz_xml *elt);
|
||
|
|
||
|
/**
|
||
|
Return a borrowed reference to the first child of a node,
|
||
|
or NULL if there isn't one.
|
||
|
*/
|
||
|
fz_xml *fz_dom_first_child(fz_context *ctx, fz_xml *elt);
|
||
|
|
||
|
/**
|
||
|
Return a borrowed reference to the parent of a node,
|
||
|
or NULL if there isn't one.
|
||
|
*/
|
||
|
fz_xml *fz_dom_parent(fz_context *ctx, fz_xml *elt);
|
||
|
|
||
|
/**
|
||
|
Return a borrowed reference to the next sibling of a node,
|
||
|
or NULL if there isn't one.
|
||
|
*/
|
||
|
fz_xml *fz_dom_next(fz_context *ctx, fz_xml *elt);
|
||
|
|
||
|
/**
|
||
|
Return a borrowed reference to the previous sibling of a node,
|
||
|
or NULL if there isn't one.
|
||
|
*/
|
||
|
fz_xml *fz_dom_previous(fz_context *ctx, fz_xml *elt);
|
||
|
|
||
|
/**
|
||
|
Add an attribute to an element.
|
||
|
|
||
|
Ownership of att and value remain with the caller.
|
||
|
*/
|
||
|
void fz_dom_add_attribute(fz_context *ctx, fz_xml *elt, const char *att, const char *value);
|
||
|
|
||
|
/**
|
||
|
Remove an attribute from an element.
|
||
|
*/
|
||
|
void fz_dom_remove_attribute(fz_context *ctx, fz_xml *elt, const char *att);
|
||
|
|
||
|
/**
|
||
|
Retrieve the value of a given attribute from a given element.
|
||
|
|
||
|
Returns a borrowed pointer to the value or NULL if not found.
|
||
|
*/
|
||
|
const char *fz_dom_attribute(fz_context *ctx, fz_xml *elt, const char *att);
|
||
|
|
||
|
/**
|
||
|
Enumerate through the attributes of an element.
|
||
|
|
||
|
Call with i=0,1,2,3... to enumerate attributes.
|
||
|
|
||
|
On return *att and the return value will be NULL if there are not
|
||
|
that many attributes to read. Otherwise, *att will be filled in
|
||
|
with a borrowed pointer to the attribute name, and the return
|
||
|
value will be a borrowed pointer to the value.
|
||
|
*/
|
||
|
const char *fz_dom_get_attribute(fz_context *ctx, fz_xml *elt, int i, const char **att);
|
||
|
|
||
|
#endif
|