go-fitz/include/mupdf/pdf/interpret.h

453 lines
18 KiB
C
Raw Permalink Normal View History

2023-10-17 15:51:53 +00:00
// Copyright (C) 2004-2023 Artifex Software, Inc.
//
// This file is part of MuPDF.
//
// MuPDF is free software: you can redistribute it and/or modify it under the
// terms of the GNU Affero General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
// details.
//
// You should have received a copy of the GNU Affero General Public License
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
//
// Alternative licensing terms are available from the licensor.
// For commercial licensing, see <https://www.artifex.com/> or contact
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
// CA 94129, USA, for further information.
#ifndef PDF_INTERPRET_H
#define PDF_INTERPRET_H
#include "mupdf/pdf/font.h"
#include "mupdf/pdf/resource.h"
#include "mupdf/pdf/document.h"
typedef struct pdf_gstate pdf_gstate;
typedef struct pdf_processor pdf_processor;
void *pdf_new_processor(fz_context *ctx, int size);
pdf_processor *pdf_keep_processor(fz_context *ctx, pdf_processor *proc);
void pdf_close_processor(fz_context *ctx, pdf_processor *proc);
void pdf_drop_processor(fz_context *ctx, pdf_processor *proc);
struct pdf_processor
{
int refs;
/* close the processor. Also closes any chained processors. */
void (*close_processor)(fz_context *ctx, pdf_processor *proc);
void (*drop_processor)(fz_context *ctx, pdf_processor *proc);
/* At any stage, we can have one set of resources in place.
* This function gives us a set of resources to use. We remember
* any previous set on a stack, so we can pop back to it later.
* Our responsibility (as well as remembering it for our own use)
* is to pass either it, or a filtered version of it onto any
* chained processor. */
void (*push_resources)(fz_context *ctx, pdf_processor *proc, pdf_obj *res);
/* Pop the resources stack. This must be passed on to any chained
* processors. This returns a pointer to the resource dict just
* popped by the deepest filter. The caller inherits this reference. */
pdf_obj *(*pop_resources)(fz_context *ctx, pdf_processor *proc);
/* general graphics state */
void (*op_w)(fz_context *ctx, pdf_processor *proc, float linewidth);
void (*op_j)(fz_context *ctx, pdf_processor *proc, int linejoin);
void (*op_J)(fz_context *ctx, pdf_processor *proc, int linecap);
void (*op_M)(fz_context *ctx, pdf_processor *proc, float miterlimit);
void (*op_d)(fz_context *ctx, pdf_processor *proc, pdf_obj *array, float phase);
void (*op_ri)(fz_context *ctx, pdf_processor *proc, const char *intent);
void (*op_i)(fz_context *ctx, pdf_processor *proc, float flatness);
void (*op_gs_begin)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *extgstate);
void (*op_gs_BM)(fz_context *ctx, pdf_processor *proc, const char *blendmode);
void (*op_gs_ca)(fz_context *ctx, pdf_processor *proc, float alpha);
void (*op_gs_CA)(fz_context *ctx, pdf_processor *proc, float alpha);
void (*op_gs_SMask)(fz_context *ctx, pdf_processor *proc, pdf_obj *smask, float *bc, int luminosity);
void (*op_gs_end)(fz_context *ctx, pdf_processor *proc);
/* special graphics state */
void (*op_q)(fz_context *ctx, pdf_processor *proc);
void (*op_Q)(fz_context *ctx, pdf_processor *proc);
void (*op_cm)(fz_context *ctx, pdf_processor *proc, float a, float b, float c, float d, float e, float f);
/* path construction */
void (*op_m)(fz_context *ctx, pdf_processor *proc, float x, float y);
void (*op_l)(fz_context *ctx, pdf_processor *proc, float x, float y);
void (*op_c)(fz_context *ctx, pdf_processor *proc, float x1, float y1, float x2, float y2, float x3, float y3);
void (*op_v)(fz_context *ctx, pdf_processor *proc, float x2, float y2, float x3, float y3);
void (*op_y)(fz_context *ctx, pdf_processor *proc, float x1, float y1, float x3, float y3);
void (*op_h)(fz_context *ctx, pdf_processor *proc);
void (*op_re)(fz_context *ctx, pdf_processor *proc, float x, float y, float w, float h);
/* path painting */
void (*op_S)(fz_context *ctx, pdf_processor *proc);
void (*op_s)(fz_context *ctx, pdf_processor *proc);
void (*op_F)(fz_context *ctx, pdf_processor *proc);
void (*op_f)(fz_context *ctx, pdf_processor *proc);
void (*op_fstar)(fz_context *ctx, pdf_processor *proc);
void (*op_B)(fz_context *ctx, pdf_processor *proc);
void (*op_Bstar)(fz_context *ctx, pdf_processor *proc);
void (*op_b)(fz_context *ctx, pdf_processor *proc);
void (*op_bstar)(fz_context *ctx, pdf_processor *proc);
void (*op_n)(fz_context *ctx, pdf_processor *proc);
/* clipping paths */
void (*op_W)(fz_context *ctx, pdf_processor *proc);
void (*op_Wstar)(fz_context *ctx, pdf_processor *proc);
/* text objects */
void (*op_BT)(fz_context *ctx, pdf_processor *proc);
void (*op_ET)(fz_context *ctx, pdf_processor *proc);
/* text state */
void (*op_Tc)(fz_context *ctx, pdf_processor *proc, float charspace);
void (*op_Tw)(fz_context *ctx, pdf_processor *proc, float wordspace);
void (*op_Tz)(fz_context *ctx, pdf_processor *proc, float scale);
void (*op_TL)(fz_context *ctx, pdf_processor *proc, float leading);
void (*op_Tf)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_font_desc *font, float size);
void (*op_Tr)(fz_context *ctx, pdf_processor *proc, int render);
void (*op_Ts)(fz_context *ctx, pdf_processor *proc, float rise);
/* text positioning */
void (*op_Td)(fz_context *ctx, pdf_processor *proc, float tx, float ty);
void (*op_TD)(fz_context *ctx, pdf_processor *proc, float tx, float ty);
void (*op_Tm)(fz_context *ctx, pdf_processor *proc, float a, float b, float c, float d, float e, float f);
void (*op_Tstar)(fz_context *ctx, pdf_processor *proc);
/* text showing */
void (*op_TJ)(fz_context *ctx, pdf_processor *proc, pdf_obj *array);
void (*op_Tj)(fz_context *ctx, pdf_processor *proc, char *str, size_t len);
void (*op_squote)(fz_context *ctx, pdf_processor *proc, char *str, size_t len);
void (*op_dquote)(fz_context *ctx, pdf_processor *proc, float aw, float ac, char *str, size_t len);
/* type 3 fonts */
void (*op_d0)(fz_context *ctx, pdf_processor *proc, float wx, float wy);
void (*op_d1)(fz_context *ctx, pdf_processor *proc, float wx, float wy, float llx, float lly, float urx, float ury);
/* color */
void (*op_CS)(fz_context *ctx, pdf_processor *proc, const char *name, fz_colorspace *cs);
void (*op_cs)(fz_context *ctx, pdf_processor *proc, const char *name, fz_colorspace *cs);
void (*op_SC_pattern)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_pattern *pat, int n, float *color);
void (*op_sc_pattern)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_pattern *pat, int n, float *color);
void (*op_SC_shade)(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade);
void (*op_sc_shade)(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade);
void (*op_SC_color)(fz_context *ctx, pdf_processor *proc, int n, float *color);
void (*op_sc_color)(fz_context *ctx, pdf_processor *proc, int n, float *color);
void (*op_G)(fz_context *ctx, pdf_processor *proc, float g);
void (*op_g)(fz_context *ctx, pdf_processor *proc, float g);
void (*op_RG)(fz_context *ctx, pdf_processor *proc, float r, float g, float b);
void (*op_rg)(fz_context *ctx, pdf_processor *proc, float r, float g, float b);
void (*op_K)(fz_context *ctx, pdf_processor *proc, float c, float m, float y, float k);
void (*op_k)(fz_context *ctx, pdf_processor *proc, float c, float m, float y, float k);
/* shadings, images, xobjects */
void (*op_BI)(fz_context *ctx, pdf_processor *proc, fz_image *image, const char *colorspace_name);
void (*op_sh)(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade);
void (*op_Do_image)(fz_context *ctx, pdf_processor *proc, const char *name, fz_image *image);
void (*op_Do_form)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *form);
/* marked content */
void (*op_MP)(fz_context *ctx, pdf_processor *proc, const char *tag);
void (*op_DP)(fz_context *ctx, pdf_processor *proc, const char *tag, pdf_obj *raw, pdf_obj *cooked);
void (*op_BMC)(fz_context *ctx, pdf_processor *proc, const char *tag);
void (*op_BDC)(fz_context *ctx, pdf_processor *proc, const char *tag, pdf_obj *raw, pdf_obj *cooked);
void (*op_EMC)(fz_context *ctx, pdf_processor *proc);
/* compatibility */
void (*op_BX)(fz_context *ctx, pdf_processor *proc);
void (*op_EX)(fz_context *ctx, pdf_processor *proc);
/* Virtual ops for ExtGState entries */
void (*op_gs_OP)(fz_context *ctx, pdf_processor *proc, int b);
void (*op_gs_op)(fz_context *ctx, pdf_processor *proc, int b);
void (*op_gs_OPM)(fz_context *ctx, pdf_processor *proc, int i);
void (*op_gs_UseBlackPtComp)(fz_context *ctx, pdf_processor *proc, pdf_obj *name);
/* END is used to signify end of stream (finalise and close down) */
void (*op_END)(fz_context *ctx, pdf_processor *proc);
/* interpreter state that persists across content streams */
const char *usage;
int hidden;
};
typedef struct
{
/* input */
pdf_document *doc;
pdf_obj *rdb;
pdf_lexbuf *buf;
fz_cookie *cookie;
/* state */
int gstate;
int xbalance;
int in_text;
fz_rect d1_rect;
/* stack */
pdf_obj *obj;
char name[256];
char string[256];
size_t string_len;
int top;
float stack[32];
} pdf_csi;
/* Functions to set up pdf_process structures */
pdf_processor *pdf_new_run_processor(fz_context *ctx, pdf_document *doc, fz_device *dev, fz_matrix ctm, int struct_parent, const char *usage, pdf_gstate *gstate, fz_default_colorspaces *default_cs, fz_cookie *cookie);
/*
Create a buffer processor.
This collects the incoming PDF operator stream into an fz_buffer.
buffer: The (possibly empty) buffer to which operators will be
appended.
ahxencode: If 0, then image streams will be send as binary,
otherwise they will be asciihexencoded.
*/
pdf_processor *pdf_new_buffer_processor(fz_context *ctx, fz_buffer *buffer, int ahxencode);
/*
Create an output processor. This
sends the incoming PDF operator stream to an fz_output stream.
out: The output stream to which operators will be sent.
ahxencode: If 0, then image streams will be send as binary,
otherwise they will be asciihexencoded.
*/
pdf_processor *pdf_new_output_processor(fz_context *ctx, fz_output *out, int ahxencode);
typedef struct pdf_filter_options pdf_filter_options;
/*
Create a filter processor. This filters the PDF operators
it is fed, and passes them down (with some changes) to the
child filter.
chain: The child processor to which the filtered operators
will be fed.
The options field contains a pointer to a structure with
filter specific options in.
*/
typedef pdf_processor *(pdf_filter_factory_fn)(fz_context *ctx, pdf_document *doc, pdf_processor *chain, int struct_parents, fz_matrix transform, pdf_filter_options *options, void *factory_options);
/*
A pdf_filter_factory is a pdf_filter_factory_fn, plus the options
needed to instantiate it.
*/
typedef struct
{
pdf_filter_factory_fn *filter;
void *options;
} pdf_filter_factory;
/*
recurse: Filter resources recursively.
instance_forms: Always recurse on XObject Form resources, but will
create a new instance of each XObject Form that is used, filtered
individually.
ascii: If true, escape all binary data in the output.
no_update: If true, do not update the document at the end.
opaque: Opaque value that is passed to the complete function.
complete: A function called at the end of processing.
This allows the caller to insert some extra content after
all other content.
filters: Pointer to an array of filter factory/options.
The array is terminated by an entry with a NULL factory pointer.
Operators will be fed into the filter generated from the first
factory function in the list, and from there go to the filter
generated from the second factory in the list etc.
*/
struct pdf_filter_options
{
int recurse;
int instance_forms;
int ascii;
int no_update;
void *opaque;
void (*complete)(fz_context *ctx, fz_buffer *buffer, void *arg);
pdf_filter_factory *filters;
};
typedef enum
{
FZ_CULL_PATH_FILL,
FZ_CULL_PATH_STROKE,
FZ_CULL_PATH_FILL_STROKE,
FZ_CULL_CLIP_PATH,
FZ_CULL_GLYPH,
FZ_CULL_IMAGE,
FZ_CULL_SHADING
} fz_cull_type;
/*
image_filter: A function called to assess whether a given
image should be removed or not.
text_filter: A function called to assess whether a given
character should be removed or not.
after_text_object: A function called after each text object.
This allows the caller to insert some extra content if
desired.
culler: A function called to see whether each object should
be culled or not.
*/
typedef struct
{
void *opaque;
fz_image *(*image_filter)(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image);
int (*text_filter)(fz_context *ctx, void *opaque, int *ucsbuf, int ucslen, fz_matrix trm, fz_matrix ctm, fz_rect bbox);
void (*after_text_object)(fz_context *ctx, void *opaque, pdf_document *doc, pdf_processor *chain, fz_matrix ctm);
int (*culler)(fz_context *ctx, void *opaque, fz_rect bbox, fz_cull_type type);
}
pdf_sanitize_filter_options;
/*
A sanitize filter factory.
sopts = pointer to pdf_sanitize_filter_options.
The changes made by a filter generated from this are:
* No operations are allowed to change the top level gstate.
Additional q/Q operators are inserted to prevent this.
* Repeated/unnecessary colour operators are removed (so,
for example, "0 0 0 rg 0 1 rg 0.5 g" would be sanitised to
"0.5 g")
The intention of these changes is to provide a simpler,
but equivalent stream, repairing problems with mismatched
operators, maintaining structure (such as BMC, EMC calls)
and leaving the graphics state in an known (default) state
so that subsequent operations (such as synthesising new
operators to be appended to the stream) are easier.
The net graphical effect of the filtered operator stream
should be identical to the incoming operator stream.
*/
pdf_processor *pdf_new_sanitize_filter(fz_context *ctx, pdf_document *doc, pdf_processor *chain, int struct_parents, fz_matrix transform, pdf_filter_options *options, void *sopts);
pdf_obj *pdf_filter_xobject_instance(fz_context *ctx, pdf_obj *old_xobj, pdf_obj *page_res, fz_matrix ctm, pdf_filter_options *options, pdf_cycle_list *cycle_up);
void pdf_processor_push_resources(fz_context *ctx, pdf_processor *proc, pdf_obj *res);
pdf_obj *pdf_processor_pop_resources(fz_context *ctx, pdf_processor *proc);
/*
opaque: Opaque value that is passed to all the filter functions.
color_rewrite: function pointer called to rewrite a color
On entry:
*cs = reference to a pdf object representing the colorspace.
*n = number of color components
color = *n color values.
On exit:
*cs either the same (for no change in colorspace) or
updated to be a new one. Reference must be dropped, and
a new kept reference returned!
*n = number of color components (maybe updated)
color = *n color values (maybe updated)
image_rewrite: function pointer called to rewrite an image
On entry:
*image = reference to an fz_image.
On exit:
*image either the same (for no change) or updated
to be a new one. Reference must be dropped, and a
new kept reference returned.
*/
typedef struct
{
void *opaque;
void (*color_rewrite)(fz_context *ctx, void *opaque, pdf_obj **cs, int *n, float color[FZ_MAX_COLORS]);
void (*image_rewrite)(fz_context *ctx, void *opaque, fz_image **image);
pdf_shade_recolorer *shade_rewrite;
} pdf_color_filter_options;
pdf_processor *
pdf_new_color_filter(fz_context *ctx, pdf_document *doc, pdf_processor *chain, int struct_parents, fz_matrix transform, pdf_filter_options *options, void *copts);
/*
Functions to actually process annotations, glyphs and general stream objects.
*/
void pdf_process_contents(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *obj, pdf_obj *res, fz_cookie *cookie, pdf_obj **out_res);
void pdf_process_annot(fz_context *ctx, pdf_processor *proc, pdf_annot *annot, fz_cookie *cookie);
void pdf_process_glyph(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *resources, fz_buffer *contents);
/*
Function to process a contents stream without handling the resources.
The caller is responsible for pushing/popping the resources.
*/
void pdf_process_raw_contents(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *rdb, pdf_obj *stmobj, fz_cookie *cookie);
/* Text handling helper functions */
typedef struct
{
float char_space;
float word_space;
float scale;
float leading;
pdf_font_desc *font;
float size;
int render;
float rise;
} pdf_text_state;
typedef struct
{
fz_text *text;
fz_rect text_bbox;
fz_matrix tlm;
fz_matrix tm;
int text_mode;
int cid;
int gid;
fz_rect char_bbox;
pdf_font_desc *fontdesc;
float char_tx;
float char_ty;
} pdf_text_object_state;
void pdf_tos_save(fz_context *ctx, pdf_text_object_state *tos, fz_matrix save[2]);
void pdf_tos_restore(fz_context *ctx, pdf_text_object_state *tos, fz_matrix save[2]);
fz_text *pdf_tos_get_text(fz_context *ctx, pdf_text_object_state *tos);
void pdf_tos_reset(fz_context *ctx, pdf_text_object_state *tos, int render);
int pdf_tos_make_trm(fz_context *ctx, pdf_text_object_state *tos, pdf_text_state *text, pdf_font_desc *fontdesc, int cid, fz_matrix *trm);
void pdf_tos_move_after_char(fz_context *ctx, pdf_text_object_state *tos);
void pdf_tos_translate(pdf_text_object_state *tos, float tx, float ty);
void pdf_tos_set_matrix(pdf_text_object_state *tos, float a, float b, float c, float d, float e, float f);
void pdf_tos_newline(pdf_text_object_state *tos, float leading);
#endif