Cogs.Core
|
#include <stdbool.h>
#include <stddef.h>
#include "tag_enum.h"
Go to the source code of this file.
Classes | |
struct | GumboSourcePosition |
struct | GumboStringPiece |
struct | GumboVector |
struct | GumboAttribute |
struct | GumboDocument |
struct | GumboText |
struct | GumboElement |
struct | GumboInternalNode |
struct | GumboInternalOptions |
struct | GumboInternalOutput |
Typedefs | |
typedef struct GumboInternalNode | GumboNode |
typedef void *(* | GumboAllocatorFunction) (void *userdata, size_t size) |
typedef void(* | GumboDeallocatorFunction) (void *userdata, void *ptr) |
typedef struct GumboInternalOptions | GumboOptions |
typedef struct GumboInternalOutput | GumboOutput |
Functions | |
bool | gumbo_string_equals (const GumboStringPiece *str1, const GumboStringPiece *str2) |
bool | gumbo_string_equals_ignore_case (const GumboStringPiece *str1, const GumboStringPiece *str2) |
int | gumbo_vector_index_of (GumboVector *vector, const void *element) |
const char * | gumbo_normalized_tagname (GumboTag tag) |
void | gumbo_tag_from_original_text (GumboStringPiece *text) |
const char * | gumbo_normalize_svg_tagname (const GumboStringPiece *tagname) |
GumboTag | gumbo_tag_enum (const char *tagname) |
GumboTag | gumbo_tagn_enum (const char *tagname, unsigned int length) |
GumboAttribute * | gumbo_get_attribute (const GumboVector *attrs, const char *name) |
GumboOutput * | gumbo_parse (const char *buffer) |
GumboOutput * | gumbo_parse_with_options (const GumboOptions *options, const char *buffer, size_t buffer_length) |
void | gumbo_destroy_output (const GumboOptions *options, GumboOutput *output) |
Variables | |
const GumboSourcePosition | kGumboEmptySourcePosition |
const GumboStringPiece | kGumboEmptyString |
const GumboVector | kGumboEmptyVector |
const GumboOptions | kGumboDefaultOptions |
typedef void *(* GumboAllocatorFunction) (void *userdata, size_t size) |
The type for an allocator function. Takes the 'userdata' member of the GumboParser struct as its first argument. Semantics should be the same as malloc, i.e. return a block of size_t bytes on success or NULL on failure. Allocating a block of 0 bytes behaves as per malloc.
typedef void(* GumboDeallocatorFunction) (void *userdata, void *ptr) |
typedef struct GumboInternalNode GumboNode |
Forward declaration of GumboNode so it can be used recursively in GumboNode.parent.
typedef struct GumboInternalOptions GumboOptions |
Input struct containing configuration options for the parser. These let you specify alternate memory managers, provide different error handling, etc. Use kGumboDefaultOptions for sensible defaults, and only set what you need.
typedef struct GumboInternalOutput GumboOutput |
The output struct containing the results of the parse.
enum GumboNamespaceEnum |
Namespaces. Unlike in X(HT)ML, namespaces in HTML5 are not denoted by a prefix. Rather, anything inside an <svg> tag is in the SVG namespace, anything inside the <math> tag is in the MathML namespace, and anything else is inside the HTML namespace. No other namespaces are supported, so this can be an enum only.
enum GumboNodeType |
Enum denoting the type of node. This determines the type of the node.v union.
Enumerator | |
---|---|
GUMBO_NODE_DOCUMENT | Document node. v will be a GumboDocument. |
GUMBO_NODE_ELEMENT | Element node. v will be a GumboElement. |
GUMBO_NODE_TEXT | Text node. v will be a GumboText. |
GUMBO_NODE_CDATA | CDATA node. v will be a GumboText. |
GUMBO_NODE_COMMENT | Comment node. v will be a GumboText, excluding comment delimiters. |
GUMBO_NODE_WHITESPACE | Text node, where all contents is whitespace. v will be a GumboText. |
GUMBO_NODE_TEMPLATE | Template node. This is separate from GUMBO_NODE_ELEMENT because many client libraries will want to ignore the contents of template nodes, as the spec suggests. Recursing on GUMBO_NODE_ELEMENT will do the right thing here, while clients that want to include template contents should also check for GUMBO_NODE_TEMPLATE. v will be a GumboElement. |
enum GumboParseFlags |
Parse flags. We track the reasons for parser insertion of nodes and store them in a bitvector in the node itself. This lets client code optimize out nodes that are implied by the HTML structure of the document, or flag constructs that may not be allowed by a style guide, or track the prevalence of incorrect or tricky HTML code.
enum GumboQuirksModeEnum |
enum GumboTag |
An enum for all the tags defined in the HTML5 standard. These correspond to the tag names themselves. Enum constants exist only for tags which appear in the spec itself (or for tags with special handling in the SVG and MathML namespaces); any other tags appear as GUMBO_TAG_UNKNOWN and the actual tag name can be obtained through original_tag.
This is mostly for API convenience, so that clients of this library don't need to perform a strcasecmp to find the normalized tag name. It also has efficiency benefits, by letting the parser work with enums instead of strings.
void gumbo_destroy_output | ( | const GumboOptions * | options, |
GumboOutput * | output | ||
) |
Release the memory used for the parse tree & parse errors.
GumboAttribute * gumbo_get_attribute | ( | const GumboVector * | attrs, |
const char * | name | ||
) |
Given a vector of GumboAttributes, look up the one with the specified name and return it, or NULL if no such attribute exists. This uses a case-insensitive match, as HTML is case-insensitive.
const char * gumbo_normalize_svg_tagname | ( | const GumboStringPiece * | tagname | ) |
Fixes the case of SVG elements that are not all lowercase. http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inforeign This is not done at parse time because there's no place to store a mutated tag name. tag_name is an enum (which will be TAG_UNKNOWN for most SVG tags without special handling), while original_tag_name is a pointer into the original buffer. Instead, we provide this helper function that clients can use to rename SVG tags as appropriate. Returns the case-normalized SVG tagname if a replacement is found, or NULL if no normalization is called for. The return value is static data and owned by the library.
const char * gumbo_normalized_tagname | ( | GumboTag | tag | ) |
Returns the normalized (usually all-lowercased, except for foreign content) tag name for an GumboTag enum. Return value is static data owned by the library.
GumboOutput * gumbo_parse | ( | const char * | buffer | ) |
Parses a buffer of UTF8 text into an GumboNode parse tree. The buffer must live at least as long as the parse tree, as some fields (eg. original_text) point directly into the original buffer.
This doesn't support buffers longer than 4 gigabytes.
GumboOutput * gumbo_parse_with_options | ( | const GumboOptions * | options, |
const char * | buffer, | ||
size_t | buffer_length | ||
) |
Extended version of gumbo_parse that takes an explicit options structure, buffer, and length.
bool gumbo_string_equals | ( | const GumboStringPiece * | str1, |
const GumboStringPiece * | str2 | ||
) |
Compares two GumboStringPieces, and returns true if they're equal or false otherwise.
bool gumbo_string_equals_ignore_case | ( | const GumboStringPiece * | str1, |
const GumboStringPiece * | str2 | ||
) |
Compares two GumboStringPieces ignoring case, and returns true if they're equal or false otherwise.
GumboTag gumbo_tag_enum | ( | const char * | tagname | ) |
Converts a tag name string (which may be in upper or mixed case) to a tag enum. The tag
version expects tagname
to be NULL-terminated
void gumbo_tag_from_original_text | ( | GumboStringPiece * | text | ) |
Extracts the tag name from the original_text field of an element or token by stripping off </> characters and attributes and adjusting the passed-in GumboStringPiece appropriately. The tag name is in the original case and shares a buffer with the original text, to simplify memory management. Behavior is undefined if a string-piece that doesn't represent an HTML tag (<tagname> or </tagname>) is passed in. If the string piece is completely empty (NULL data pointer), then this function will exit successfully as a no-op.
int gumbo_vector_index_of | ( | GumboVector * | vector, |
const void * | element | ||
) |
Returns the first index at which an element appears in this vector (testing by pointer equality), or -1 if it never does.
|
extern |
Default options struct; use this with gumbo_parse_with_options.
|
extern |
A SourcePosition used for elements that have no source position, i.e. parser-inserted elements.
|
extern |
A constant to represent a 0-length null string.
|
extern |
An empty (0-length, 0-capacity) GumboVector.