htmlparser2

  • Version 9.1.0
  • Published
  • 254 kB
  • 4 dependencies
  • MIT license

Install

npm i htmlparser2
yarn add htmlparser2
pnpm add htmlparser2

Overview

Fast & forgiving HTML/XML parser

Index

Functions

function createDocumentStream

createDocumentStream: (
callback: (error: Error | null, document: Document) => void,
options?: Options,
elementCallback?: (element: Element) => void
) => Parser;
  • Creates a parser instance, with an attached DOM handler.

    Parameter callback

    A callback that will be called once parsing has been completed, with the resulting document.

    Parameter options

    Optional options for the parser and DOM handler.

    Parameter elementCallback

    An optional callback that will be called every time a tag has been completed inside of the DOM.

function createDomStream

createDomStream: (
callback: (error: Error | null, dom: ChildNode[]) => void,
options?: Options,
elementCallback?: (element: Element) => void
) => Parser;
  • Creates a parser instance, with an attached DOM handler.

    Parameter callback

    A callback that will be called once parsing has been completed, with an array of root nodes.

    Parameter options

    Optional options for the parser and DOM handler.

    Parameter elementCallback

    An optional callback that will be called every time a tag has been completed inside of the DOM.

    Deprecated

    Use createDocumentStream instead.

function parseDocument

parseDocument: (data: string, options?: Options) => Document;
  • Parses the data, returns the resulting document.

    Parameter data

    The data that should be parsed.

    Parameter options

    Optional options for the parser and DOM handler.

function parseDOM

parseDOM: (data: string, options?: Options) => ChildNode[];
  • Parses data, returns an array of the root nodes.

    Note that the root nodes still have a Document node as their parent. Use parseDocument to get the Document node instead.

    Parameter data

    The data that should be parsed.

    Parameter options

    Optional options for the parser and DOM handler.

    Deprecated

    Use parseDocument instead.

function parseFeed

parseFeed: (feed: string, options?: Options) => Feed | null;
  • Parse a feed.

    Parameter feed

    The feed that should be parsed, as a string.

    Parameter options

    Optionally, options for parsing. When using this, you should set xmlMode to true.

Classes

class Parser

class Parser implements Callbacks {}

    constructor

    constructor(cbs?: Partial<Handler>, options?: ParserOptions);

      property endIndex

      endIndex: number;
      • The end index of the last event.

      property startIndex

      startIndex: number;
      • The start index of the last event.

      method done

      done: (chunk?: string) => void;
      • Alias of end, for backwards compatibility.

        Parameter chunk

        Optional final chunk to parse.

        Deprecated

      method end

      end: (chunk?: string) => void;
      • Parses the end of the buffer and clears the stack, calls onend.

        Parameter chunk

        Optional final chunk to parse.

      method isVoidElement

      protected isVoidElement: (name: string) => boolean;
      • Checks if the current tag is a void element. Override this if you want to specify your own additional void elements.

      method parseChunk

      parseChunk: (chunk: string) => void;
      • Alias of write, for backwards compatibility.

        Parameter chunk

        Chunk to parse.

        Deprecated

      method parseComplete

      parseComplete: (data: string) => void;
      • Resets the parser, then parses a complete document and pushes it to the handler.

        Parameter data

        Document to parse.

      method pause

      pause: () => void;
      • Pauses parsing. The parser won't emit events until resume is called.

      method reset

      reset: () => void;
      • Resets the parser to a blank state, ready to parse a new HTML document

      method resume

      resume: () => void;
      • Resumes parsing after pause was called.

      method write

      write: (chunk: string) => void;
      • Parses a chunk of data and calls the corresponding callbacks.

        Parameter chunk

        Chunk to parse.

      class Tokenizer

      class Tokenizer {}

        constructor

        constructor(
        { xmlMode, decodeEntities }: { xmlMode?: boolean; decodeEntities?: boolean },
        cbs: Callbacks
        );

          property running

          running: boolean;
          • Indicates whether the tokenizer has been paused.

          method end

          end: () => void;

            method pause

            pause: () => void;

              method reset

              reset: () => void;

                method resume

                resume: () => void;

                  method write

                  write: (chunk: string) => void;

                    Interfaces

                    interface Handler

                    interface Handler {}

                      method onattribute

                      onattribute: (
                      name: string,
                      value: string,
                      quote?: string | undefined | null
                      ) => void;
                      • Parameter name

                        Name of the attribute

                        Parameter value

                        Value of the attribute.

                        Parameter quote

                        Quotes used around the attribute. null if the attribute has no quotes around the value, undefined if the attribute has no value.

                      method oncdataend

                      oncdataend: () => void;

                        method oncdatastart

                        oncdatastart: () => void;

                          method onclosetag

                          onclosetag: (name: string, isImplied: boolean) => void;

                            method oncomment

                            oncomment: (data: string) => void;

                              method oncommentend

                              oncommentend: () => void;

                                method onend

                                onend: () => void;
                                • Signals the handler that parsing is done

                                method onerror

                                onerror: (error: Error) => void;

                                  method onopentag

                                  onopentag: (
                                  name: string,
                                  attribs: { [s: string]: string },
                                  isImplied: boolean
                                  ) => void;

                                    method onopentagname

                                    onopentagname: (name: string) => void;

                                      method onparserinit

                                      onparserinit: (parser: Parser) => void;

                                        method onprocessinginstruction

                                        onprocessinginstruction: (name: string, data: string) => void;

                                          method onreset

                                          onreset: () => void;
                                          • Resets the handler back to starting state

                                          method ontext

                                          ontext: (data: string) => void;

                                            interface ParserOptions

                                            interface ParserOptions {}

                                              property decodeEntities

                                              decodeEntities?: boolean;
                                              • Decode entities within the document.

                                                true

                                              property lowerCaseAttributeNames

                                              lowerCaseAttributeNames?: boolean;
                                              • If set to true, all attribute names will be lowercased. This has noticeable impact on speed.

                                                !xmlMode

                                              property lowerCaseTags

                                              lowerCaseTags?: boolean;
                                              • If set to true, all tags will be lowercased.

                                                !xmlMode

                                              property recognizeCDATA

                                              recognizeCDATA?: boolean;
                                              • If set to true, CDATA sections will be recognized as text even if the xmlMode option is not enabled. NOTE: If xmlMode is set to true then CDATA sections will always be recognized as text.

                                                xmlMode

                                              property recognizeSelfClosing

                                              recognizeSelfClosing?: boolean;
                                              • If set to true, self-closing tags will trigger the onclosetag event even if xmlMode is not set to true. NOTE: If xmlMode is set to true then self-closing tags will always be recognized.

                                                xmlMode

                                              property Tokenizer

                                              Tokenizer?: typeof Tokenizer;
                                              • Allows the default tokenizer to be overwritten.

                                              property xmlMode

                                              xmlMode?: boolean;
                                              • Indicates whether special tags (<script>, <style>, and <title>) should get special treatment and if "empty" tags (eg. <br>) can have children. If false, the content of special tags will be text only. For feeds and other XML content (documents that don't consist of HTML), set this to true.

                                                false

                                              interface TokenizerCallbacks

                                              interface Callbacks {}

                                                method onattribdata

                                                onattribdata: (start: number, endIndex: number) => void;

                                                  method onattribend

                                                  onattribend: (quote: QuoteType, endIndex: number) => void;

                                                    method onattribentity

                                                    onattribentity: (codepoint: number) => void;

                                                      method onattribname

                                                      onattribname: (start: number, endIndex: number) => void;

                                                        method oncdata

                                                        oncdata: (start: number, endIndex: number, endOffset: number) => void;

                                                          method onclosetag

                                                          onclosetag: (start: number, endIndex: number) => void;

                                                            method oncomment

                                                            oncomment: (start: number, endIndex: number, endOffset: number) => void;

                                                              method ondeclaration

                                                              ondeclaration: (start: number, endIndex: number) => void;

                                                                method onend

                                                                onend: () => void;

                                                                  method onopentagend

                                                                  onopentagend: (endIndex: number) => void;

                                                                    method onopentagname

                                                                    onopentagname: (start: number, endIndex: number) => void;

                                                                      method onprocessinginstruction

                                                                      onprocessinginstruction: (start: number, endIndex: number) => void;

                                                                        method onselfclosingtag

                                                                        onselfclosingtag: (endIndex: number) => void;

                                                                          method ontext

                                                                          ontext: (start: number, endIndex: number) => void;

                                                                            method ontextentity

                                                                            ontextentity: (codepoint: number, endIndex: number) => void;

                                                                              Enums

                                                                              enum QuoteType

                                                                              enum QuoteType {
                                                                              NoValue = 0,
                                                                              Unquoted = 1,
                                                                              Single = 2,
                                                                              Double = 3,
                                                                              }

                                                                                member Double

                                                                                Double = 3

                                                                                  member NoValue

                                                                                  NoValue = 0

                                                                                    member Single

                                                                                    Single = 2

                                                                                      member Unquoted

                                                                                      Unquoted = 1

                                                                                        Type Aliases

                                                                                        type Options

                                                                                        type Options = ParserOptions & DomHandlerOptions;

                                                                                          Package Files (3)

                                                                                          Dependencies (4)

                                                                                          Dev Dependencies (12)

                                                                                          Peer Dependencies (0)

                                                                                          No peer dependencies.

                                                                                          Badge

                                                                                          To add a badge like this onejsDocs.io badgeto your package's README, use the codes available below.

                                                                                          You may also use Shields.io to create a custom badge linking to https://www.jsdocs.io/package/htmlparser2.

                                                                                          • Markdown
                                                                                            [![jsDocs.io](https://img.shields.io/badge/jsDocs.io-reference-blue)](https://www.jsdocs.io/package/htmlparser2)
                                                                                          • HTML
                                                                                            <a href="https://www.jsdocs.io/package/htmlparser2"><img src="https://img.shields.io/badge/jsDocs.io-reference-blue" alt="jsDocs.io"></a>