Skip to content

crawlHTML

CrawlHTMLDetailTargetConfig

ts
export interface CrawlHTMLDetailTargetConfig extends CrawlCommonConfig {
  url: string
  headers?: Object | null
  priority?: number
  fingerprint?: DetailTargetFingerprintCommon | null
}

default value

  • url: undefined
  • headers: undefined
  • priority: undefined
  • fingerprint: undefined

CrawlHTMLAdvancedConfig

ts
export interface CrawlHTMLAdvancedConfig extends CrawlCommonConfig {
  targets: (string | CrawlHTMLDetailTargetConfig)[]
  intervalTime?: IntervalTime
  fingerprints?: DetailTargetFingerprintCommon[]

  headers?: Object

  onCrawlItemComplete?: (crawlDataSingleResult: CrawlHTMLSingleResult) => void
}

default value

  • targets: undefined
  • intervalTime: undefined
  • fingerprints: undefined
  • headers: undefined
  • onCrawlItemComplete: undefined

CrawlHTMLSingleResult

ts
export interface CrawlHTMLSingleResult extends CrawlCommonResult {
  data: {
    statusCode: number | undefined
    headers: IncomingHttpHeaders // IncomingHttpHeaders comes from node:http
    html: string
  } | null
}

External type

  • IncomingHttpHeaders: http from nodejs

Released under the MIT license