Skip to content

#CrawlOtherConfig

CrawlCommonConfig

ts
export interface CrawlCommonConfig {
  timeout?: number | null
  proxy?: {
    urls: string[]
    switchByHttpStatus?: number[]
    switchByErrorCount?: number
  } | null
  maxRetry?: number | null
}

default value

  • timeout: 10000
  • proxy: undefined
  • maxRetry: 0

DetailTargetFingerprintCommon

ts
export interface DetailTargetFingerprintCommon {
  ua?: string
  mobile?: '?0' | '?1' | 'random'
  platform?: Platform
  platformVersion?: string
  acceptLanguage?: string
  userAgent?: {
    value: string
    versions?: {
      name: string
      maxMajorVersion?: number
      minMajorVersion?: number
      maxMinorVersion?: number
      minMinorVersion?: number
      maxPatchVersion?: number
      minPatchVersion?: number
    }[]
  }
}

default value

-ua: undefined

  • mobile: undefined
  • platform: undefined
  • platformVersion: undefined
  • acceptLanguage: undefined
  • userAgent: undefined

Mobile

ts
export type Mobile = '?0' | '?1'

Platform

ts
export type Platform =
  | 'Android'
  | 'Chrome OS'
  | 'Chromium OS'
  | 'iOS'
  | 'Linux'
  | 'macOS'
  | 'Windows'
  | 'Unknown'

PageCookies

ts
export type PageCookies =
  | string
  | Protocol.Network.CookieParam // Protocol comes from puppeteer
  | Protocol.Network.CookieParam[] // Protocol comes from puppeteer

External type

  • Protocol: from puppeteer

Method

ts
export type Method =
  | 'get'
  | 'GET'
  | 'delete'
  | 'DELETE'
  | 'head'
  | 'HEAD'
  | 'options'
  | 'OPTIONS'
  | 'post'
  | 'POST'
  | 'put'
  | 'PUT'
  | 'patch'
  | 'PATCH'
  | 'purge'
  | 'PURGE'
  | 'link'
  | 'LINK'
  | 'unlink'
  | 'UNLINK'

IntervalTime

ts
export type IntervalTime = number | { max: number; min?: number }

CrawlCommonResult

ts
export interface CrawlCommonResult {
  id: number
  isSuccess: boolean
  maxRetry: number
  retryCount: number
  proxyDetails: ProxyDetails
  crawlErrorQueue: Error[]
}
  • id: generated according to the order of crawling targets. If there is a priority, it will be generated according to the priority.
  • isSuccess: whether the crawling was successful
  • maxRetry: The maximum number of retries for this crawling target
  • retryCount: the number of times the crawling target has been retried
  • proxyDetails: record proxy status
  • crawlErrorQueue: Collection of error reports for the crawl target

Released under the MIT license