Skip to content

CrawlOtherConfig

CrawlCommonConfig

ts
export interface CrawlCommonConfig {
  timeout?: number | null
  proxy?: {
    urls: string[]
    switchByHttpStatus?: number[]
    switchByErrorCount?: number
  } | null
  maxRetry?: number | null
}

默认值

  • timeout: 10000
  • proxy: undefined
  • maxRetry: 0

DetailTargetFingerprintCommon

ts
export interface DetailTargetFingerprintCommon {
  ua?: string
  mobile?: '?0' | '?1' | 'random'
  platform?: Platform
  platformVersion?: string
  acceptLanguage?: string
  userAgent?: {
    value: string
    versions?: {
      name: string
      maxMajorVersion?: number
      minMajorVersion?: number
      maxMinorVersion?: number
      minMinorVersion?: number
      maxPatchVersion?: number
      minPatchVersion?: number
    }[]
  }
}

默认值

  • ua: undefined
  • mobile: undefined
  • platform: undefined
  • platformVersion: undefined
  • acceptLanguage: undefined
  • userAgent: undefined

Mobile

ts
export type Mobile = '?0' | '?1'

Platform

ts
export type Platform =
  | 'Android'
  | 'Chrome OS'
  | 'Chromium OS'
  | 'iOS'
  | 'Linux'
  | 'macOS'
  | 'Windows'
  | 'Unknown'

PageCookies

ts
export type PageCookies =
  | string
  | Protocol.Network.CookieParam // Protocol 来自于 puppeteer
  | Protocol.Network.CookieParam[] // Protocol 来自于 puppeteer

外部类型

  • Protocol:来自于 puppeteer

Method

ts
export type Method =
  | 'get'
  | 'GET'
  | 'delete'
  | 'DELETE'
  | 'head'
  | 'HEAD'
  | 'options'
  | 'OPTIONS'
  | 'post'
  | 'POST'
  | 'put'
  | 'PUT'
  | 'patch'
  | 'PATCH'
  | 'purge'
  | 'PURGE'
  | 'link'
  | 'LINK'
  | 'unlink'
  | 'UNLINK'

IntervalTime

ts
export type IntervalTime = number | { max: number; min?: number }

CrawlCommonResult

ts
export interface CrawlCommonResult {
  id: number
  isSuccess: boolean
  maxRetry: number
  retryCount: number
  proxyDetails: ProxyDetails
  crawlErrorQueue: Error[]
}
  • id:根据爬取目标的顺序生成的,如果有优先级,则会根据优先级生成
  • isSuccess:是否成功爬取
  • maxRetry:该次爬取目标的最大重试次数
  • retryCount:该次爬取目标已经重试的次数
  • proxyDetails:记录代理情况
  • crawlErrorQueue:该次爬取目标的报错收集

基于 MIT 许可发布