Skip to content

crawlFile

CrawlFileDetailTargetConfig

ts
export interface CrawlFileDetailTargetConfig extends CrawlCommonConfig {
  url: string
  headers?: Object | null
  priority?: number
  storeDir?: string | null
  fileName?: string | null
  extension?: string | null
  fingerprint?: DetailTargetFingerprintCommon | null
}

default value

  • url: undefined
  • headers: undefined
  • priority: undefined
  • storeDir: __dirname
  • fileName: string
  • extension: string
  • fingerprint: undefined

CrawlFileAdvancedConfig

ts
export interface CrawlFileAdvancedConfig extends CrawlCommonConfig {
  targets: (string | CrawlFileDetailTargetConfig)[]
  intervalTime?: IntervalTime
  fingerprints?: DetailTargetFingerprintCommon[]
  storeDirs?: string | (string | null)[]
  extensions?: string | (string | null)[]
  fileNames?: (string | null)[]

  headers?: Object

  onCrawlItemComplete?: (crawlFileSingleResult: CrawlFileSingleResult) => void
  onBeforeSaveItemFile?: (info: {
    id: number
    fileName: string
    filePath: string
    data: Buffer
  }) => Promise<Buffer | void> | Buffer | void
}

default value

  • targets: undefined
  • intervalTime: undefined
  • fingerprints: undefined
  • storeDirs: __dirname
  • extensions: string
  • fileNames: undefined
  • headers: undefined
  • onCrawlItemComplete: undefined
  • onBeforeSaveItemFile: undefined

CrawlFileSingleResult

ts
export interface CrawlFileSingleResult extends CrawlCommonResult {
  data: {
    statusCode: number | undefined
    headers: IncomingHttpHeaders // IncomingHttpHeaders comes from node:http
    data: {
      isSuccess: boolean
      fileName: string
      fileExtension: string
      mimeType: string
      size: number
      filePath: string
    }
  } | null
}

External type

  • IncomingHttpHeaders: http from nodejs

Released under the MIT license