crawlPage
CrawlPageDetailTargetConfig
ts
export interface CrawlPageDetailTargetConfig extends CrawlCommonConfig {
url: string
headers?: Object | null
cookies?: PageCookies | null
priority?: number
viewport?: Viewport | null // Viewport 来自于 puppeteer
fingerprint?:
| (DetailTargetFingerprintCommon & {
maxWidth?: number
minWidth?: number
maxHeight?: number
minHidth?: number
})
| null
}| 参数 | 类型 | 默认值 | 描述 |
|---|---|---|---|
| url | string | - | url |
| headers | Object | null | - | 请求头 |
| cookies | PageCookies | null | - | cookies |
| priority | number | - | 优先级 |
| viewport | Viewport | - | 设置视口大小 |
| fingerprint | DetailTargetFingerprintCommon & { maxWidth?: number; minWidth?: number; maxHeight?: number; minHidth?: number }) | - | 设备指纹 |
外部类型
- Viewport:来自于 puppeteer ,viewport 会直接传给 page.setViewport 用于设置页面大小
CrawlPageAdvancedConfig
ts
export interface CrawlPageAdvancedConfig extends CrawlCommonConfig {
targets: (string | CrawlPageDetailTargetConfig)[]
intervalTime?: IntervalTime
fingerprints?: (DetailTargetFingerprintCommon & {
maxWidth?: number
minWidth?: number
maxHeight?: number
minHidth?: number
})[]
headers?: Object
cookies?: PageCookies
viewport?: Viewport // Viewport:来自于 puppeteer
onCrawlItemComplete?: (crawlPageSingleResult: CrawlPageSingleResult) => void
}| 参数 | 类型 | 默认值 | 描述 |
|---|---|---|---|
| targets | (string | CrawlDataDetailTargetConfig)[] | - | 目标 |
| intervalTime | IntervalTime | - | 间隔时间 |
| fingerprints | DetailTargetFingerprintCommon[] | - | 设备指纹 |
| headers | Object | - | 请求头 |
| cookies | PageCookies | null | - |
| viewport | Viewport | - | 设置视口大小 |
| onCrawlItemComplete | ( crawlDataSingleResult: CrawlDataSingleResult ) => void | - | 声明周期 |
外部类型
- Viewport:来自于 puppeteer ,viewport 会直接传给 page.setViewport 用于设置页面大小
CrawlPageSingleResult
ts
export interface CrawlPageSingleResult extends CrawlCommonResult {
data: {
browser: Browser // Browser 来自于 puppeteer
response: HTTPResponse | null // HTTPResponse 来自于 puppeteer
page: Page // Page 自来于 puppeteer
}
}外部类型
- Browser:来自于 puppeteer ,https://pptr.dev/api/puppeteer.browser
- HTTPResponse:来自于 puppeteer ,https://pptr.dev/api/puppeteer.httpresponse
- Page:来自于 puppeteer ,https://pptr.dev/api/puppeteer.page