資料內(nèi)容:
步驟1:定義數(shù)據(jù)結(jié)構(gòu)(items.py)
import scrapy
class GlobalProductItem(scrapy.Item):
# 基礎(chǔ)信息
name = scrapy.Field()
sku = scrapy.Field()
price = scrapy.Field()
currency = scrapy.Field()
source_site = scrapy.Field()
# 時間戳
crawl_time = scrapy.Field()
# 處理后字段
normalized_price = scrapy.Field(
serializer=lambda x: f"${x:.2f}" # 序列化處理
)
# 位置信息
ship_from_country = scrapy.Field()
ship_to_countries = scrapy.Field()
# 分類維度
category = scrapy.Field()
subcategory = scrapy.Field()
# 標記字段
discount_tag = scrapy.Field()
is_out_of_stock = scrapy.Field()
# 詳情?元數(shù)據(jù)
product_url = scrapy.Field()
image_urls = scrapy.Field()