
API 設計原理:從理論到實踐
代理 IP 自動切換
JS 渲染與驗證碼繞過
統一 REST 接口調用
多區域市場支持
高可靠性與擴展性
以上優勢讓 Amazon Scraper API 成為實現商品價格監控與動態定價的首選技術方案。
[調度器] → [Scraper API 客戶端] → [數據解析] → [時序數據庫]
↓
[動態定價引擎]
↓
[Amazon SP-API 更新價格]
pip install requests beautifulsoup4 lxml aiohttp backoff influxdb-client pandas scikit-learn schedule boto3
requests
:基礎 HTTP 調用。beautifulsoup4
、lxml
:HTML 解析。aiohttp
、asyncio
:異步高并發抓取。backoff
:指數退避重試。influxdb-client
:時序數據寫入。pandas
、scikit-learn
:數據處理與機器學習。schedule
:簡單任務調度。boto3
:如需結合 AWS Lambda 或 S3 存儲,調用 AWS 服務。import requests
from bs4 import BeautifulSoup
API_ENDPOINT = "https://api.scraperapi.com"
API_KEY = "YOUR_SCRAPER_API_KEY"
def fetch_price(asin, region="us"):
url = f"https://www.amazon.com/dp/{asin}"
params = {
"api_key": API_KEY,
"url": url,
"render": "true",
"country_code": region
}
resp = requests.get(API_ENDPOINT, params=params, timeout=60)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "lxml")
price = soup.select_one(".a-price .a-offscreen").get_text(strip=True)
return float(price.replace('$', '').replace(',', ''))
if __name__ == "__main__":
print(fetch_price("B08N5WRWNW"))
import asyncio, aiohttp, backoff
from bs4 import BeautifulSoup
SEM = asyncio.Semaphore(20)
@backoff.on_exception(backoff.expo, Exception, max_tries=3)
async def fetch(session, asin):
async with SEM:
params = {"api_key": API_KEY, "url": f"https://www.amazon.com/dp/{asin}",
"render":"true", "country_code":"us"}
async with session.get(API_ENDPOINT, params=params, timeout=60) as resp:
resp.raise_for_status()
html = await resp.text()
soup = BeautifulSoup(html, "lxml")
price_text = soup.select_one(".a-price .a-offscreen").get_text(strip=True)
return asin, float(price_text.replace('$','').replace(',',''))
async def batch_fetch(asins):
async with aiohttp.ClientSession() as session:
tasks = [fetch(session, a) for a in asins]
return await asyncio.gather(*tasks, return_exceptions=True)
# 用法示例
# asins = ["B08N5WRWNW", "B09XYZ123"]
# results = asyncio.run(batch_fetch(asins))
from influxdb_client import InfluxDBClient, Point
client = InfluxDBClient(url="http://localhost:8086", token="TOKEN", org="ORG")
write_api = client.write_api()
def write_to_influx(asin, price, ts):
point = Point("amazon_price") \
.tag("asin", asin) \
.field("price", price) \
.time(ts)
write_api.write(bucket="prices", record=point)
import pandas as pd
# 從 InfluxDB 查詢歷史價格
# 假設得到 DataFrame 包含 ['time', 'asin', 'price']
df = pd.read_csv("historical_prices.csv", parse_dates=["time"])
df['hour'] = df['time'].dt.hour
df['weekday'] = df['time'].dt.weekday
# 可加入更多特征...
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
features = ["hour", "weekday", "competitor_diff"]
X = df[features]
y = df["price"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = RandomForestRegressor(n_estimators=100)
model.fit(X_train, y_train)
def dynamic_price(current, predicted):
if predicted > current * 1.05:
return min(predicted, current * 1.10)
elif predicted < current * 0.95:
return max(predicted, current * 0.90)
return current
import boto3
client = boto3.client('pricing') # 偽示例,實際需使用 SP-API SDK
def update_price(asin, new_price):
# 調用 SP-API 完成價格更新
pass
schedule
包或 Celery 定時執行抓取與定價。本文以“利用 Amazon Scraper API 實現價格監控與動態定價”為核心,完整展示了從數據抓取、解析、存儲、預測模型到自動調價及監控的全流程工程實戰。通過本方案,你可以:
原文引自YouTube視頻:https://www.youtube.com/watch?app=desktop&v=pDjZ-1CmZAM