聚美优品作为国内知名的美妆护肤、生活消费电商平台,其商品详情数据(如价格、规格、库存、优惠信息等)是比价工具、导购平台、数据分析系统的核心数据源。由于聚美优品未公开官方开放 API,开发者需通过合规的页面解析或第三方服务实现商品详情(item_get)的获取。本文将系统讲解接口对接逻辑、技术实现、反爬应对及最佳实践,帮助开发者构建稳定的商品详情获取系统。
一、接口基础认知(核心功能与场景)
二、对接前置准备(环境与工具)
三、接口调用流程(基于页面解析)
四、代码实现示例(Python)
import requests
import time
import random
import re
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from typing import Dict, List
class JumeiItemApi:
def __init__(self, proxy_pool: List[str] = None):
self.base_url = "https://www.jumei.com/item/{item_id}.html"
self.sku_api = "https://www.jumei.com/ajax/item_sku?item_id={item_id}" # 动态规格接口
self.ua = UserAgent()
self.proxy_pool = proxy_pool # 代理池列表,如["http://ip:port", ...]
def _get_headers(self) -> Dict[str, str]:
"""生成随机请求头"""
return {
"User-Agent": self.ua.random,
"Referer": "https://www.jumei.com/",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Cookie": "uid=anonymous; sid=xxx; _jme_tuid=xxx" # 替换为实际Cookie(从浏览器获取)
}
def _get_proxy(self) -> Dict[str, str]:
"""随机获取代理"""
if self.proxy_pool and len(self.proxy_pool) > 0:
proxy = random.choice(self.proxy_pool)
return {"http": proxy, "https": proxy}
return None
def _parse_static_data(self, html: str) -> Dict[str, str]:
"""解析静态HTML中的基础信息"""
soup = BeautifulSoup(html, "lxml")
return {
"title": soup.select_one("h1.product-name")?.text.strip() or "",
"brand": soup.select_one("a.brand-name")?.text.strip() or "",
"original_price": self._clean_price(soup.select_one("span.original-price")?.text.strip() or ""),
"current_price": self._clean_price(soup.select_one("span.current-price")?.text.strip() or ""),
"images": [img.get("src") for img in soup.select("div.product-img img") if img.get("src")],
"detail_html": str(soup.select_one("div.product-detail-content") or "") # 商品详情HTML
}
def _clean_price(self, price_str: str) -> float:
"""清洗价格字符串(去除¥、逗号等)"""
if not price_str:
return 0.0
price_str = re.sub(r"[^\d.]", "", price_str)
return float(price_str) if price_str else 0.0
def _fetch_dynamic_data(self, item_id: str, headers: Dict[str, str], proxy: Dict[str, str]) -> Dict:
"""调用动态接口获取规格与库存"""
try:
url = self.sku_api.format(item_id=item_id)
response = requests.get(url, headers=headers, proxies=proxy, timeout=10)
response.raise_for_status()
return response.json()
except Exception as e:
print(f"动态接口获取失败: {str(e)}")
return {}
def _parse_dynamic_data(self, dynamic_data: Dict) -> Dict:
"""解析动态数据(规格、库存、销量)"""
sku_list = dynamic_data.get("sku_list", [])
parsed_specs = []
for sku in sku_list:
parsed_specs.append({
"sku_id": sku.get("sku_id"),
"spec": sku.get("spec"),
"price": sku.get("price"),
"stock": sku.get("stock"),
"sales": sku.get("sales")
})
return {
"specs": parsed_specs,
"total_stock": dynamic_data.get("total_stock", 0),
"total_sales": dynamic_data.get("total_sales", 0)
}
def item_get(self, item_id: str, timeout: int = 10) -> Dict:
"""
获取聚美优品商品详情
:param item_id: 商品ID(如1000001234)
:param timeout: 超时时间
:return: 标准化商品数据
"""
try:
# 1. 构建URL并发送请求
url = self.base_url.format(item_id=item_id)
headers = self._get_headers()
proxy = self._get_proxy()
# 随机延迟,避免反爬
time.sleep(random.uniform(1, 3))
response = requests.get(
url=url,
headers=headers,
proxies=proxy,
timeout=timeout
)
response.raise_for_status()
html = response.text
# 2. 解析静态数据
static_data = self._parse_static_data(html)
if not static_data["title"]:
return {"success": False, "error_msg": "未找到商品信息,可能item_id错误或商品已下架"}
# 3. 获取并解析动态数据
dynamic_data = self._fetch_dynamic_data(item_id, headers, proxy)
dynamic_parsed = self._parse_dynamic_data(dynamic_data)
# 4. 合并数据
result = {
"success": True,
"data": {
"item_id": item_id,
**static_data,** dynamic_parsed,
"update_time": time.strftime("%Y-%m-%d %H:%M:%S")
}
}
return result
except requests.exceptions.HTTPError as e:
if "403" in str(e):
return {"success": False, "error_msg": "触发反爬,建议更换代理或Cookie", "code": 403}
return {"success": False, "error_msg": f"HTTP错误: {str(e)}", "code": response.status_code}
except Exception as e:
return {"success": False, "error_msg": f"获取失败: {str(e)}", "code": -1}
# 使用示例
if __name__ == "__main__":
# 代理池(替换为有效代理)
PROXIES = [
"http://123.45.67.89:8888",
"http://98.76.54.32:8080"
]
# 初始化API客户端
api = JumeiItemApi(proxy_pool=PROXIES)
# 获取商品详情(示例item_id)
item_id = "1000001234" # 替换为实际商品ID
result = api.item_get(item_id)
if result["success"]:
data = result["data"]
print(f"商品标题: {data['title']}")
print(f"品牌: {data['brand']}")
print(f"价格: 原价 {data['original_price']}元 → 折扣价 {data['current_price']}元")
print(f"总库存: {data['total_stock']}件 | 总销量: {data['total_sales']}件")
print(f"主图数量: {len(data['images'])}张")
print("\n规格信息:")
for spec in data["specs"]:
print(f" {spec['spec']} → 价格: {spec['price']}元 | 库存: {spec['stock']}件 | 已售: {spec['sales']}件")
else:
print(f"获取失败: {result['error_msg']}(错误码: {result.get('code')})")