一号店(现并入京东,保留保留独立品牌运营)作为国内早期知名综合电商平台,聚焦超市生鲜、生鲜、家居等快消品类,其商品详情数据(如产地溯源、保质期、会员价、配送时效等)对电商比价、供应链分析、消费趋势研究等场景仍有重要价值。由于平台无公开官方 API,开发者需通过页面解析实现商品详情(item_get)的获取。本文系统讲解接口对接逻辑、技术实现、反爬应对及数据解析要点,帮助开发者构建稳定的商品详情获取系统。
一、接口基础认知(核心功能与场景)
二、对接前置准备(环境与 URL 结构)
三、接口调用流程(基于页面解析)
四、代码实现示例(Python)
import requests
import time
import random
import re
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from typing import Dict, List
class YhdItemApi:
def __init__(self, proxy_pool: List[str] = None, cookie: str = ""):
self.base_url = "https://item.yhd.com/item/{item_id}"
self.stock_api = "https://item.yhd.com/ajax/getProductStock?productId={item_id}" # 库存接口
self.member_price_api = "https://item.yhd.com/ajax/getMemberPrice?productId={item_id}" # 会员价接口
self.ua = UserAgent()
self.proxy_pool = proxy_pool # 代理池列表,如["http://ip:port", ...]
self.cookie = cookie # 登录态Cookie(用于获取会员价)
def _get_headers(self) -> Dict[str, str]:
"""生成随机请求头"""
headers = {
"User-Agent": self.ua.random,
"Referer": "https://www.yhd.com/",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
}
if self.cookie:
headers["Cookie"] = self.cookie
return headers
def _get_proxy(self) -> Dict[str, str]:
"""随机获取代理"""
if self.proxy_pool and len(self.proxy_pool) > 0:
proxy = random.choice(self.proxy_pool)
return {"http": proxy, "https": proxy}
return None
def _clean_price(self, price_str: str) -> float:
"""清洗价格字符串(去除¥、逗号等)"""
if not price_str:
return 0.0
price_str = re.sub(r"[^\d.]", "", price_str)
return float(price_str) if price_str else 0.0
def _parse_specs(self, spec_elements) -> Dict[str, str]:
"""解析规格参数(快消品核心字段)"""
specs = {}
for elem in spec_elements:
text = elem.text.strip()
if ":" in text: # 匹配“key:value”格式
key, value = text.split(":", 1)
specs[key.strip()] = value.strip()
return specs
def _parse_static_data(self, html: str) -> Dict[str, str]:
"""解析静态HTML中的基础信息"""
soup = BeautifulSoup(html, "lxml")
# 提取规格参数
spec_elements = soup.select("div.spec-list li")
specs = self._parse_specs(spec_elements)
# 提取成分表(食品类商品)
ingredients = ""
ingredients_elem = soup.select_one("div.ingredients-info")
if ingredients_elem:
ingredients = ingredients_elem.text.strip().replace("成分表:", "")
return {
"title": soup.select_one("h1.product-title")?.text.strip() or "",
"images": [img.get("src") for img in soup.select("div.product-img img") if img.get("src")],
"price": {
"current": self._clean_price(soup.select_one("div.price-current")?.text or ""),
"original": self._clean_price(soup.select_one("div.price-original")?.text or "")
},
"product": {
"origin": soup.select_one("div.origin-info")?.text.strip().replace("产地:", "") or "",
"shelf_life": soup.select_one("div.shelf-life")?.text.strip().replace("保质期:", "") or "",
"specs": specs,
"ingredients": ingredients
},
"brand": soup.select_one("div.brand-name a")?.text.strip() or "",
"service": {
"delivery_time": soup.select_one("div.delivery-time")?.text.strip() or "",
"after_sale": soup.select_one("div.after-sale")?.text.strip() or ""
},
"url": self.base_url.format(item_id="") # 后续补充完整URL
}
def _fetch_dynamic_data(self, item_id: str, headers: Dict[str, str], proxy: Dict[str, str]) -> Dict:
"""调用动态接口获取库存和会员价"""
dynamic_data = {"stock": 0, "member_price": 0.0, "discount": ""}
try:
# 获取库存
stock_url = self.stock_api.format(item_id=item_id)
stock_resp = requests.get(stock_url, headers=headers, proxies=proxy, timeout=10)
stock_data = stock_resp.json()
dynamic_data["stock"] = stock_data.get("stockNum", 0)
# 获取会员价(需登录Cookie)
if self.cookie:
member_url = self.member_price_api.format(item_id=item_id)
member_resp = requests.get(member_url, headers=headers, proxies=proxy, timeout=10)
member_data = member_resp.json()
dynamic_data["member_price"] = member_data.get("memberPrice", 0.0)
dynamic_data["discount"] = member_data.get("discount", "")
# 提取销量(部分商品在动态接口中)
sales_url = f"https://item.yhd.com/ajax/getSalesCount?productId={item_id}"
sales_resp = requests.get(sales_url, headers=headers, proxies=proxy, timeout=10)
sales_data = sales_resp.json()
dynamic_data["monthly_sales"] = sales_data.get("monthlySales", 0)
dynamic_data["comment_count"] = sales_data.get("commentCount", 0)
except Exception as e:
print(f"动态数据获取失败: {str(e)}")
return dynamic_data
def item_get(self, item_id: str, timeout: int = 10) -> Dict:
"""
获取一号店商品详情
:param item_id: 商品ID(如12345678)
:param timeout: 超时时间
:return: 标准化商品数据
"""
try:
# 1. 构建URL并发送请求
url = self.base_url.format(item_id=item_id)
headers = self._get_headers()
proxy = self._get_proxy()
# 随机延迟,避免反爬
time.sleep(random.uniform(1.5, 3))
response = requests.get(
url=url,
headers=headers,
proxies=proxy,
timeout=timeout
)
response.raise_for_status()
html = response.text
# 2. 解析静态数据
static_data = self._parse_static_data(html)
static_data["url"] = url # 补充完整URL
if not static_data["title"]:
return {"success": False, "error_msg": "未找到商品信息,可能item_id错误或商品已下架"}
# 3. 获取并合并动态数据
dynamic_data = self._fetch_dynamic_data(item_id, headers, proxy)
# 4. 整合结果
result = {
"success": True,
"data": {
"item_id": item_id,** static_data,
"price": {
**static_data["price"],
"member": dynamic_data["member_price"],
"discount": dynamic_data["discount"]
},
"trade": {
"monthly_sales": dynamic_data["monthly_sales"],
"comment_count": dynamic_data["comment_count"],
"stock": dynamic_data["stock"]
},
"update_time": time.strftime("%Y-%m-%d %H:%M:%S")
}
}
return result
except requests.exceptions.HTTPError as e:
if "403" in str(e):
return {"success": False, "error_msg": "触发反爬,建议更换代理或Cookie", "code": 403}
return {"success": False, "error_msg": f"HTTP错误: {str(e)}", "code": response.status_code}
except Exception as e:
return {"success": False, "error_msg": f"获取失败: {str(e)}", "code": -1}
# 使用示例
if __name__ == "__main__":
# 代理池(替换为有效代理)
PROXIES = [
"http://123.45.67.89:8888",
"http://98.76.54.32:8080"
]
# 登录态Cookie(从浏览器获取,用于查看会员价)
COOKIE = "uuid=xxx; user_key=xxx; JSESSIONID=xxx"
# 初始化API客户端
api = YhdItemApi(proxy_pool=PROXIES, cookie=COOKIE)
# 获取商品详情(示例item_id)
item_id = "12345678" # 替换为实际商品ID
result = api.item_get(item_id)
if result["success"]:
data = result["data"]
print(f"商品标题: {data['title']}")
print(f"品牌: {data['brand']} | 产地: {data['product']['origin']} | 保质期: {data['product']['shelf_life']}")
print(f"价格: ¥{data['price']['current']} | 会员价: ¥{data['price']['member']}({data['price']['discount']}) | 原价: ¥{data['price']['original']}")
print(f"库存: {data['trade']['stock']}件 | 月销: {data['trade']['monthly_sales']}件 | 评价: {data['trade']['comment_count']}条")
print(f"核心规格:")
# 打印前5条规格参数
for i, (key, value) in enumerate(list(data['product']['specs'].items())[:5]):
print(f" {key}: {value}")
print(f"配送时效: {data['service']['delivery_time']} | 售后服务: {data['service']['after_sale']}")
else:
print(f"获取失败: {result['error_msg']}(错误码: {result.get('code')})")