什么值得买(SMZDM)作为国内知名消费决策平台,以 “优惠爆料、商品评测、消费指南” 为核心,其商品详情信息(如历史价格曲线、用户真实评测、优惠活动、品类推荐)对电商比价、消费决策、市场分析等场景具有重要价值。由于平台无公开官方 API,开发者需通过页面解析实现商品详情(item_get)的获取。本文系统讲解接口对接逻辑、技术实现、反爬应对及消费决策特有字段解析,帮助开发者构建稳定的商品详情获取系统。
一、接口基础认知(核心功能与场景)
二、对接前置准备(环境与 URL 结构)
三、接口调用流程(基于页面解析)
四、代码实现示例(Python)
import requests
import time
import random
import re
import json
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from typing import Dict, List
class SmzdmItemApi:
def __init__(self, proxy_pool: List[str] = None, cookie: str = ""):
self.base_url = "https://www.smzdm.com/p/{item_id}/"
self.history_price_api = "https://www.smzdm.com/ajax/pc_product_history_price/" # 历史价格接口
self.qa_api = "https://www.smzdm.com/ajax/pc_ask_list/" # 问答接口
self.ua = UserAgent()
self.proxy_pool = proxy_pool # 代理池列表
self.cookie = cookie # 登录态Cookie(用于获取完整历史价格)
def _get_headers(self) -> Dict[str, str]:
"""生成随机请求头"""
headers = {
"User-Agent": self.ua.random,
"Referer": "https://www.smzdm.com/",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
}
if self.cookie:
headers["Cookie"] = self.cookie
return headers
def _get_proxy(self) -> Dict[str, str]:
"""随机获取代理"""
if self.proxy_pool and len(self.proxy_pool) > 0:
proxy = random.choice(self.proxy_pool)
return {"http": proxy, "https": proxy}
return None
def _clean_price(self, price_str: str) -> float:
"""清洗价格字符串(去除¥、逗号等)"""
if not price_str:
return 0.0
price_str = re.sub(r"[^\d.]", "", price_str)
return float(price_str) if price_str else 0.0
def _parse_specs(self, spec_html) -> Dict[str, str]:
"""解析规格参数"""
specs = {}
if not spec_html:
return specs
spec_soup = BeautifulSoup(spec_html, "lxml")
for row in spec_soup.select("tr"):
th = row.select_one("th")
td = row.select_one("td")
if th and td:
specs[th.text.strip()] = td.text.strip()
return specs
def _parse_main_page(self, html: str) -> Dict[str, str]:
"""解析主页面基础信息"""
soup = BeautifulSoup(html, "lxml")
# 提取优缺点(来自评测总结)
advantages = [li.text.strip() for li in soup.select("div.advantage-list li")]
disadvantages = [li.text.strip() for li in soup.select("div.disadvantage-list li")]
return {
"title": soup.select_one("h1.title")?.text.strip() or "",
"images": [img.get("src") for img in soup.select("div.main-img img") if img.get("src")],
"price": {
"current": self._clean_price(soup.select_one("div.price-box .price")?.text or ""),
"original": self._clean_price(soup.select_one("div.price-box .origin-price")?.text or ""),
"discount": soup.select_one("div.coupon-info")?.text.strip() or ""
},
"product": {
"brand": soup.select_one("div.brand a")?.text.strip() or "",
"specs": self._parse_specs(str(soup.select_one("div.parameter-table"))),
"advantages": advantages,
"disadvantages": disadvantages
},
"user_feedback": {
"score": float(soup.select_one("div.score-box .score")?.text or "0"),
"comment_count": int(re.search(r"\d+", soup.select_one("div.comment-tab .count")?.text or "0").group()) if soup.select_one("div.comment-tab .count") else 0
},
"platform": {
"source": soup.select_one("div.buy-link a")?.text.strip() or "",
"buy_url": soup.select_one("div.buy-link a")?.get("href") or ""
},
"related": {
"similar_items": [
{
"title": a.text.strip(),
"url": f"https://www.smzdm.com{a.get('href')}"
} for a in soup.select("div.similar-goods a")[:3]
]
}
}
def _parse_comments(self, html: str) -> List[Dict]:
"""解析用户评价"""
soup = BeautifulSoup(html, "lxml")
comments = []
for item in soup.select("div.comment-item")[:5]: # 取前5条
user = item.select_one("div.user-name")?.text.strip() or "匿名用户"
rating = item.select_one("div.star")?.get("data-score") or "0"
content = item.select_one("div.comment-content")?.text.strip() or ""
comments.append({
"user": user,
"rating": rating,
"content": content
})
return comments
def _fetch_dynamic_data(self, item_id: str, headers: Dict[str, str], proxy: Dict[str, str]) -> Dict:
"""调用动态接口获取历史价格和问答"""
dynamic_data = {
"price_trend": [],
"history_lowest": {"price": 0, "date": ""},
"qa_list": []
}
try:
# 1. 获取历史价格
price_params = {"item_id": item_id}
price_resp = requests.get(
self.history_price_api,
params=price_params,
headers=headers,
proxies=proxy,
timeout=10
)
price_data = price_resp.json()
if price_data.get("error_code") == 0 and "data" in price_data:
dynamic_data["price_trend"] = price_data["data"].get("price_list", [])
dynamic_data["history_lowest"] = {
"price": price_data["data"].get("lowest_price", 0),
"date": price_data["data"].get("lowest_date", "")
}
# 2. 获取用户问答(前3条)
qa_params = {"item_id": item_id, "page": 1, "size": 3}
qa_resp = requests.get(
self.qa_api,
params=qa_params,
headers=headers,
proxies=proxy,
timeout=10
)
qa_data = qa_resp.json()
if qa_data.get("error_code") == 0 and "data" in qa_data:
dynamic_data["qa_list"] = [
{
"question": item.get("ask_content", ""),
"answer": item.get("answer_content", "")
} for item in qa_data["data"].get("list", [])
]
except Exception as e:
print(f"动态数据获取失败: {str(e)}")
return dynamic_data
def item_get(self, item_id: str, timeout: int = 10) -> Dict:
"""
获取值得买商品详情
:param item_id: 商品ID(如1234567)
:param timeout: 超时时间
:return: 标准化商品数据
"""
try:
# 1. 主页面请求
url = self.base_url.format(item_id=item_id)
headers = self._get_headers()
proxy = self._get_proxy()
# 随机延迟,避免反爬
time.sleep(random.uniform(1.5, 3))
response = requests.get(
url=url,
headers=headers,
proxies=proxy,
timeout=timeout
)
response.raise_for_status()
main_html = response.text
# 2. 解析主页面数据
main_data = self._parse_main_page(main_html)
if not main_data["title"]:
return {"success": False, "error_msg": "商品不存在或已下架"}
# 3. 解析用户评价(主页面包含部分评价)
comments = self._parse_comments(main_html)
# 4. 获取动态数据(历史价格、问答)
dynamic_data = self._fetch_dynamic_data(item_id, headers, proxy)
# 5. 整合结果
result = {
"success": True,
"data": {
"item_id": item_id,** main_data,
"price": {
**main_data["price"],
"history_lowest": dynamic_data["history_lowest"],
"price_trend": dynamic_data["price_trend"]
},
"user_feedback": {** main_data["user_feedback"],
"comments": comments,
"qa_list": dynamic_data["qa_list"]
},
"update_time": time.strftime("%Y-%m-%d %H:%M:%S")
}
}
return result
except requests.exceptions.HTTPError as e:
if "403" in str(e):
return {"success": False, "error_msg": "触发反爬,建议更换代理或Cookie", "code": 403}
return {"success": False, "error_msg": f"HTTP错误: {str(e)}", "code": response.status_code}
except Exception as e:
return {"success": False, "error_msg": f"获取失败: {str(e)}", "code": -1}
# 使用示例
if __name__ == "__main__":
# 代理池(替换为有效代理)
PROXIES = [
"http://123.45.67.89:8888",
"http://98.76.54.32:8080"
]
# 登录态Cookie(从浏览器获取,用于完整历史价格)
COOKIE = "sess=xxx; user_id=xxx; device_id=xxx"
# 初始化API客户端
api = SmzdmItemApi(proxy_pool=PROXIES, cookie=COOKIE)
# 获取商品详情(示例item_id)
item_id = "1234567" # 替换为实际商品ID
result = api.item_get(item_id)
if result["success"]:
data = result["data"]
print(f"商品标题: {data['title']}")
print(f"价格: 当前¥{data['price']['current']} | 原价¥{data['price']['original']} | 历史最低¥{data['price']['history_lowest']['price']}({data['price']['history_lowest']['date']})")
print(f"来源平台: {data['platform']['source']} | 购买链接: {data['platform']['buy_url'][:50]}...")
print(f"用户评分: {data['user_feedback']['score']}分 | 评价数: {data['user_feedback']['comment_count']}条")
print(f"核心优点: {', '.join(data['product']['advantages'][:3])}")
print(f"核心缺点: {', '.join(data['product']['disadvantages'][:3])}")
print(f"精选评价1: {data['user_feedback']['comments'][0]['content'][:50]}...")
print(f"热门问答: {data['user_feedback']['qa_list'][0]['question'][:30]}? {data['user_feedback']['qa_list'][0]['answer'][:50]}...")
else:
print(f"获取失败: {result['error_msg']}(错误码: {result.get('code')})")