义乌购(聚焦小商品批发的 B2B 电商平台)的商品详情数据(如批发价、起订量、供应商资质、物流信息等)对批发采购、供应链分析、市场调研等场景具有重要价值。由于平台无公开官方 API,开发者需通过页面解析实现商品详情(item_get)的获取。本文系统讲解接口对接逻辑、技术实现、批发场景适配及反爬应对,帮助开发者构建稳定的义乌购商品详情获取系统。
一、接口基础认知(核心功能与场景)
二、对接前置准备(环境与 URL 结构)
三、接口调用流程(基于页面解析)
四、代码实现示例(Python)
import requests
import time
import random
import re
import json
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from typing import Dict, List
class YiwugouItemApi:
def __init__(self, proxy_pool: List[str] = None, cookie: str = ""):
self.base_url = "https://www.yiwugou.com/product/detail/{item_id}.html"
self.price_api = "https://www.yiwugou.com/ajax/product/price" # 阶梯价接口
self.stock_api = "https://www.yiwugou.com/ajax/product/stock" # 库存接口
self.ua = UserAgent()
self.proxy_pool = proxy_pool # 代理池列表
self.cookie = cookie # 登录态Cookie(用于完整价格)
def _get_headers(self) -> Dict[str, str]:
"""生成随机请求头"""
headers = {
"User-Agent": self.ua.random,
"Referer": "https://www.yiwugou.com/category/",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
}
if self.cookie:
headers["Cookie"] = self.cookie
return headers
def _get_proxy(self) -> Dict[str, str]:
"""随机获取代理"""
if self.proxy_pool and len(self.proxy_pool) > 0:
proxy = random.choice(self.proxy_pool)
return {"http": proxy, "https": proxy}
return None
def _clean_price(self, price_str: str) -> float:
"""清洗价格字符串(去除元/件、¥等)"""
if not price_str:
return 0.0
price_str = re.sub(r"[^\d.]", "", price_str)
return float(price_str) if price_str else 0.0
def _clean_moq(self, moq_str: str) -> int:
"""清洗起订量(提取数字)"""
if not moq_str:
return 1
moq_num = re.search(r"\d+", moq_str)
return int(moq_num.group()) if moq_num else 1
def _parse_static_data(self, html: str) -> Dict:
"""解析主页面静态数据"""
soup = BeautifulSoup(html, "lxml")
# 提取规格选项
spec_list = []
for spec_item in soup.select("div.spec-select .spec-item"):
spec_name = spec_item.get("data-spec") or spec_item.text.strip()
spec_list.append(spec_name)
# 提取供应商联系方式(电话/微信)
contact_info = {}
phone_tag = soup.select_one("div.contact-phone")
if phone_tag:
contact_info["phone"] = re.sub(r"\D", "", phone_tag.text.strip())
wechat_tag = soup.select_one("div.contact-wechat")
if wechat_tag:
contact_info["wechat"] = wechat_tag.text.strip().replace("微信:", "")
return {
"title": soup.select_one("h1.product-title")?.text.strip() or "",
"images": {
"main": [img.get("src") for img in soup.select("div.main-gallery img") if img.get("src")],
"detail": [img.get("src") for img in soup.select("div.detail-images img") if img.get("src")],
"package": [img.get("src") for img in soup.select("div.package-images img") if img.get("src")]
},
"price": {
"wholesale_basic": self._clean_price(soup.select_one("div.price-wholesale")?.text or ""),
"retail": self._clean_price(soup.select_one("div.price-retail")?.text or "")
},
"trade": {
"moq": self._clean_moq(soup.select_one("div.moq")?.text or ""),
"mix_batch": soup.select_one("div.mix-batch")?.text.strip() or "",
"delivery_time": soup.select_one("div.delivery-time")?.text.strip() or ""
},
"supplier": {
"name": soup.select_one("div.shop-name a")?.text.strip() or "",
"location": soup.select_one("div.shop-location")?.text.strip() or "",
"credit": soup.select_one("div.credit-level")?.text.strip() or "",
"contact": contact_info
},
"logistics": {
"default_express": soup.select_one("div.default-express")?.text.strip() or "",
"freight_rule": soup.select_one("div.freight-rule")?.text.strip() or ""
},
"specs": {
"options": spec_list # 规格选项列表
}
}
def _fetch_dynamic_data(self, item_id: str, headers: Dict[str, str], proxy: Dict[str, str]) -> Dict:
"""调用动态接口获取阶梯价和库存"""
dynamic_data = {
"price_ladder": [], # 多阶梯价格
"stock": 0, # 总库存
"spec_stock": {} # 各规格库存(如{"3cm-卡通": 5000})
}
try:
# 1. 获取阶梯价
price_params = {"item_id": item_id}
price_resp = requests.get(
self.price_api,
params=price_params,
headers=headers,
proxies=proxy,
timeout=10
)
price_data = price_resp.json()
if price_data.get("status") == 1 and "data" in price_data:
dynamic_data["price_ladder"] = price_data["data"].get("price_ladder", [])
# 2. 获取库存
stock_params = {"item_id": item_id}
stock_resp = requests.get(
self.stock_api,
params=stock_params,
headers=headers,
proxies=proxy,
timeout=10
)
stock_data = stock_resp.json()
if stock_data.get("status") == 1 and "data" in stock_data:
dynamic_data["stock"] = stock_data["data"].get("total_stock", 0)
dynamic_data["spec_stock"] = stock_data["data"].get("spec_stock", {})
except Exception as e:
print(f"动态数据获取失败: {str(e)}")
return dynamic_data
def _merge_specs_and_price(self, static_specs: List[str], price_ladder: List[Dict], spec_stock: Dict) -> List[Dict]:
"""合并规格与价格、库存(若规格独立定价则单独处理,否则共用阶梯价)"""
merged_specs = []
for spec in static_specs:
# 检查是否有规格独立的阶梯价(部分商品支持)
spec_price_ladder = next((pl for pl in price_ladder if pl.get("spec") == spec), None)
if not spec_price_ladder:
spec_price_ladder = price_ladder # 共用默认阶梯价
merged_specs.append({
"name": spec,
"stock": spec_stock.get(spec, 0),
"ladder_price": spec_price_ladder
})
return merged_specs
def item_get(self, item_id: str, timeout: int = 10) -> Dict:
"""
获取义乌购商品详情
:param item_id: 商品ID(如12345678)
:param timeout: 超时时间
:return: 标准化商品数据
"""
try:
# 1. 主页面请求
url = self.base_url.format(item_id=item_id)
headers = self._get_headers()
proxy = self._get_proxy()
# 随机延迟,避免反爬
time.sleep(random.uniform(2, 4))
response = requests.get(
url=url,
headers=headers,
proxies=proxy,
timeout=timeout
)
response.raise_for_status()
main_html = response.text
# 2. 解析主页面数据
static_data = self._parse_static_data(main_html)
if not static_data["title"]:
return {"success": False, "error_msg": "商品不存在或已下架"}
# 3. 获取动态数据(阶梯价、库存)
dynamic_data = self._fetch_dynamic_data(item_id, headers, proxy)
# 4. 合并规格与价格、库存
merged_specs = self._merge_specs_and_price(
static_data["specs"]["options"],
dynamic_data["price_ladder"],
dynamic_data["spec_stock"]
)
# 5. 整合结果
result = {
"success": True,
"data": {
"item_id": item_id,** static_data,
"price": {
**static_data["price"],
"ladder": dynamic_data["price_ladder"] # 覆盖为完整阶梯价
},
"specs": merged_specs,
"total_stock": dynamic_data["stock"],
"url": url,
"update_time": time.strftime("%Y-%m-%d %H:%M:%S")
}
}
return result
except requests.exceptions.HTTPError as e:
if "403" in str(e):
return {"success": False, "error_msg": "触发反爬,建议更换代理或Cookie", "code": 403}
return {"success": False, "error_msg": f"HTTP错误: {str(e)}", "code": response.status_code}
except Exception as e:
return {"success": False, "error_msg": f"获取失败: {str(e)}", "code": -1}
# 使用示例
if __name__ == "__main__":
# 代理池(替换为有效代理)
PROXIES = [
"http://123.45.67.89:8888",
"http://98.76.54.32:8080"
]
# 登录态Cookie(从浏览器获取,用于完整价格)
COOKIE = "PHPSESSID=xxx; user_id=xxx"
# 初始化API客户端
api = YiwugouItemApi(proxy_pool=PROXIES, cookie=COOKIE)
# 获取商品详情(示例item_id)
item_id = "12345678" # 替换为实际商品ID
result = api.item_get(item_id)
if result["success"]:
data = result["data"]
print(f"商品标题: {data['title']}")
print(f"价格信息: 基础批发价¥{data['price']['wholesale_basic']} | 市场价¥{data['price']['retail']}")
print(f"阶梯价:")
for ladder in data['price']['ladder'][:3]:
print(f" {ladder['quantity']}件及以上: ¥{ladder['price']}/{ladder['unit']}")
print(f"交易规则: 起订量{data['trade']['moq']}件 | {data['trade']['mix_batch']} | 发货时效: {data['trade']['delivery_time']}")
print(f"供应商: {data['supplier']['name']} | 商位: {data['supplier']['location']} | 诚信等级: {data['supplier']['credit']}")
print(f"联系方式: 电话{data['supplier']['contact'].get('phone', '未知')} | 微信{data['supplier']['contact'].get('wechat', '未知')}")
print(f"规格与库存:")
for spec in data['specs'][:3]:
print(f" {spec['name']}: 库存{spec['stock']}件 | 阶梯价: {[f'{l["quantity"]}件¥{l["price"]}' for l in spec['ladder_price'][:2]]}")
print(f"物流: 默认快递{data['logistics']['default_express']} | 运费规则: {data['logistics']['freight_rule']}")
else:
print(f"获取失败: {result['error_msg']}(错误码: {result.get('code')})")