服装网(聚焦服饰、鞋包、配饰等时尚品类的电商平台)的商品详情数据(如尺码表、材质成分、穿搭建议、库存颜色等)对电商选品、比价工具、时尚趋势分析等场景具有重要价值。由于平台无公开官方 API,开发者需通过页面解析实现商品详情(item_get)的获取。本文系统讲解接口对接逻辑、技术实现、反爬应对及时尚品类特有字段解析,帮助开发者构建稳定的服装类商品数据获取系统。
一、接口基础认知(核心功能与场景)
二、对接前置准备(环境与 URL 结构)
三、接口调用流程(基于页面解析)
四、代码实现示例(Python)
import requests
import time
import random
import re
import json
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from typing import Dict, List
class FuzhuangItemApi:
def __init__(self, proxy_pool: List[str] = None, cookie: str = ""):
self.base_url = "https://www.fuzhuang.com/item/{item_id}.html"
self.stock_api = "https://www.fuzhuang.com/ajax/sku/stock" # 库存接口
self.size_table_api = "https://www.fuzhuang.com/ajax/size/table" # 尺码表接口
self.ua = UserAgent()
self.proxy_pool = proxy_pool # 代理池列表
self.cookie = cookie # 登录态Cookie(用于评价等需登录的内容)
def _get_headers(self) -> Dict[str, str]:
"""生成随机请求头"""
headers = {
"User-Agent": self.ua.random,
"Referer": "https://www.fuzhuang.com/category/",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
}
if self.cookie:
headers["Cookie"] = self.cookie
return headers
def _get_proxy(self) -> Dict[str, str]:
"""随机获取代理"""
if self.proxy_pool and len(self.proxy_pool) > 0:
proxy = random.choice(self.proxy_pool)
return {"http": proxy, "https": proxy}
return None
def _clean_price(self, price_str: str) -> float:
"""清洗价格字符串(去除¥、逗号等)"""
if not price_str:
return 0.0
price_str = re.sub(r"[^\d.]", "", price_str)
return float(price_str) if price_str else 0.0
def _parse_size_table(self, size_data: Dict) -> List[Dict]:
"""解析尺码表数据(如肩宽、胸围)"""
if not size_data.get("data"):
return []
table = size_data["data"].get("table", [])
# 转换为[{ "尺码": "S", "肩宽": "36cm", ... }, ...]
return [dict(zip(table[0], row)) for row in table[1:]] if table else []
def _parse_static_data(self, html: str) -> Dict:
"""解析主页面静态数据"""
soup = BeautifulSoup(html, "lxml")
# 提取颜色选项(含颜色图)
color_list = []
for color_item in soup.select("div.color-select .color-item"):
color_name = color_item.get("data-color") or ""
color_img = color_item.select_one("img")?.get("src") or ""
color_list.append({"name": color_name, "image": color_img})
# 提取尺码选项(名称)
size_names = [size.get("data-size") for size in soup.select("div.size-select .size-item") if size.get("data-size")]
# 提取材质成分(清洗文本)
material = soup.select_one("div.material")?.text.strip() or ""
material = re.sub(r"\s+", " ", material) # 去除多余空格
return {
"title": soup.select_one("h1.goods-title")?.text.strip() or "",
"images": {
"main": [img.get("src") for img in soup.select("div.main-gallery img") if img.get("src")],
"detail": [img.get("src") for img in soup.select("div.detail-images img") if img.get("src")],
"style": [img.get("src") for img in soup.select("div.style-show img") if img.get("src")]
},
"price": {
"current": self._clean_price(soup.select_one("div.price-current")?.text or ""),
"original": self._clean_price(soup.select_one("div.price-original")?.text or ""),
"discount": soup.select_one("div.discount-tag")?.text.strip() or "",
"member_price": self._clean_price(soup.select_one("div.member-price")?.text or "")
},
"brand": soup.select_one("div.brand-name a")?.text.strip() or "",
"material": material,
"style_tags": [tag.text.strip() for tag in soup.select("div.style-tags span")],
"specs": {
"colors": color_list,
"size_names": size_names
},
"user_feedback": {
"rating": float(soup.select_one("div.average-rating")?.text or "0"),
"comment_count": int(re.search(r"\d+", soup.select_one("div.comment-count")?.text or "0").group()) if soup.select_one("div.comment-count") else 0
}
}
def _parse_comments(self, html: str) -> List[Dict]:
"""解析用户评价(含尺码建议)"""
soup = BeautifulSoup(html, "lxml")
comments = []
for item in soup.select("div.comment-item")[:5]: # 取前5条
user_info = item.select_one("div.user-info")?.text.strip() or "匿名用户"
# 提取用户体型(如“165cm/50kg”)
body_type = re.search(r"\d+cm/\d+kg", user_info).group() if re.search(r"\d+cm/\d+kg", user_info) else ""
# 提取购买的规格(颜色+尺码)
buy_spec = item.select_one("div.buy-spec")?.text.strip() or ""
# 提取评价内容
content = item.select_one("div.comment-content")?.text.strip() or ""
# 提取尺码建议(如“偏大一码”)
size_suggest = ""
if "码" in content:
suggest_match = re.search(r"(偏大|偏小|正常).*?码", content)
size_suggest = suggest_match.group() if suggest_match else ""
# 提取晒图
images = [img.get("src") for img in item.select("div.comment-images img") if img.get("src")]
comments.append({
"user": user_info,
"body_type": body_type,
"buy_spec": buy_spec,
"content": content,
"size_suggest": size_suggest,
"images": images
})
return comments
def _fetch_dynamic_data(self, item_id: str, headers: Dict[str, str], proxy: Dict[str, str]) -> Dict:
"""调用动态接口获取库存和尺码表"""
dynamic_data = {
"stock": {"skus": [], "total_stock": 0},
"size_table": []
}
try:
# 1. 获取库存数据
stock_params = {"item_id": item_id}
stock_resp = requests.get(
self.stock_api,
params=stock_params,
headers=headers,
proxies=proxy,
timeout=10
)
stock_data = stock_resp.json()
if stock_data.get("code") == 0 and "data" in stock_data:
dynamic_data["stock"] = stock_data["data"]
# 2. 获取尺码表
size_params = {"item_id": item_id}
size_resp = requests.get(
self.size_table_api,
params=size_params,
headers=headers,
proxies=proxy,
timeout=10
)
size_data = size_resp.json()
dynamic_data["size_table"] = self._parse_size_table(size_data)
except Exception as e:
print(f"动态数据获取失败: {str(e)}")
return dynamic_data
def _merge_specs_and_stock(self, static_specs: Dict, stock_data: Dict) -> List[Dict]:
"""合并颜色、尺码与库存数据"""
colors = static_specs["colors"]
size_names = static_specs["size_names"]
skus = stock_data.get("skus", [])
# 按颜色分组库存
color_stock_map = {}
for sku in skus:
color = sku["color"]
if color not in color_stock_map:
color_stock_map[color] = []
color_stock_map[color].append(sku)
# 合并颜色与对应尺码库存
merged_colors = []
for color in colors:
color_name = color["name"]
color_image = color["image"]
# 获取该颜色的所有尺码库存
color_skus = color_stock_map.get(color_name, [])
# 关联尺码名称(确保顺序与页面一致)
sizes = []
for size_name in size_names:
# 查找该颜色+尺码的库存
sku_match = next((s for s in color_skus if s["size"] == size_name), None)
if sku_match:
sizes.append({
"name": size_name,
"stock": sku_match["stock"],
"sku_id": sku_match["sku_id"],
"available": sku_match["stock"] > 0
})
else:
sizes.append({
"name": size_name,
"stock": 0,
"sku_id": "",
"available": False
})
merged_colors.append({
"name": color_name,
"image": color_image,
"sizes": sizes
})
return merged_colors
def item_get(self, item_id: str, timeout: int = 10) -> Dict:
"""
获取服装网商品详情
:param item_id: 商品ID(如123456、CLOTH202409)
:param timeout: 超时时间
:return: 标准化商品数据
"""
try:
# 1. 主页面请求
url = self.base_url.format(item_id=item_id)
headers = self._get_headers()
proxy = self._get_proxy()
# 随机延迟,避免反爬
time.sleep(random.uniform(1.5, 3))
response = requests.get(
url=url,
headers=headers,
proxies=proxy,
timeout=timeout
)
response.raise_for_status()
main_html = response.text
# 2. 解析主页面数据
static_data = self._parse_static_data(main_html)
if not static_data["title"]:
return {"success": False, "error_msg": "商品不存在或已下架"}
# 3. 解析用户评价
comments = self._parse_comments(main_html)
static_data["user_feedback"]["comments"] = comments
# 提取尺码建议汇总
size_suggestions = [c["size_suggest"] for c in comments if c["size_suggest"]]
static_data["user_feedback"]["size_suggestions"] = size_suggestions
# 4. 获取动态数据(库存、尺码表)
dynamic_data = self._fetch_dynamic_data(item_id, headers, proxy)
# 5. 合并规格与库存
merged_colors = self._merge_specs_and_stock(
static_data["specs"],
dynamic_data["stock"]
)
# 6. 整合结果
result = {
"success": True,
"data": {
"item_id": item_id,** static_data,
"specs": {
"colors": merged_colors,
"total_stock": dynamic_data["stock"].get("total_stock", 0)
},
"size_table": dynamic_data["size_table"],
"url": url,
"update_time": time.strftime("%Y-%m-%d %H:%M:%S")
}
}
return result
except requests.exceptions.HTTPError as e:
if "403" in str(e):
return {"success": False, "error_msg": "触发反爬,建议更换代理或Cookie", "code": 403}
return {"success": False, "error_msg": f"HTTP错误: {str(e)}", "code": response.status_code}
except Exception as e:
return {"success": False, "error_msg": f"获取失败: {str(e)}", "code": -1}
# 使用示例
if __name__ == "__main__":
# 代理池(替换为有效代理)
PROXIES = [
"http://123.45.67.89:8888",
"http://98.76.54.32:8080"
]
# 登录态Cookie(从浏览器获取,用于评价等内容)
COOKIE = "session_id=xxx; user_id=xxx"
# 初始化API客户端
api = FuzhuangItemApi(proxy_pool=PROXIES, cookie=COOKIE)
# 获取商品详情(示例item_id)
item_id = "123456" # 替换为实际商品ID
result = api.item_get(item_id)
if result["success"]:
data = result["data"]
print(f"商品标题: {data['title']}")
print(f"价格: 当前¥{data['price']['current']} | 原价¥{data['price']['original']} | 优惠: {data['price']['discount']}")
print(f"品牌: {data['brand']} | 材质: {data['material']} | 风格: {', '.join(data['style_tags'])}")
print(f"用户评分: {data['user_feedback']['rating']}分 | 评价数: {data['user_feedback']['comment_count']}条")
print(f"颜色选项: {', '.join([c['name'] for c in data['specs']['colors']])}")
print(f"可购规格示例({data['specs']['colors'][0]['name']}):")
for size in data['specs']['colors'][0]['sizes'][:3]:
status = "有货" if size['available'] else "无货"
print(f" 尺码{size['name']}: {size['stock']}件({status})")
print(f"核心尺码表:")
if data['size_table']:
for row in data['size_table'][:3]: # 打印前3行
print(f" {row['尺码']}: 肩宽{row.get('肩宽', '')} | 胸围{row.get('胸围', '')} | 衣长{row.get('衣长', '')}")
print(f"用户尺码建议: {', '.join(data['user_feedback']['size_suggestions'][:3])}")
else:
print(f"获取失败: {result['error_msg']}(错误码: {result.get('code')})")