搜好货是国内知名的工业品 B2B 电商平台,聚焦机械、原材料、五金工具等工业品类,其商品详情数据(如批发价、规格参数、供应商资质、起订量等)是工业品采购商比价、供应商筛选、供应链分析的核心依据。由于搜好货无公开官方 API,开发者需通过页面解析或第三方服务实现商品详情(item_get)的获取。本文将系统讲解接口对接逻辑、技术实现、反爬应对及最佳实践,帮助开发者构建稳定的工业品详情数据获取系统。
一、接口基础认知(核心功能与场景)
二、对接前置准备(环境与工具)
三、接口调用流程(基于页面解析)
四、代码实现示例(Python)
import requests
import time
import random
import re
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from typing import Dict, List
class SouhaohuoItemApi:
def __init__(self, proxy_pool: List[str] = None):
self.base_url = "https://www.912688.com/chanpin/{item_id}.html"
self.trade_api = "https://www.912688.com/ajax/trade_records?item_id={item_id}" # 成交记录接口
self.ua = UserAgent()
self.proxy_pool = proxy_pool # 代理池列表,如["http://ip:port", ...]
def _get_headers(self) -> Dict[str, str]:
"""生成随机请求头"""
return {
"User-Agent": self.ua.random,
"Referer": "https://www.912688.com/",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Cookie": "PHPSESSID=xxx; user_id=anonymous; Hm_lvt_xxx=xxx" # 替换为实际Cookie
}
def _get_proxy(self) -> Dict[str, str]:
"""随机获取代理"""
if self.proxy_pool and len(self.proxy_pool) > 0:
proxy = random.choice(self.proxy_pool)
return {"http": proxy, "https": proxy}
return None
def _parse_price_gradient(self, gradient_texts: List[str]) -> List[Dict]:
"""解析价格梯度(如“1-10台:¥12000”)"""
gradient_list = []
for text in gradient_texts:
# 匹配数量区间和价格
match = re.match(r"(\d+)(?:-(\d+))?[台个件]:¥?(\d+(\.\d+)?)", text)
if match:
min_qty = int(match.group(1))
max_qty = int(match.group(2)) if match.group(2) else None
price = float(match.group(3))
gradient_list.append({
"min_quantity": min_qty,
"max_quantity": max_qty,
"price": price
})
return gradient_list
def _parse_specs(self, spec_table) -> Dict[str, str]:
"""解析规格参数表格(工业品核心字段)"""
specs = {}
if not spec_table:
return specs
for row in spec_table.select("tr"):
th = row.select_one("th")
td = row.select_one("td")
if th and td:
key = th.text.strip()
value = td.text.strip()
if key and value:
specs[key] = value
return specs
def _parse_static_data(self, html: str) -> Dict[str, str]:
"""解析静态HTML中的基础信息"""
soup = BeautifulSoup(html, "lxml")
# 提取价格梯度
gradient_texts = [li.text.strip() for li in soup.select("div.price-gradients li")]
gradient_list = self._parse_price_gradient(gradient_texts)
# 提取规格参数(表格形式)
spec_table = soup.select_one("table.spec-table")
specs = self._parse_specs(spec_table)
# 提取供应商资质
certifications = [img.get("alt", "").strip() for img in soup.select("div.certifications img") if img.get("alt")]
return {
"title": soup.select_one("h1.product-title")?.text.strip() or "",
"images": [img.get("src") for img in soup.select("div.product-gallery img") if img.get("src")],
"price": {
"wholesale": float(re.sub(r"[^\d.]", "", soup.select_one("div.price-box .wholesale-price")?.text or "0")),
"market": float(re.sub(r"[^\d.]", "", soup.select_one("div.price-box .market-price")?.text or "0")),
"gradient": gradient_list,
"min_order": soup.select_one("div.min-order")?.text.strip() or ""
},
"specs": specs,
"supplier": {
"name": soup.select_one("div.company-name a")?.text.strip() or "",
"location": soup.select_one("div.company-location")?.text.strip() or "",
"capital": soup.select_one("div.registered-capital")?.text.strip() or "",
"established": soup.select_one("div.established-year")?.text.strip() or "",
"certifications": certifications
},
"payment_methods": [span.text.strip() for span in soup.select("div.payment-methods span")],
"detail_html": str(soup.select_one("div.product-detail") or "")
}
def _fetch_trade_data(self, item_id: str, headers: Dict[str, str], proxy: Dict[str, str]) -> Dict:
"""调用动态接口获取成交记录"""
try:
url = self.trade_api.format(item_id=item_id)
response = requests.get(url, headers=headers, proxies=proxy, timeout=10)
response.raise_for_status()
return response.json()
except Exception as e:
print(f"成交记录接口获取失败: {str(e)}")
return {"records": [], "total_sold": 0}
def item_get(self, item_id: str, timeout: int = 10) -> Dict:
"""
获取搜好货商品详情
:param item_id: 商品ID(如1234567)
:param timeout: 超时时间
:return: 标准化商品数据
"""
try:
# 1. 构建URL并发送请求
url = self.base_url.format(item_id=item_id)
headers = self._get_headers()
proxy = self._get_proxy()
# 随机延迟,避免反爬
time.sleep(random.uniform(2, 4))
response = requests.get(
url=url,
headers=headers,
proxies=proxy,
timeout=timeout
)
response.raise_for_status()
html = response.text
# 2. 解析静态数据
static_data = self._parse_static_data(html)
if not static_data["title"]:
return {"success": False, "error_msg": "未找到商品信息,可能item_id错误或商品已下架"}
# 3. 获取并解析成交记录
trade_data = self._fetch_trade_data(item_id, headers, proxy)
# 4. 合并数据
result = {
"success": True,
"data": {
"item_id": item_id,** static_data,
"trade": {
"total_sold": trade_data.get("total_sold", 0),
"records": trade_data.get("records", [])
},
"update_time": time.strftime("%Y-%m-%d %H:%M:%S")
}
}
return result
except requests.exceptions.HTTPError as e:
if "403" in str(e):
return {"success": False, "error_msg": "触发反爬,建议更换代理或Cookie", "code": 403}
return {"success": False, "error_msg": f"HTTP错误: {str(e)}", "code": response.status_code}
except Exception as e:
return {"success": False, "error_msg": f"获取失败: {str(e)}", "code": -1}
# 使用示例
if __name__ == "__main__":
# 代理池(替换为有效代理)
PROXIES = [
"http://123.45.67.89:8888",
"http://98.76.54.32:8080"
]
# 初始化API客户端
api = SouhaohuoItemApi(proxy_pool=PROXIES)
# 获取商品详情(示例item_id)
item_id = "1234567" # 替换为实际商品ID
result = api.item_get(item_id)
if result["success"]:
data = result["data"]
print(f"商品标题: {data['title']}")
print(f"供应商: {data['supplier']['name']} | 所在地: {data['supplier']['location']}")
print(f"资质: {','.join(data['supplier']['certifications'])} | 注册资本: {data['supplier']['capital']}")
print(f"批发价: {data['price']['wholesale']}元 | 市场价: {data['price']['market']}元 | 起订量: {data['price']['min_order']}")
print("价格梯度:")
for grad in data['price']['gradient']:
max_qty = grad['max_quantity'] if grad['max_quantity'] else "∞"
print(f" {grad['min_quantity']}-{max_qty}台: {grad['price']}元/台")
print(f"\n核心规格:")
# 打印前5条规格参数(工业品规格较多)
for i, (key, value) in enumerate(list(data['specs'].items())[:5]):
print(f" {key}: {value}")
print(f"\n成交记录: 总销量{data['trade']['total_sold']}台 | 支付方式: {','.join(data['payment_methods'])}")
else:
print(f"获取失败: {result['error_msg']}(错误码: {result.get('code')})")