洋码头作为国内知名跨境电商平台,聚焦海外正品直购,商品覆盖美妆护肤、奢侈品、母婴用品等品类,其商品详情数据(如海外直邮信息、关税说明、采购地溯源、版本差异等)对跨境电商比价、正品验证、消费趋势分析等场景具有核心价值。由于平台无公开官方 API,开发者需通过页面解析实现商品详情(item_get)的获取。本文系统讲解接口对接逻辑、技术实现、反爬应对及跨境特有字段解析,帮助开发者构建稳定的跨境商品数据获取系统。
一、接口基础认知(核心功能与场景)
二、对接前置准备(环境与 URL 结构)
三、接口调用流程(基于页面解析)
四、代码实现示例(Python)
import requests
import time
import random
import re
import json
import hashlib
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from typing import Dict, List
class YangmatouItemApi:
def __init__(self, proxy_pool: List[str] = None, cookie: str = ""):
self.base_url = "https://www.yangmatou.com/goods/{item_id}.html"
self.detail_iframe_url = "https://detail.yangmatou.com/{item_id}.html" # 规格参数iframe
self.stock_api = "https://api.yangmatou.com/goods/stock" # 库存接口
self.freight_api = "https://api.yangmatou.com/goods/freight" # 运费接口
self.ua = UserAgent()
self.proxy_pool = proxy_pool # 代理池列表
self.cookie = cookie # 登录态Cookie
self.secret_key = "xxx" # 从JS逆向获取的签名密钥(需实际破解)
def _get_headers(self) -> Dict[str, str]:
"""生成随机请求头"""
headers = {
"User-Agent": self.ua.random,
"Referer": "https://www.yangmatou.com/",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"X-Requested-With": "XMLHttpRequest" # 动态接口需带此头
}
if self.cookie:
headers["Cookie"] = self.cookie
return headers
def _get_proxy(self) -> Dict[str, str]:
"""随机获取代理"""
if self.proxy_pool and len(self.proxy_pool) > 0:
proxy = random.choice(self.proxy_pool)
return {"http": proxy, "https": proxy}
return None
def _generate_sign(self, params: Dict[str, str]) -> str:
"""生成动态接口签名(需根据实际JS逆向逻辑实现)"""
# 示例:按key排序后拼接+密钥,MD5加密
sorted_params = sorted(params.items(), key=lambda x: x[0])
sign_str = "&".join([f"{k}={v}" for k, v in sorted_params]) + self.secret_key
return hashlib.md5(sign_str.encode()).hexdigest()
def _clean_price(self, price_str: str) -> float:
"""清洗价格字符串"""
if not price_str:
return 0.0
price_str = re.sub(r"[^\d.]", "", price_str)
return float(price_str) if price_str else 0.0
def _parse_main_page(self, html: str) -> Dict[str, str]:
"""解析主页面基础信息"""
soup = BeautifulSoup(html, "lxml")
return {
"title": soup.select_one("h1.goods-title")?.text.strip() or "",
"images": [img.get("src") for img in soup.select("div.gallery-main img") if img.get("src")],
"price": {
"current": self._clean_price(soup.select_one("div.price-main .price")?.text or ""),
"original": self._clean_price(soup.select_one("div.price-origin")?.text or "")
},
"cross_border": {
"purchase_location": soup.select_one("div.purchase-location")?.text.strip() or "",
"logistics_type": soup.select_one("div.logistics-type")?.text.strip() or "",
"delivery_time": soup.select_one("div.delivery-time")?.text.strip() or ""
},
"seller": {
"name": soup.select_one("div.seller-name a")?.text.strip() or "",
"location": soup.select_one("div.seller-location")?.text.strip() or "",
"level": soup.select_one("div.seller-level")?.text.strip() or ""
}
}
def _parse_iframe_detail(self, item_id: str, headers: Dict[str, str], proxy: Dict[str, str]) -> Dict[str, str]:
"""解析iframe中的规格参数与详情"""
try:
url = self.detail_iframe_url.format(item_id=item_id)
response = requests.get(url, headers=headers, proxies=proxy, timeout=10)
soup = BeautifulSoup(response.text, "lxml")
# 提取规格参数
spec_table = soup.select_one("table.spec-table")
specs = {}
if spec_table:
for row in spec_table.select("tr"):
th = row.select_one("th")
td = row.select_one("td")
if th and td:
specs[th.text.strip()] = td.text.strip()
# 提取版本信息(跨境商品核心)
version = specs.get("版本", "") or re.search(r"[美欧日中]版", soup.text).group() if re.search(r"[美欧日中]版", soup.text) else ""
return {
"specs": specs,
"version": version,
"expiry_date": specs.get("保质期", ""),
"customs_info": soup.select_one("div.customs-info")?.text.strip() or "" # 报关信息
}
except Exception as e:
print(f"iframe详情解析失败: {str(e)}")
return {"specs": {}, "version": "", "expiry_date": "", "customs_info": ""}
def _fetch_dynamic_data(self, item_id: str, headers: Dict[str, str], proxy: Dict[str, str]) -> Dict:
"""调用动态接口获取库存、销量、运费(带签名)"""
dynamic_data = {
"stock": 0, "available": 0, "sales_count": 0,
"comment_count": 0, "freight": 0.0
}
try:
# 1. 获取库存与销量
timestamp = str(int(time.time() * 1000))
stock_params = {
"goodsId": item_id,
"t": timestamp,
"platform": "pc"
}
stock_params["sign"] = self._generate_sign(stock_params)
stock_resp = requests.get(
self.stock_api,
params=stock_params,
headers=headers,
proxies=proxy,
timeout=10
)
stock_data = stock_resp.json()
if stock_data.get("code") == 200:
dynamic_data["stock"] = stock_data["data"].get("stock", 0)
dynamic_data["available"] = stock_data["data"].get("available", 0)
dynamic_data["sales_count"] = stock_data["data"].get("salesCount", 0)
dynamic_data["comment_count"] = stock_data["data"].get("commentCount", 0)
# 2. 获取运费
freight_params = {
"goodsId": item_id,
"t": timestamp,
"addressId": "110000" # 示例:北京地址ID,影响运费计算
}
freight_params["sign"] = self._generate_sign(freight_params)
freight_resp = requests.get(
self.freight_api,
params=freight_params,
headers=headers,
proxies=proxy,
timeout=10
)
freight_data = freight_resp.json()
if freight_data.get("code") == 200:
dynamic_data["freight"] = freight_data["data"].get("amount", 0.0)
except Exception as e:
print(f"动态数据获取失败: {str(e)}")
return dynamic_data
def item_get(self, item_id: str, timeout: int = 10) -> Dict:
"""
获取洋码头商品详情
:param item_id: 商品ID(如1234567)
:param timeout: 超时时间
:return: 标准化商品数据
"""
try:
# 1. 主页面请求
url = self.base_url.format(item_id=item_id)
headers = self._get_headers()
proxy = self._get_proxy()
time.sleep(random.uniform(2, 4)) # 随机延迟
response = requests.get(
url=url,
headers=headers,
proxies=proxy,
timeout=timeout
)
response.raise_for_status()
main_html = response.text
# 2. 解析主页面数据
main_data = self._parse_main_page(main_html)
if not main_data["title"]:
return {"success": False, "error_msg": "商品不存在或已下架"}
# 3. 解析iframe详情
iframe_data = self._parse_iframe_detail(item_id, headers, proxy)
# 4. 获取动态数据(库存、运费等)
dynamic_data = self._fetch_dynamic_data(item_id, headers, proxy)
# 5. 整合结果
result = {
"success": True,
"data": {
"item_id": item_id,** main_data,
"product": {
"specs": iframe_data["specs"],
"version": iframe_data["version"],
"expiry_date": iframe_data["expiry_date"]
},
"cross_border": {
**main_data["cross_border"],
"customs_info": iframe_data["customs_info"]
},
"price": {** main_data["price"],
"tax_included": "含税" in (main_html or ""), # 从页面判断是否含税
"freight": dynamic_data["freight"]
},
"trade": {
"sales_count": dynamic_data["sales_count"],
"comment_count": dynamic_data["comment_count"],
"stock": dynamic_data["available"]
},
"update_time": time.strftime("%Y-%m-%d %H:%M:%S")
}
}
return result
except requests.exceptions.HTTPError as e:
if "403" in str(e):
return {"success": False, "error_msg": "IP被封或签名失效,建议更换代理并重试", "code": 403}
return {"success": False, "error_msg": f"HTTP错误: {str(e)}", "code": response.status_code}
except Exception as e:
return {"success": False, "error_msg": f"获取失败: {str(e)}", "code": -1}
# 使用示例
if __name__ == "__main__":
# 代理池(替换为有效代理)
PROXIES = [
"http://123.45.67.89:8888",
"http://98.76.54.32:8080"
]
# 登录态Cookie(从浏览器获取)
COOKIE = "user_id=xxx; token=xxx; session_id=xxx"
# 初始化API客户端(需补充secret_key,通过JS逆向获取)
api = YangmatouItemApi(proxy_pool=PROXIES, cookie=COOKIE)
# 获取商品详情(示例item_id)
item_id = "1234567" # 替换为实际商品ID
result = api.item_get(item_id)
if result["success"]:
data = result["data"]
print(f"商品标题: {data['title']}")
print(f"价格: ¥{data['price']['current']}({'含税' if data['price']['tax_included'] else '不含税'}) | 运费: ¥{data['price']['freight']} | 原价: ¥{data['price']['original']}")
print(f"跨境信息: {data['cross_border']['logistics_type']} | 采购地: {data['cross_border']['purchase_location']} | 时效: {data['cross_border']['delivery_time']}")
print(f"规格版本: {data['product']['version']} | {', '.join([f'{k}:{v}' for k, v in list(data['product']['specs'].items())[:3]])}")
print(f"卖家信息: {data['seller']['name']}({data['seller']['location']} | 等级: {data['seller']['level']})")
print(f"交易数据: 销量{data['trade']['sales_count']}件 | 评价{data['trade']['comment_count']}条 | 库存{data['trade']['stock']}件")
else:
print(f"获取失败: {result['error_msg']}(错误码: {result.get('code')})")