蘑菇街作为聚焦年轻女性用户的时尚电商平台,其商品搜索功能(对应item_search接口,非官方命名)是获取服饰、美妆、家居等品类商品列表的核心工具,广泛应用于电商选品、竞品分析、趋势监控等场景。由于蘑菇街官方 API 对个人开发者限制严格,多数场景需通过页面解析或第三方服务实现搜索对接。本文将系统讲解item_search接口的对接逻辑、参数解析、加密处理及反爬应对,帮助开发者构建稳定的商品搜索数据获取系统。
一、接口基础认知(核心功能与场景)
二、对接前置准备(环境与参数解析)
三、接口调用流程(基于动态接口)
四、代码实现示例(Python)
import requests
import time
import random
import json
import hashlib
from fake_useragent import UserAgent
from typing import List, Dict
class MogujieSearchApi:
def __init__(self, cookie: str, proxy_pool: List[str] = None):
self.search_api = "https://gateway.mogujie.com/api/search/search"
self.app_key = "12574478"
self.cookie = cookie
self.cookie_tk = self._extract_cookie_tk() # 提取_m_h5_tk
self.ua = UserAgent()
self.proxy_pool = proxy_pool
def _extract_cookie_tk(self) -> str:
"""从Cookie中提取_m_h5_tk"""
for cookie in self.cookie.split(";"):
if "_m_h5_tk" in cookie:
return cookie.split("=")[1].strip()
raise ValueError("Cookie中未找到_m_h5_tk,请检查Cookie有效性")
def _generate_sign(self, data_str: str) -> tuple:
"""生成加密参数_m_h5_tk和timestamp"""
token = self.cookie_tk.split("_")[0]
timestamp = str(int(time.time() * 1000))
sign_str = f"{token}&{timestamp}&{self.app_key}&{data_str}"
sign = hashlib.md5(sign_str.encode()).hexdigest()
return f"{token}_{sign}", timestamp
def _get_headers(self) -> Dict[str, str]:
"""生成请求头"""
return {
"User-Agent": self.ua.mobile, # 移动端UA反爬较松
"Referer": "https://m.mogujie.com/search",
"Origin": "https://m.mogujie.com",
"Cookie": self.cookie,
"Content-Type": "application/json"
}
def _get_proxy(self) -> Dict[str, str]:
"""随机获取代理"""
if self.proxy_pool and len(self.proxy_pool) > 0:
proxy = random.choice(self.proxy_pool)
return {"http": proxy, "https": proxy}
return None
def _parse_items(self, response_data: Dict) -> List[Dict]:
"""解析商品列表数据"""
items = []
for item in response_data.get("data", {}).get("result", {}).get("list", []):
items.append({
"item_id": item.get("itemId", ""),
"title": item.get("title", ""),
"main_image": item.get("img", ""),
"url": f"https://item.mogujie.com/detail/{item.get('itemId', '')}.htm",
"price": {
"current": item.get("price", 0),
"original": item.get("originalPrice", 0),
"discount": f"{round(item.get('price', 0)/item.get('originalPrice', 1)*10, 1)}折"
if item.get("originalPrice", 0) > 0 else ""
},
"sales": {
"monthly": item.get("saleCount", 0),
"comment_count": item.get("commentCount", 0)
},
"shop": {
"name": item.get("shopName", ""),
"location": item.get("location", "")
},
"tags": item.get("tags", [])
})
return items
def item_search(self,
keyword: str,
cate_id: str = "",
price_min: int = None,
price_max: int = None,
sort: str = "default",
location: str = "",
page_limit: int = 5) -> Dict:
"""
搜索蘑菇街商品列表
:param keyword: 搜索关键词
:param cate_id: 分类ID(可选)
:param price_min: 最低价格(元)
:param price_max: 最高价格(元)
:param sort: 排序方式(default/sale/priceAsc等)
:param location: 发货地(可选)
:param page_limit: 最大页数(默认5)
:return: 标准化搜索结果
"""
try:
all_items = []
current_page = 1
while current_page <= page_limit:
# 1. 构建请求数据
data = {
"keyword": keyword,
"cateId": cate_id,
"sort": sort,
"page": current_page,
"pageSize": 20
}
if price_min is not None:
data["priceMin"] = price_min
if price_max is not None:
data["priceMax"] = price_max
if location:
data["location"] = location
data_str = json.dumps(data, ensure_ascii=False)
# 2. 生成加密参数
m_h5_tk, timestamp = self._generate_sign(data_str)
params = {
"_m_h5_tk": m_h5_tk,
"_m_h5_tk_enc": "",
"timestamp": timestamp,
"appKey": self.app_key
}
# 3. 发送请求(带随机延迟)
time.sleep(random.uniform(2, 4))
headers = self._get_headers()
proxy = self._get_proxy()
response = requests.post(
url=self.search_api,
params=params,
data=data_str,
headers=headers,
proxies=proxy,
timeout=10
)
response.raise_for_status()
response_data = response.json()
# 4. 检查接口状态
if not response_data.get("success"):
error_code = response_data.get("code", -1)
if error_code == -1001:
return {"success": False, "error_msg": "_m_h5_tk失效,请更新Cookie", "code": -1001}
return {"success": False, "error_msg": f"接口返回失败: {response_data.get('msg')}", "code": error_code}
# 5. 解析商品并分页
items = self._parse_items(response_data)
if not items:
break # 无数据,终止分页
all_items.extend(items)
# 若当前页商品数<20,说明是最后一页
if len(items) < 20:
break
current_page += 1
# 去重(基于item_id)
seen_ids = set()
unique_items = []
for item in all_items:
if item["item_id"] not in seen_ids:
seen_ids.add(item["item_id"])
unique_items.append(item)
return {
"success": True,
"total": len(unique_items),
"page_processed": current_page - 1,
"items": unique_items
}
except requests.exceptions.HTTPError as e:
if "403" in str(e):
return {"success": False, "error_msg": "触发反爬,建议更换代理或Cookie", "code": 403}
return {"success": False, "error_msg": f"HTTP错误: {str(e)}", "code": response.status_code}
except Exception as e:
return {"success": False, "error_msg": f"搜索失败: {str(e)}", "code": -1}
# 使用示例
if __name__ == "__main__":
# 从浏览器获取的Cookie(需包含_m_h5_tk)
COOKIE = "_m_h5_tk=xxx; _m_h5_tk_enc=xxx; ..." # 替换为实际Cookie
# 代理池(替换为有效代理)
PROXIES = [
"http://123.45.67.89:8888",
"http://98.76.54.32:8080"
]
# 初始化API客户端
search_api = MogujieSearchApi(cookie=COOKIE, proxy_pool=PROXIES)
# 搜索“夏季连衣裙”,价格100-300元,按销量降序,最多3页
result = search_api.item_search(
keyword="夏季连衣裙",
price_min=100,
price_max=300,
sort="sale",
page_limit=3
)
if result["success"]:
print(f"搜索成功:共找到 {result['total']} 件商品,处理 {result['page_processed']} 页")
for i, item in enumerate(result["items"][:5]): # 打印前5条
print(f"\n商品 {i+1}:")
print(f"标题:{item['title'][:50]}...")
print(f"价格:现价{item['price']['current']}元 | 原价{item['price']['original']}元 | {item['price']['discount']}")
print(f"销量:月销{item['sales']['monthly']}件 | 评价{item['sales']['comment_count']}条")
print(f"店铺:{item['shop']['name']}({item['shop']['location']})")
print(f"标签:{','.join(item['tags'])} | 详情页:{item['url']}")
else:
print(f"搜索失败:{result['error_msg']}(错误码:{result.get('code')})")