建材网(含淘宝建材类目,淘宝作为综合电商平台的建材垂直板块)的item_search接口(非官方命名)是按关键字 / 筛选条件获取建材商品列表的核心入口,数据覆盖建材规格、价格、供应商、工程适配性等关键信息,对工程采购选品、供应链比价、线上建材批发等场景具有核心价值。由于淘宝无公开官方建材专用 API,需通过页面解析 + 淘宝开放平台(可选)结合实现对接。本文聚焦淘宝建材类目特性,系统讲解接口逻辑、参数设计、技术实现及场景适配,助你构建稳定的建材商品搜索系统。
一、接口基础认知(核心功能与场景)
二、对接前置准备(环境与 URL 结构)
三、接口调用流程(基于动态接口解析)
四、代码实现示例(Python)
import requests
import time
import random
import re
import json
import urllib.parse
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from typing import List, Dict
import execjs
class TaobaoJcItemSearchApi:
def __init__(self, proxy_pool: List[str] = None, cookie: str = "", tb_token: str = ""):
self.base_api_url = "https://s.taobao.com/api"
self.ua = UserAgent()
self.proxy_pool = proxy_pool # 代理池列表
self.cookie = cookie # 登录态Cookie(含tb_token等)
self.tb_token = tb_token # 淘宝token(从Cookie中提取)
# 建材类目ID映射(简化版,需根据实际需求扩展)
self.category_map = {
"钢筋-螺纹钢": "50010815",
"管材-PE给水管": "50010820",
"防水卷材-SBS": "50010830",
"商品混凝土": "50010840"
}
# 区域编码映射(简化版)
self.area_map = {
"河北": "130000",
"山东": "370000",
"浙江": "330000",
"广东": "440000"
}
# 加载JS签名脚本(需提前从淘宝前端提取完整逻辑)
with open("taobao_sign.js", "r", encoding="utf-8") as f:
self.js_code = f.read()
self.ctx = execjs.compile(self.js_code)
def _get_headers(self) -> Dict[str, str]:
"""生成随机请求头"""
headers = {
"User-Agent": self.ua.random,
"Referer": "https://s.taobao.com/",
"Accept": "text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin"
}
if self.cookie:
headers["Cookie"] = self.cookie
return headers
def _get_proxy(self) -> Dict[str, str]:
"""随机获取代理(每次请求切换)"""
if self.proxy_pool and len(self.proxy_pool) > 0:
proxy = random.choice(self.proxy_pool)
return {"http": proxy, "https": proxy}
return None
def _generate_sign_params(self) -> Tuple[str, str]:
"""生成_ksTS和sign参数"""
# 生成_ksTS:时间戳+随机3位数字
ts = int(time.time() * 1000)
ksTS = f"{ts}_{random.randint(100, 999)}"
# 生成sign(需传入当前关键词、页码等参数,此处简化为示例,实际需根据请求参数动态生成)
# 注意:真实sign生成需依赖当前请求的q、page等核心参数,需根据JS逻辑调整
sign = self.ctx.call("generateSign", "", 1, ksTS, self.tb_token)
return ksTS, sign
def _parse_jsonp(self, jsonp_str: str) -> Dict:
"""解析JSONP响应为JSON"""
try:
json_str = re.search(r"jQuery\d+_\d+\((\{.*\})\)", jsonp_str).group(1)
return json.loads(json_str)
except Exception as e:
print(f"JSONP解析失败: {str(e)}")
return {}
def _clean_sell_num(self, sell_num_str: str) -> int:
"""清洗销量(处理“230+”“1.2万”等格式)"""
if not sell_num_str:
return 0
sell_num_str = str(sell_num_str).replace("+", "").replace("万", "0000")
if "." in sell_num_str:
sell_num_str = sell_num_str.replace(".", "")[:-1] # 1.2万→12000
return int(sell_num_str) if sell_num_str.isdigit() else 0
def _parse_product(self, product: Dict) -> Dict:
"""解析单条商品数据"""
# 提取资质标签
labels = product.get("shop_card_labels", [])
qualification_tags = [label.get("text", "") for label in labels if label.get("text")]
# 提取规格信息(简化,完整规格需详情页接口)
specs = {}
if product.get("props"):
for prop in product.get("props", []):
prop_name = prop.get("name", "")
prop_value = prop.get("value", "")
if prop_name and prop_value:
specs[prop_name] = prop_value
return {
"item_id": str(product.get("id", "")),
"title": product.get("title", "").strip(),
"main_image": f"https:{product.get('pic_url', '')}" if product.get("pic_url") else "",
"url": f"https:{product.get('detail_url', '')}" if product.get("detail_url") else "",
"price": {
"single": float(product.get("price", 0)),
"unit": product.get("unit", "元/件"), # 单位(如“元/吨”“元/米”)
"is_postage": product.get("is_postage", 0) == 1 # 是否包邮
},
"specs": specs, # 核心规格参数
"purchase": {
"min_order": int(product.get("min_num", 1)), # 起订量
"delivery_time": product.get("delivery_time", "48小时内发货"), # 发货时效
"support_tax": "含税" in product.get("title", "") or "含税" in qualification_tags # 是否支持含税
},
"seller": {
"name": product.get("nick", ""),
"type": {
"B": "企业店",
"C": "淘宝店",
"T": "天猫店"
}.get(product.get("seller_type", ""), "未知"),
"province": product.get("province", ""), # 产地
"qualifications": qualification_tags
},
"sales": {
"sell_num": self._clean_sell_num(product.get("sell_num", 0)),
"sell_num_str": product.get("sell_num", ""),
"good_rate": product.get("positive_rate", 0) # 好评率(%)
}
}
def item_search(self,
keyword: str = "",
category: str = "",
price_from: float = None,
price_to: float = None,
area: str = "",
seller_type: str = "",
min_order: int = 1,
sort: str = "sale-desc",
page_limit: int = 5) -> Dict:
"""
淘宝建材类目商品搜索
:param keyword: 搜索关键词(如“16mm螺纹钢 HRB400E”)
:param category: 分类名称(如“钢筋-螺纹钢”)或分类ID
:param price_from: 最低单价
:param price_to: 最高单价
:param area: 产地名称(如“河北”)或区域编码
:param seller_type: 店铺类型(B-企业店,C-淘宝店,T-天猫店)
:param min_order: 最小起订量
:param sort: 排序方式(sale-desc/price-asc/credit-desc)
:param page_limit: 最大页数(默认5,最大100)
:return: 标准化搜索结果
"""
try:
if not keyword and not category:
return {"success": False, "error_msg": "关键词和分类至少需提供一个"}
# 1. 参数预处理
# 分类ID转换
cat_id = self.category_map.get(category, category) if category else ""
# 区域编码转换
area_code = self.area_map.get(area, area) if area else ""
# 关键词编码
encoded_keyword = urllib.parse.quote(keyword, encoding="utf-8") if keyword else ""
all_products = []
current_page = 1
total_pages = 1
while current_page <= page_limit and current_page <= 100:
# 2. 生成签名参数
ksTS, sign = self._generate_sign_params()
# 3. 构建请求参数
params = {
"_ksTS": ksTS,
"callback": f"jQuery11240{random.randint(1000000000000, 9999999999999)}_{int(time.time() * 1000)}",
"sign": sign,
"q": encoded_keyword,
"cat": cat_id,
"page": current_page,
"sort": sort,
"minQuantity": min_order,
"seller_type": seller_type,
"imgfile": "",
"commend": "all",
"ssid": "s5-e",
"search_type": "item",
"sourceId": "tb.index",
"spm": "a21bo.jiancai.201856-taobao-item.1",
"ie": "utf8"
}
# 价格区间参数(可选)
if price_from is not None:
params["lowPrice"] = price_from
if price_to is not None:
params["highPrice"] = price_to
# 区域参数(可选)
if area_code:
params["area"] = area_code
# 4. 发送请求(带随机延迟和代理切换)
time.sleep(random.uniform(5, 8)) # 控制频率,避免反爬
headers = self._get_headers()
proxy = self._get_proxy()
# 构建完整URL
request_url = f"{self.base_api_url}?{urllib.parse.urlencode(params)}"
response = requests.get(
url=request_url,
headers=headers,
proxies=proxy,
timeout=15
)
response.raise_for_status()
# 5. 解析响应数据
data = self._parse_jsonp(response.text)
if not data:
print(f"第{current_page}页无有效数据,终止分页")
break
# 提取商品列表
product_list = data.get("result", {}).get("list", [])
if not product_list:
print(f"第{current_page}页无商品,终止分页")
break
# 解析商品数据
parsed_products = [self._parse_product(p) for p in product_list]
all_products.extend(parsed_products)
# 6. 获取总页数(仅第一页)
if current_page == 1:
total_pages = data.get("result", {}).get("totalPage", 1)
total_pages = min(total_pages, page_limit, 100) # 限制最大页数
print(f"共{total_pages}页商品,开始遍历...")
# 7. 判断是否继续分页
if current_page >= total_pages:
break
current_page += 1
# 8. 去重(基于item_id)
seen_ids = set()
unique_products = []
for product in all_products:
if product["item_id"] not in seen_ids:
seen_ids.add(product["item_id"])
unique_products.append(product)
return {
"success": True,
"total": len(unique_products),
"total_pages": total_pages,
"page_processed": current_page - 1,
"products": unique_products
}
except requests.exceptions.HTTPError as e:
if "403" in str(e):
return {"success": False, "error_msg": "触发反爬,建议更换代理、Cookie或降低请求频率", "code": 403}
if "401" in str(e):
return {"success": False, "error_msg": "Cookie失效,请重新登录获取", "code": 401}
return {"success": False, "error_msg": f"HTTP错误: {str(e)}", "code": response.status_code}
except execjs.ExecJSError as e:
return {"success": False, "error_msg": f"签名生成失败(JS执行错误): {str(e)}", "code": -2}
except Exception as e:
return {"success": False, "error_msg": f"搜索失败: {str(e)}", "code": -1}
# 配套的taobao_sign.js示例(需从淘宝前端提取完整逻辑,此处为简化版)
"""
taobao_sign.js内容:
function md5(str) {
// 此处需填入完整MD5加密函数(从淘宝JS中提取)
return str; // 示例占位,实际需替换为真实MD5实现
}
function generateSign(q, page, ksTS, tbToken) {
// 真实签名逻辑需结合淘宝当前JS,此处仅为格式示例
var signStr = q + page + ksTS + tbToken + "taobao_jiancai_search";
return md5(signStr).toUpperCase();
}
"""
# 使用示例
if __name__ == "__main__":
# 配置参数
PROXIES = [
"http://123.45.67.89:8888",
"http://98.76.54.32:8080"
] # 替换为有效高匿代理
COOKIE = "tb_token=xxx; cookie2=xxx; t=xxx; _tb_token_=xxx; ..." # 替换为淘宝登录Cookie
TB_TOKEN = "xxx" # 从Cookie中提取tb_token字段
# 初始化API客户端
search_api = TaobaoJcItemSearchApi(
proxy_pool=PROXIES,
cookie=COOKIE,
tb_token=TB_TOKEN
)
# 搜索配置:16mm HRB400E螺纹钢,河北产地,企业店,10吨起订
result = search_api.item_search(
keyword="16mm螺纹钢 HRB400E",
category="钢筋-螺纹钢",
price_from=4500,
price_to=5000,
area="河北",
seller_type="B",
min_order=10,
sort="sale-desc",
page_limit=3
)
# 结果输出
if result["success"]:
print(f"搜索成功:共找到 {result['total']} 件商品,遍历 {result['page_processed']}/{result['total_pages']} 页")
for i, product in enumerate(result["products"][:5]): # 打印前5条
print(f"\n商品 {i+1}:")
print(f"标题:{product['title'][:60]}...")
print(f"价格:¥{product['price']['single']}/{product['price']['unit']} | {'包邮' if product['price']['is_postage'] else '不包邮'}")
print(f"规格参数:{'; '.join([f'{k}:{v}' for k, v in product['specs'].items()])}")
print(f"采购信息:起订{product['purchase']['min_order']}{product['price']['unit']} | {product['purchase']['delivery_time']} | {'支持含税' if product['purchase']['support_tax'] else '不含税'}")
print(f"供应商:{product['seller']['name']}({product['seller']['type']}) | 产地:{product['seller']['province']}")
print(f"资质标签:{', '.join(product['seller']['qualifications']) or '无'}")
print(f"销量口碑:30天成交{product['sales']['sell_num']}件 | 好评率{product['sales']['good_rate']}%")
print(f"详情页:{product['url']}")
else:
print(f"搜索失败:{result['error_msg']}(错误码:{result.get('code')})")