化工网(聚焦化工原料、精细化工、塑料橡胶等垂直领域的 B2B 电商平台,如化工网、盖德化工网、摩贝网等)的item_search接口(非官方命名)是按关键词 / 筛选条件批量获取化工商品列表的核心入口。数据覆盖化工产品的纯度、CAS 号、供应规格、批量报价、供应商资质等关键信息,对化工采购比价、供应链选型、市场行情监控等场景具有核心价值。由于化工网多为垂直领域站点,无统一公开 API,需通过页面解析 + 动态接口逆向实现对接。本文聚焦化工行业特性,系统讲解接口逻辑、参数设计、技术实现及反爬应对,助你构建稳定的化工商品搜索系统。
一、接口基础认知(核心功能与场景)
二、对接前置准备(环境与 URL 结构)
三、接口调用流程(基于页面解析 + 动态接口)
四、代码实现示例(Python)
import requests
import time
import random
import re
import json
import urllib.parse
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from typing import List, Dict, Tuple
class ChemicalItemSearchApi:
def __init__(self, platform: str = "guidechem", proxy_pool: List[str] = None, cookie: str = ""):
"""
初始化化工网搜索API
:param platform: 化工网平台(guidechem=盖德化工网,molbase=摩贝网)
:param proxy_pool: 代理池列表(如["http://ip:port", ...])
:param cookie: 登录态Cookie(企业账号更佳)
"""
self.platform = platform.lower()
self.proxy_pool = proxy_pool
self.cookie = cookie
self.ua = UserAgent()
# 平台基础配置(可扩展多平台)
self.platform_config = {
"guidechem": {
"base_url": "https://www.guidechem.com/search/",
"item_selector": ".product-item", # 商品列表选择器
"category_map": { # 类目ID映射(简化版)
"有机化工-醇类": "289",
"无机化工-酸类": "301",
"塑料-聚丙烯": "402",
"危化品-易燃液体": "505"
}
},
"molbase": {
"base_url": "https://www.molbase.com/api/v1/products/search",
"category_map": {} # 摩贝网类目ID需单独解析
}
}
self.config = self.platform_config.get(self.platform, self.platform_config["guidechem"])
def _get_headers(self) -> Dict[str, str]:
"""生成随机请求头"""
headers = {
"User-Agent": self.ua.random,
"Referer": self.config["base_url"].split("/search")[0],
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"X-Requested-With": "XMLHttpRequest"
}
if self.cookie:
headers["Cookie"] = self.cookie
return headers
def _get_proxy(self) -> Dict[str, str]:
"""随机获取代理(每次请求切换)"""
if self.proxy_pool and len(self.proxy_pool) > 0:
proxy = random.choice(self.proxy_pool)
return {"http": proxy, "https": proxy}
return None
def _generate_sign(self, params: Dict) -> str:
"""生成动态接口签名(适配摩贝网等平台,简化版)"""
# 真实签名需逆向平台JS逻辑,此处为示例占位
if self.platform == "molbase":
timestamp = params.get("timestamp", "")
keyword = params.get("keyword", "")
secret = "molbase_secret_key" # 需从JS中提取真实密钥
sign_str = f"{keyword}{timestamp}{secret}"
return re.sub(r"[^\w]", "", sign_str).upper() # 简化加密逻辑
return ""
def _clean_price(self, price_str: str) -> Tuple[float, str]:
"""清洗价格(提取数值和单位)"""
if not price_str:
return 0.0, ""
# 匹配“¥7500/吨”“7200元/桶”等格式
price_match = re.search(r"(\d+\.?\d*)", price_str)
unit_match = re.search(r"(\w+)/(\w+)", price_str) # 提取单位(如“元/吨”)
price = float(price_match.group()) if price_match else 0.0
unit = unit_match.group() if unit_match else ""
return price, unit
def _clean_purity(self, purity_str: str) -> float:
"""清洗纯度(提取数值,如“99.9%”→99.9)"""
if not purity_str:
return 0.0
purity_match = re.search(r"(\d+\.?\d*)", purity_str)
return float(purity_match.group()) if purity_match else 0.0
def _clean_stock(self, stock_str: str) -> Tuple[int, str]:
"""清洗库存(提取数值和单位,如“现货100吨”→(100, "吨"))"""
if not stock_str:
return 0, ""
stock_match = re.search(r"(\d+)", stock_str)
unit_match = re.search(r"(\w+)", stock_str.replace("现货", ""))
stock = int(stock_match.group()) if stock_match else 0
unit = unit_match.group() if unit_match else ""
return stock, unit
def _parse_guidechem_product(self, item_soup) -> Dict:
"""解析盖德化工网单条商品数据"""
# 提取资质标签
qualification_tags = [tag.text.strip() for tag in item_soup.select(".qualification-tags span")]
# 提取价格(单价+单位)
price_str = item_soup.select_one(".price-box .current-price")?.text.strip() or ""
price, price_unit = self._clean_price(price_str)
# 提取库存
stock_str = item_soup.select_one(".stock-tag")?.text.strip() or ""
stock, stock_unit = self._clean_stock(stock_str)
# 提取规格参数(纯度、CAS号、包装)
purity_str = item_soup.select_one(".purity-tag")?.text.strip() or ""
cas_str = item_soup.select_one(".cas-code")?.text.strip() or ""
package_str = item_soup.select_one(".package-spec")?.text.strip() or ""
return {
"product_id": item_soup.get("data-id") or "",
"title": item_soup.select_one(".product-title a")?.text.strip() or "",
"url": item_soup.select_one(".product-title a")?.get("href") or "",
"specs": {
"purity": self._clean_purity(purity_str), # 纯度(如99.9)
"purity_str": purity_str,
"cas": cas_str, # CAS号
"package": package_str, # 包装规格(如200kg/桶)
"appearance": item_soup.select_one(".appearance")?.text.strip() or "" # 外观
},
"price": {
"single": price,
"single_str": price_str,
"unit": price_unit, # 价格单位(如元/吨)
"tax": "含税" in (item_soup.select_one(".tax-tag")?.text or "")
},
"supply": {
"stock": stock,
"stock_str": stock_str,
"stock_unit": stock_unit,
"min_order": item_soup.select_one(".min-order")?.text.strip() or "1吨起订",
"supply_type": "现货" if "现货" in stock_str else "期货",
"delivery_time": item_soup.select_one(".delivery-time")?.text.strip() or "7天内"
},
"seller": {
"name": item_soup.select_one(".seller-name a")?.text.strip() or "",
"type": "工厂" if "源头工厂" in qualification_tags else "贸易商",
"area": item_soup.select_one(".seller-area")?.text.strip() or "",
"qualifications": qualification_tags,
"capacity": item_soup.select_one(".capacity")?.text.strip() or "" # 产能
},
"safety": {
"hazard_class": item_soup.select_one(".hazard-class")?.text.strip() or "", # 危险类别
"msds": "有" if item_soup.select_one(".msds-tag") else "无" # 是否有MSDS报告
}
}
def _parse_product(self, item) -> Dict:
"""统一解析商品数据(适配多平台)"""
if self.platform == "guidechem":
return self._parse_guidechem_product(item)
# 可扩展摩贝网等其他平台的解析逻辑
return {}
def item_search(self,
keyword: str = "",
category: str = "",
cas: str = "",
price_from: float = None,
price_to: float = None,
area: str = "",
supply_type: int = 1, # 1=现货,2=期货
seller_type: int = 1, # 1=工厂,2=贸易商
min_order: str = "1",
sort: str = "price-asc",
page_limit: int = 5) -> Dict:
"""
化工网商品搜索
:param keyword: 搜索关键词(如“无水乙醇”)
:param category: 分类名称(如“有机化工-醇类”)或分类ID
:param cas: CAS号(如“64-17-5”)
:param price_from: 最低单价
:param price_to: 最高单价
:param area: 产地(如“山东”)
:param supply_type: 供应类型(1=现货,2=期货)
:param seller_type: 供应商类型(1=工厂,2=贸易商)
:param min_order: 最小起订量(如“10”)
:param sort: 排序方式(price-asc/price-desc/sales-desc)
:param page_limit: 最大页数(默认5,最大50)
:return: 标准化搜索结果
"""
try:
if not keyword and not cas:
return {"success": False, "error_msg": "关键词(keyword)和CAS号(cas)至少需提供一个"}
# 1. 参数预处理
# 分类ID转换(从名称映射到ID)
cat_id = self.config["category_map"].get(category, category) if category else ""
# 编码中文参数
encoded_keyword = urllib.parse.quote(keyword, encoding="utf-8") if keyword else ""
encoded_area = urllib.parse.quote(area, encoding="utf-8") if area else ""
all_products = []
current_page = 1
total_pages = 1
while current_page <= page_limit and current_page <= 50:
# 2. 构建请求参数
if self.platform == "guidechem":
params = {
"keyword": encoded_keyword,
"catId": cat_id,
"cas": cas,
"priceFrom": price_from,
"priceTo": price_to,
"area": encoded_area,
"supplyType": supply_type,
"sellerType": seller_type,
"minOrder": min_order,
"sort": sort,
"page": current_page
}
# 过滤空值参数
params = {k: v for k, v in params.items() if v is not None and v != ""}
request_url = f"{self.config['base_url']}?{urllib.parse.urlencode(params)}"
elif self.platform == "molbase":
timestamp = int(time.time() * 1000)
params = {
"keyword": keyword,
"cas": cas,
"page": current_page,
"supplyType": supply_type,
"sellerType": seller_type,
"timestamp": timestamp,
"sign": self._generate_sign({"keyword": keyword, "timestamp": timestamp})
}
request_url = self.config["base_url"]
# 3. 发送请求(带随机延迟和代理切换)
time.sleep(random.uniform(4, 6)) # 控制频率,避免反爬
headers = self._get_headers()
proxy = self._get_proxy()
# 发送请求
if self.platform == "guidechem":
response = requests.get(
url=request_url,
headers=headers,
proxies=proxy,
timeout=15
)
else: # molbase用POST请求
response = requests.post(
url=request_url,
json=params,
headers=headers,
proxies=proxy,
timeout=15
)
response.raise_for_status()
# 4. 解析响应数据
if self.platform == "guidechem":
soup = BeautifulSoup(response.text, "lxml")
product_soup_list = soup.select(self.config["item_selector"])
if not product_soup_list:
print(f"第{current_page}页无商品,终止分页")
break
# 解析商品数据
parsed_products = [self._parse_product(item) for item in product_soup_list]
else: # molbase JSON响应
data = json.loads(response.text)
if not data.get("success", True):
print(f"第{current_page}页请求失败:{data.get('msg')}")
break
product_list = data.get("data", {}).get("list", [])
parsed_products = [self._parse_product(product) for product in product_list]
all_products.extend(parsed_products)
# 5. 获取总页数(仅第一页)
if current_page == 1:
if self.platform == "guidechem":
total_page_str = soup.select_one(".total-page")?.text.strip()
total_pages = int(re.search(r"\d+", total_page_str).group()) if total_page_str else 1
else:
total_pages = data.get("data", {}).get("totalPage", 1)
total_pages = min(total_pages, page_limit, 50) # 限制最大页数
print(f"共{total_pages}页商品,开始遍历...")
# 6. 判断是否继续分页
if current_page >= total_pages:
break
current_page += 1
# 7. 去重(基于productId或CAS+供应商名称)
seen_keys = set()
unique_products = []
for product in all_products:
if product["product_id"]:
key = product["product_id"]
else:
key = f"{product['specs']['cas']}_{product['seller']['name']}" # 备选去重键
if key not in seen_keys:
seen_keys.add(key)
unique_products.append(product)
return {
"success": True,
"total": len(unique_products),
"total_pages": total_pages,
"page_processed": current_page - 1,
"products": unique_products,
"platform": self.platform
}
except requests.exceptions.HTTPError as e:
if "403" in str(e):
return {"success": False, "error_msg": "触发反爬,建议更换代理、Cookie或降低请求频率", "code": 403}
if "401" in str(e):
return {"success": False, "error_msg": "Cookie失效,请重新登录获取", "code": 401}
return {"success": False, "error_msg": f"HTTP错误: {str(e)}", "code": response.status_code}
except Exception as e:
return {"success": False, "error_msg": f"搜索失败: {str(e)}", "code": -1}
# 使用示例
if __name__ == "__main__":
# 配置参数
PROXIES = [
"http://123.45.67.89:8888",
"http://98.76.54.32:8080"
] # 替换为有效高匿代理
COOKIE = "userid=xxx; sessionId=xxx; enterpriseId=xxx" # 替换为化工网登录Cookie
# 初始化API客户端(默认盖德化工网)
search_api = ChemicalItemSearchApi(
platform="guidechem",
proxy_pool=PROXIES,
cookie=COOKIE
)
# 搜索配置:99.9%无水乙醇(CAS64-17-5),山东产地,工厂现货,10吨起订
result = search_api.item_search(
keyword="无水乙醇 99.9%",
category="有机化工-醇类",
cas="64-17-5",
price_from=7000,
price_to=8000,
area="山东",
supply_type=1, # 现货
seller_type=1, # 工厂
min_order="10",
sort="price-asc", # 价格升序
page_limit=3
)
# 结果输出
if result["success"]:
print(f"搜索成功:共找到 {result['total']} 件商品,遍历 {result['page_processed']}/{result['total_pages']} 页")
for i, product in enumerate(result["products"][:5]): # 打印前5条
print(f"\n商品 {i+1}:")
print(f"标题:{product['title'][:60]}...")
print(f"核心规格:纯度{product['specs']['purity']}% | CAS:{product['specs']['cas']} | 包装:{product['specs']['package']}")
print(f"价格信息:¥{product['price']['single']}/{product['price']['unit']} | {'含税' if product['price']['tax'] else '不含税'}")
print(f"供应信息:{product['supply']['stock_str']} | {product['supply']['min_order']} | {product['supply']['delivery_time']}交货")
print(f"供应商:{product['seller']['name']}({product['seller']['type']}) | 产地:{product['seller']['area']}")
print(f"资质合规:{', '.join(product['seller']['qualifications']) or '无'} | MSDS:{product['safety']['msds']} | 危险类别:{product['safety']['hazard_class']}")
print(f"详情页:{product['url']}")
else:
print(f"搜索失败:{result['error_msg']}(错误码:{result.get('code')})")