化工网(如盖德化工网、摩贝网、中国化工网等垂直 B2B 平台)的 item_get 接口(非官方命名)是获取单款化工产品完整详情的核心入口,数据覆盖纯度、CAS 号、批量阶梯价、MSDS 报告、供应商资质、危化品合规证明等关键信息,对化工采购决策、供应链审核、产品合规校验等场景具有不可替代的价值。由于化工网无统一公开 API,需通过详情页解析 + 动态接口逆向实现对接,且需重点适配化工行业的强合规性、规格标准化等特性。本文系统讲解接口逻辑、技术实现、行业痛点解决方案,助你构建稳定的化工产品详情获取系统。
一、接口基础认知(核心功能与场景)
1. 核心功能
2. 典型应用场景
3. 接口特性
4. 主流化工网详情页特性对比
二、对接前置准备(环境与 URL 结构)
1. 开发环境
2. 详情页 URL 结构
3. 前置准备要点
4. 页面结构分析(以盖德化工网为例)
三、接口调用流程(静态 HTML + 动态 API)
1. URL 构建与请求头伪装
2. 静态数据解析(基础信息 + 规格参数)
3. 动态接口补充(完整阶梯价 + 库存)
4. 合规信息提取(资质 + MSDS + 危险类别)
5. 数据整合(标准化输出)
四、代码实现示例(Python)
import requests
import time
import random
import re
import json
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from typing import List, Dict, Tuple
class ChemicalItemGetApi:
def __init__(self, platform: str = "guidechem", proxy_pool: List[str] = None, cookie: str = ""):
"""
初始化化工网商品详情API
:param platform: 平台类型(guidechem=盖德化工网,molbase=摩贝网)
:param proxy_pool: 代理池列表(如["http://ip:port", ...])
:param cookie: 登录态Cookie(企业账号必需)
"""
self.platform = platform.lower()
self.proxy_pool = proxy_pool
self.cookie = cookie
self.ua = UserAgent()
# 平台基础配置(可扩展多平台)
self.platform_config = {
"guidechem": {
"detail_url": "https://www.guidechem.com/product/{product_id}.html",
"api_url": "https://www.guidechem.com/api/product/detail",
"spec_table_selector": "table.spec-table",
"qualification_selector": "div.qualification-list .qual-item"
},
"molbase": {
"detail_url": "https://www.molbase.com/product/{product_id}/detail.html",
"api_url": "https://www.molbase.com/api/v1/product/detail",
"spec_table_selector": ".spec-item-list",
"qualification_selector": ".cert-list .cert-item"
}
}
self.config = self.platform_config.get(self.platform, self.platform_config["guidechem"])
def _get_headers(self) -> Dict[str, str]:
"""生成随机请求头(模拟企业账号访问)"""
headers = {
"User-Agent": self.ua.random,
"Referer": "https://www.guidechem.com/search/",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"X-Requested-With": "XMLHttpRequest",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-Mode": "navigate"
}
if self.cookie:
headers["Cookie"] = self.cookie
return headers
def _get_proxy(self) -> Dict[str, str]:
"""随机获取代理(每次请求切换)"""
if self.proxy_pool and len(self.proxy_pool) > 0:
proxy = random.choice(self.proxy_pool)
return {"http": proxy, "https": proxy}
return None
def _parse_specs(self, soup) -> Dict:
"""解析核心规格参数(纯度、CAS号、执行标准等)"""
specs = {
"purity": "", "purity_str": "", "cas": "", "standard": "",
"molecular_formula": "", "appearance": "", "package": ""
}
spec_table = soup.select_one(self.config["spec_table_selector"])
if not spec_table:
return specs
for row in spec_table.select("tr"):
th = row.select_one("th")?.text.strip().lower()
td = row.select_one("td")?.text.strip()
if not th or not td:
continue
# 适配不同平台的参数名称(如“含量”=“纯度”)
if any(keyword in th for keyword in ["纯度", "含量", "纯度等级"]):
specs["purity_str"] = td
purity_match = re.search(r"\d+\.?\d*", td)
specs["purity"] = purity_match.group() if purity_match else ""
elif "cas" in th:
specs["cas"] = td.replace("-", "").strip() # 统一CAS号格式(去除空格)
elif any(keyword in th for keyword in ["执行标准", "标准"]):
specs["standard"] = td
elif "分子式" in th:
specs["molecular_formula"] = td
elif "外观" in th:
specs["appearance"] = td
elif any(keyword in th for keyword in ["包装", "规格"]):
specs["package"] = td
return specs
def _parse_ladder_prices(self, api_data: Dict) -> List[Dict]:
"""解析批量阶梯价(结构化)"""
ladder_prices = []
if self.platform == "guidechem":
ladder_data = api_data.get("data", {}).get("ladderPrices", [])
for item in ladder_data:
ladder_prices.append({
"min_quantity": int(item.get("minNum", 0)),
"max_quantity": int(item.get("maxNum", 99999)),
"price": float(item.get("price", 0)),
"unit": item.get("unit", "元/吨")
})
elif self.platform == "molbase":
ladder_data = api_data.get("data", {}).get("priceLadder", [])
for item in ladder_data:
ladder_prices.append({
"min_quantity": int(item.get("quantity", 0)),
"max_quantity": 99999,
"price": float(item.get("price", 0)),
"unit": item.get("unit", "元/kg")
})
# 按采购量升序排序
ladder_prices.sort(key=lambda x: x["min_quantity"])
return ladder_prices
def _parse_qualifications(self, soup) -> List[Dict]:
"""解析供应商资质(危化品许可证、ISO认证等)"""
qualifications = []
qual_items = soup.select(self.config["qualification_selector"])
for item in qual_items:
qual_name = item.select_one(".qual-name, .cert-name")?.text.strip()
qual_no = item.select_one(".qual-no, .cert-no")?.text.strip() or ""
qual_expire = item.select_one(".qual-expire, .cert-expire")?.text.strip() or ""
if qual_name:
qualifications.append({
"name": qual_name,
"license_no": qual_no,
"expire_date": qual_expire
})
return qualifications
def _parse_safety_compliance(self, soup) -> Dict:
"""解析合规安全信息(MSDS、危险类别、UN编号)"""
safety_info = {
"hazard_class": "",
"un_number": "",
"msds": {"has_msds": False, "download_url": ""},
"safety_notice": ""
}
# 危险类别
hazard_class = soup.select_one("div.hazard-class, .hazard-type")?.text.strip()
if hazard_class:
safety_info["hazard_class"] = hazard_class
# UN编号
un_number = soup.select_one("div.un-number, .un-code")?.text.strip()
if un_number:
safety_info["un_number"] = un_number
# MSDS报告
msds_a = soup.select_one("a.msds-download, .msds-link")
if msds_a and msds_a.get("href"):
msds_url = msds_a.get("href")
# 补全相对URL
if not msds_url.startswith("http"):
msds_url = f"https://www.guidechem.com{msds_url}" if self.platform == "guidechem" else f"https://www.molbase.com{msds_url}"
safety_info["msds"] = {
"has_msds": True,
"download_url": msds_url
}
# 安全说明
safety_notice = soup.select_one("div.safety-notice, .safety-desc")?.text.strip()
if safety_notice:
safety_info["safety_notice"] = safety_notice
return safety_info
def _fetch_api_data(self, product_id: str, headers: Dict, proxy: Dict) -> Dict:
"""获取动态接口数据(阶梯价、库存)"""
api_data = {}
timestamp = int(time.time() * 1000)
try:
if self.platform == "guidechem":
api_params = {"pid": product_id, "t": timestamp}
response = requests.get(
self.config["api_url"],
params=api_params,
headers=headers,
proxies=proxy,
timeout=15
)
api_data = response.json()
elif self.platform == "molbase":
# 摩贝网需签名参数(简化版,真实场景需逆向JS)
sign = self._generate_molbase_sign(product_id, timestamp)
api_params = {"productId": product_id, "timestamp": timestamp, "sign": sign}
response = requests.get(
self.config["api_url"],
params=api_params,
headers=headers,
proxies=proxy,
timeout=15
)
api_data = response.json()
except Exception as e:
print(f"动态接口请求失败:{str(e)}")
return api_data
def _generate_molbase_sign(self, product_id: str, timestamp: int) -> str:
"""生成摩贝网签名(简化版,真实需逆向JS)"""
import hashlib
secret = "molbase_product_detail_secret" # 需从摩贝网JS提取真实密钥
sign_str = f"{product_id}{timestamp}{secret}"
return hashlib.md5(sign_str.encode()).hexdigest().upper()
def item_get(self, product_id: str) -> Dict:
"""
获取化工网商品详情
:param product_id: 商品ID(如盖德化工网1234567)
:return: 标准化商品详情数据
"""
try:
# 1. 构建详情页URL
detail_url = self.config["detail_url"].format(product_id=product_id)
headers = self._get_headers()
proxy = self._get_proxy()
# 2. 随机延迟(避免反爬)
time.sleep(random.uniform(5, 8))
# 3. 请求详情页主HTML
response = requests.get(
url=detail_url,
headers=headers,
proxies=proxy,
timeout=15
)
response.raise_for_status() # 抛出HTTP错误(403/404等)
soup = BeautifulSoup(response.text, "lxml")
# 4. 验证商品是否存在(标题为空则视为不存在)
title = soup.select_one("h1.product-title")?.text.strip() or ""
if not title:
return {"success": False, "error_msg": "商品不存在或已下架", "code": 404}
# 5. 解析静态数据
base_info = {
"product_id": product_id,
"title": title,
"url": detail_url,
"main_image": soup.select_one("div.main-img img")?.get("src") or "",
"category": soup.select_one("div.breadcrumb a:last-of-type")?.text.strip() or "",
"update_time": time.strftime("%Y-%m-%d %H:%M:%S")
}
specs = self._parse_specs(soup)
seller_info = {
"name": soup.select_one("div.seller-name, .merchant-name")?.text.strip() or "",
"area": soup.select_one("div.seller-area, .merchant-location")?.text.strip() or "",
"type": "工厂" if any(tag in (soup.select_one("div.seller-tag")?.text or "") for tag in ["源头工厂", "生产厂家"]) else "贸易商",
"qualifications": self._parse_qualifications(soup),
"capacity": soup.select_one("div.capacity, .production-capacity")?.text.strip() or ""
}
safety_compliance = self._parse_safety_compliance(soup)
# 6. 请求并解析动态接口数据(阶梯价、库存)
api_data = self._fetch_api_data(product_id, headers, proxy)
ladder_prices = self._parse_ladder_prices(api_data)
# 库存信息
stock_info = {
"total_stock": 0,
"stock_unit": "吨",
"is_in_stock": False
}
if self.platform == "guidechem":
stock_info["total_stock"] = int(api_data.get("data", {}).get("stock", 0))
stock_info["is_in_stock"] = api_data.get("data", {}).get("isInStock", False)
stock_info["stock_unit"] = "吨" if specs.get("package") and "吨" in specs["package"] else "kg"
elif self.platform == "molbase":
stock_info["total_stock"] = int(api_data.get("data", {}).get("stockQuantity", 0))
stock_info["is_in_stock"] = stock_info["total_stock"] > 0
stock_info["stock_unit"] = "kg"
# 价格信息整合
price_info = {
"base_price": float(api_data.get("data", {}).get("basePrice", 0)) or float(re.search(r"\d+\.?\d*", soup.select_one(".base-price")?.text or "").group() if re.search(r"\d+\.?\d*", soup.select_one(".base-price")?.text or "") else 0),
"base_unit": "元/吨" if stock_info["stock_unit"] == "吨" else "元/kg",
"ladder_prices": ladder_prices,
"is_tax_included": "含税" in (soup.select_one(".tax-tag")?.text or "") or api_data.get("data", {}).get("isTaxIncluded", False)
}
# 供应信息
supply_info = {
"stock_info": stock_info,
"min_order": int(api_data.get("data", {}).get("minOrder", 1)) or int(re.search(r"\d+", soup.select_one(".min-order")?.text or "1").group()),
"delivery_time": api_data.get("data", {}).get("deliveryTime", "") or soup.select_one(".delivery-time")?.text.strip() or "7天内",
"transport_method": soup.select_one("div.transport-method, .transport-type")?.text.strip() or "危化品专用车",
"supply_range": soup.select_one("div.supply-range")?.text.strip() or "全国配送"
}
# 7. 整合最终数据
final_data = {
"success": True,
"data": {
"base_info": base_info,
"specs": specs,
"price_info": price_info,
"supply_info": supply_info,
"seller_info": seller_info,
"safety_compliance": safety_compliance
}
}
return final_data
except requests.exceptions.HTTPError as e:
if "403" in str(e):
return {"success": False, "error_msg": "触发反爬,建议更换代理、Cookie或降低访问频率", "code": 403}
if "401" in str(e):
return {"success": False, "error_msg": "Cookie失效,请重新登录企业账号", "code": 401}
if "404" in str(e):
return {"success": False, "error_msg": "商品不存在", "code": 404}
return {"success": False, "error_msg": f"HTTP错误:{str(e)}", "code": response.status_code}
except Exception as e:
return {"success": False, "error_msg": f"详情获取失败:{str(e)}", "code": -1}
# 使用示例
if __name__ == "__main__":
# 配置参数(替换为实际值)
PROXIES = [
"http://123.45.67.89:8888",
"http://98.76.54.32:8080"
] # 高匿动态代理池
COOKIE = "userid=xxx; sessionId=xxx; enterpriseId=xxx; loginType=enterprise; ..." # 企业账号Cookie
PRODUCT_ID = "1234567" # 盖德化工网商品ID(99.9%无水乙醇)
# 初始化API客户端(默认盖德化工网)
item_api = ChemicalItemGetApi(
platform="guidechem",
proxy_pool=PROXIES,
cookie=COOKIE
)
# 获取商品详情
result = item_api.item_get(product_id=PRODUCT_ID)
# 结果输出
if result["success"]:
data = result["data"]
print("=" * 80)
print(f"商品标题:{data['base_info']['title']}")
print(f"商品ID:{data['base_info']['product_id']} | 类目:{data['base_info']['category']}")
print(f"详情页:{data['base_info']['url']}")
print("-" * 80)
print("核心规格:")
print(f" CAS号:{data['specs']['cas']} | 纯度:{data['specs']['purity']}% | 执行标准:{data['specs']['standard']}")
print(f" 分子式:{data['specs']['molecular_formula']} | 外观:{data['specs']['appearance']}")
print(f" 包装规格:{data['specs']['package']}")
print("-" * 80)
print("价格信息:")
print(f" 基础单价:¥{data['price_info']['base_price']}/{data['price_info']['base_unit']} | {'含税' if data['price_info']['is_tax_included'] else '不含税'}")
if data['price_info']['ladder_prices']:
print(" 批量阶梯价:")
for ladder in data['price_info']['ladder_prices']:
max_qty = "∞" if ladder['max_quantity'] == 99999 else ladder['max_quantity']
print(f" {ladder['min_quantity']}-{max_qty}{ladder['unit'].split('/')[-1]}:¥{ladder['price']}/{ladder['unit']}")
print("-" * 80)
print("供应信息:")
print(f" 库存:{'现货' if data['supply_info']['stock_info']['is_in_stock'] else '期货'} | 总量:{data['supply_info']['stock_info']['total_stock']}{data['supply_info']['stock_info']['stock_unit']}")
print(f" 起订量:{data['supply_info']['min_order']}{data['price_info']['base_unit'].split('/')[-1]} | 交货期:{data['supply_info']['delivery_time']}")
print(f" 运输方式:{data['supply_info']['transport_method']} | 供应范围:{data['supply_info']['supply_range']}")
print("-" * 80)
print("供应商信息:")
print(f" 名称:{data['seller_info']['name']} | 类型:{data['seller_info']['type']} | 产地:{data['seller_info']['area']}")
print(f" 产能:{data['seller_info']['capacity']}")
if data['seller_info']['qualifications']:
print(" 资质:")
for qual in data['seller_info']['qualifications'][:3]: # 显示前3个资质
print(f" - {qual['name']}(许可证号:{qual['license_no'] or '无'})")
print("-" * 80)
print("合规安全信息:")
print(f" 危险类别:{data['safety_compliance']['hazard_class']} | UN编号:{data['safety_compliance']['un_number']}")
print(f" MSDS报告:{'有' if data['safety_compliance']['msds']['has_msds'] else '无'}")
if data['safety_compliance']['msds']['has_msds']:
print(f" MSDS下载:{data['safety_compliance']['msds']['download_url']}")
print(f" 安全说明:{data['safety_compliance']['safety_notice'][:100]}...")
print("=" * 80)
else:
print(f"获取失败:{result['error_msg']}(错误码:{result.get('code')})")