南网商城(隶属于南方电网旗下,专注电力行业物资采购)是电力系统内重要的 B2B 电商平台,聚焦电力设备、电工材料、安防用品等工业品类。其商品详情数据(如型号规格、技术参数、供应商资质、交货周期等)对电力行业供应链分析、比价采购、供应商评估等场景具有极高价值。由于南网商城无公开官方 API,开发者需通过页面解析实现商品详情(item_get)的获取。本文系统讲解接口对接逻辑、技术实现、反爬应对及数据解析要点,帮助开发者构建稳定的电力行业商品数据获取系统。
一、接口基础认知(核心功能与场景)
二、对接前置准备(环境与 URL 结构)
三、接口调用流程(基于页面解析)
四、代码实现示例(Python)
import requests
import time
import random
import re
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from typing import Dict, List
class NwmallItemApi:
def __init__(self, proxy_pool: List[str] = None, cookie: str = ""):
self.base_url = "https://www.nwmall.com/product/{item_id}.html"
self.stock_api = "https://www.nwmall.com/ajax/product/{item_id}/stock" # 库存接口
self.ua = UserAgent()
self.proxy_pool = proxy_pool # 代理池列表,如["http://ip:port", ...]
self.cookie = cookie # 登录态Cookie(部分页面需登录查看价格)
def _get_headers(self) -> Dict[str, str]:
"""生成随机请求头"""
headers = {
"User-Agent": self.ua.random,
"Referer": "https://www.nwmall.com/category/",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
}
if self.cookie:
headers["Cookie"] = self.cookie
return headers
def _get_proxy(self) -> Dict[str, str]:
"""随机获取代理"""
if self.proxy_pool and len(self.proxy_pool) > 0:
proxy = random.choice(self.proxy_pool)
return {"http": proxy, "https": proxy}
return None
def _clean_price(self, price_str: str) -> float:
"""清洗价格字符串(去除¥、含税、逗号等)"""
if not price_str:
return 0.0
price_str = re.sub(r"[^\d.]", "", price_str)
return float(price_str) if price_str else 0.0
def _parse_technical_params(self, table_soup) -> Dict[str, str]:
"""解析技术参数表格(电力商品核心字段)"""
params = {}
if not table_soup:
return params
for row in table_soup.select("tr"):
th = row.select_one("th")
td = row.select_one("td")
if th and td:
key = th.text.strip()
value = td.text.strip()
params[key] = value
return params
def _parse_static_data(self, html: str) -> Dict[str, str]:
"""解析静态HTML中的基础信息"""
soup = BeautifulSoup(html, "lxml")
# 提取技术参数表格
param_table = soup.select_one("table.param-table")
technical_params = self._parse_technical_params(param_table)
# 提取执行标准(从技术参数中筛选)
execution_standard = ""
for key in ["执行标准", "标准编号", "依据标准"]:
if key in technical_params:
execution_standard = technical_params[key]
break
# 提取批量折扣(如“100+件 95折”)
discount_str = soup.select_one("div.batch-discount")?.text.strip() or ""
return {
"title": soup.select_one("h1.product-title")?.text.strip() or "",
"images": [img.get("src") for img in soup.select("div.product-img img") if img.get("src")],
"price": {
"tax_included": self._clean_price(soup.select_one("div.price-current")?.text or ""),
"original": self._clean_price(soup.select_one("div.price-original")?.text or ""),
"min_order": int(re.search(r"\d+", soup.select_one("div.min-order")?.text or "1")?.group()) or 1,
"batch_discount": discount_str
},
"technical_params": technical_params,
"execution_standard": execution_standard,
"supplier": {
"name": soup.select_one("div.supplier-name a")?.text.strip() or "",
"qualifications": [tag.text.strip() for tag in soup.select("div.qualification-tags span")],
"location": soup.select_one("div.supplier-location")?.text.strip() or ""
},
"trade": {
"sales": int(re.search(r"\d+", soup.select_one("div.sales-count")?.text or "0")?.group()) or 0,
"delivery_cycle": soup.select_one("div.delivery-cycle")?.text.strip() or ""
},
"service": {
"warranty": soup.select_one("div.warranty-period")?.text.strip() or "",
"after_sale": soup.select_one("div.after-sale-policy")?.text.strip() or ""
},
"url": soup.select_one("link[rel='canonical']")?.get("href") or ""
}
def _fetch_dynamic_data(self, item_id: str, headers: Dict[str, str], proxy: Dict[str, str]) -> Dict:
"""调用动态接口获取库存"""
dynamic_data = {"stock": 0, "available": 0}
try:
stock_url = self.stock_api.format(item_id=item_id)
resp = requests.get(stock_url, headers=headers, proxies=proxy, timeout=10)
stock_data = resp.json()
dynamic_data["stock"] = stock_data.get("stock", 0)
dynamic_data["available"] = stock_data.get("available", 0)
except Exception as e:
print(f"动态库存获取失败: {str(e)}")
return dynamic_data
def item_get(self, item_id: str, timeout: int = 10) -> Dict:
"""
获取南网商城商品详情
:param item_id: 商品ID(如P123456)
:param timeout: 超时时间
:return: 标准化商品数据
"""
try:
# 1. 构建URL并发送请求
url = self.base_url.format(item_id=item_id)
headers = self._get_headers()
proxy = self._get_proxy()
# 随机延迟,避免反爬
time.sleep(random.uniform(2, 4))
response = requests.get(
url=url,
headers=headers,
proxies=proxy,
timeout=timeout
)
response.raise_for_status()
html = response.text
# 2. 解析静态数据
static_data = self._parse_static_data(html)
if not static_data["title"]:
return {"success": False, "error_msg": "未找到商品信息,可能item_id错误或商品已下架"}
# 3. 获取并合并动态数据
dynamic_data = self._fetch_dynamic_data(item_id, headers, proxy)
static_data["trade"]["stock"] = dynamic_data["available"] # 覆盖可售库存
# 4. 整合结果
result = {
"success": True,
"data": {
"item_id": item_id,** static_data,
"update_time": time.strftime("%Y-%m-%d %H:%M:%S")
}
}
return result
except requests.exceptions.HTTPError as e:
if "403" in str(e):
return {"success": False, "error_msg": "触发反爬,建议更换代理或Cookie", "code": 403}
if "401" in str(e):
return {"success": False, "error_msg": "需要登录,请提供有效Cookie", "code": 401}
return {"success": False, "error_msg": f"HTTP错误: {str(e)}", "code": response.status_code}
except Exception as e:
return {"success": False, "error_msg": f"获取失败: {str(e)}", "code": -1}
# 使用示例
if __name__ == "__main__":
# 代理池(替换为有效代理)
PROXIES = [
"http://123.45.67.89:8888",
"http://98.76.54.32:8080"
]
# 登录态Cookie(从浏览器获取,部分页面需登录)
COOKIE = "SESSION=xxx; user_token=xxx"
# 初始化API客户端
api = NwmallItemApi(proxy_pool=PROXIES, cookie=COOKIE)
# 获取商品详情(示例item_id)
item_id = "P123456" # 替换为实际商品ID
result = api.item_get(item_id)
if result["success"]:
data = result["data"]
print(f"商品标题: {data['title']}")
print(f"含税单价: ¥{data['price']['tax_included']} | 起订量: {data['price']['min_order']}件 | 批量折扣: {data['price']['batch_discount']}")
print(f"执行标准: {data['execution_standard']}")
print(f"供应商: {data['supplier']['name']}({data['supplier']['location']})")
print(f"资质认证: {','.join(data['supplier']['qualifications'])}")
print(f"交易数据: 累计销量{data['trade']['sales']}件 | 可售库存{data['trade']['stock']}件 | 交货周期{data['trade']['delivery_cycle']}")
print(f"核心技术参数:")
# 打印前5条关键参数
key_params = ["规格型号", "额定电压", "材质", "适用场景", "防护等级"]
for key in key_params:
if key in data["technical_params"]:
print(f" {key}: {data['technical_params'][key]}")
print(f"售后服务: 质保期{data['service']['warranty']} | {data['service']['after_sale']}")
else:
print(f"获取失败: {result['error_msg']}(错误码: {result.get('code')})")