1688 的 item_review 接口(官方标准方法名:alibaba.item.review.get)是阿里巴巴开放平台提供的核心接口之一,用于获取指定商品的买家评论数据,包括文本评价、评分、晒图、采购属性(如采购数量、用途)等信息。该接口是分析商品质量、供应商信誉、买家需求的关键数据来源,广泛应用于供应链风控、竞品分析、商品优化等场景。
一、接口核心特性深度分析
1. 接口定位与核心价值
2. 认证与请求机制
3. 核心参数与响应结构
(1)关键请求参数
(2)响应核心字段
二、Python 脚本实现(含评论分析功能)
以下实现 item_review 接口的完整调用逻辑,支持评论获取、数据格式化、基础分析(好评率计算、关键词提取),需先在 1688 开放平台申请 appkey 和 appsecret 并开通接口权限。
import requests import hashlib import time import json import logging import re from collections import Counter from typing import Dict, Optional, List, Tuple from requests.exceptions import RequestException from wordcloud import WordCloud # 需额外安装:pip install wordcloud配置日志
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s"
)
停用词列表(过滤无意义词汇)
STOP_WORDS = set ([
"的", "了", "是", "我", "在", "有", "和", "就", "不", "人", "都", "一", "一个", "上", "也",
"很", "到", "说", "要", "去", "你", "会", "着", "没有", "看", "好", "自己", "这", "件", "个"
])
class AlibabaItemReviewAPI:
def init(self, appkey: str, appsecret: str):
"""
初始化 1688 item_review 接口客户端
:param appkey: 1688 开放平台 appkey
:param appsecret: 1688 开放平台 appsecret
"""
self.appkey = appkey
self.appsecret = appsecret
self.base_url = "https://gw.open.1688.com/openapi/http/1/system.oauth2"
self.session = requests.Session()
def _generate_sign (self, params: Dict) -> str:
"""生成 1688 API 签名(MD5 大写)"""
1. 按参数名 ASCII 升序排序
sorted_params = sorted(params.items(), key=lambda x: x[0])
2. 拼接为 key=value&key=value 格式
sign_str = "&".join([f"{k}={v}" for k, v in sorted_params])
3. 末尾拼接 appsecret 并 MD5 加密
sign_str += self.appsecret
return hashlib.md5(sign_str.encode("utf-8")).hexdigest().upper()
def _get_timestamp (self) -> str:
"""生成符合 1688 规范的时间戳(yyyy-MM-dd HH:mm:ss)"""
return time.strftime ("% Y-% m-% d % H:% M:% S", time.localtime ())
def get_item_reviews (self,
product_id: str,
page: int = 1,
page_size: int = 20,
review_type: str = "all",
sort: str = "create_time_desc") -> Optional [Dict]:
"""
获取商品评论数据
:param product_id: 1688 商品 ID
:param page: 页码(1~100)
:param page_size: 每页评论数(1~50)
:param review_type: 评论类型:all/positive/negative
:param sort: 排序方式:create_time_desc/create_time_asc
:return: 格式化后的评论数据(含分页、统计、评论列表)
"""
1. 参数校验
if review_type not in ["all", "positive", "negative"]:
logging.error (f"无效评论类型:{review_type},仅支持 all/positive/negative")
return None
if page < 1 or page > 100:
logging.error (f"页码超出范围:{page},仅支持 1~100")
return None
if page_size < 1 or page_size > 50:
logging.error (f"每页数量超出范围:{page_size},仅支持 1~50")
return None
2. 构造请求参数
params = {
"method": "alibaba.item.review.get",
"app_key": self.appkey,
"timestamp": self._get_timestamp(),
"format": "json",
"v": "2.0",
"sign_method": "md5",
"product_id": product_id,
"page": str(page),
"page_size": str(page_size),
"review_type": review_type,
"sort": sort
}
3. 生成签名
params["sign"] = self._generate_sign(params)
try:
4. 发送 GET 请求
response = self.session.get (
self.base_url,
params=params,
timeout=15
)
response.raise_for_status () # 抛出 HTTP 错误(如 401、429)
result = response.json ()
5. 处理错误响应
if "error_response" in result:
error = result ["error_response"]
logging.error (
f"接口错误:{error.get ('msg', ' 未知错误 ')}(错误码:{error.get ('code', ' 未知 ')},子码:{error.get ('sub_code', ' 无 ')})"
)
return None
6. 提取并格式化数据
raw_response = result.get ("alibaba_item_review_get_response", {})
raw_data = raw_response.get ("result", {})
if not raw_data:
logging.warning ("未获取到评论数据")
return None
return self._format_review_data(raw_data)
except RequestException as e:
logging.error (f"请求异常:{str (e)}")
return None
except json.JSONDecodeError:
logging.error (f"响应解析失败:{response.text [:200]}...")
return None
def _format_review_data (self, raw_data: Dict) -> Dict:
"""格式化评论数据,提取核心字段并清洗"""
1. 格式化分页信息
pagination = {
"total_reviews": int(raw_data.get("total_results", 0)),
"current_page": int(raw_data.get("page", 1)),
"page_size": int(raw_data.get("page_size", 20)),
"total_pages": (int(raw_data.get("total_results", 0)) + int(raw_data.get("page_size", 20)) - 1) // int(raw_data.get("page_size", 20))
}
2. 格式化统计信息
stats = {
"positive_rate": round (float (raw_data.get ("positive_rate", 0)), 4), # 好评率(保留 4 位小数)
"positive_count": int (raw_data.get ("positive_count", 0)), # 好评数
"negative_count": int (raw_data.get ("negative_count", 0)), # 差评数
"has_image_ratio": round (
int (raw_data.get ("has_image_count", 0)) /max (int (raw_data.get ("total_results", 1)), 1),
4
) if raw_data.get ("total_results") else 0 # 带图评论占比
}
3. 格式化评论列表(清洗文本、提取关键信息)
formatted_reviews = []
for raw_review in raw_data.get("reviews", {}).get("review", []):
清洗评论文本(去除 HTML 标签、多余空格)
content = re.sub(r"<[^>]+>", "", raw_review.get("content", "")).strip()
处理晒图(提取 URL 列表)
images = raw_review.get ("images", {}).get ("image_url", [])
if isinstance (images, str): # 兼容单张图片的情况(返回字符串而非数组)
images = [images] if images else []
formatted_reviews.append ({
"review_id": raw_review.get ("review_id"),
"buyer_nick": raw_review.get ("buyer_nick", "匿名买家"), # 脱敏昵称
"create_time": raw_review.get ("create_time"),
"star_level": int (raw_review.get ("star_level", 0)), # 1-5 星
"star_desc": raw_review.get ("star_desc", ""),
"content": content,
"images": images,
"purchase_info": {
"quantity": int (raw_review.get ("purchase_quantity", 0)),
"unit": raw_review.get ("purchase_unit", "件"),
"usage": raw_review.get ("usage", "未填写") # 用途(如 “生产使用”“批发零售”)
}
})
return {
"product_id": raw_data.get("product_id"),
"pagination": pagination,
"stats": stats,
"reviews": formatted_reviews
}
def get_all_reviews (self, product_id: str, review_type: str = "all", max_pages: int = 10) -> Optional [Dict]:
"""
获取商品所有评论(自动分页)
:param product_id: 商品 ID
:param review_type: 评论类型
:param max_pages: 最大页数(防止过度请求,建议≤20)
:return: 合并后的所有评论数据
"""
all_reviews = []
current_page = 1
total_reviews = 0
total_pages = 1
while current_page <= max_pages and current_page <= total_pages:
logging.info(f"获取商品 {product_id} 第 {current_page} 页评论(类型:{review_type})")
page_result = self.get_item_reviews (
product_id=product_id,
page=current_page,
page_size=50, # 用最大页大小减少请求次数
review_type=review_type,
sort="create_time_desc"
)
if not page_result:
break
初始化总页数(第一页后更新)
if current_page == 1:
total_reviews = page_result["pagination"]["total_reviews"]
total_pages = page_result["pagination"]["total_pages"]
logging.info(f"商品 {product_id} 共 {total_reviews} 条评论,共 {total_pages} 页")
追加当前页评论
all_reviews.extend(page_result["reviews"])
current_page += 1
控制请求频率(避免触发 QPS 限制)
time.sleep(1.5)
重新计算统计信息(合并后)
if all_reviews:
positive_count = sum (1 for r in all_reviews if r ["star_level"] >= 4)
negative_count = sum (1 for r in all_reviews if r ["star_level"] <= 2)
has_image_count = sum (1 for r in all_reviews if len (r ["images"]) > 0)
return {
"product_id": product_id,
"pagination": {
"total_reviews": len (all_reviews),
"total_pages": current_page - 1,
"page_size": 50,
"current_page": current_page - 1
},
"stats": {
"positive_rate": round (positive_count /len (all_reviews), 4),
"positive_count": positive_count,
"negative_count": negative_count,
"has_image_ratio": round (has_image_count /len (all_reviews), 4)
},
"reviews": all_reviews
}
else:
logging.warning (f"未获取到商品 {product_id} 的任何评论")
return None
def analyze_reviews (self, reviews: List [Dict]) -> Dict:
"""
评论基础分析(关键词提取、评分分布、采购用途统计)
:param reviews: 格式化后的评论列表
:return: 分析结果
"""
if not reviews:
return {"