淘宝的 item_review 接口是用于获取商品评论数据的核心接口,能够获取淘宝平台上指定商品的用户评价、评分、追评等详细信息。这些数据对于商品口碑分析、用户需求挖掘、竞品对比等场景具有重要价值,是电商数据分析中不可或缺的一环。
一、接口核心特性分析
1. 接口功能与定位
2. 认证机制
3. 核心参数与响应结构
请求参数
响应核心字段
二、Python 脚本实现
import requests
import time
import json
import logging
import re
from typing import Dict, Optional, List, Tuple
from requests.exceptions import RequestException
from snownlp import SnowNLP # 用于情感分析,需安装:pip install snownlp
# 配置日志
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s"
)
class TaobaoItemReviewAPI:
def __init__(self, appkey: str, appsecret: str):
"""
初始化淘宝评论API客户端
:param appkey: 淘宝开放平台appkey
:param appsecret: 淘宝开放平台appsecret
"""
self.appkey = appkey
self.appsecret = appsecret
self.base_url = "https://eco.taobao.com/router/rest"
self.access_token = None
self.token_expires_at = 0 # token过期时间戳
self.session = requests.Session()
self.session.headers.update({
"Content-Type": "application/x-www-form-urlencoded",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
})
def _get_access_token(self) -> Optional[str]:
"""获取访问令牌"""
# 检查token是否有效
if self.access_token and self.token_expires_at > time.time() + 60:
return self.access_token
logging.info("获取新的access_token")
params = {
"method": "taobao.oauth.token",
"appkey": self.appkey,
"appsecret": self.appsecret,
"grant_type": "client_credentials",
"format": "json"
}
try:
response = self.session.get(self.base_url, params=params, timeout=10)
response.raise_for_status()
result = response.json()
if "error_response" in result:
logging.error(f"获取access_token失败: {result['error_response']['msg']} (错误码: {result['error_response']['code']})")
return None
self.access_token = result["access_token"]
self.token_expires_at = time.time() + result.get("expires_in", 86400) # 默认为24小时
return self.access_token
except RequestException as e:
logging.error(f"获取access_token请求异常: {str(e)}")
return None
def get_item_reviews(self,
item_id: str,
page: int = 1,
page_size: int = 20,
sort: str = "newest",
has_image: bool = False) -> Optional[Dict]:
"""
获取商品评论
:param item_id: 商品ID
:param page: 页码
:param page_size: 每页条数
:param sort: 排序方式
:param has_image: 是否只看有图评价
:return: 评论数据
"""
# 验证参数
valid_sorts = ["newest", "helpful", "bad"]
if sort not in valid_sorts:
logging.error(f"无效的排序方式: {sort},支持: {valid_sorts}")
return None
if page_size < 1 or page_size > 100:
logging.error(f"每页条数必须在1-100之间,当前为: {page_size}")
return None
# 获取有效的access_token
if not self._get_access_token():
return None
params = {
"method": "taobao.item.review.get",
"appkey": self.appkey,
"access_token": self.access_token,
"item_id": item_id,
"page": page,
"page_size": page_size,
"sort": sort,
"has_image": "true" if has_image else "false",
"format": "json",
"v": "2.0"
}
try:
response = self.session.get(self.base_url, params=params, timeout=15)
response.raise_for_status()
result = response.json()
if "error_response" in result:
logging.error(f"获取评论失败: {result['error_response']['msg']} (错误码: {result['error_response']['code']})")
return None
review_response = result.get("item_review_get_response", {})
reviews_data = review_response.get("reviews", {})
if not reviews_data:
logging.warning("未获取到评论数据")
return None
# 格式化评论数据
return self._format_review_data(reviews_data)
except RequestException as e:
logging.error(f"获取评论请求异常: {str(e)}")
return None
except json.JSONDecodeError:
logging.error(f"评论响应解析失败: {response.text[:200]}...")
return None
def _format_review_data(self, review_data: Dict) -> Dict:
"""格式化评论数据"""
# 分页信息
pagination = {
"total_reviews": int(review_data.get("total", 0)),
"total_pages": (int(review_data.get("total", 0)) + int(review_data.get("page_size", 20)) - 1) // int(review_data.get("page_size", 20)),
"current_page": int(review_data.get("page", 1)),
"page_size": int(review_data.get("page_size", 20))
}
# 格式化评论列表
reviews = []
for review in review_data.get("review", []):
# 处理评价内容(去除HTML标签)
content = self._clean_text(review.get("content", ""))
# 情感分析(0-1之间,越接近1越积极)
sentiment_score = self._analyze_sentiment(content)
sentiment = "positive" if sentiment_score > 0.6 else "negative" if sentiment_score < 0.4 else "neutral"
# 处理评价图片
images = []
if review.get("images"):
images = [img.get("url") for img in review.get("images", {}).get("image", []) if img.get("url")]
# 处理追评
append_comment = None
if review.get("append_comment"):
append_comment = {
"content": self._clean_text(review["append_comment"].get("content", "")),
"created": review["append_comment"].get("created")
}
# 处理商家回复
reply = None
if review.get("reply"):
reply = {
"content": self._clean_text(review["reply"].get("content", "")),
"created": review["reply"].get("created")
}
reviews.append({
"review_id": review.get("review_id"),
"user": {
"nick": review.get("nick"),
"avatar": review.get("user_avatar")
},
"rating": {
"total": int(review.get("rating", 0)), # 总评分
"description": int(review.get("description_rating", 0)), # 描述相符
"service": int(review.get("service_rating", 0)), # 服务态度
"delivery": int(review.get("delivery_rating", 0)) # 物流速度
},
"content": content,
"created_time": review.get("created"),
"images": images,
"append_comment": append_comment,
"reply": reply,
"useful": int(review.get("useful", 0)), # 有用数
"tags": review.get("tags", "").split(","), # 评价标签
"sentiment": {
"score": round(sentiment_score, 4),
"label": sentiment
}
})
return {
"pagination": pagination,
"reviews": reviews,
"raw_data": review_data # 保留原始数据
}
def _clean_text(self, text: str) -> str:
"""清理文本,去除HTML标签和特殊字符"""
if not text:
return ""
# 去除HTML标签
clean = re.sub(r'<.*?>', '', text)
# 去除多余空格和换行
clean = re.sub(r'\s+', ' ', clean).strip()
return clean
def _analyze_sentiment(self, text: str) -> float:
"""使用SnowNLP进行情感分析"""
if not text:
return 0.5 # 中性
try:
return SnowNLP(text).sentiments
except:
return 0.5 # 分析失败时返回中性
def get_all_reviews(self, item_id: str, max_pages: int = 10, has_image: bool = False) -> List[Dict]:
"""
获取多页评论数据
:param item_id: 商品ID
:param max_pages: 最大页数限制
:param has_image: 是否只看有图评价
:return: 所有评论列表
"""
all_reviews = []
page = 1
while page <= max_pages:
logging.info(f"获取第 {page} 页评论")
result = self.get_item_reviews(
item_id=item_id,
page=page,
page_size=100, # 使用最大页大小减少请求次数
has_image=has_image
)
if not result or not result["reviews"]:
break
all_reviews.extend(result["reviews"])
# 检查是否已到最后一页
if page >= result["pagination"]["total_pages"]:
break
page += 1
# 控制请求频率,淘宝API有严格的QPS限制
time.sleep(2)
return all_reviews
def analyze_reviews(self, reviews: List[Dict]) -> Dict:
"""分析评论数据,生成统计报告"""
if not reviews:
return {}
total = len(reviews)
sentiment_counts = {"positive": 0, "neutral": 0, "negative": 0}
rating_stats = {
"total": [],
"description": [],
"service": [],
"delivery": []
}
tag_counts = {}
has_image_count = 0
# 统计基础数据
for review in reviews:
# 情感统计
sentiment = review["sentiment"]["label"]
sentiment_counts[sentiment] += 1
# 评分统计
for key in rating_stats:
if key in review["rating"]:
rating_stats[key].append(review["rating"][key])
# 标签统计
for tag in review["tags"]:
if tag:
tag_counts[tag] = tag_counts.get(tag, 0) + 1
# 有图评价统计
if review["images"]:
has_image_count += 1
# 计算平均评分
avg_ratings = {}
for key, values in rating_stats.items():
if values:
avg_ratings[key] = round(sum(values) / len(values), 1)
else:
avg_ratings[key] = 0
# 获取热门标签(前10)
top_tags = sorted(tag_counts.items(), key=lambda x: x[1], reverse=True)[:10]
return {
"total_reviews": total,
"sentiment_distribution": {
"count": sentiment_counts,
"percentage": {
k: round(v / total * 100, 1) for k, v in sentiment_counts.items()
}
},
"average_rating": avg_ratings,
"image_review_ratio": round(has_image_count / total * 100, 1) if total > 0 else 0,
"top_tags": top_tags
}
# 示例调用
if __name__ == "__main__":
# 替换为实际的appkey和appsecret(从淘宝开放平台获取)
APPKEY = "your_appkey"
APPSECRET = "your_appsecret"
# 替换为目标商品ID
ITEM_ID = "123456789"
# 初始化API客户端
api = TaobaoItemReviewAPI(APPKEY, APPSECRET)
# 方式1:获取单页评论
# review_result = api.get_item_reviews(
# item_id=ITEM_ID,
# page=1,
# page_size=20,
# sort="newest",
# has_image=False
# )
# 方式2:获取多页评论
review_result = api.get_all_reviews(
item_id=ITEM_ID,
max_pages=3,
has_image=False
)
if isinstance(review_result, dict) and "reviews" in review_result:
print(f"共获取到 {review_result['pagination']['total_reviews']} 条评论")
print(f"当前第 {review_result['pagination']['current_page']}/{review_result['pagination']['total_pages']} 页\n")
# 打印前3条评论
for i, review in enumerate(review_result["reviews"][:3], 1):
print(f"{i}. 用户: {review['user']['nick']}")
print(f" 评分: {review['rating']['total']}分 (描述: {review['rating']['description']}, 服务: {review['rating']['service']}, 物流: {review['rating']['delivery']})")
print(f" 时间: {review['created_time']}")
print(f" 内容: {review['content'][:100]}{'...' if len(review['content'])>100 else ''}")
print(f" 情感: {review['sentiment']['label']} (得分: {review['sentiment']['score']})")
print(f" 有用数: {review['useful']}")
if review['images']:
print(f" 图片数: {len(review['images'])}")
if review['append_comment']:
print(f" 追评: {review['append_comment']['content'][:50]}{'...' if len(review['append_comment']['content'])>50 else ''}")
print("-" * 100)
# 分析评论
analysis = api.analyze_reviews(review_result["reviews"])
print("\n=== 评论分析报告 ===")
print(f"总评论数: {analysis['total_reviews']}")
print(f"情感分布: 正面 {analysis['sentiment_distribution']['percentage']['positive']}%, 中性 {analysis['sentiment_distribution']['percentage']['neutral']}%, 负面 {analysis['sentiment_distribution']['percentage']['negative']}%")
print(f"平均评分: 总评分 {analysis['average_rating']['total']}, 描述相符 {analysis['average_rating']['description']}, 服务态度 {analysis['average_rating']['service']}, 物流速度 {analysis['average_rating']['delivery']}")
print(f"有图评价占比: {analysis['image_review_ratio']}%")
print("热门标签:")
for tag, count in analysis['top_tags']:
print(f" {tag}: {count}次")
elif isinstance(review_result, list):
# 处理多页评论结果
print(f"共获取到 {len(review_result)} 条评论")
# 分析评论
analysis = api.analyze_reviews(review_result)
print("\n=== 评论分析报告 ===")
print(f"总评论数: {analysis['total_reviews']}")
print(f"情感分布: 正面 {analysis['sentiment_distribution']['percentage']['positive']}%, 中性 {analysis['sentiment_distribution']['percentage']['neutral']}%, 负面 {analysis['sentiment_distribution']['percentage']['negative']}%")