Shopee 的 item_review 接口是获取平台商品评论数据的核心接口,能够返回特定商品的用户评价、评分、图文内容等关键信息。对于跨境电商卖家而言,这些数据对于了解用户反馈、优化产品描述、改进产品质量以及制定营销策略具有重要价值。
一、接口核心特性分析
1. 接口功能与定位
2. 认证机制
3. 核心参数与响应结构
请求参数
响应核心字段
二、Python 脚本实现
import requests
import time
import json
import logging
import hashlib
import hmac
import os
import re
from typing import Dict, Optional, List, Tuple
from requests.exceptions import RequestException
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from collections import defaultdict
from textblob import TextBlob # 用于情感分析
# 配置日志
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s"
)
class ShopeeItemReviewAPI:
def __init__(self, partner_id: int, partner_key: str, shopid: int, region: str = "sg"):
"""
初始化Shopee评论接口客户端
:param partner_id: 合作伙伴ID
:param partner_key: 合作伙伴密钥
:param shopid: 店铺ID
:param region: 地区代码,如sg(新加坡), my(马来西亚), th(泰国), tw(台湾)
"""
self.partner_id = partner_id
self.partner_key = partner_key.encode('utf-8') # 用于签名的密钥
self.shopid = shopid
self.region = region
self.base_url = self._get_base_url()
self.access_token = None
self.session = requests.Session()
self.session.headers.update({
"Content-Type": "application/json",
"Accept": "application/json",
"User-Agent": "ShopeeAPI/1.0.0 (Python)"
})
def _get_base_url(self) -> str:
"""根据地区获取基础URL"""
region_map = {
"sg": "https://partner.shopeemobile.com", # 新加坡
"my": "https://partner.shopeemobile.com", # 马来西亚
"th": "https://partner.shopeemobile.com", # 泰国
"tw": "https://partner.shopee.tw", # 台湾
"id": "https://partner.shopeemobile.com", # 印度尼西亚
"vn": "https://partner.shopeemobile.com", # 越南
"ph": "https://partner.shopeemobile.com" # 菲律宾
}
return region_map.get(self.region, region_map["sg"])
def _generate_signature(self, path: str, timestamp: int, access_token: str = "") -> str:
"""生成请求签名"""
# 构建待签名字符串
base_string = f"{self.partner_id}{path}{timestamp}"
if access_token:
base_string += access_token
# 使用HMAC-SHA256计算签名
signature = hmac.new(
self.partner_key,
base_string.encode('utf-8'),
hashlib.sha256
).hexdigest()
return signature
def set_access_token(self, access_token: str) -> None:
"""设置访问令牌"""
self.access_token = access_token
def get_item_reviews(self,
item_id: int,
page: int = 1,
page_size: int = 20,
rating_filter: int = 0,
with_images: bool = False,
language: Optional[str] = None) -> Optional[Dict]:
"""
获取商品评论
:param item_id: 商品ID
:param page: 页码
:param page_size: 每页条数
:param rating_filter: 评分筛选:0(全部)、1-5(特定评分)
:param with_images: 是否只看有图评论
:param language: 评论语言筛选
:return: 评论数据
"""
# 验证参数
if page_size < 1 or page_size > 50:
logging.error(f"每页条数必须在1-50之间,当前为: {page_size}")
return None
if rating_filter < 0 or rating_filter > 5:
logging.error(f"评分筛选值必须在0-5之间,当前为: {rating_filter}")
return None
# 检查access_token
if not self.access_token:
logging.error("请先设置access_token")
return None
# 构建请求路径和参数
path = "/api/v2/item/get_reviews"
timestamp = int(time.time() * 1000) # 毫秒时间戳
signature = self._generate_signature(path, timestamp, self.access_token)
params = {
"partner_id": self.partner_id,
"shopid": self.shopid,
"item_id": item_id,
"page": page,
"page_size": page_size,
"rating_filter": rating_filter,
"with_images": with_images,
"timestamp": timestamp,
"access_token": self.access_token,
"sign": signature
}
# 添加可选参数
if language:
params["language"] = language
try:
url = f"{self.base_url}{path}"
response = self.session.get(url, params=params, timeout=15)
response.raise_for_status()
result = response.json()
# 检查响应状态
if result.get("error") != 0:
logging.error(f"获取评论失败: {result.get('message', '未知错误')} (错误码: {result.get('error')})")
return None
# 格式化评论数据
return self._format_review_data(result, item_id)
except RequestException as e:
logging.error(f"获取评论请求异常: {str(e)}")
return None
except json.JSONDecodeError:
logging.error(f"评论响应解析失败: {response.text[:200]}...")
return None
def _format_review_data(self, review_data: Dict, item_id: int) -> Dict:
"""格式化评论数据"""
# 分页信息
pagination = {
"total_reviews": review_data.get("data", {}).get("total_count", 0),
"total_pages": (review_data.get("data", {}).get("total_count", 0) +
review_data.get("data", {}).get("page_size", 20) - 1) //
review_data.get("data", {}).get("page_size", 20),
"current_page": review_data.get("data", {}).get("page", 1),
"page_size": review_data.get("data", {}).get("page_size", 20)
}
# 格式化评论列表
reviews = []
for review in review_data.get("data", {}).get("reviews", []):
# 处理评分
rating = review.get("rating", 0)
# 处理评论图片
images = []
if review.get("images"):
images = [img.get("url") for img in review.get("images") if img.get("url")]
# 处理变体信息
variant_info = ""
if review.get("variation"):
variant_info = ", ".join([v.get("name", "") for v in review.get("variation") if v.get("name")])
# 处理卖家回复
seller_reply = None
if review.get("seller_reply"):
seller_reply = {
"content": review["seller_reply"].get("comment", ""),
"created_time": review["seller_reply"].get("created_at", 0)
}
# 评论内容翻译(简单处理)
content = review.get("comment", "")
translated_content = self._translate_comment(content, review.get("language", ""))
# 情感分析
sentiment = self._analyze_sentiment(content or translated_content)
reviews.append({
"review_id": review.get("review_id"),
"item_id": item_id,
"user": {
"user_id": review.get("user_id"),
"username": review.get("author_username", "匿名用户")
},
"rating": rating,
"rating_star": "★" * rating + "☆" * (5 - rating),
"content": content,
"translated_content": translated_content,
"language": review.get("language", ""),
"sentiment": sentiment,
"images": images,
"variant_info": variant_info,
"purchase_time": review.get("purchase_time", 0),
"created_time": review.get("created_at", 0),
"like_count": review.get("like_count", 0),
"reply_count": review.get("cmt_count", 0),
"seller_reply": seller_reply,
"is_verified_purchase": review.get("is_verified", False)
})
return {
"pagination": pagination,
"reviews": reviews,
"raw_data": review_data # 保留原始数据
}
def _translate_comment(self, content: str, language: str) -> str:
"""简单翻译评论内容(实际应用中可使用专业翻译API)"""
if not content or language == "zh-CN":
return content
# 这里使用TextBlob进行简单翻译,实际应用中建议使用Google Translate API等专业服务
try:
if language in ["en", "zh-TW", "th", "id", "vi", "ms", "tl"]:
blob = TextBlob(content)
return str(blob.translate(to="zh-CN"))
return content
except:
# 翻译失败时返回原文
return content
def _analyze_sentiment(self, content: str) -> Dict:
"""分析评论情感倾向"""
if not content:
return {"polarity": 0, "subjectivity": 0, "sentiment": "neutral"}
# 使用TextBlob进行情感分析
blob = TextBlob(content)
polarity = blob.sentiment.polarity # 情感极性:-1(负面)到1(正面)
subjectivity = blob.sentiment.subjectivity # 主观性:0(客观)到1(主观)
# 确定情感类别
if polarity > 0.1:
sentiment = "positive"
elif polarity < -0.1:
sentiment = "negative"
else:
sentiment = "neutral"
return {
"polarity": round(polarity, 4),
"subjectivity": round(subjectivity, 4),
"sentiment": sentiment
}
def get_all_reviews(self, item_id: int, max_pages: int = 10, **kwargs) -> List[Dict]:
"""
获取商品的所有评论
:param item_id: 商品ID
:param max_pages: 最大页数限制
:return: 所有评论列表
"""
all_reviews = []
page = 1
while page <= max_pages:
logging.info(f"获取第 {page} 页评论")
result = self.get_item_reviews(
item_id=item_id,
page=page,
page_size=50, # 使用最大页大小
**kwargs
)
if not result or not result["reviews"]:
break
all_reviews.extend(result["reviews"])
# 检查是否已到最后一页
if page >= result["pagination"]["total_pages"]:
break
page += 1
# 控制请求频率,Shopee API通常限制QPS为10
time.sleep(0.1)
logging.info(f"共获取到 {len(all_reviews)} 条评论")
return all_reviews
def analyze_reviews(self, reviews: List[Dict]) -> Dict:
"""分析评论数据"""
if not reviews:
return {}
total = len(reviews)
# 评分分析
rating_analysis = self._analyze_ratings(reviews)
# 情感分析
sentiment_analysis = self._analyze_sentiments(reviews)
# 内容关键词分析
keyword_analysis = self._analyze_keywords(reviews)
# 变体分析
variant_analysis = self._analyze_variants(reviews)
# 图片分析
image_analysis = self._analyze_images(reviews)
# 卖家回复分析
reply_analysis = self._analyze_seller_replies(reviews)
# 提取典型评论
typical_reviews = self._get_typical_reviews(reviews)
return {
"total_reviews": total,
"rating_analysis": rating_analysis,
"sentiment_analysis": sentiment_analysis,
"keyword_analysis": keyword_analysis,
"variant_analysis": variant_analysis,
"image_analysis": image_analysis,
"reply_analysis": reply_analysis,
"typical_reviews": typical_reviews
}
def _analyze_ratings(self, reviews: List[Dict]) -> Dict:
"""分析评分分布"""
rating_counts = defaultdict(int)
total_rating = 0
for review in reviews:
rating = review["rating"]
rating_counts[rating] += 1
total_rating += rating
avg_rating = round(total_rating / len(reviews), 1) if reviews else 0
return {
"average": avg_rating,
"counts": dict(rating_counts),
"distribution": {k: round(v/len(reviews)*100, 1) for k, v in rating_counts.items()}
}
def _analyze_sentiments(self, reviews: List[Dict]) -> Dict:
"""分析情感分布"""
sentiment_counts = defaultdict(int)
polarity_scores = []
for review in reviews:
sentiment = review["sentiment"]["sentiment"]
sentiment_counts[sentiment] += 1
polarity_scores.append(review["sentiment"]["polarity"])
avg_polarity = round(sum(polarity_scores) / len(polarity_scores), 4) if polarity_scores else 0
# 按评分分组的情感分析
rating_sentiment = defaultdict(lambda: defaultdict(int))
for review in reviews:
rating_sentiment[review["rating"]][review["sentiment"]["sentiment"]] += 1
return {
"distribution": dict(sentiment_counts),
"ratio": {k: round(v/len(reviews)*100, 1) for k, v in sentiment_counts.items()},
"avg_polarity": avg_polarity,
"by_rating": {k: dict(v) for k, v in rating_sentiment.items()}
}
def _analyze_keywords(self, reviews: List[Dict]) -> Dict:
"""分析评论关键词"""
# 核心关键词类别
keywords = {
"质量": ["质量", "品质", "做工", "材料", "耐用", "差", "好"],
"物流": ["快递", "物流", "速度", "慢", "快", "包装", "运输"],
"尺寸": ["大小", "尺寸", "合适", "太大", "太小", "长短"],
"外观": ["颜色", "外观", "样子", "好看", "漂亮", "难看"],
"价格": ["价格", "划算", "便宜", "贵", "性价比"],
"功能": ["功能", "好用", "实用", "方便", "难用"]
}
keyword_counts = defaultdict(int)
negative_keywords = defaultdict(int)
for review in reviews:
# 使用翻译后的内容进行关键词分析
content = review["translated_content"] or review["content"]
if not content:
continue
content_lower = content.lower()
is_negative = review["sentiment"]["sentiment"] == "negative"
for category, kws in keywords.items():
for kw in kws:
if kw in content_lower:
keyword_counts[category] += 1
if is_negative:
negative_keywords[category] += 1
break # 每个类别只计数一次
# 计算负面关键词占比
keyword_negative_ratio = {}
for category, count in keyword_counts.items():
if count > 0:
keyword_negative_ratio[category] = round(negative_keywords[category] / count * 100, 1)
# 按出现次数排序
sorted_keywords = sorted(keyword_counts.items(), key=lambda x: x[1], reverse=True)
return {
"counts": dict(keyword_counts),
"sorted": sorted_keywords,
"negative_ratio": keyword_negative_ratio
}
def _analyze_variants(self, reviews: List[Dict]) -> Dict:
"""分析变体评论分布"""
variant_counts = defaultdict(int)
variant_ratings = defaultdict(list)
for review in reviews:
if review["variant_info"]:
variant = review["variant_info"]
variant_counts[variant] += 1
variant_ratings[variant].append(review["rating"])
# 计算每个变体的平均评分
variant_avg_rating = {}
for variant, ratings in variant_ratings.items():
variant_avg_rating[variant] = round(sum(ratings) / len(ratings), 1)
# 按评论数排序
sorted_variants = sorted(variant_counts.items(), key=lambda x: x[1], reverse=True)
return {
"counts": dict(variant_counts),
"avg_rating": variant_avg_rating,
"sorted": sorted_variants[:10] # 前10个热门变体
}
def _analyze_images(self, reviews: List[Dict]) -> Dict:
"""分析评论图片情况"""
has_image_count = 0
total_images = 0
image_reviews = []
for review in reviews:
if review["images"]:
has_image_count += 1
img_count = len(review["images"])
total_images += img_count
image_reviews.append({
"review_id": review["review_id"],
"image_count": img_count,
"rating": review["rating"],
"images": review["images"]
})
# 按图片数量排序
image_reviews_sorted = sorted(image_reviews, key=lambda x: x["image_count"], reverse=True)
return {
"has_image_ratio": round(has_image_count / len(reviews) * 100, 1) if reviews else 0,
"avg_images_per_review": round(total_images / len(reviews), 1) if reviews else 0,
"top_image_reviews": image_reviews_sorted[:5]
}
def _analyze_seller_replies(self, reviews: List[Dict]) -> Dict:
"""分析卖家回复情况"""
replied_count = 0
reply_times = []
for review in reviews:
if review["seller_reply"]:
replied_count += 1
# 计算回复时间(评论发布到卖家回复的时间差,单位:小时)
if review["created_time"] and review["seller_reply"]["created_time"]:
time_diff = (review["seller_reply"]["created_time"] - review["created_time"]) / 3600000
reply_times.append(time_diff)
avg_reply_time = round(sum(reply_times) / len(reply_times), 1) if reply_times else 0
return {
"reply_ratio": round(replied_count / len(reviews) * 100, 1) if reviews else 0,
"avg_reply_time_hours": avg_reply_time,
"replied_count": replied_count
}
def _get_typical_reviews(self, reviews: List[Dict]) -> Dict:
"""获取典型评论示例"""
# 按情感和评分分类的典型评论
typical = {
"positive_high_rating": self._get_top_reviews(reviews,
lambda x: x["sentiment"]["sentiment"] == "positive" and x["rating"] >= 4,
sort_key=lambda x: x["like_count"], reverse=True)[:3],
"negative_low_rating": self._get_top_reviews(reviews,
lambda x: x["sentiment"]["sentiment"] == "negative" and x["rating"] <= 2,
sort_key=lambda x: x["like_count"], reverse=True)[:3],
"most_helpful": self._get_top_reviews(reviews,
lambda x: True,
sort_key=lambda x: x["like_count"], reverse=True)[:3],
"with_images": self._get_top_reviews(reviews,
lambda x: len(x["images"]) > 0,
sort_key=lambda x: len(x["images"]), reverse=True)[:3]
}
return typical
def _get_top_reviews(self, reviews: List[Dict], filter_func, sort_key, reverse=True) -> List[Dict]:
"""筛选并排序评论"""
filtered = [r for r in reviews if filter_func(r)]
return sorted(filtered, key=sort_key, reverse=reverse)
def download_review_images(self, reviews: List[Dict], output_dir: str = "review_images") -> None:
"""下载评论图片"""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
for i, review in enumerate(reviews):
if review["images"]:
for j, img_url in enumerate(review["images"]):
try:
response = self.session.get(img_url, timeout=10)
response.raise_for_status()
# 保存图片
img_path = f"{output_dir}/review_{review['review_id']}_img_{j}.jpg"
with open(img_path, 'wb') as f:
f.write(response.content)
logging.debug(f"已下载图片: {img_path}")
except Exception as e:
logging.error(f"下载图片失败 {img_url}: {str(e)}")
def visualize_analysis(self, analysis: Dict, output_dir: str = "review_analysis") -> None:
"""可视化分析结果"""
# 创建输出目录
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# 设置中文显示
plt.rcParams["font.family"] = ["SimHei", "WenQuanYi Micro Hei", "Heiti TC"]
plt.rcParams["axes.unicode_minus"] = False
# 1. 评分分布条形图
plt.figure(figsize=(10, 6))
ratings = sorted(analysis["rating_analysis"]["counts"].items())
if ratings:
labels = [f"{k}星" for k, _ in ratings]
values = [v for _, v in ratings]
x = np.arange(len(labels))
plt.bar(x, values, color='skyblue')
plt.xticks(x, labels)
plt.ylabel('评论数量')
plt.title(f'商品评分分布 (平均评分: {analysis["rating_analysis"]["average"]})')
for i, v in enumerate(values):
plt.text(i, v + 1, str(v), ha='center')
plt.tight_layout()
plt.savefig(f"{output_dir}/rating_distribution.png")
plt.close()
# 2. 情感分布饼图
plt.figure(figsize=(8, 8))
sentiments = analysis["sentiment_analysis"]["distribution"]
if sentiments:
labels = ["正面", "负面", "中性"]
data = [
sentiments.get("positive", 0),
sentiments.get("negative", 0),
sentiments.get("neutral", 0)
]
colors = ['#4CAF50', '#F44336', '#FFC107']
plt.pie(data, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
plt.title('评论情感分布')
plt.axis('equal')
plt.tight_layout()
plt.savefig(f"{output_dir}/sentiment_distribution.png")
plt.close()
# 3. 关键词分布条形图
plt.figure(figsize=(12, 6))
keywords = analysis["keyword_analysis"]["sorted"][:10]
if keywords:
labels = [k[0] for k in keywords]
values = [k[1] for k in keywords]
x = np.arange(len(labels))
plt.bar(x, values, color='#2196F3')
plt.xticks(x, labels)
plt.ylabel('出现次数')
plt.title('评论关键词分布')
plt.tight_layout()
plt.savefig(f"{output_dir}/keyword_distribution.png")
plt.close()
# 4. 变体评分对比图
plt.figure(figsize=(12, 6))
variants = analysis["variant_analysis"]["sorted"][:8]
if variants:
variant_names = [v[0][:15] + '...' if len(v[0]) > 15 else v[0] for v in variants]
variant_ratings = [analysis["variant_analysis"]["avg_rating"].get(v[0], 0) for v in variants]
x = np.arange(len(variant_names))
plt.bar(x, variant_ratings, color='#FF9800')
plt.xticks(x, variant_names, rotation=45)
plt.ylabel('平均评分')
plt.title('热门变体平均评分对比')
plt.ylim(0, 5)
for i, v in enumerate(variant_ratings):
plt.text(i, v + 0.1, str(v), ha='center')
plt.tight_layout()
plt.savefig(f"{output_dir}/variant_rating_comparison.png")
plt.close()
logging.info(f"分析图表已保存至 {output_dir} 目录")
# 示例调用
if __name__ == "__main__":
# 替换为实际的参数(从Shopee开放平台获取)
PARTNER_ID = 123456 # 合作伙伴ID
PARTNER_KEY = "your_partner_key" # 合作伙伴密钥
SHOPID = 987654 # 店铺ID
ACCESS_TOKEN = "your_access_token" # 访问令牌
ITEM_ID = 1234567890 # 商品ID
REGION = "sg" # 地区代码
# 初始化API客户端
api = ShopeeItemReviewAPI(PARTNER_ID, PARTNER_KEY, SHOPID, region=REGION)
api.set_access_token(ACCESS_TOKEN)
# 获取商品评论
all_reviews = api.get_all_reviews(
item_id=ITEM_ID,
max_pages=5, # 最多获取5页
# rating_filter=5, # 只看5星评论
# with_images=True # 只看有图评论
)
if all_reviews:
# 分析评论数据
analysis = api.analyze_reviews(all_reviews)
print(f"=== Shopee商品评论分析报告 (商品ID: {ITEM_ID}) ===")
print(f"总评论数: {analysis['total_reviews']}条")
# 评分分析
print("\n评分分析:")
print(f" 平均评分: {analysis['rating_analysis']['average']}星")
print(" 评分分布:")
for rating, ratio in sorted(analysis['rating_analysis']['distribution'].items()):
print(f" {rating}星: {ratio}% ({analysis['rating_analysis']['counts'][rating]}条)")
# 情感分析
print("\n情感分析:")
print(f" 正面评论: {analysis['sentiment_analysis']['ratio'].get('positive', 0)}%")
print(f" 负面评论: {analysis['sentiment_analysis']['ratio'].get('negative', 0)}%")
print(f" 中性评论: {analysis['sentiment_analysis']['ratio'].get('neutral', 0)}%")
# 关键词分析
print("\n关键词分析:")
print(" 出现频率最高的5个关键词:")
for i, (keyword, count) in enumerate(analysis['keyword_analysis']['sorted'][:5], 1):
neg_ratio = analysis['keyword_analysis']['negative_ratio'].get(keyword, 0)
print(f" {i}. {keyword}: {count}次 (负面提及: {neg_ratio}%)")
# 变体分析
print("\n变体分析:")
if analysis['variant_analysis']['sorted']:
print(" 评论最多的3个变体:")
for i, (variant, count) in enumerate(analysis['variant_analysis']['sorted'][:3], 1):
rating = analysis['variant_analysis']['avg_rating'].get(variant, 0)
print(f" {i}. {variant}: {count}条评论, 平均评分: {rating}星")
# 图片分析
print("\n图片分析:")
print(f" 带图评论占比: {analysis['image_analysis']['has_image_ratio']}%")
print(f" 平均每条评论图片数: {analysis['image_analysis']['avg_images_per_review']}")
# 卖家回复分析
print("\n卖家回复分析:")
print(f" 回复率: {analysis['reply_analysis']['reply_ratio']}%")
print(f" 平均回复时间: {analysis['reply_analysis']['avg_reply_time_hours']}小时")
# 典型评论
print("\n典型正面评论:")
for i, review in enumerate(analysis['typical_reviews']['positive_high_rating'][:2], 1):
print(f" {i}. {review['user']['username']} ({review['rating_star']})")
print(f" 内容: {review['translated_content'][:100]}{'...' if len(review['translated_content'])>100 else ''}")
print("\n典型负面评论:")
for i, review in enumerate(analysis['typical_reviews']['negative_low_rating'][:2], 1):
print(f" {i}. {review['user']['username']} ({review['rating_star']})")
print(f" 内容: {review['translated_content'][:100]}{'...' if len(review['translated_content'])>100 else ''}")
# 生成可视化图表
api.visualize_analysis(analysis)
# 下载热门带图评论的图片
api.download_review_images(analysis['typical_reviews']['with_images'])