大麦网的演出搜索功能(对应item_search接口,非官方命名)是获取特定关键词(如艺人、演出类型、城市)相关演出列表的核心工具,广泛应用于票务聚合平台、演出推荐系统、行业数据分析等场景。由于大麦网无公开官方 API,开发者需通过合规的页面解析或第三方服务实现对接。本文将系统讲解item_search接口的对接逻辑、技术实现、反爬应对及最佳实践,帮助开发者构建稳定高效的演出列表获取系统。
一、接口基础认知(核心功能与场景)
二、对接前置准备(环境与工具)
三、接口调用流程(基于页面解析)
四、代码实现示例(Python)
import requests
import time
import random
import re
from urllib.parse import quote
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from typing import List, Dict
class DamaiSearchApi:
def __init__(self, proxy_pool: List[str] = None):
self.base_url = "https://search.damai.cn/search.htm"
self.ua = UserAgent()
self.proxy_pool = proxy_pool # 代理池列表,如["http://ip:port", ...]
self.city_code = self._load_city_code() # 城市代码映射表
def _load_city_code(self) -> Dict[str, str]:
"""加载城市-代码映射表(简化版)"""
return {
"全国": "0", "北京": "110", "上海": "310",
"广州": "4401", "深圳": "4403", "杭州": "3301"
}
def _get_headers(self) -> Dict[str, str]:
"""生成随机请求头"""
return {
"User-Agent": self.ua.random,
"Referer": "https://www.damai.cn/",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Cookie": "cna=xxx; isg=xxx" # 替换为实际Cookie(从浏览器获取)
}
def _get_proxy(self) -> Dict[str, str]:
"""随机获取代理"""
if self.proxy_pool and len(self.proxy_pool) > 0:
proxy = random.choice(self.proxy_pool)
return {"http": proxy, "https": proxy}
return None
def _parse_item(self, item_soup) -> Dict[str, str]:
"""解析单条演出数据"""
# 提取演出ID(从链接中)
link = item_soup.select_one("a.item__link")["href"]
item_id = re.search(r"id=(\d+)", link).group(1) if link else ""
# 提取核心字段
return {
"item_id": item_id,
"title": item_soup.select_one(".item__name")?.text.strip() or "",
"poster": item_soup.select_one(".item__img img")?.get("src") or "",
"time": item_soup.select_one(".item__time")?.text.strip() or "",
"venue": item_soup.select_one(".item__venue")?.text.strip() or "",
"price_range": item_soup.select_one(".item__price")?.text.strip() or "",
"status": item_soup.select_one(".item__status")?.text.strip() or "",
"url": f"https://detail.damai.cn{link}" if link.startswith("/") else link
}
def item_search(self, keyword: str, city: str = "全国", date: str = "0",
page_limit: int = 5, timeout: int = 10) -> Dict:
"""
搜索演出列表
:param keyword: 搜索关键词(如“周杰伦”)
:param city: 城市名称(如“上海”,默认“全国”)
:param date: 时间范围(0-全部,1-近30天,默认0)
:param page_limit: 最大页数(避免过度爬取,默认5)
:param timeout: 超时时间
:return: 结构化的搜索结果
"""
try:
# 1. 转换城市为代码
city_code = self.city_code.get(city, "0")
# 2. 编码关键词(支持中文)
encoded_keyword = quote(keyword, encoding="utf-8")
all_items = []
current_page = 1
while current_page <= page_limit:
# 构建当前页URL
params = {
"keyword": encoded_keyword,
"city": city_code,
"date": date,
"page": current_page
}
# 发送请求(带随机延迟)
time.sleep(random.uniform(2, 4)) # 间隔2-4秒,避免反爬
headers = self._get_headers()
proxy = self._get_proxy()
response = requests.get(
url=self.base_url,
params=params,
headers=headers,
proxies=proxy,
timeout=timeout
)
response.raise_for_status()
html = response.text
# 解析页面
soup = BeautifulSoup(html, "lxml")
item_list = soup.select("div.items > div.item")
if not item_list:
# 无数据,终止分页
break
# 提取当前页数据
for item in item_list:
parsed_item = self._parse_item(item)
all_items.append(parsed_item)
# 检查是否有下一页
next_page = soup.select_one("a.pagination-next")
if not next_page:
break # 无下一页,终止
current_page += 1
return {
"success": True,
"total": len(all_items),
"page_processed": current_page - 1,
"items": all_items
}
except requests.exceptions.HTTPError as e:
if "403" in str(e):
return {"success": False, "error_msg": "触发反爬,建议更换代理或Cookie", "code": 403}
return {"success": False, "error_msg": f"HTTP错误: {str(e)}", "code": response.status_code}
except Exception as e:
return {"success": False, "error_msg": f"搜索失败: {str(e)}", "code": -1}
# 使用示例
if __name__ == "__main__":
# 代理池(替换为有效代理)
PROXIES = [
"http://123.45.67.89:8888",
"http://98.76.54.32:8080"
]
# 初始化API客户端
search_api = DamaiSearchApi(proxy_pool=PROXIES)
# 搜索“上海 近30天 周杰伦”的演出
result = search_api.item_search(
keyword="周杰伦",
city="上海",
date="1", # 近30天
page_limit=3 # 最多3页
)
if result["success"]:
print(f"搜索成功:共找到 {result['total']} 场演出,处理 {result['page_processed']} 页")
for i, item in enumerate(result["items"][:5]): # 打印前5条
print(f"\n演出 {i+1}:")
print(f"标题:{item['title']}")
print(f"时间:{item['time']} | 场馆:{item['venue']}")
print(f"票价:{item['price_range']} | 状态:{item['status']}")
print(f"详情页:{item['url']}")
else:
print(f"搜索失败:{result['error_msg']}(错误码:{result['code']})")