58 同城作为国内知名的分类信息平台,涵盖二手房、租房、招聘、二手车等多个领域。由于其未正式开放公共 API,以下分析基于网页端 / 移动端的非官方接口(通过抓包和页面解析),并提供 Python 实现方案。需注意:非官方接口存在稳定性风险,使用需遵守平台规则。
### 一、58 同城接口核心特性分析
#### 1. 接口体系与功能域
通过分析,58 同城的核心数据接口可分为以下几类:
- **分类信息列表**:二手房(`/ershoufang/`)、租房(`/zufang/`)、招聘(`/job/`)等,支持按地区、价格、筛选条件查询;
- **详情信息**:单个信息的详细内容(如房源详情、职位描述);
- **地区与分类**:城市列表、分类标签(用于筛选);
- **搜索接口**:全局搜索或分类内搜索(含关键词匹配)。
#### 2. 接口特点与反爬机制
- **URL 结构**:列表页 URL 多为,如北京二手房列表;
- **请求类型**:以 GET 请求为主,筛选参数通过 URL 查询字符串传递;
- **反爬措施**:
- 检测`User-Agent`(需模拟浏览器);
- 频繁请求触发验证码或 IP 封禁;
- 部分页面数据通过 JavaScript 动态加载(需解析 AJAX 接口);
- 详情页可能存在 Referer 验证。
- **数据格式**:列表页多为 HTML 渲染,部分异步加载数据为 JSON 格式。
### 二、Python 脚本实现:58 同城数据获取框架
以下实现基于页面解析和 AJAX 接口抓取,支持二手房列表查询和详情获取,包含反爬处理机制。
import requests
import re
import json
import time
import random
import logging
from typing import Dict, List, Optional
from bs4 import BeautifulSoup
from requests.exceptions import RequestException, TooManyRedirects
# 配置日志
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s"
)
class WubaAPI:
def __init__(self, city: str = "bj"):
"""
初始化58同城API客户端
:param city: 城市拼音缩写(如北京"bj"、上海"sh"、广州"gz")
"""
self.city = city
self.base_url = f"https://{city}.58.com"
# 随机User-Agent池(模拟不同浏览器)
self.user_agents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
"Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/1A537a",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Safari/605.1.15"
]
# 会话保持(处理Cookie和会话状态)
self.session = requests.Session()
self._update_headers()
def _update_headers(self):
"""随机更新请求头,降低反爬风险"""
self.session.headers.update({
"User-Agent": random.choice(self.user_agents),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.8",
"Referer": f"{self.base_url}/",
"Connection": "keep-alive"
})
def _random_sleep(self):
"""随机休眠,模拟人类浏览行为"""
time.sleep(random.uniform(1.5, 3.5))
def get_ershoufang_list(self, price_range: tuple = (0, 1000), page: int = 1) -> Optional[List[Dict]]:
"""
获取二手房列表数据
:param price_range: 价格范围(万元),如(0, 500)表示0-500万
:param page: 页码
:return: 二手房列表(含标题、价格、面积等)
"""
# 构造筛选参数(58同城价格参数单位为千元,需转换)
min_price = price_range[0] * 10
max_price = price_range[1] * 10
url = f"{self.base_url}/ershoufang/p{page}/?price={min_price}_{max_price}"
try:
self._update_headers() # 每次请求更新 headers
response = self.session.get(url, timeout=10, allow_redirects=False)
# 处理重定向(可能因反爬跳转验证码页)
if response.status_code in [301, 302]:
logging.warning("请求被重定向,可能触发反爬")
return None
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
# 解析列表项(58同城二手房列表项class为"house-list-wrap")
house_list = soup.select(".house-list-wrap .house-cell")
if not house_list:
logging.info("未找到二手房数据,可能已无更多页")
return []
result = []
for house in house_list:
# 提取核心信息
title_elem = house.select_one(".title a")
price_elem = house.select_one(".price .sum")
area_elem = house.select_one(".baseinfo .area")
addr_elem = house.select_one(".baseinfo .address")
if not all([title_elem, price_elem, area_elem]):
continue # 跳过信息不完整的项
# 提取详情页URL(处理相对路径)
detail_url = title_elem.get("href")
if detail_url.startswith("//"):
detail_url = f"https:{detail_url}"
elif detail_url.startswith("/"):
detail_url = f"{self.base_url}{detail_url}"
result.append({
"title": title_elem.get_text(strip=True),
"price": price_elem.get_text(strip=True) + "万",
"area": area_elem.get_text(strip=True),
"address": addr_elem.get_text(strip=True) if addr_elem else "",
"detail_url": detail_url,
"house_id": re.search(r"(\d+)\.shtml", detail_url).group(1) if detail_url else None
})
logging.info(f"获取二手房列表成功,页码:{page},数量:{len(result)}")
self._random_sleep()
return result
except RequestException as e:
logging.error(f"二手房列表请求失败:{str(e)}")
self._random_sleep()
return None
def get_ershoufang_detail(self, house_id: str) -> Optional[Dict]:
"""
获取二手房详情数据
:param house_id: 房源ID(从列表接口获取)
:return: 房源详情字典
"""
url = f"{self.base_url}/ershoufang/{house_id}.shtml"
try:
self._update_headers()
response = self.session.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
# 解析基本信息
title = soup.select_one(".house-title h1").get_text(strip=True) if soup.select_one(".house-title h1") else ""
price = soup.select_one(".price-wrap .price").get_text(strip=True) if soup.select_one(".price-wrap .price") else ""
# 解析房源参数(面积、户型、朝向等)
params = {}
param_list = soup.select(".house-basic-desc .content li")
for param in param_list:
key_elem = param.select_one(".label")
value_elem = param.select_one(".value")
if key_elem and value_elem:
key = key_elem.get_text(strip=True).replace(":", "")
params[key] = value_elem.get_text(strip=True)
# 解析房源描述
desc = soup.select_one("#generalDesc").get_text(strip=True) if soup.select_one("#generalDesc") else ""
result = {
"house_id": house_id,
"title": title,
"price": price,
"params": params,
"description": desc,
"detail_url": url
}
logging.info(f"获取房源详情成功,ID:{house_id}")
self._random_sleep()
return result
except RequestException as e:
logging.error(f"房源详情请求失败:{str(e)}")
self._random_sleep()
return None
def search_general(self, keyword: str, category: str = "") -> Optional[List[Dict]]:
"""
全局搜索(支持跨分类)
:param keyword: 搜索关键词(如"北京 两居室")
:param category: 分类(如"ershoufang"、"zufang",空为全部分类)
:return: 搜索结果列表
"""
category_path = category + "/" if category else ""
url = f"{self.base_url}/{category_path}?key={requests.utils.quote(keyword)}"
try:
self._update_headers()
response = self.session.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
# 解析搜索结果(不同分类的结果项class可能不同,这里适配通用结构)
results = []
for item in soup.select(".serach-list .list-item"):
title_elem = item.select_one(".title a")
if not title_elem:
continue
results.append({
"title": title_elem.get_text(strip=True),
"url": title_elem.get("href"),
"summary": item.select_one(".desc").get_text(strip=True) if item.select_one(".desc") else ""
})
logging.info(f"全局搜索成功,关键词:{keyword},结果数:{len(results)}")
self._random_sleep()
return results
except RequestException as e:
logging.error(f"全局搜索失败:{str(e)}")
self._random_sleep()
return None
# 示例调用
if __name__ == "__main__":
# 初始化客户端(城市:北京"bj")
wuba = WubaAPI(city="bj")
# 1. 获取二手房列表(0-500万,第1页)
ershoufang_list = wuba.get_ershoufang_list(price_range=(0, 500), page=1)
if ershoufang_list:
print("二手房列表(前3条):")
for house in ershoufang_list[:3]:
print(f"[{house['house_id']}] {house['title']} | {house['price']} | {house['area']} | {house['address']}")
# 2. 获取第一个房源的详情
if ershoufang_list and len(ershoufang_list) > 0:
first_house_id = ershoufang_list[0]["house_id"]
if first_house_id:
detail = wuba.get_ershoufang_detail(first_house_id)
if detail:
print(f"\n房源详情:{detail['title']}")
print(f"价格:{detail['price']}")
print("基本参数:")
for k, v in detail["params"].items():
print(f"- {k}: {v}")
print(f"描述:{detail['description'][:100]}...")
# 3. 全局搜索(关键词:"北京 租房")
search_results = wuba.search_general(keyword="北京 租房", category="zufang")
if search_results:
print("\n搜索结果(前3条):")
for res in search_results[:3]:
print(f"- {res['title']}:{res['summary'][:50]}...")