import requests
from bs4 import BeautifulSoup
def fetch_page(url):
“”” 抓取网页内容 “””
try:
response = requests.get(url)
response.raise_for_status() # 检查请求是否成功
return response.text
except requests.RequestException as e:
print(f”Error fetching {url}: {e}”)
return None
def parse_page(html_content):
“”” 解析网页内容,提取所有链接 “””
soup = BeautifulSoup(html_content, ‘html.parser’)
links = []
for link in soup.find_all(‘a’, href=True):
links.append(link[‘href’])
return links
def main():
url = “http://example.com” # 替换为你要爬取的网址
html_content = fetch_page(url)
if html_content:
links = parse_page(html_content)
for link in links:
print(link)
else:
print(“Failed to fetch the page.”)
if name == “main“:
main()
确保你已经安装了requests和BeautifulSoup。如果还没有安装,可以通过以下命令安装
pip install requests beautifulsoup4