import requestsfrom bs4 import BeautifulSoupfrom datetime import datetimeurl = "http://news.gzcc.cn/html/xiaoyuanxinwen/"res = requests.get(url)res.encoding = 'utf-8'soup = BeautifulSoup(res.text, "html.parser")# for news in soup.select('li'):# if len(news.select('.news-list-title'))>0:# t = news.select('.news-list-title')[0].text# dt = news.select('.news-list-info')[0].contents[0].text# a = news.select('a')[0].attrs['href']# print(t,dt,a)# breakfor news in soup.select('li'): if len(news.select('.news-list-title'))>0: t = news.select('.news-list-title')[0].text #标题 a = news.select('a')[0].attrs['href'] #链接 # print(a) resd = requests.get(a) resd.encoding = 'utf-8' soupd = BeautifulSoup(resd.text,'html.parser') # print(soupd.select('.show-info')[0].text) #正文 info = soupd.select('.show-info')[0].text dt = info.lstrip('发布时间')[1:20] # 发布时间 dt = datetime.strptime(dt, '%Y-%m-%d %H:%M:%S') print(dt) i = info.find('来源:') z = info.find('作者:') sy = info.find('摄影:') if i > 0: s = info[info.find('来源:'):].split()[0].lstrip('来源:') z = info[info.find('作者:'):].split()[0].lstrip('作者:') sy = info[info.find('摄影:'):].split()[0].lstrip('摄影:') print(s, z, sy)