Yahooトピックスのリンクを抜き出す

投稿者: | 2022-04-09

from urllib.request import urlopen
from bs4 import BeautifulSoup
import re

print('\n')
print('■'*120)
print('\n')

url = 'https://news.yahoo.co.jp/categories/'

category_list = [
	'business',
	'entertainment',
	'sports',
	'domestic',
	'world',
	'it',
	'local',]
	
for cate_name in category_list:

	url = 'https://news.yahoo.co.jp/categories/' + cate_name
	
	html = urlopen(url)
	soup = BeautifulSoup(html, 'html.parser')
	
	print(f'<{cate_name}>\n')

  
	for li in soup.find(id='contents').find('ul').find_all('li'):
		print(li.get_text())
		print(li.a.get('href'))
		
	print('\n')
	
	
# https://news.yahoo.co.jp/
# https://news.yahoo.co.jp/categories/business
# https://news.yahoo.co.jp/categories/entertainment
# https://news.yahoo.co.jp/categories/sports
# https://news.yahoo.co.jp/categories/domestic
# https://news.yahoo.co.jp/categories/world
# https://news.yahoo.co.jp/categories/it
# https://news.yahoo.co.jp/categories/local