from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
print('\n')
print('■'*120)
print('\n')
url = 'https://news.yahoo.co.jp/categories/'
category_list = [
'business',
'entertainment',
'sports',
'domestic',
'world',
'it',
'local',]
for cate_name in category_list:
url = 'https://news.yahoo.co.jp/categories/' + cate_name
html = urlopen(url)
soup = BeautifulSoup(html, 'html.parser')
print(f'<{cate_name}>\n')
for li in soup.find(id='contents').find('ul').find_all('li'):
print(li.get_text())
print(li.a.get('href'))
print('\n')
# https://news.yahoo.co.jp/
# https://news.yahoo.co.jp/categories/business
# https://news.yahoo.co.jp/categories/entertainment
# https://news.yahoo.co.jp/categories/sports
# https://news.yahoo.co.jp/categories/domestic
# https://news.yahoo.co.jp/categories/world
# https://news.yahoo.co.jp/categories/it
# https://news.yahoo.co.jp/categories/local