1.导入包
from html.parser import HTMLParser
2.请完成代码的编写。
class MyHTMLParser(HTMLParser):
def handle_starttag(self, tag, attrs):
"""
recognize start tag, like <div>
:param tag:
:param attrs:
:return:
"""
print("Encountered a start tag:", tag)
def handle_endtag(self, tag):
"""
recognize end tag, like </div>
:param tag:
:return:
"""
print("Encountered an end tag :", tag)
def handle_data(self, data):
"""
recognize data, html content string
:param data:
:return:
"""
print("Encountered some data :", data)
def handle_startendtag(self, tag, attrs):
"""
recognize tag that without endtag, like <img />
:param tag:
:param attrs:
:return:
"""
print("Encountered startendtag :", tag)
def handle_comment(self, data):
"""
:param data:
:return:
"""
print("Encountered comment :", data)
parser = MyHTMLParser()
html_code = '''<html>
<head>这是头标签</head>
<body>
<!– test html parser –>
<p>Some <a href=\"#\">html</a> HTML Ӓ Ӓtutorial…<br>END</p>
</body></html>'''
parser.feed(html_code)
parser.close()