【无标题】获取网页文本_新闻动态

author = ‘李元豪 from https://www.zhilu.space’

author = ‘李元豪 from https://www.zhilu.space’
from DrissionPage import Chromium
from lxml import etree

tab = Chromium().latest_tab

tab.get(‘https://blog.csdn.net/rank/list’)
t=tab.html
parser = etree.HTMLParser()
tree = etree.fromstring(t, parser)

all_text = tree.xpath(‘https://blog.csdn.net/bvip911/article/details//text()’)

all_text = [text.strip() for text in all_text if text.strip()]

for text in all_text:
print(text)

tab.change_mode()

items = tab.ele(‘.ui relaxed divided items explore-repo__list’).eles(‘.item’)

for item in items:
# 打印元素文本
print(item(‘t:h3’).text)
print(item(‘.project-desc mb-1’).text)
print()`# -- coding: utf-8 --
author = ‘李元豪 from https://www.zhilu.space’

author = ‘李元豪 from https://www.zhilu.space’
from DrissionPage import Chromium
from lxml import etree

tab = Chromium().latest_tab

tab.get(‘https://blog.csdn.net/rank/list’)
t=tab.html
parser = etree.HTMLParser()
tree = etree.fromstring(t, parser)

all_text = tree.xpath(‘https://blog.csdn.net/bvip911/article/details//text()’)

all_text = [text.strip() for text in all_text if text.strip()]

for text in all_text:
print(text)

tab.change_mode()

items = tab.ele(‘.ui relaxed divided items explore-repo__list’).eles(‘.item’)

for item in items:
# 打印元素文本
print(item(‘t:h3’).text)
print(item(‘.project-desc mb-1’).text)
print()`

相关文章

相关动态

最新文章