22 lines
766 B
Python
22 lines
766 B
Python
import json
|
|
import urllib.request
|
|
import re
|
|
|
|
URL = 'https://howdoyouconvert.com/wp-json/wp/v2/calculator?per_page=5'
|
|
req = urllib.request.Request(URL, headers={'User-Agent': 'Mozilla/5.0'})
|
|
try:
|
|
with urllib.request.urlopen(req) as response:
|
|
data = json.loads(response.read().decode())
|
|
for post in data:
|
|
slug = post['slug']
|
|
content = post['content']['rendered']
|
|
# try to extract just paragraph text
|
|
pars = re.findall(r'<p>(.*?)</p>', content, re.DOTALL)
|
|
text = " ".join(pars)
|
|
# strip a tags and strong tags
|
|
text = re.sub(r'<[^>]+>', '', text)
|
|
print(f"Slug: {slug}")
|
|
print(f"Content: {text[:200]}...\n")
|
|
except Exception as e:
|
|
print(e)
|