Ensure basic HTML parsing is in retry block

This commit is contained in:
Joshua Boniface 2023-04-06 12:54:10 -04:00
parent 6ec8923336
commit ca36555e6b
1 changed files with 7 additions and 4 deletions

11
c3dbdl
View File

@ -111,14 +111,17 @@ def buildDatabase(pages, concurrency):
try: try:
click.echo(f"Parsing page {i} (attempt {attempts}/3)...") click.echo(f"Parsing page {i} (attempt {attempts}/3)...")
p = requests.get(f"{config['base_songs_url']}?page={i}") p = requests.get(f"{config['base_songs_url']}?page={i}")
if p is None or p.status_code != 200:
raise
parsed_html = BeautifulSoup(p.text, 'html.parser')
if parsed_html.body is None:
raise
if parsed_html.body.find('div', attrs={'class':'portlet-body'}) is None:
raise
break break
except Exception: except Exception:
sleep(attempts) sleep(attempts)
attempts += 1 attempts += 1
if p is None or p.status_code != 200:
break
parsed_html = BeautifulSoup(p.text, 'html.parser')
table_html = parsed_html.body.find('div', attrs={'class':'portlet-body'}).find('tbody') table_html = parsed_html.body.find('div', attrs={'class':'portlet-body'}).find('tbody')