Add instrument entries to database

This commit is contained in:
Joshua Boniface 2023-04-07 04:10:40 -04:00
parent 083ff1884b
commit cb1a5c5d58
2 changed files with 80 additions and 16 deletions

View File

@ -5,5 +5,6 @@
# * E203 (whitespace before ':'): Black recommends this as disabled # * E203 (whitespace before ':'): Black recommends this as disabled
ignore = W503, E501 ignore = W503, E501
extend-ignore = E203 extend-ignore = E203
exclude = build/
# Set the max line length to 88 for Black # Set the max line length to 88 for Black
max-line-length = 88 max-line-length = 88

View File

@ -33,11 +33,24 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"], max_content_width=120) CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"], max_content_width=120)
def fetchSongData(entry): def fetchSongData(entries):
song_entry = dict() song_entry = {
"artist": None,
"title": None,
"album": None,
"song_link": None,
"genre": None,
"year": None,
"length": None,
"author": None,
"instruments": dict(),
"dl_links": list(),
}
messages = list() messages = list()
found_instruments = False
for idx, td in enumerate(entry.find_all("td")): # Find song details
for idx, td in enumerate(entries[0].find_all("td")):
if idx == 2: if idx == 2:
# Artist # Artist
song_entry["artist"] = td.find("a").get_text().strip().replace("/", "+") song_entry["artist"] = td.find("a").get_text().strip().replace("/", "+")
@ -74,6 +87,35 @@ def fetchSongData(entry):
# Author (of chart) # Author (of chart)
song_entry["author"] = td.find("a").get_text().strip().replace("/", "+") song_entry["author"] = td.find("a").get_text().strip().replace("/", "+")
# Find song instruments and difficulties
for idx, td in enumerate(entries[1].find_all("td")):
if (
len(list(td.find_all("div", attrs={"style": "width:110px;float:left"}))) > 0
and not found_instruments
):
for instrument in td.find_all(
"div", attrs={"style": "width:110px;float:left"}
):
difficulty_link = (
instrument.find_all(
"a", attrs={"style": "text-decoration: none;color:#000"}
)[1]
.get("href")
.split("/")
)
instrument_name = (
difficulty_link[-2].split("_")[-1].replace("prokeys", "keys")
)
instrument_diff = int(difficulty_link[-1])
if instrument_diff < 1:
# No part
instrument_difficulty = None
else:
# Link difficulty - 1
instrument_difficulty = instrument_diff - 1
song_entry["instruments"][instrument_name] = instrument_difficulty
found_instruments = True
if ( if (
song_entry song_entry
and song_entry["author"] and song_entry["author"]
@ -83,9 +125,6 @@ def fetchSongData(entry):
messages.append( messages.append(
f"> Found song entry for {song_entry['artist']} - {song_entry['title']} by {song_entry['author']}" f"> Found song entry for {song_entry['artist']} - {song_entry['title']} by {song_entry['author']}"
) )
for entry_type in ["artist", "album", "genre", "year", "length"]:
if not song_entry[entry_type]:
song_entry[entry_type] = "None"
# Get download links from the actual song page # Get download links from the actual song page
attempts = 1 attempts = 1
@ -130,7 +169,8 @@ def fetchSongData(entry):
return None return None
song_entry["dl_links"] = dl_links song_entry["dl_links"] = dl_links
# Append to the database # Return messages and song entry
print(song_entry)
return messages, song_entry return messages, song_entry
@ -178,11 +218,22 @@ def buildDatabase(pages, concurrency):
"tbody" "tbody"
) )
# This is weird, but because of the table layout, there are two table rows for
# each song: the first is the song info, the second is the instruments
# So we must make a single "entry" that is a list of the two elements, then
# handle that later in fetchSongData.
entries = list() entries = list()
entry_idx = 0
entry_data = list()
for entry in table_html.find_all("tr", attrs={"class": "odd"}): for entry in table_html.find_all("tr", attrs={"class": "odd"}):
if len(entry) < 1: if len(entry) < 1:
break break
entries.append(entry) entry_data.append(entry)
entry_idx += 1
if entry_idx == 2:
entries.append(entry_data)
entry_idx = 0
entry_data = list()
click.echo("Fetching and parsing song pages...") click.echo("Fetching and parsing song pages...")
with ThreadPoolExecutor(max_workers=concurrency) as executor: with ThreadPoolExecutor(max_workers=concurrency) as executor:
@ -534,9 +585,13 @@ def download(_filters, _id, _desc, _limit, _file_structure):
add_to_pending = True add_to_pending = True
else: else:
try: try:
add_to_pending = all(_filter[1].lower() in song[_filter[0]].lower() for _filter in _filters) pending_filters = [
except KeyError: _filter[1].lower() in song[_filter[0]].lower()
click.echo(f"Invalid filter field {_filter[0]}") for _filter in _filters
]
add_to_pending = all(pending_filters)
except KeyError as e:
click.echo(f"Invalid filter field {e}")
exit(1) exit(1)
if add_to_pending: if add_to_pending:
@ -591,19 +646,27 @@ def search(_filters):
add_to_pending = True add_to_pending = True
else: else:
try: try:
add_to_pending = all(_filter[1].lower() in song[_filter[0]].lower() for _filter in _filters) pending_filters = [
except KeyError: _filter[1].lower() in song[_filter[0]].lower()
click.echo(f"Invalid filter field {_filter[0]}") for _filter in _filters
]
add_to_pending = all(pending_filters)
except KeyError as e:
click.echo(f"Invalid filter field {e}")
exit(1) exit(1)
if add_to_pending: if add_to_pending:
pending_songs.append(song) pending_songs.append(song)
click.echo(f"Found {len(pending_songs)} matchin song files:") click.echo(f"Found {len(pending_songs)} matching songs:")
click.echo()
for entry in pending_songs: for entry in pending_songs:
click.echo( click.echo(
f"""> "{entry['artist']} - {entry['title']}" by {entry['author']}...""" f"""> "{entry['artist']} - {entry['title']}" from "{entry['album']} ({entry['year']})" by {entry['author']}"""
) )
for link in entry["dl_links"]:
click.echo(f""" * {link['description']}""")
click.echo()
@click.group(context_settings=CONTEXT_SETTINGS) @click.group(context_settings=CONTEXT_SETTINGS)