Add instrument entries to database

2023-04-07 04:10:40 -04:00
parent 083ff1884b
commit cb1a5c5d58
2 changed files with 80 additions and 16 deletions
--- a/.flake8
+++ b/.flake8
@ -5,5 +5,6 @@
 #   * E203 (whitespace before ':'): Black recommends this as disabled
 ignore = W503, E501
 extend-ignore = E203
 exclude = build/
 # Set the max line length to 88 for Black
 max-line-length = 88
--- a/c3dbdl/c3dbdl.py
+++ b/c3dbdl/c3dbdl.py
@ -33,11 +33,24 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
 CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"], max_content_width=120)
-def fetchSongData(entry):
+def fetchSongData(entries):
-    song_entry = dict()
+    song_entry = {
        "artist": None,
        "title": None,
        "album": None,
        "song_link": None,
        "genre": None,
        "year": None,
        "length": None,
        "author": None,
        "instruments": dict(),
        "dl_links": list(),
    }
    messages = list()
    found_instruments = False
-    for idx, td in enumerate(entry.find_all("td")):
+    # Find song details
    for idx, td in enumerate(entries[0].find_all("td")):
        if idx == 2:
            # Artist
            song_entry["artist"] = td.find("a").get_text().strip().replace("/", "+")
@ -74,6 +87,35 @@ def fetchSongData(entry):
            # Author (of chart)
            song_entry["author"] = td.find("a").get_text().strip().replace("/", "+")
    # Find song instruments and difficulties
    for idx, td in enumerate(entries[1].find_all("td")):
        if (
            len(list(td.find_all("div", attrs={"style": "width:110px;float:left"}))) > 0
            and not found_instruments
        ):
            for instrument in td.find_all(
                "div", attrs={"style": "width:110px;float:left"}
            ):
                difficulty_link = (
                    instrument.find_all(
                        "a", attrs={"style": "text-decoration: none;color:#000"}
                    )[1]
                    .get("href")
                    .split("/")
                )
                instrument_name = (
                    difficulty_link[-2].split("_")[-1].replace("prokeys", "keys")
                )
                instrument_diff = int(difficulty_link[-1])
                if instrument_diff < 1:
                    # No part
                    instrument_difficulty = None
                else:
                    # Link difficulty - 1
                    instrument_difficulty = instrument_diff - 1
                song_entry["instruments"][instrument_name] = instrument_difficulty
            found_instruments = True
    if (
        song_entry
        and song_entry["author"]
@ -83,9 +125,6 @@ def fetchSongData(entry):
        messages.append(
            f"> Found song entry for {song_entry['artist']} - {song_entry['title']} by {song_entry['author']}"
        )
        for entry_type in ["artist", "album", "genre", "year", "length"]:
            if not song_entry[entry_type]:
                song_entry[entry_type] = "None"
        # Get download links from the actual song page
        attempts = 1
@ -130,7 +169,8 @@ def fetchSongData(entry):
            return None
        song_entry["dl_links"] = dl_links
-        # Append to the database
+        # Return messages and song entry
        print(song_entry)
        return messages, song_entry
@ -178,11 +218,22 @@ def buildDatabase(pages, concurrency):
            "tbody"
        )
        # This is weird, but because of the table layout, there are two table rows for
        # each song: the first is the song info, the second is the instruments
        # So we must make a single "entry" that is a list of the two elements, then
        # handle that later in fetchSongData.
        entries = list()
        entry_idx = 0
        entry_data = list()
        for entry in table_html.find_all("tr", attrs={"class": "odd"}):
            if len(entry) < 1:
                break
-            entries.append(entry)
+            entry_data.append(entry)
            entry_idx += 1
            if entry_idx == 2:
                entries.append(entry_data)
                entry_idx = 0
                entry_data = list()
        click.echo("Fetching and parsing song pages...")
        with ThreadPoolExecutor(max_workers=concurrency) as executor:
@ -534,9 +585,13 @@ def download(_filters, _id, _desc, _limit, _file_structure):
            add_to_pending = True
        else:
            try:
-                add_to_pending = all(_filter[1].lower() in song[_filter[0]].lower() for _filter in _filters)
+                pending_filters = [
-            except KeyError:
+                    _filter[1].lower() in song[_filter[0]].lower()
-                click.echo(f"Invalid filter field {_filter[0]}")
+                    for _filter in _filters
                ]
                add_to_pending = all(pending_filters)
            except KeyError as e:
                click.echo(f"Invalid filter field {e}")
                exit(1)
        if add_to_pending:
@ -591,19 +646,27 @@ def search(_filters):
            add_to_pending = True
        else:
            try:
-                add_to_pending = all(_filter[1].lower() in song[_filter[0]].lower() for _filter in _filters)
+                pending_filters = [
-            except KeyError:
+                    _filter[1].lower() in song[_filter[0]].lower()
-                click.echo(f"Invalid filter field {_filter[0]}")
+                    for _filter in _filters
                ]
                add_to_pending = all(pending_filters)
            except KeyError as e:
                click.echo(f"Invalid filter field {e}")
                exit(1)
        if add_to_pending:
            pending_songs.append(song)
-    click.echo(f"Found {len(pending_songs)} matchin song files:")
+    click.echo(f"Found {len(pending_songs)} matching songs:")
    click.echo()
    for entry in pending_songs:
        click.echo(
-            f"""> "{entry['artist']} - {entry['title']}" by {entry['author']}..."""
+            f"""> "{entry['artist']} - {entry['title']}" from "{entry['album']} ({entry['year']})" by {entry['author']}"""
        )
        for link in entry["dl_links"]:
            click.echo(f"""  * {link['description']}""")
        click.echo()
@click.group(context_settings=CONTEXT_SETTINGS)