Add instrument entries to database

2023-04-07 04:10:40 -04:00
parent 083ff1884b
commit cb1a5c5d58
2 changed files with 80 additions and 16 deletions
--- a/.flake8
+++ b/.flake8
@@ -5,5 +5,6 @@
 #   * E203 (whitespace before ':'): Black recommends this as disabled
 ignore = W503, E501
 extend-ignore = E203
+exclude = build/
 # Set the max line length to 88 for Black
 max-line-length = 88
--- a/c3dbdl/c3dbdl.py
+++ b/c3dbdl/c3dbdl.py
@@ -33,11 +33,24 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
 CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"], max_content_width=120)


-def fetchSongData(entry):
-    song_entry = dict()
+def fetchSongData(entries):
+    song_entry = {
+        "artist": None,
+        "title": None,
+        "album": None,
+        "song_link": None,
+        "genre": None,
+        "year": None,
+        "length": None,
+        "author": None,
+        "instruments": dict(),
+        "dl_links": list(),
+    }
    messages = list()
+    found_instruments = False

-    for idx, td in enumerate(entry.find_all("td")):
+    # Find song details
+    for idx, td in enumerate(entries[0].find_all("td")):
        if idx == 2:
            # Artist
            song_entry["artist"] = td.find("a").get_text().strip().replace("/", "+")
@@ -74,6 +87,35 @@ def fetchSongData(entry):
            # Author (of chart)
            song_entry["author"] = td.find("a").get_text().strip().replace("/", "+")

+    # Find song instruments and difficulties
+    for idx, td in enumerate(entries[1].find_all("td")):
+        if (
+            len(list(td.find_all("div", attrs={"style": "width:110px;float:left"}))) > 0
+            and not found_instruments
+        ):
+            for instrument in td.find_all(
+                "div", attrs={"style": "width:110px;float:left"}
+            ):
+                difficulty_link = (
+                    instrument.find_all(
+                        "a", attrs={"style": "text-decoration: none;color:#000"}
+                    )[1]
+                    .get("href")
+                    .split("/")
+                )
+                instrument_name = (
+                    difficulty_link[-2].split("_")[-1].replace("prokeys", "keys")
+                )
+                instrument_diff = int(difficulty_link[-1])
+                if instrument_diff < 1:
+                    # No part
+                    instrument_difficulty = None
+                else:
+                    # Link difficulty - 1
+                    instrument_difficulty = instrument_diff - 1
+                song_entry["instruments"][instrument_name] = instrument_difficulty
+            found_instruments = True
+
    if (
        song_entry
        and song_entry["author"]
@@ -83,9 +125,6 @@ def fetchSongData(entry):
        messages.append(
            f"> Found song entry for {song_entry['artist']} - {song_entry['title']} by {song_entry['author']}"
        )
-        for entry_type in ["artist", "album", "genre", "year", "length"]:
-            if not song_entry[entry_type]:
-                song_entry[entry_type] = "None"

        # Get download links from the actual song page
        attempts = 1
@@ -130,7 +169,8 @@ def fetchSongData(entry):
            return None
        song_entry["dl_links"] = dl_links

-        # Append to the database
+        # Return messages and song entry
+        print(song_entry)
        return messages, song_entry


@@ -178,11 +218,22 @@ def buildDatabase(pages, concurrency):
            "tbody"
        )

+        # This is weird, but because of the table layout, there are two table rows for
+        # each song: the first is the song info, the second is the instruments
+        # So we must make a single "entry" that is a list of the two elements, then
+        # handle that later in fetchSongData.
        entries = list()
+        entry_idx = 0
+        entry_data = list()
        for entry in table_html.find_all("tr", attrs={"class": "odd"}):
            if len(entry) < 1:
                break
-            entries.append(entry)
+            entry_data.append(entry)
+            entry_idx += 1
+            if entry_idx == 2:
+                entries.append(entry_data)
+                entry_idx = 0
+                entry_data = list()

        click.echo("Fetching and parsing song pages...")
        with ThreadPoolExecutor(max_workers=concurrency) as executor:
@@ -534,9 +585,13 @@ def download(_filters, _id, _desc, _limit, _file_structure):
            add_to_pending = True
        else:
            try:
-                add_to_pending = all(_filter[1].lower() in song[_filter[0]].lower() for _filter in _filters)
-            except KeyError:
-                click.echo(f"Invalid filter field {_filter[0]}")
+                pending_filters = [
+                    _filter[1].lower() in song[_filter[0]].lower()
+                    for _filter in _filters
+                ]
+                add_to_pending = all(pending_filters)
+            except KeyError as e:
+                click.echo(f"Invalid filter field {e}")
                exit(1)

        if add_to_pending:
@@ -591,19 +646,27 @@ def search(_filters):
            add_to_pending = True
        else:
            try:
-                add_to_pending = all(_filter[1].lower() in song[_filter[0]].lower() for _filter in _filters)
-            except KeyError:
-                click.echo(f"Invalid filter field {_filter[0]}")
+                pending_filters = [
+                    _filter[1].lower() in song[_filter[0]].lower()
+                    for _filter in _filters
+                ]
+                add_to_pending = all(pending_filters)
+            except KeyError as e:
+                click.echo(f"Invalid filter field {e}")
                exit(1)

        if add_to_pending:
            pending_songs.append(song)

-    click.echo(f"Found {len(pending_songs)} matchin song files:")
+    click.echo(f"Found {len(pending_songs)} matching songs:")
+    click.echo()
    for entry in pending_songs:
        click.echo(
-            f"""> "{entry['artist']} - {entry['title']}" by {entry['author']}..."""
+            f"""> "{entry['artist']} - {entry['title']}" from "{entry['album']} ({entry['year']})" by {entry['author']}"""
        )
+        for link in entry["dl_links"]:
+            click.echo(f"""  * {link['description']}""")
+        click.echo()


@click.group(context_settings=CONTEXT_SETTINGS)