This is the source code for a bot being run on the Reddit account u/EmotionalField.

The program continually scans comments across the entire site for mentions of science fiction and fantasy book titles. It uses a dataset of book title/author pairs that were scraped from the Speculative Fiction Database, a compendium of all known science fiction/fantasy literary works.

If an SF/F book title is detected, the bot checks if the author’s name is also present in the comment in order to reduce false positives. If both these conditions are met, the bot searches YouTube for an audiobook of the mentioned title, then replies with a link to it should one be found.

Link to full repo after the code.

 

temp.py

   1 
   2 
    import praw
   3 
    import clevercsv
   4 
    import subprocess
   5 
    import youtube_dl
   6 
    import json
   7 
    import pendulum
   8 
    import loguru
   9 
    import os
  10 
    import time
  11 
    import collections
  12 
 
  13 
 
  14 
 
  15 
 
  16 
    def speculative():
  17 
       
  18 
        """
  19 
        Uses scraped data from the Internet Speculative Fiction Database to
  20 
        search YouTube for SF/Fantasy audiobooks. Title/author pairs read from
  21 
        "isfdb_catalog.csv" file.   
  22 
       
  23 
        """
  24 
       
  25 
       
  26 
        bot = praw.Reddit(
  27 
                client_id = os.environ.get(
  28 
                    "CLIENT_ID"),
  29 
                client_secret = os.environ.get(
  30 
                    "CLIENT_SECRET"),
  31 
                username = os.environ.get(
  32 
                    "USERNAME"),
  33 
                password = os.environ.get(
  34 
                    "PASSWORD"),
  35 
                user_agent = os.environ.get(
  36 
                    "USER_AGENT"))
  37 
                          
  38 
       
  39 
        comments = bot.subreddit(
  40 
            "all").stream.comments(
  41 
            skip_existing = True)
  42 
 
  43 
        responded = collections.deque(
  44 
            maxlen = 100)
  45 
 
  46 
        with open(
  47 
            "isfdb_catalog.csv",
  48 
            "r",
  49 
            encoding = "UTF-8") as isfdb_catalog:
  50 
         
  51 
          isfdb_catalog = clevercsv.reader(
  52 
            isfdb_catalog)
  53 
 
  54 
          catalog = [
  55 
            [row[0],row[1]]
  56 
            for row in isfdb_catalog
  57 
            if len(row) > 1]
  58 
       
  59 
       
  60 
        for comment in comments:
  61 
            text = comment.body.lower().replace(".", "")
  62 
           
  63 
            for card in catalog:
  64 
               
  65 
                if (
  66 
                    card[0].lower() in text and card[1].lower() in text
  67 
                    and not comment.submission.id in responded
  68 
                    and not comment.subreddit.user_is_banned):
  69 
           
  70 
                    info = subprocess.check_output(
  71 
                        ["youtube-dl",
  72 
                        "-i",
  73 
                        "-j",
  74 
                        f"ytsearch: {card[0]} {card[1]} audiobook"])
  75 
                   
  76 
                    jdict = json.loads(info)
  77 
 
  78 
                    audio = [
  79 
                        "audiobook",
  80 
                        "audio book"]
  81 
 
  82 
                    author_format = [
  83 
                    name.lower() for name in card[1].split(" ")
  84 
                    if len(name) >= 3]
  85 
 
  86 
                    if (
  87 
                        jdict["duration"] > 10800
  88 
                        and jdict["average_rating"] > 4.0
  89 
                        and card[0].lower() in jdict[
  90 
                        "title"].lower()
  91 
                        and any(
  92 
                            item in jdict[
  93 
                            "title"].lower() for item in audio)
  94 
                        and all(
  95 
                            item in jdict[
  96 
                            "title"].lower() for item in author_format))
  97 
 
  98 
                       
  99 
                        saw_the_sign = (
 100 
                            """[^(Source Code)](https://capybasilisk.com/posts/"""
 101 
                            """2020/04/speculative-fiction-bot/) """
 102 
                            """^| [^(Feedback)](https://www.reddit.com/message/"""
 103 
                            """compose?to=Capybasilisk&subject=Robot) """
 104 
                            """^| [^(Programmer)](https://www.reddit.com/u/"""
 105 
                            """capybasilisk) """
 106 
                            """^| ^(Downvote To Remove) """
 107 
                            """^| ^(Version 1.4.0) """
 108 
                            """^| ^(Support Robot Rights!)""")
 109 
                       
 110 
 
 111 
                        comment.reply(
 112 
                            f"""Hi. You just mentioned *{card[0]}* by """
 113 
                            f"""{card[1]}.\n\nI've found an audiobook of """  
 114 
                            """that novel on YouTube. You can listen to it here"""
 115 
                            f""":\n\n[YouTube | {jdict['title']}]"""
 116 
                            f"""({jdict['webpage_url']})\n\n*I\'m a bot that """
 117 
                            """searches YouTube for science fiction and fantasy""" 
 118 
                            f""" audiobooks.*\n***\n{saw_the_sign}""")
 119 
 
 120 
                       
 121 
                        responded.append(
 122 
                            comment.submission.id)
 123 
 
 124 
                        with open(
 125 
                            "activity.csv",
 126 
                            "a",
 127 
                            encoding = "UTF-8") as actlog:
 128 
 
 129 
                            activity = clevercsv.writer(
 130 
                                actlog)
 131 
                           
 132 
                            if actlog.tell() == 0:
 133 
 
 134 
                                activity.writerow(
 135 
                                    ["Book",
 136 
                                    "Comment",
 137 
                                    "Author",
 138 
                                    "Thread",
 139 
                                    "Subreddit",
 140 
                                    "Time"])
 141 
 
 142 
                            activity.writerow(
 143 
                                [f"{card[0]} by {card[1]}",
 144 
                                f"{comment.body}",
 145 
                                f"{comment.author}",
 146 
                                f"{comment.submission.title}",
 147 
                                f"{comment.subreddit}",
 148 
                                f"{pendulum.now().to_datetime_string()}"])
 149 
                       
 150 
                        break       
 151 
 
 152 
            if pendulum.now().to_time_string().endswith(
 153 
                "0:00"):
 154 
               
 155 
                replies = bot.user.me().comments.new(
 156 
                    limit=100)
 157 
               
 158 
                for reply in replies:
 159 
                    if reply.score < 0:
 160 
                       
 161 
                        with open(
 162 
                            "deleted.csv",
 163 
                            "a",
 164 
                            encoding = "UTF-8") as removed:
 165 
 
 166 
                            deleted = clevercsv.writer(removed)
 167 
                           
 168 
                            if removed.tell() == 0:
 169 
                               deleted.writerow(
 170 
                                ["Comment",
 171 
                                "Parent", 
 172 
                                "Thread",
 173 
                                "Subreddit",
 174 
                                "Time",
 175 
                                "Score"])
 176 
                           
 177 
                            deleted.writerow(
 178 
                                [f"{reply.body}",
 179 
                                 f"{reply.parent().body}",
 180 
                                 f"{reply.submission.title}",
 181 
                                 f"{reply.subreddit}",
 182 
                                 f"{pendulum.from_timestamp(reply.created_utc)}",
 183 
                                 f"{reply.score}"])
 184 
 
 185 
                        reply.delete()
 186 
 
 187 
                time.sleep(1)
 188 
 
 189 
 
 190 
 
 191 
 
 192 
    def eventlogger(event):
 193 
       
 194 
        eventlogger = loguru.logger
 195 
 
 196 
        eventlogger.add(
 197 
            sink = "events.log",
 198 
            level = "WARNING",
 199 
            format = "\n\n\n\n{level} {time: {time:DD-MM-YYYY HH:mm:ss}}\n"
 200 
                     "Elapsed Time: {elapsed}\n"
 201 
                     "File: {file}\n"
 202 
                     "Message: {message}")
 203 
       
 204 
        eventlogger.exception(event)
 205 
 
 206 
 
 207 
               
 208 
 
 209 
    if __name__ == "__main__":
 210 
 
 211 
        while True:
 212 
 
 213 
            try:
 214 
 
 215 
                speculative()
 216 
 
 217 
            except Exception as event:
 218 
 
 219 
                eventlogger(event)
 220 
                time.sleep(600)
 221 
                continue
 222 
 
 223 
 
 224 
 
 225 
 

 

Below is the script I wrote to scrape the Speculative Fiction Database. It pulls the data and builds a CSV file consisting of basic info on about 125,000 science fiction and fantasy books. It takes several hours to complete its run on a basic Linux server.

 

temp.py

   1 
   
   2 
    import requests
   3 
    import bs4
   4 
    import clevercsv
   5 
    import re
   6 
 
   7 
 
   8 
 
   9 
 
  10 
    def catalog():
  11 
       
  12 
        """
  13 
        Scrapes metadata of science fiction and fantasy literary works
  14 
        from The Internet Speculative Fiction Database and stores them in
  15 
        a CSV file. If site structure changes significantly, code may stop
  16 
        functioning properly.
  17 
       
  18 
        """
  19 
 
  20 
        card = 0
  21 
       
  22 
        for entry in range(199000):
  23 
           
  24 
            try:
  25 
           
  26 
                card += 1
  27 
 
  28 
               
  29 
                page = requests.get(
  30 
                    f"http://www.isfdb.org/cgi-bin/title.cgi?{card}")
  31 
                parsed = bs4.BeautifulSoup(
  32 
                    page.content,
  33 
                    "html.parser")
  34 
                content = parsed.find(
  35 
                    id = "content").text.split("\n")
  36 
                content_string = "##".join(content)
  37 
 
  38 
               
  39 
                content_title = re.search(
  40 
                    "Title:\\s+[^#]+", content_string).group(0)
  41 
                title = content_title.split(": ")[1]
  42 
 
  43 
                content_author = re.search(
  44 
                    "Author:##[^#]+", content_string).group(0)
  45 
                author = content_author.split("##")[1]
  46 
 
  47 
                content_date = re.search(
  48 
                    "Date:\\s+\\d+\\-\\d+\\-\\d+", content_string).group(0)
  49 
                pubdate = content_date.split("  ")[1]
  50 
 
  51 
                content_type = re.search(
  52 
                    "Type:\\s+[^#]+", content_string).group(0)
  53 
                booktype = content_type.split(": ")[1]
  54 
 
  55 
 
  56 
                accepted_booktype = [
  57 
                    "NOVEL", 
  58 
                    "SHORTFICTION",
  59 
                    "COLLECTION",
  60 
                    "ANTHOLOGY",
  61 
                    "OMNIBUS",
  62 
                    "POEM",
  63 
                    "NONFICTION",
  64 
                    "ESSAY"]
  65 
 
  66 
 
  67 
                with open(
  68 
                    "SFF_Dataset.csv", "a", encoding="UTF-8") as sff:
  69 
                    dataset = clevercsv.writer(sff)
  70 
 
  71 
                    if sff.tell() == 0:
  72 
                       
  73 
                        dataset.writerow(
  74 
                            ["Title",
  75 
                            "Author",
  76 
                            "Publication Date",
  77 
                            "Type"])
  78 
 
  79 
                    if booktype in accepted_booktype:
  80 
 
  81 
                        dataset.writerow(
  82 
                            [title,
  83 
                            author,
  84 
                            pubdate,
  85 
                            booktype])
  86 
 
  87 
 
  88 
            except:
  89 
 
  90 
                print(
  91 
                    f"Skipping entry no. {card}: Empty article.", "\n" *4)
  92 
 
  93 
                continue 
  94 
 
  95 
 
  96 
 
  97 
    if __name__ == "__main__":
  98 
 
  99 
        catalog()
 100 
 
 101 
 
 102 
 
 103 
 
 104 
       

 

Below is how the final dataset looks in Calc from LibreOffice:

 

Dataset

 

The bot is run continuously on a remote Linux server, which I also use to host this website.

Full code repo available on Github

I’ve also put the dataset up on Kaggle

 

Related posts:

EXP-RTL: Exponential Retaliation In Iterated Prisoner’s Dilemma Games

Interleaved Neighborhood Algorithm: Fully Exploratory Optimization

 

About Me