GoalKicker.com PDF bulk-download in Clojure

You like GoalKicker.com 🔥 so do I, but to download the updates is a waste of precious time.

The solution was to write a tiny mass/bulk downloader for GoalKicker in Clojure to speed up the process a little.

Clojure PDF/eBook bulk-download for GoalKicker.com

(ns console.goalkicker
  (:require [clj-http.client :as c]
            [clojure.string :as str]
            [clojure.pprint :as pp]
            [clojure.java.io :as io]
            [hickory.core :as h]
            [hickory.select :as s]
            ,))

;;==============================================================================

(def url-base "https://books.goalkicker.com")

(defn- get-body
  ""
  [url]
  (-> (c/get url)
      :body
      h/parse
      h/as-hickory))

(defn- get-ahref
  ""
  [body]
  (s/select
    (s/child (s/tag :a))
    body))

(defn- get-urls
  ""
  [ahref]
  (for [x ahref]
    (let [h (get-in x [:attrs :href])]
      h)))

(defn- filter-urls
  ""
  [urls]
  (filter
    #(str/ends-with? % "Book/")
    urls))

(defn- get-urls-books
  ""
  [url]
  (let [body       (get-body url)
        ahref      (get-ahref body)
        urls       (get-urls ahref)
        urls-books (filter-urls urls)]
    (map #(str url "/" %) urls-books)))

;;==============================================================================

(defn- filter-pdf
  ""
  [urls]
  (filter
    #(str/ends-with? % ".pdf")
    urls))

(defn- get-pdf-url
  ""
  [url]
  (let [thread-id (.getId (Thread/currentThread))
        body      (get-body url)
        ahref     (get-ahref body)
        urls      (get-urls ahref)
        urls-pdf  (filter-pdf urls)]
    (str url (first urls-pdf))))

(defn- fetch-pdf!
  ""
  [url]
  (let [req (c/get url {:as :byte-array :throw-exceptions false})]
    (if (= (:status req) 200)
      (:body req))))

(defn- save-pdf-clojure!
  ""
  [pdf]
  (some->
    (fetch-pdf! pdf)
    (io/copy (io/file (str "./data/" (last (str/split pdf #"/")))))))

;;==============================================================================

(defn leech!
  "Alright, just download all the PDF!"
  [url]
  (let [urls-books (get-urls-books url-base)
        urls-pdf   (doall (pmap #(get-pdf-url %) urls-books)) ;; speed up with pmap
        _          (doall (map #(save-pdf-clojure! %) urls-pdf))]
    urls-pdf))

;;==============================================================================