Originally written on Feb 24, 20218.

NOTE 2024-08-22: parseopt2 is deprecated. This script needs to be rewritten.

 
#[
downthemall.nim
 
Download documents matching a pattern from a webpage.
 
Build:
  nim c -r -d:ssl -d:release downthemall.nim
 
 
Example: (downloads all the PDFs linked in the URL)
  ./downthemall https://www.btbytes.com/pl.html -A .pdf
]#
 
import htmlparser
import httpclient
import xmltree
import strtabs
import os
import strutils
import streams
import parseopt2
 
when isMainModule:
  var url: string
  var extns: seq[string]
  for kind, key, val in getopt():
    case kind
    of cmdArgument:
      url = key
    of cmdShortOption:
      case key
      of "A": extns.add(val)
    of cmdLongOption: discard
    of cmdEnd: discard
 
  if url == nil or extns == nil:
    echo "usage: downthemall URL -A .extension [-A .another]"
    quit(0)
 
 
  let client = newHttpClient()
  let content = client.getContent(url)
  let html = parseHtml(newStringStream(content))
  for a in html.findall("a"):
    let href = a.attrs["href"]
    for extn in extns:
      if href.endswith(extn):
        let fname = href.split('/')[^1]
        downloadFile(href, fname)
        echo "Downloaded: $1 to $2" % [href, fname]