downthemall

A Nim Script to download documents matching a pattern from a webpage.

Created: by Pradeep GowdaUpdated:Aug 22, 2024Tagged: utility · nim · code .

Originally written on Feb 24, 20218.

NOTE 2024-08-22: parseopt2 is deprecated. This script needs to be rewritten.


#[
downthemall.nim

Download documents matching a pattern from a webpage.

Build:
  nim c -r -d:ssl -d:release downthemall.nim


Example: (downloads all the PDFs linked in the URL)
  ./downthemall https://www.btbytes.com/pl.html -A .pdf
]#

import htmlparser
import httpclient
import xmltree
import strtabs
import os
import strutils
import streams
import parseopt2

when isMainModule:
  var url: string
  var extns: seq[string]
  for kind, key, val in getopt():
    case kind
    of cmdArgument:
      url = key
    of cmdShortOption:
      case key
      of "A": extns.add(val)
    of cmdLongOption: discard
    of cmdEnd: discard

  if url == nil or extns == nil:
    echo "usage: downthemall URL -A .extension [-A .another]"
    quit(0)


  let client = newHttpClient()
  let content = client.getContent(url)
  let html = parseHtml(newStringStream(content))
  for a in html.findall("a"):
    let href = a.attrs["href"]
    for extn in extns:
      if href.endswith(extn):
        let fname = href.split('/')[^1]
        downloadFile(href, fname)
        echo "Downloaded: $1 to $2" % [href, fname]