diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..e919e4c --- /dev/null +++ b/.editorconfig @@ -0,0 +1,12 @@ +# Editor config -- see http://editorconfig.org/ +root = true + +# Unix-style newlines +[*] +end_of_line = lf +insert_final_newline = true + +# 4 space indentation +[*.py] +indent_style = space +indent_size = 4 diff --git a/slideslurp/__init__.py b/slideslurp/__init__.py index c98e58f..a36cb74 100644 --- a/slideslurp/__init__.py +++ b/slideslurp/__init__.py @@ -1,45 +1,44 @@ +""" Slideslurp scrapes a PDF from SlideShare.net (LinkedIn). """ + import argparse import sys - +import urllib import bs4 import requests -from urllib.parse import urlparse - -from reportlab.pdfgen import canvas - +import reportlab.pdfgen.canvas def parse_args(): + """ Parse the command-line arguments. """ descr = "Generate PDFs from Slideshare presentations" parser = argparse.ArgumentParser(description=descr) parser.add_argument('url', metavar='url', nargs=1, help='the URL to slurp') - parser.add_argument('--output', '-o', default=None, - help='the file to write to (default: out.pdf)') + parser.add_argument('--out', '--output', '-o', default=None, + help='force the output file name') args = parser.parse_args() - if not args.output: - path = urlparse(args.url[0]).path - args.output = '%s.pdf'%path[path.find('/', 1) + 1:] + if not args.out: + path = urllib.parse.urlparse(args.url[0]).path + args.out = '%s.pdf'%path[path.find('/', 1) + 1:] return args def main(): + """ Main function. """ args = parse_args() res = requests.get(args.url[0]) - tree = bs4.BeautifulSoup(res.text, "html.parser") - c = canvas.Canvas(args.output) + canvas = reportlab.pdfgen.canvas.Canvas(args.out) for img in tree.findAll("img", class_="slide_image"): img_url = img.attrs["data-full"] - page_width, page_height = c._pagesize - c.setPageRotation(90) - c.drawImage(img_url, 0, 0, page_height, page_width, - preserveAspectRatio=True) - c.showPage() - c.save() - + page_width, page_height = canvas._pagesize + canvas.drawImage(img_url, 0, 0, page_height, page_width, + preserveAspectRatio=True) + canvas.setPageSize((page_height, page_width)) + canvas.showPage() + canvas.save() if __name__ == '__main__': main()