Editorconfig, lintian, slightly better output.

This commit is contained in:
Thomas "Cakeisalie5" Touhey
2017-05-05 17:04:03 +02:00
parent e8275c472b
commit 6e9cc213ad
2 changed files with 30 additions and 19 deletions

12
.editorconfig Normal file
View File

@@ -0,0 +1,12 @@
# Editor config -- see http://editorconfig.org/
root = true
# Unix-style newlines
[*]
end_of_line = lf
insert_final_newline = true
# 4 space indentation
[*.py]
indent_style = space
indent_size = 4

View File

@@ -1,45 +1,44 @@
""" Slideslurp scrapes a PDF from SlideShare.net (LinkedIn). """
import argparse import argparse
import sys import sys
import urllib
import bs4 import bs4
import requests import requests
from urllib.parse import urlparse import reportlab.pdfgen.canvas
from reportlab.pdfgen import canvas
def parse_args(): def parse_args():
""" Parse the command-line arguments. """
descr = "Generate PDFs from Slideshare presentations" descr = "Generate PDFs from Slideshare presentations"
parser = argparse.ArgumentParser(description=descr) parser = argparse.ArgumentParser(description=descr)
parser.add_argument('url', metavar='url', nargs=1, parser.add_argument('url', metavar='url', nargs=1,
help='the URL to slurp') help='the URL to slurp')
parser.add_argument('--output', '-o', default=None, parser.add_argument('--out', '--output', '-o', default=None,
help='the file to write to (default: out.pdf)') help='force the output file name')
args = parser.parse_args() args = parser.parse_args()
if not args.output: if not args.out:
path = urlparse(args.url[0]).path path = urllib.parse.urlparse(args.url[0]).path
args.output = '%s.pdf'%path[path.find('/', 1) + 1:] args.out = '%s.pdf'%path[path.find('/', 1) + 1:]
return args return args
def main(): def main():
""" Main function. """
args = parse_args() args = parse_args()
res = requests.get(args.url[0]) res = requests.get(args.url[0])
tree = bs4.BeautifulSoup(res.text, "html.parser") tree = bs4.BeautifulSoup(res.text, "html.parser")
c = canvas.Canvas(args.output) canvas = reportlab.pdfgen.canvas.Canvas(args.out)
for img in tree.findAll("img", class_="slide_image"): for img in tree.findAll("img", class_="slide_image"):
img_url = img.attrs["data-full"] img_url = img.attrs["data-full"]
page_width, page_height = c._pagesize page_width, page_height = canvas._pagesize
c.setPageRotation(90) canvas.drawImage(img_url, 0, 0, page_height, page_width,
c.drawImage(img_url, 0, 0, page_height, page_width,
preserveAspectRatio=True) preserveAspectRatio=True)
c.showPage() canvas.setPageSize((page_height, page_width))
c.save() canvas.showPage()
canvas.save()
if __name__ == '__main__': if __name__ == '__main__':
main() main()