Editorconfig, lintian, slightly better output.
This commit is contained in:
12
.editorconfig
Normal file
12
.editorconfig
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
# Editor config -- see http://editorconfig.org/
|
||||||
|
root = true
|
||||||
|
|
||||||
|
# Unix-style newlines
|
||||||
|
[*]
|
||||||
|
end_of_line = lf
|
||||||
|
insert_final_newline = true
|
||||||
|
|
||||||
|
# 4 space indentation
|
||||||
|
[*.py]
|
||||||
|
indent_style = space
|
||||||
|
indent_size = 4
|
@@ -1,45 +1,44 @@
|
|||||||
|
""" Slideslurp scrapes a PDF from SlideShare.net (LinkedIn). """
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import sys
|
import sys
|
||||||
|
import urllib
|
||||||
import bs4
|
import bs4
|
||||||
import requests
|
import requests
|
||||||
from urllib.parse import urlparse
|
import reportlab.pdfgen.canvas
|
||||||
|
|
||||||
from reportlab.pdfgen import canvas
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
|
""" Parse the command-line arguments. """
|
||||||
descr = "Generate PDFs from Slideshare presentations"
|
descr = "Generate PDFs from Slideshare presentations"
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description=descr)
|
parser = argparse.ArgumentParser(description=descr)
|
||||||
parser.add_argument('url', metavar='url', nargs=1,
|
parser.add_argument('url', metavar='url', nargs=1,
|
||||||
help='the URL to slurp')
|
help='the URL to slurp')
|
||||||
parser.add_argument('--output', '-o', default=None,
|
parser.add_argument('--out', '--output', '-o', default=None,
|
||||||
help='the file to write to (default: out.pdf)')
|
help='force the output file name')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if not args.output:
|
if not args.out:
|
||||||
path = urlparse(args.url[0]).path
|
path = urllib.parse.urlparse(args.url[0]).path
|
||||||
args.output = '%s.pdf'%path[path.find('/', 1) + 1:]
|
args.out = '%s.pdf'%path[path.find('/', 1) + 1:]
|
||||||
return args
|
return args
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
""" Main function. """
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
|
||||||
res = requests.get(args.url[0])
|
res = requests.get(args.url[0])
|
||||||
|
|
||||||
tree = bs4.BeautifulSoup(res.text, "html.parser")
|
tree = bs4.BeautifulSoup(res.text, "html.parser")
|
||||||
c = canvas.Canvas(args.output)
|
canvas = reportlab.pdfgen.canvas.Canvas(args.out)
|
||||||
|
|
||||||
for img in tree.findAll("img", class_="slide_image"):
|
for img in tree.findAll("img", class_="slide_image"):
|
||||||
img_url = img.attrs["data-full"]
|
img_url = img.attrs["data-full"]
|
||||||
page_width, page_height = c._pagesize
|
page_width, page_height = canvas._pagesize
|
||||||
c.setPageRotation(90)
|
canvas.drawImage(img_url, 0, 0, page_height, page_width,
|
||||||
c.drawImage(img_url, 0, 0, page_height, page_width,
|
preserveAspectRatio=True)
|
||||||
preserveAspectRatio=True)
|
canvas.setPageSize((page_height, page_width))
|
||||||
c.showPage()
|
canvas.showPage()
|
||||||
c.save()
|
canvas.save()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
Reference in New Issue
Block a user