Merge pull request #1 from cakeisalie5/master

Corrected one or two things.
This commit is contained in:
2017-05-06 17:07:13 +02:00
committed by GitHub
2 changed files with 43 additions and 15 deletions

12
.editorconfig Normal file
View File

@@ -0,0 +1,12 @@
# Editor config -- see http://editorconfig.org/
root = true
# Unix-style newlines
[*]
end_of_line = lf
insert_final_newline = true
# 4 space indentation
[*.py]
indent_style = space
indent_size = 4

View File

@@ -1,40 +1,56 @@
""" Slideslurp scrapes a PDF from SlideShare.net (LinkedIn). """
import argparse
import sys
import urllib
import bs4
import requests
from reportlab.pdfgen import canvas
import reportlab.pdfgen.canvas
def parse_args():
""" Parse the command-line arguments and return them as an object.
The returned object has the following properties:
- `url` (string): the URL of the slideshare to download;
- `out` (string): the output file path. """
descr = "Generate PDFs from Slideshare presentations"
parser = argparse.ArgumentParser(description=descr)
parser.add_argument('url', metavar='url', nargs=1,
help='the URL to slurp')
parser.add_argument('--output', '-o', default='out.pdf',
help='the file to write to (default: out.pdf)')
parser.add_argument('--out', '--output', '-o', default=None,
help='force the output file name')
return parser.parse_args()
args = parser.parse_args()
# If the user doesn't specify an output filename, we'll deduce
# one from the URL. Here's an URL example:
# https://www.slideshare.net/FrisodeJong/iso-20022-for-dummies
#
# We're taking the path (`/FrisodeJong/iso-20022-for-dummies`),
# isolating the part after the second slash (the first slash
# always being at the first position), and appending `.pdf` to it.
if not args.out:
path = urllib.parse.urlparse(args.url[0]).path
args.out = '%s.pdf'%path[path.find('/', 1) + 1:]
return args
def main():
""" Main function of the program.
Uses the global command-line arguments (not custom ones)! """
args = parse_args()
res = requests.get(args.url[0])
tree = bs4.BeautifulSoup(res.text, "html.parser")
c = canvas.Canvas(args.output)
canvas = reportlab.pdfgen.canvas.Canvas(args.out)
for img in tree.findAll("img", class_="slide_image"):
img_url = img.attrs["data-full"]
page_width, page_height = c._pagesize
c.setPageRotation(90)
c.drawImage(img_url, 0, 0, page_height, page_width, preserveAspectRatio=True)
c.showPage()
c.save()
page_width, page_height = canvas._pagesize
canvas.drawImage(img_url, 0, 0, page_height, page_width,
preserveAspectRatio=True)
canvas.setPageSize((page_height, page_width))
canvas.showPage()
canvas.save()
if __name__ == '__main__':
main()