Merge pull request #1 from cakeisalie5/master

Corrected one or two things.
2017-05-06 17:07:13 +02:00
parent f38a6fb596 6860f7bb31
commit 2b30157636
2 changed files with 43 additions and 15 deletions
--- a/.editorconfig
+++ b/.editorconfig
@@ -0,0 +1,12 @@
 # Editor config -- see http://editorconfig.org/
 root = true
 # Unix-style newlines
 [*]
 end_of_line = lf
 insert_final_newline = true
 # 4 space indentation
 [*.py]
 indent_style = space
 indent_size = 4
--- a/slideslurp/init.py
+++ b/slideslurp/init.py
@@ -1,40 +1,56 @@
 """ Slideslurp scrapes a PDF from SlideShare.net (LinkedIn). """
 import argparse
 import sys
-
+import urllib
 import bs4
 import requests
-
+import reportlab.pdfgen.canvas
 from reportlab.pdfgen import canvas
 def parse_args():
    """ Parse the command-line arguments and return them as an object.
    The returned object has the following properties:
    - `url` (string): the URL of the slideshare to download;
    - `out` (string): the output file path. """
    descr = "Generate PDFs from Slideshare presentations"
    parser = argparse.ArgumentParser(description=descr)
    parser.add_argument('url', metavar='url', nargs=1,
                        help='the URL to slurp')
-    parser.add_argument('--output', '-o', default='out.pdf',
+    parser.add_argument('--out', '--output', '-o', default=None,
-                        help='the file to write to (default: out.pdf)')
+                        help='force the output file name')
-    return parser.parse_args()
+    args = parser.parse_args()
    # If the user doesn't specify an output filename, we'll deduce
    # one from the URL. Here's an URL example:
    # https://www.slideshare.net/FrisodeJong/iso-20022-for-dummies
    #
    # We're taking the path (`/FrisodeJong/iso-20022-for-dummies`),
    # isolating the part after the second slash (the first slash
    # always being at the first position), and appending `.pdf` to it.
    if not args.out:
        path = urllib.parse.urlparse(args.url[0]).path
        args.out = '%s.pdf'%path[path.find('/', 1) + 1:]
    return args
 def main():
    """ Main function of the program.
    Uses the global command-line arguments (not custom ones)! """
    args = parse_args()
    res = requests.get(args.url[0])
    tree = bs4.BeautifulSoup(res.text, "html.parser")
-    c = canvas.Canvas(args.output)
+    canvas = reportlab.pdfgen.canvas.Canvas(args.out)
    for img in tree.findAll("img", class_="slide_image"):
        img_url = img.attrs["data-full"]
-        page_width, page_height = c._pagesize
+        page_width, page_height = canvas._pagesize
-        c.setPageRotation(90)
+        canvas.drawImage(img_url, 0, 0, page_height, page_width,
-        c.drawImage(img_url, 0, 0, page_height, page_width, preserveAspectRatio=True)
+                         preserveAspectRatio=True)
-        c.showPage()
+        canvas.setPageSize((page_height, page_width))
-    c.save()
+        canvas.showPage()
-
+    canvas.save()
 if __name__ == '__main__':
    main()