From e8275c472b2f114ad435692236b8a6f46e4169b0 Mon Sep 17 00:00:00 2001
From: "Thomas \"Cakeisalie5\" Touhey" <thomas@touhey.fr>
Date: Fri, 5 May 2017 16:40:01 +0200
Subject: [PATCH 1/3] Better file name detection.

---
 slideslurp/__init__.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/slideslurp/__init__.py b/slideslurp/__init__.py
index d76d8a7..c98e58f 100644
--- a/slideslurp/__init__.py
+++ b/slideslurp/__init__.py
@@ -3,6 +3,7 @@ import sys
 
 import bs4
 import requests
+from urllib.parse import urlparse
 
 from reportlab.pdfgen import canvas
 
@@ -13,11 +14,14 @@ def parse_args():
     parser = argparse.ArgumentParser(description=descr)
     parser.add_argument('url', metavar='url', nargs=1,
                         help='the URL to slurp')
-    parser.add_argument('--output', '-o', default='out.pdf',
+    parser.add_argument('--output', '-o',  default=None,
                         help='the file to write to (default: out.pdf)')
 
-    return parser.parse_args()
-
+    args = parser.parse_args()
+    if not args.output:
+        path = urlparse(args.url[0]).path
+        args.output = '%s.pdf'%path[path.find('/', 1) + 1:]
+    return args
 
 def main():
     args = parse_args()
@@ -31,7 +35,8 @@ def main():
         img_url = img.attrs["data-full"]
         page_width, page_height = c._pagesize
         c.setPageRotation(90)
-        c.drawImage(img_url, 0, 0, page_height, page_width, preserveAspectRatio=True)
+        c.drawImage(img_url, 0, 0, page_height, page_width,
+                    preserveAspectRatio=True)
         c.showPage()
     c.save()
 

From 6e9cc213ad52601f9520304adcd0bd8dc1280230 Mon Sep 17 00:00:00 2001
From: "Thomas \"Cakeisalie5\" Touhey" <thomas@touhey.fr>
Date: Fri, 5 May 2017 17:04:03 +0200
Subject: [PATCH 2/3] Editorconfig, lintian, slightly better output.

---
 .editorconfig          | 12 ++++++++++++
 slideslurp/__init__.py | 37 ++++++++++++++++++-------------------
 2 files changed, 30 insertions(+), 19 deletions(-)
 create mode 100644 .editorconfig

diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..e919e4c
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,12 @@
+# Editor config -- see http://editorconfig.org/
+root = true
+
+# Unix-style newlines
+[*]
+end_of_line = lf
+insert_final_newline = true
+
+# 4 space indentation
+[*.py]
+indent_style = space
+indent_size = 4
diff --git a/slideslurp/__init__.py b/slideslurp/__init__.py
index c98e58f..a36cb74 100644
--- a/slideslurp/__init__.py
+++ b/slideslurp/__init__.py
@@ -1,45 +1,44 @@
+""" Slideslurp scrapes a PDF from SlideShare.net (LinkedIn). """
+
 import argparse
 import sys
-
+import urllib
 import bs4
 import requests
-from urllib.parse import urlparse
-
-from reportlab.pdfgen import canvas
-
+import reportlab.pdfgen.canvas
 
 def parse_args():
+    """ Parse the command-line arguments. """
     descr = "Generate PDFs from Slideshare presentations"
 
     parser = argparse.ArgumentParser(description=descr)
     parser.add_argument('url', metavar='url', nargs=1,
                         help='the URL to slurp')
-    parser.add_argument('--output', '-o',  default=None,
-                        help='the file to write to (default: out.pdf)')
+    parser.add_argument('--out', '--output', '-o', default=None,
+                        help='force the output file name')
 
     args = parser.parse_args()
-    if not args.output:
-        path = urlparse(args.url[0]).path
-        args.output = '%s.pdf'%path[path.find('/', 1) + 1:]
+    if not args.out:
+        path = urllib.parse.urlparse(args.url[0]).path
+        args.out = '%s.pdf'%path[path.find('/', 1) + 1:]
     return args
 
 def main():
+    """ Main function. """
     args = parse_args()
 
     res = requests.get(args.url[0])
-
     tree = bs4.BeautifulSoup(res.text, "html.parser")
-    c = canvas.Canvas(args.output)
+    canvas = reportlab.pdfgen.canvas.Canvas(args.out)
 
     for img in tree.findAll("img", class_="slide_image"):
         img_url = img.attrs["data-full"]
-        page_width, page_height = c._pagesize
-        c.setPageRotation(90)
-        c.drawImage(img_url, 0, 0, page_height, page_width,
-                    preserveAspectRatio=True)
-        c.showPage()
-    c.save()
-
+        page_width, page_height = canvas._pagesize
+        canvas.drawImage(img_url, 0, 0, page_height, page_width,
+                         preserveAspectRatio=True)
+        canvas.setPageSize((page_height, page_width))
+        canvas.showPage()
+    canvas.save()
 
 if __name__ == '__main__':
     main()

From 6860f7bb312d56cc4f2965ef455112ae510b25cf Mon Sep 17 00:00:00 2001
From: "Thomas \"Cakeisalie5\" Touhey" <thomas@touhey.fr>
Date: Sat, 6 May 2017 16:32:06 +0200
Subject: [PATCH 3/3] Enhanced docstrings, commented an unclear bit of my
 contribution.

---
 slideslurp/__init__.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/slideslurp/__init__.py b/slideslurp/__init__.py
index a36cb74..4166888 100644
--- a/slideslurp/__init__.py
+++ b/slideslurp/__init__.py
@@ -8,7 +8,10 @@ import requests
 import reportlab.pdfgen.canvas
 
 def parse_args():
-    """ Parse the command-line arguments. """
+    """ Parse the command-line arguments and return them as an object.
+    The returned object has the following properties:
+    - `url` (string): the URL of the slideshare to download;
+    - `out` (string): the output file path. """
     descr = "Generate PDFs from Slideshare presentations"
 
     parser = argparse.ArgumentParser(description=descr)
@@ -18,13 +21,22 @@ def parse_args():
                         help='force the output file name')
 
     args = parser.parse_args()
+
+    # If the user doesn't specify an output filename, we'll deduce
+    # one from the URL. Here's an URL example:
+    # https://www.slideshare.net/FrisodeJong/iso-20022-for-dummies
+    #
+    # We're taking the path (`/FrisodeJong/iso-20022-for-dummies`),
+    # isolating the part after the second slash (the first slash
+    # always being at the first position), and appending `.pdf` to it.
     if not args.out:
         path = urllib.parse.urlparse(args.url[0]).path
         args.out = '%s.pdf'%path[path.find('/', 1) + 1:]
     return args
 
 def main():
-    """ Main function. """
+    """ Main function of the program.
+    Uses the global command-line arguments (not custom ones)! """
     args = parse_args()
 
     res = requests.get(args.url[0])