From a0014a0684c39699bdbe257caed2988153fc0dbb Mon Sep 17 00:00:00 2001 From: hellerve Date: Thu, 6 Apr 2017 23:43:29 +0200 Subject: [PATCH] initial --- .gitignore | 3 +++ README.md | 17 +++++++++++++++++ requirements.txt | 3 +++ setup.py | 28 ++++++++++++++++++++++++++++ slideslurp/__init__.py | 27 +++++++++++++++++++++++++++ 5 files changed, 78 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 requirements.txt create mode 100644 setup.py create mode 100644 slideslurp/__init__.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..25aacff --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +build/ +dist/ +*.egg-info/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..7ee63aa --- /dev/null +++ b/README.md @@ -0,0 +1,17 @@ +# slideslurp + +Give slideslurp a URL to a [SlideShare](https://www.slideshare.net/) +presentation and it will give you a PDF called `out.pdf`. + +I will clean this up, I promise. + +## Installation + +``` +git clone https://github.com/hellerve/slideslurp +cd slideslurp +python setup.py install +``` + +
+Happy slurping! diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c9933f7 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +requests +bs4 +reportlab diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..8f90002 --- /dev/null +++ b/setup.py @@ -0,0 +1,28 @@ +# coding=utf-8 +import os + +from setuptools import find_packages +from setuptools import setup + +setup( + name = 'siteslurp', + version = '0.0.1', + description = 'SlideShare to PDF', + author = 'Veit Heller', + author_email = 'veit@veitheller.de', + license = 'MIT License', + url = 'https://github.com/hellerve/slideslurp', + download_url = 'https://github.com/hellerve/slideslurp/tarball/0.0.1', + packages = find_packages('.'), + install_requires=[ + "bs4", + "reportlab", + "requests", + ], + entry_points={ + 'console_scripts': [ + 'slideslurp = slideslurp:main', + ] + }, + include_package_data = True, +) diff --git a/slideslurp/__init__.py b/slideslurp/__init__.py new file mode 100644 index 0000000..50edc5b --- /dev/null +++ b/slideslurp/__init__.py @@ -0,0 +1,27 @@ +import sys + +import bs4 +import requests + +from reportlab.pdfgen import canvas + + +def main(): + url = sys.argv[1] + + res = requests.get(url) + + tree = bs4.BeautifulSoup(res.text, "html.parser") + c = canvas.Canvas('out.pdf') + + for img in tree.findAll("img", class_="slide_image"): + img_url = img.attrs["data-full"] + page_width, page_height = c._pagesize + c.setPageRotation(90) + c.drawImage(img_url, 0, 0, page_height, page_width, preserveAspectRatio=True) + c.showPage() + c.save() + + +if __name__ == '__main__': + main()