This commit is contained in:
2017-04-06 23:43:29 +02:00
commit a0014a0684
5 changed files with 78 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
build/
dist/
*.egg-info/

17
README.md Normal file
View File

@@ -0,0 +1,17 @@
# slideslurp
Give slideslurp a URL to a [SlideShare](https://www.slideshare.net/)
presentation and it will give you a PDF called `out.pdf`.
I will clean this up, I promise.
## Installation
```
git clone https://github.com/hellerve/slideslurp
cd slideslurp
python setup.py install
```
<hr/>
Happy slurping!

3
requirements.txt Normal file
View File

@@ -0,0 +1,3 @@
requests
bs4
reportlab

28
setup.py Normal file
View File

@@ -0,0 +1,28 @@
# coding=utf-8
import os
from setuptools import find_packages
from setuptools import setup
setup(
name = 'siteslurp',
version = '0.0.1',
description = 'SlideShare to PDF',
author = 'Veit Heller',
author_email = 'veit@veitheller.de',
license = 'MIT License',
url = 'https://github.com/hellerve/slideslurp',
download_url = 'https://github.com/hellerve/slideslurp/tarball/0.0.1',
packages = find_packages('.'),
install_requires=[
"bs4",
"reportlab",
"requests",
],
entry_points={
'console_scripts': [
'slideslurp = slideslurp:main',
]
},
include_package_data = True,
)

27
slideslurp/__init__.py Normal file
View File

@@ -0,0 +1,27 @@
import sys
import bs4
import requests
from reportlab.pdfgen import canvas
def main():
url = sys.argv[1]
res = requests.get(url)
tree = bs4.BeautifulSoup(res.text, "html.parser")
c = canvas.Canvas('out.pdf')
for img in tree.findAll("img", class_="slide_image"):
img_url = img.attrs["data-full"]
page_width, page_height = c._pagesize
c.setPageRotation(90)
c.drawImage(img_url, 0, 0, page_height, page_width, preserveAspectRatio=True)
c.showPage()
c.save()
if __name__ == '__main__':
main()