41 lines
995 B
Python
41 lines
995 B
Python
import argparse
|
|
import sys
|
|
|
|
import bs4
|
|
import requests
|
|
|
|
from reportlab.pdfgen import canvas
|
|
|
|
|
|
def parse_args():
|
|
descr = "Generate PDFs from Slideshare presentations"
|
|
|
|
parser = argparse.ArgumentParser(description=descr)
|
|
parser.add_argument('url', metavar='url', nargs=1,
|
|
help='the URL to slurp')
|
|
parser.add_argument('--output', '-o', default='out.pdf',
|
|
help='the file to write to (default: out.pdf)')
|
|
|
|
return parser.parse_args()
|
|
|
|
|
|
def main():
|
|
args = parse_args()
|
|
|
|
res = requests.get(args.url[0])
|
|
|
|
tree = bs4.BeautifulSoup(res.text, "html.parser")
|
|
c = canvas.Canvas(args.file)
|
|
|
|
for img in tree.findAll("img", class_="slide_image"):
|
|
img_url = img.attrs["data-full"]
|
|
page_width, page_height = c._pagesize
|
|
c.setPageRotation(90)
|
|
c.drawImage(img_url, 0, 0, page_height, page_width, preserveAspectRatio=True)
|
|
c.showPage()
|
|
c.save()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|