utils.py 2.78 KB
Newer Older
1 2 3 4 5
"""
Utility methods for the Shopping Cart app
"""

from django.conf import settings
6
from pdfminer.converter import PDFPageAggregator
7 8 9
from pdfminer.layout import LAParams, LTFigure, LTTextBox, LTTextLine
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfinterp import PDFPageInterpreter, PDFResourceManager
10
from pdfminer.pdfpage import PDFPage
11 12
from pdfminer.pdfparser import PDFParser

13
from openedx.core.djangoapps.site_configuration import helpers as configuration_helpers
14 15 16 17 18 19 20


def is_shopping_cart_enabled():
    """
    Utility method to check the various configuration to verify that
    all of the settings have been enabled
    """
21
    enable_paid_course_registration = configuration_helpers.get_value(
22 23 24 25
        'ENABLE_PAID_COURSE_REGISTRATION',
        settings.FEATURES.get('ENABLE_PAID_COURSE_REGISTRATION')
    )

26
    enable_shopping_cart = configuration_helpers.get_value(
27 28 29 30
        'ENABLE_SHOPPING_CART',
        settings.FEATURES.get('ENABLE_SHOPPING_CART')
    )

31
    return enable_paid_course_registration and enable_shopping_cart
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75


def parse_pages(pdf_buffer, password):
    """
    With an PDF buffer object, get the pages, parse each one, and return the entire pdf text
    """
    # Create a PDF parser object associated with the file object.
    parser = PDFParser(pdf_buffer)
    # Create a PDF document object that stores the document structure.
    # Supply the password for initialization.
    document = PDFDocument(parser, password)

    resource_manager = PDFResourceManager()
    la_params = LAParams()
    device = PDFPageAggregator(resource_manager, laparams=la_params)
    interpreter = PDFPageInterpreter(resource_manager, device)

    text_content = []  # a list of strings, each representing text collected from each page of the doc
    for page in PDFPage.create_pages(document):
        interpreter.process_page(page)
        # receive the LTPage object for this page
        layout = device.get_result()
        # layout is an LTPage object which may contain
        #  child objects like LTTextBox, LTFigure, LTImage, etc.
        text_content.append(parse_lt_objects(layout._objs))  # pylint: disable=protected-access

    return text_content


def parse_lt_objects(lt_objects):
    """
    Iterate through the list of LT* objects and capture the text data contained in each object
    """
    text_content = []

    for lt_object in lt_objects:
        if isinstance(lt_object, LTTextBox) or isinstance(lt_object, LTTextLine):
            # text
            text_content.append(lt_object.get_text().encode('utf-8'))
        elif isinstance(lt_object, LTFigure):
            # LTFigure objects are containers for other LT* objects, so recurse through the children
            text_content.append(parse_lt_objects(lt_object._objs))  # pylint: disable=protected-access

    return '\n'.join(text_content)