utils.py

"""
Utility methods for the Shopping Cart app
"""

from django.conf import settings
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LAParams, LTFigure, LTTextBox, LTTextLine
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfinterp import PDFPageInterpreter, PDFResourceManager
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfparser import PDFParser

from openedx.core.djangoapps.site_configuration import helpers as configuration_helpers


def is_shopping_cart_enabled():
    """
    Utility method to check the various configuration to verify that
    all of the settings have been enabled
    """
    enable_paid_course_registration = configuration_helpers.get_value(
        'ENABLE_PAID_COURSE_REGISTRATION',
        settings.FEATURES.get('ENABLE_PAID_COURSE_REGISTRATION')
    )

    enable_shopping_cart = configuration_helpers.get_value(
        'ENABLE_SHOPPING_CART',
        settings.FEATURES.get('ENABLE_SHOPPING_CART')
    )

    return enable_paid_course_registration and enable_shopping_cart


def parse_pages(pdf_buffer, password):
    """
    With an PDF buffer object, get the pages, parse each one, and return the entire pdf text
    """
    # Create a PDF parser object associated with the file object.
    parser = PDFParser(pdf_buffer)
    # Create a PDF document object that stores the document structure.
    # Supply the password for initialization.
    document = PDFDocument(parser, password)

    resource_manager = PDFResourceManager()
    la_params = LAParams()
    device = PDFPageAggregator(resource_manager, laparams=la_params)
    interpreter = PDFPageInterpreter(resource_manager, device)

    text_content = []  # a list of strings, each representing text collected from each page of the doc
    for page in PDFPage.create_pages(document):
        interpreter.process_page(page)
        # receive the LTPage object for this page
        layout = device.get_result()
        # layout is an LTPage object which may contain
        #  child objects like LTTextBox, LTFigure, LTImage, etc.
        text_content.append(parse_lt_objects(layout._objs))  # pylint: disable=protected-access

    return text_content


def parse_lt_objects(lt_objects):
    """
    Iterate through the list of LT* objects and capture the text data contained in each object
    """
    text_content = []

    for lt_object in lt_objects:
        if isinstance(lt_object, LTTextBox) or isinstance(lt_object, LTTextLine):
            # text
            text_content.append(lt_object.get_text().encode('utf-8'))
        elif isinstance(lt_object, LTFigure):
            # LTFigure objects are containers for other LT* objects, so recurse through the children
            text_content.append(parse_lt_objects(lt_object._objs))  # pylint: disable=protected-access

    return '\n'.join(text_content)