Source code for looker_powerpoint.tools.find_alt_text
from pptx import Presentation
from lxml import etree
import yaml
NS = {"p": "http://schemas.openxmlformats.org/presentationml/2006/main"}
[docs]
def get_presentation_objects_with_descriptions(pptx_path):
"""
Extracts all shapes from a PowerPoint presentation and returns them with descriptions.
Args:
pptx_path (str): The path to the PowerPoint presentation file.
Returns:
list: A list of dictionaries, where each dictionary represents a shape and
contains the shape object, its description, and the slide number.
Returns an empty list if the presentation cannot be opened or has no slides/shapes.
"""
try:
presentation = Presentation(pptx_path)
except Exception as e:
print(f"Error opening presentation: {e}")
return []
objects_with_descriptions = []
for i, slide in enumerate(presentation.slides, start=0):
for shape in slide.shapes:
description = extract_alt_text(shape) # Generate description
emu_to_pixels = lambda emu: emu / 9525
width_px = emu_to_pixels(shape.width)
height_px = emu_to_pixels(shape.height)
if description:
if type(description) is dict and "meta_name" in description:
shape_id = description.get("meta_name")
else:
shape_id = (
f"{i},{shape.shape_id}" # Use shape number for identification
)
objects_with_descriptions.append(
{
"shape_id": shape_id, # Use shape number for identification
"shape_type": shape.shape_type.name,
"shape_width": round(width_px),
"shape_height": round(height_px),
"integration": description,
"slide_number": i, # Use the enumerate index for slide number
"shape_number": shape.shape_id,
}
)
return objects_with_descriptions
if __name__ == "__main__":
# Example Usage
pptx_file = "p.pptx" # Replace with your file path
objects = get_presentation_objects_with_descriptions(pptx_file)
from rich import print