Commit 7099600c by Saroj Dhiman

Delete final_module.py

1 parent 8d656ce7
Showing with 0 additions and 73 deletions
from pdf2image import convert_from_path
from PIL import Image
import torch
from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
import textract
import os
# Set the device
DEVICE = "cpu"
def generate(model, processor, img, questions):
inputs = processor(images=[img for _ in range(len(questions))],
text=questions, return_tensors="pt").to(DEVICE)
predictions = model.generate(**inputs, max_new_tokens=256)
return zip(questions, processor.batch_decode(predictions, skip_special_tokens=True))
def convert_pdf_to_image(filename, page_no, image_filename):
images = convert_from_path(filename)
if page_no < 1 or page_no > len(images):
raise ValueError(f"Page {page_no} is out of range for the provided PDF.")
image = images[page_no - 1]
image.save(image_filename)
return image
def extract_text_from_image(image):
# Use Textract to extract text from the image
text = textract.process(image)
return text.decode("utf-8")
def process_pdf_to_image_and_text(filename, page_no, questions, rawtext_filename, image_filename):
# Load the Pix2Struct model and processor
model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-docvqa-large").to(DEVICE)
processor = Pix2StructProcessor.from_pretrained("google/pix2struct-docvqa-large")
image = convert_pdf_to_image(filename, page_no, image_filename)
completions = generate(model, processor, image, questions)
# Print the completions to the terminal
answers = []
for question, answer in completions:
answers.append(answer)
print(f"Question: {question}")
print(f"Answer: {answer}\n")
# Extract text from the image using Textract
extracted_text = extract_text_from_image(image_filename)
# Write the extracted text to a text file
with open(rawtext_filename, "w") as output_file:
output_file.write(extracted_text)
# Write the completions to a text file
completions_filename = "output_text.txt"
with open(completions_filename, "w") as output_file:
for question, answer in zip(questions, answers):
output_file.write(f"Question: {question}\nAnswer: {answer}\n\n")
return completions_filename
# filename = "invoice_4.pdf"
# page_no = 1 # Change to the desired page number
# questions = ["what is the name of company?",
# "what is the date of issue?",
# "What is the invoice number?",
# "What is the billed to address?",
# "what is company?"] # Add your questions
# completions_filename = process_pdf_to_image_and_text(filename, page_no, questions, "out.txt", "img.png")
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!