<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: VBO/Assets to extract data from scanned PDF invoice in Product Forum</title>
    <link>https://community.blueprism.com/t5/Product-Forum/VBO-Assets-to-extract-data-from-scanned-PDF-invoice/m-p/113827#M50914</link>
    <description>&lt;P&gt;The solution in my experience very much depends on the PDF,&amp;nbsp; are we talking about 1. well structured PDF forms with good accessibility functionality,&amp;nbsp; 2. are we talking about PDF documents that are always in the same structure when copied to clipboard or exported to a text or XML format,&amp;nbsp; or are we talking about 3. scanned documents?&lt;BR /&gt;For 1. you might be surprised how well the UIA interface within Blue Prism works with the document if it is made for accessibility.&amp;nbsp; For 2.&amp;nbsp; You might get away with an export and xml or text parsing solution.&amp;nbsp; For 3.&amp;nbsp; OCR&amp;nbsp; technologies are the way to go and if there is a large variance LLMs might be an addition.&lt;/P&gt;</description>
    <pubDate>Thu, 12 Sep 2024 09:53:17 GMT</pubDate>
    <dc:creator>Denis__Dennehy</dc:creator>
    <dc:date>2024-09-12T09:53:17Z</dc:date>
    <item>
      <title>VBO/Assets to extract data from scanned PDF invoice</title>
      <link>https://community.blueprism.com/t5/Product-Forum/VBO-Assets-to-extract-data-from-scanned-PDF-invoice/m-p/113697#M50872</link>
      <description>&lt;P&gt;Hello team,&lt;/P&gt;&lt;P&gt;What are some of the best VBO/Assets you have used to&amp;nbsp;extract data from scanned PDF invoices?&lt;/P&gt;&lt;P&gt;Not considering full-scale IDP engine implementation here instead looking for a quick solution using any DX assets or open library that works reliably on both digital as well as scanned docs.&lt;/P&gt;</description>
      <pubDate>Tue, 10 Sep 2024 09:37:09 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/VBO-Assets-to-extract-data-from-scanned-PDF-invoice/m-p/113697#M50872</guid>
      <dc:creator>Tejaskumar_Darji</dc:creator>
      <dc:date>2024-09-10T09:37:09Z</dc:date>
    </item>
    <item>
      <title>Re: VBO/Assets to extract data from scanned PDF invoice</title>
      <link>https://community.blueprism.com/t5/Product-Forum/VBO-Assets-to-extract-data-from-scanned-PDF-invoice/m-p/113715#M50877</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;DIV&gt;I found a few assets on the DX portal that can be used:&lt;/DIV&gt;&lt;UL&gt;&lt;LI&gt;&lt;A href="https://digitalexchange.blueprism.com/dx/entry/125953/solution/pdfco-2" target="_self"&gt;Process for PDF.co - 1.0.0&lt;/A&gt;&lt;/LI&gt;&lt;LI&gt;&lt;A href="https://digitalexchange.blueprism.com/dx/entry/7742/solution/thedigitalworkers" target="_self"&gt;Process for BOT AI ML DocuBOT&lt;/A&gt;&lt;/LI&gt;&lt;LI&gt;&lt;A href="https://digitalexchange.blueprism.com/dx/entry/3439/solution/pdf-management-2" target="_self"&gt;Function for PDF Management - 1.1.0&lt;/A&gt;&lt;/LI&gt;&lt;LI&gt;&lt;A href="https://digitalexchange.blueprism.com/dx/entry/117528/solution/sabancidx---daily-pdf-actions" target="_blank" rel="noopener noreferrer"&gt;Daily PDF Actions&lt;/A&gt;&lt;/LI&gt;&lt;LI&gt;&lt;A href="https://digitalexchange.blueprism.com/dx/entry/58413/solution/utility---pdf" target="_blank" rel="noopener noreferrer"&gt;Function for Utility - PDF&lt;/A&gt;&lt;/LI&gt;&lt;LI&gt;&lt;A href="https://digitalexchange.blueprism.com/dx/entry/3470/solution/abbyy-flexicapture-connector" target="_self"&gt;Process for ABBYY FlexiCapture Connector&lt;/A&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;DIV&gt;We also have some KB's related to PDF that might be useful:&lt;/DIV&gt;&lt;UL&gt;&lt;LI&gt;&lt;A href="https://support.blueprism.com/en/support/solutions/articles/7000076870-how-can-i-work-with-adobe-acrobat-pdf-documents-when-using-blue-prism-enterprise-" target="_self"&gt;How can I work with Adobe Acrobat PDF documents when using Blue Prism Enterprise?&lt;/A&gt;&lt;/LI&gt;&lt;LI&gt;&lt;A href="https://support.blueprism.com/en/support/solutions/articles/7000077392" target="_blank" rel="noopener noreferrer"&gt;How can I extract data from a PDF document which is contained in a browser window?&lt;/A&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;&lt;EM&gt;&lt;SPAN&gt;Refer to the '&lt;STRONG&gt;&lt;A href="https://university.blueprism.com/courses?search=Blue%20Prism%C2%AE%20Interfacing%20With%20PDF%20Documents%20(EN)" target="_blank" rel="noopener"&gt;Interfacing with PDF Documents&lt;/A&gt;&lt;/STRONG&gt;' training course in the Blue Prism&amp;nbsp;&lt;/SPAN&gt;&lt;A href="https://university.blueprism.com/" target="_blank" rel="noopener"&gt;&lt;SPAN&gt;University&lt;/SPAN&gt;&lt;/A&gt;&lt;SPAN&gt;&amp;nbsp;for additional information on interacting with PDF data.&lt;/SPAN&gt;&lt;/EM&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 10 Sep 2024 16:02:50 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/VBO-Assets-to-extract-data-from-scanned-PDF-invoice/m-p/113715#M50877</guid>
      <dc:creator>Brigianakopec</dc:creator>
      <dc:date>2024-09-10T16:02:50Z</dc:date>
    </item>
    <item>
      <title>Re: VBO/Assets to extract data from scanned PDF invoice</title>
      <link>https://community.blueprism.com/t5/Product-Forum/VBO-Assets-to-extract-data-from-scanned-PDF-invoice/m-p/113747#M50890</link>
      <description>&lt;P&gt;We are using the built in OCR-reader in our invoice process.&lt;/P&gt;&lt;P&gt;We open the invoice PDF in a MS Edge window. The MS Edge window is spied with Region Mode and then we can use a Read stage with "Read Text with OCR".&lt;/P&gt;</description>
      <pubDate>Wed, 11 Sep 2024 08:44:31 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/VBO-Assets-to-extract-data-from-scanned-PDF-invoice/m-p/113747#M50890</guid>
      <dc:creator>jktalgo</dc:creator>
      <dc:date>2024-09-11T08:44:31Z</dc:date>
    </item>
    <item>
      <title>Re: VBO/Assets to extract data from scanned PDF invoice</title>
      <link>https://community.blueprism.com/t5/Product-Forum/VBO-Assets-to-extract-data-from-scanned-PDF-invoice/m-p/113827#M50914</link>
      <description>&lt;P&gt;The solution in my experience very much depends on the PDF,&amp;nbsp; are we talking about 1. well structured PDF forms with good accessibility functionality,&amp;nbsp; 2. are we talking about PDF documents that are always in the same structure when copied to clipboard or exported to a text or XML format,&amp;nbsp; or are we talking about 3. scanned documents?&lt;BR /&gt;For 1. you might be surprised how well the UIA interface within Blue Prism works with the document if it is made for accessibility.&amp;nbsp; For 2.&amp;nbsp; You might get away with an export and xml or text parsing solution.&amp;nbsp; For 3.&amp;nbsp; OCR&amp;nbsp; technologies are the way to go and if there is a large variance LLMs might be an addition.&lt;/P&gt;</description>
      <pubDate>Thu, 12 Sep 2024 09:53:17 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/VBO-Assets-to-extract-data-from-scanned-PDF-invoice/m-p/113827#M50914</guid>
      <dc:creator>Denis__Dennehy</dc:creator>
      <dc:date>2024-09-12T09:53:17Z</dc:date>
    </item>
    <item>
      <title>Re: VBO/Assets to extract data from scanned PDF invoice</title>
      <link>https://community.blueprism.com/t5/Product-Forum/VBO-Assets-to-extract-data-from-scanned-PDF-invoice/m-p/113831#M50917</link>
      <description>&lt;P&gt;How do you handle the zoom level? Also you have the same format invoices or varying layouts?&lt;/P&gt;</description>
      <pubDate>Thu, 12 Sep 2024 10:11:25 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/VBO-Assets-to-extract-data-from-scanned-PDF-invoice/m-p/113831#M50917</guid>
      <dc:creator>Tejaskumar_Darji</dc:creator>
      <dc:date>2024-09-12T10:11:25Z</dc:date>
    </item>
    <item>
      <title>Re: VBO/Assets to extract data from scanned PDF invoice</title>
      <link>https://community.blueprism.com/t5/Product-Forum/VBO-Assets-to-extract-data-from-scanned-PDF-invoice/m-p/113832#M50918</link>
      <description>&lt;P&gt;Most of these are 3rd party paid services OR does not work with scanned PDFs&lt;/P&gt;</description>
      <pubDate>Thu, 12 Sep 2024 10:12:39 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/VBO-Assets-to-extract-data-from-scanned-PDF-invoice/m-p/113832#M50918</guid>
      <dc:creator>Tejaskumar_Darji</dc:creator>
      <dc:date>2024-09-12T10:12:39Z</dc:date>
    </item>
    <item>
      <title>Re: VBO/Assets to extract data from scanned PDF invoice</title>
      <link>https://community.blueprism.com/t5/Product-Forum/VBO-Assets-to-extract-data-from-scanned-PDF-invoice/m-p/113834#M50919</link>
      <description>&lt;P&gt;hello&amp;nbsp;&lt;a href="https://community.blueprism.com/t5/user/viewprofilepage/user-id/2159"&gt;@Tejaskumar_Darji&lt;/a&gt;&amp;nbsp; - We have used Python libraries to get content from scanned PDF.&lt;/P&gt;</description>
      <pubDate>Thu, 12 Sep 2024 10:14:25 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/VBO-Assets-to-extract-data-from-scanned-PDF-invoice/m-p/113834#M50919</guid>
      <dc:creator>Neel1</dc:creator>
      <dc:date>2024-09-12T10:14:25Z</dc:date>
    </item>
  </channel>
</rss>

