<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic RE: PDF extraction with checkbox field in Product Forum</title>
    <link>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49730#M5081</link>
    <description>&lt;P&gt;Thanks Paul for your suggestion.&lt;/P&gt;
&lt;P&gt;I tried few objects from DX and tried converting the PDF into word and excel. It is not able to extract the data and the information is read either as image or blank values as the PDF is editable form.&lt;/P&gt;&lt;BR /&gt;&lt;BR /&gt;------------------------------&lt;BR /&gt;Amrutha Sivarajan&lt;BR /&gt;------------------------------&lt;BR /&gt;</description>
    <pubDate>Thu, 23 Feb 2023 03:45:00 GMT</pubDate>
    <dc:creator>Amruthasimplify</dc:creator>
    <dc:date>2023-02-23T03:45:00Z</dc:date>
    <item>
      <title>PDF extraction with checkbox field</title>
      <link>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49722#M5073</link>
      <description>Hi All,&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Can you assist me in finding a solution to extract data from a PDF without relying on external applications, as my organization requires the use of only Blue Prism approved objects and native tools?&lt;BR /&gt;I have attempted using Global Send Keys, but it doesn't seem to work well for capturing data from the PDF, which includes text boxes, multi-line fields, and checkboxes. Also, there is a possibility of rearranging the field positions in the future, making it inappropriate to use field position references for data extraction. The PDF can have more than 3 pages.&lt;BR /&gt;Is there any alternative method that allows for capturing data, including checkbox values, in a more efficient manner?&lt;/SPAN&gt;&lt;BR /&gt;Sample of the fields are shown below.&lt;BR /&gt;&lt;BR /&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="20323.png"&gt;&lt;img src="https://community.blueprism.com/t5/image/serverpage/image-id/20468iDDC0A0E6B45EED48/image-size/large?v=v2&amp;amp;px=999" role="button" title="20323.png" alt="20323.png" /&gt;&lt;/span&gt;Thanks in advance.&lt;BR /&gt;&lt;BR /&gt;------------------------------&lt;BR /&gt;Amrutha Sivarajan&lt;BR /&gt;------------------------------</description>
      <pubDate>Thu, 02 Feb 2023 04:03:00 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49722#M5073</guid>
      <dc:creator>Amruthasimplify</dc:creator>
      <dc:date>2023-02-02T04:03:00Z</dc:date>
    </item>
    <item>
      <title>RE: PDF extraction with checkbox field</title>
      <link>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49723#M5074</link>
      <description>Hi Amrutha,&lt;BR /&gt;&lt;BR /&gt;Two suggestions that perhaps can help you:&lt;BR /&gt;
&lt;UL&gt;
&lt;LI&gt;I think there's an object for PFD's in DX&lt;/LI&gt;
&lt;LI&gt;Last week someone with a similar challenge was advised to try and open the pdf in Word&lt;/LI&gt;
&lt;/UL&gt;&lt;BR /&gt;&lt;BR /&gt;------------------------------&lt;BR /&gt;Happy coding!&lt;BR /&gt;---------------&lt;BR /&gt;Paul&lt;BR /&gt;Sweden&lt;BR /&gt;------------------------------&lt;BR /&gt;</description>
      <pubDate>Thu, 02 Feb 2023 07:16:00 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49723#M5074</guid>
      <dc:creator>PvD_SE</dc:creator>
      <dc:date>2023-02-02T07:16:00Z</dc:date>
    </item>
    <item>
      <title>RE: PDF extraction with checkbox field</title>
      <link>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49724#M5075</link>
      <description>&lt;P&gt;Hi Paul&lt;/P&gt;
&lt;P&gt;Can you please provide the DX link of the object?&lt;/P&gt;
&lt;P&gt;Many thanks in advance&lt;/P&gt;&lt;BR /&gt;&lt;BR /&gt;------------------------------&lt;BR /&gt;Manish Rawat&lt;BR /&gt;Project Manager&lt;BR /&gt;Mercer&lt;BR /&gt;New Delhi&lt;BR /&gt;------------------------------&lt;BR /&gt;</description>
      <pubDate>Tue, 21 Feb 2023 05:41:00 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49724#M5075</guid>
      <dc:creator>ManishRaw</dc:creator>
      <dc:date>2023-02-21T05:41:00Z</dc:date>
    </item>
    <item>
      <title>RE: PDF extraction with checkbox field</title>
      <link>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49725#M5076</link>
      <description>&lt;P&gt;Hi Manish,&lt;/P&gt;
&lt;P&gt;I wrote '...I think...' implying I am not sure as we do not use any DX objects in our shop.&lt;/P&gt;
&lt;P&gt;That said, My '...I know...' is based on earlier posts on this subject in this community, so some googling on your side will probably unearth clues as to where to find any such DX object.&lt;/P&gt;&lt;BR /&gt;&lt;BR /&gt;------------------------------&lt;BR /&gt;Happy coding!&lt;BR /&gt;---------------&lt;BR /&gt;Paul&lt;BR /&gt;Sweden&lt;BR /&gt;------------------------------&lt;BR /&gt;</description>
      <pubDate>Tue, 21 Feb 2023 11:15:00 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49725#M5076</guid>
      <dc:creator>PvD_SE</dc:creator>
      <dc:date>2023-02-21T11:15:00Z</dc:date>
    </item>
    <item>
      <title>RE: PDF extraction with checkbox field</title>
      <link>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49726#M5077</link>
      <description>&lt;P&gt;Hi Amrutha,&lt;/P&gt;
&lt;P&gt;I got one process last year where we had to extract some data from the pdf files. I used alternative way to do this task. I converted pdf files to excel file and then with the help of excel utility I read cells value.&lt;/P&gt;
&lt;P&gt;you can also try this method.&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;&lt;BR /&gt;&lt;BR /&gt;------------------------------&lt;BR /&gt;Sahil Chankotra&lt;BR /&gt;------------------------------&lt;BR /&gt;</description>
      <pubDate>Wed, 22 Feb 2023 08:50:00 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49726#M5077</guid>
      <dc:creator>Chankotra1998</dc:creator>
      <dc:date>2023-02-22T08:50:00Z</dc:date>
    </item>
    <item>
      <title>RE: PDF extraction with checkbox field</title>
      <link>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49727#M5078</link>
      <description>&lt;P&gt;Hi Amrutha,&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;
&lt;P&gt;Last year, I worked on the automation where I have to update and extract the data from PDF forms. I have used C# code and Itextsharp dll for this use case.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Please find below the details -&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Inputs - filePath(Text)&lt;/P&gt;
&lt;P&gt;Outputs - outputText(Text), Success(Flag), Message(Text)&lt;/P&gt;
&lt;P&gt;Code -&lt;/P&gt;
&lt;P&gt;Success = true;&lt;BR /&gt;Message = "";&lt;BR /&gt;outputText = "";&lt;BR /&gt;StringBuilder text = new StringBuilder();&lt;BR /&gt;PdfReader pdfReader = null;&lt;/P&gt;
&lt;P&gt;var pdf_filename = filePath;&lt;BR /&gt;try{&lt;BR /&gt;pdfReader = new PdfReader(pdf_filename);&lt;BR /&gt;{&lt;BR /&gt;&amp;nbsp; &amp;nbsp; var fields = pdfReader.AcroFields.Fields;&lt;/P&gt;
&lt;P&gt;&amp;nbsp; &amp;nbsp; foreach (var key in fields.Keys)&lt;BR /&gt;&amp;nbsp; &amp;nbsp; {&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; var value = pdfReader.AcroFields.GetField(key);&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp;&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; text.Append(key+"----"+value+";");&lt;BR /&gt;&amp;nbsp; &amp;nbsp; }&lt;BR /&gt;outputText = text.ToString();&lt;BR /&gt;}&lt;BR /&gt;}&lt;BR /&gt;catch(Exception exx) {&lt;BR /&gt;&amp;nbsp; &amp;nbsp; Success = false;&lt;BR /&gt;&amp;nbsp; &amp;nbsp; Message = exx.Message;&lt;BR /&gt;}&lt;BR /&gt;finally {&lt;BR /&gt;&amp;nbsp; &amp;nbsp; if (pdfReader != null)&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; {&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; pdfReader.Close();&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }&lt;BR /&gt;}&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;
&lt;P&gt;You will get the details in text data item and after that use the split text with character ;( as mentioned in code - text.Append(key+"----"+value+&lt;STRONG&gt;";&lt;/STRONG&gt;")).&lt;/P&gt;
&lt;P&gt;Also, you need to import the dlls in code option -&amp;nbsp;&lt;/P&gt;
&lt;OL&gt;
&lt;LI&gt;C:\Program Files\Blue Prism Limited\Blue Prism Automate\itextsharp.dll&lt;/LI&gt;
&lt;LI&gt;C:\Program Files\Blue Prism Limited\Blue Prism Automate\BouncyCastle.Crypto.dll&lt;/LI&gt;
&lt;/OL&gt;
&lt;P&gt;Please let me know if you need any additional information.&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;&lt;BR /&gt;&lt;BR /&gt;------------------------------&lt;BR /&gt;KirtiMaan Talwar&lt;BR /&gt;Consultant&lt;BR /&gt;Deloitte&lt;BR /&gt;------------------------------&lt;BR /&gt;</description>
      <pubDate>Wed, 22 Feb 2023 13:34:00 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49727#M5078</guid>
      <dc:creator>kirtimaantalwar</dc:creator>
      <dc:date>2023-02-22T13:34:00Z</dc:date>
    </item>
    <item>
      <title>RE: PDF extraction with checkbox field</title>
      <link>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49728#M5079</link>
      <description>&lt;P&gt;Thank you Sahil.&lt;/P&gt;
&lt;P&gt;I tried your approach unfortunately the Excel is reading some fields as image and its not returning structured data. I'm getting a mix of image and text values for PDF to Excel conversion.&lt;/P&gt;&lt;BR /&gt;&lt;BR /&gt;------------------------------&lt;BR /&gt;Amrutha Sivarajan&lt;BR /&gt;------------------------------&lt;BR /&gt;</description>
      <pubDate>Thu, 23 Feb 2023 03:32:00 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49728#M5079</guid>
      <dc:creator>Amruthasimplify</dc:creator>
      <dc:date>2023-02-23T03:32:00Z</dc:date>
    </item>
    <item>
      <title>RE: PDF extraction with checkbox field</title>
      <link>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49729#M5080</link>
      <description>&lt;P&gt;Thanks a lot for your detailed explanation.&amp;nbsp; I truly appreciate your effort.&lt;/P&gt;
&lt;P&gt;I would like to try out the method you have suggested. If you don't mind can you share me the authenticated URLs for downloading the DLLs?&lt;/P&gt;
&lt;P&gt;I had tried using BP objects from Digital exchange and worked on few python codes to read the PDF. Since the PDF is editable, its unable to read the field values and is able to read the field labels alone.&amp;nbsp;&lt;/P&gt;&lt;BR /&gt;&lt;BR /&gt;------------------------------&lt;BR /&gt;Amrutha Sivarajan&lt;BR /&gt;------------------------------&lt;BR /&gt;</description>
      <pubDate>Thu, 23 Feb 2023 03:43:00 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49729#M5080</guid>
      <dc:creator>Amruthasimplify</dc:creator>
      <dc:date>2023-02-23T03:43:00Z</dc:date>
    </item>
    <item>
      <title>RE: PDF extraction with checkbox field</title>
      <link>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49730#M5081</link>
      <description>&lt;P&gt;Thanks Paul for your suggestion.&lt;/P&gt;
&lt;P&gt;I tried few objects from DX and tried converting the PDF into word and excel. It is not able to extract the data and the information is read either as image or blank values as the PDF is editable form.&lt;/P&gt;&lt;BR /&gt;&lt;BR /&gt;------------------------------&lt;BR /&gt;Amrutha Sivarajan&lt;BR /&gt;------------------------------&lt;BR /&gt;</description>
      <pubDate>Thu, 23 Feb 2023 03:45:00 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49730#M5081</guid>
      <dc:creator>Amruthasimplify</dc:creator>
      <dc:date>2023-02-23T03:45:00Z</dc:date>
    </item>
    <item>
      <title>RE: PDF extraction with checkbox field</title>
      <link>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49731#M5082</link>
      <description>&lt;P&gt;Hi &lt;A class="user-content-mention" data-sign="@" data-contactkey="c3dc92f6-1f5e-42e9-bc81-9224c9d0c7a6" data-tag-text="@Amrutha Sivarajan" href="https://community.blueprism.com/network/profile?UserKey=c3dc92f6-1f5e-42e9-bc81-9224c9d0c7a6" data-itemmentionkey="e96f02ca-b4dc-493a-b7b0-91e458098c85"&gt;@Amrutha Sivarajan&lt;/A&gt; ,&lt;/P&gt;
&lt;P&gt;Did you try opening the pdf file in chrome or any other browser? Opening a file using a browser sometimes helps in spying the relevant elements and you can try reading the checkbox values.&lt;/P&gt;&lt;BR /&gt;&lt;BR /&gt;------------------------------&lt;BR /&gt;Manpreet Kaur&lt;BR /&gt;Manager&lt;BR /&gt;Deloitte&lt;BR /&gt;*If you find this post helpful mark it as Best Answer&lt;BR /&gt;------------------------------&lt;BR /&gt;</description>
      <pubDate>Thu, 23 Feb 2023 07:55:00 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49731#M5082</guid>
      <dc:creator>ManpreetKaur1</dc:creator>
      <dc:date>2023-02-23T07:55:00Z</dc:date>
    </item>
    <item>
      <title>RE: PDF extraction with checkbox field</title>
      <link>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49732#M5083</link>
      <description>&lt;P&gt;Hi Amrutha,&lt;/P&gt;
&lt;P&gt;Please find the links -&lt;/P&gt;
&lt;P&gt;-&amp;gt; &lt;A href="https://www.dllme.com/dll/files/itextsharp_dll.html" target="test_blank"&gt;https://www.dllme.com/dll/files/itextsharp_dll.html&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;-&amp;gt; &lt;A href="https://www.dllme.com/dll/files/bouncycastle_crypto_dll.html" target="test_blank"&gt;https://www.dllme.com/dll/files/bouncycastle_crypto_dll.html&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;PFB the example using C# code -&lt;/P&gt;
&lt;P&gt;PDF Form -&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="20319.png"&gt;&lt;img src="https://community.blueprism.com/t5/image/serverpage/image-id/20466iC7BAFCF0BDB746EB/image-size/large?v=v2&amp;amp;px=999" role="button" title="20319.png" alt="20319.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;We'll get the data in text data item and then use split action mentioned in previous step.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;PDF Data extracted -&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="20320.png"&gt;&lt;img src="https://community.blueprism.com/t5/image/serverpage/image-id/20465iF9DCD28582C01CB8/image-size/large?v=v2&amp;amp;px=999" role="button" title="20320.png" alt="20320.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;
&lt;P&gt;Please let me know if you have any other issues.&lt;/P&gt;&lt;BR /&gt;&lt;BR /&gt;------------------------------&lt;BR /&gt;KirtiMaan Talwar&lt;BR /&gt;Consultant&lt;BR /&gt;Deloitte&lt;BR /&gt;------------------------------&lt;BR /&gt;</description>
      <pubDate>Thu, 23 Feb 2023 08:55:00 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49732#M5083</guid>
      <dc:creator>kirtimaantalwar</dc:creator>
      <dc:date>2023-02-23T08:55:00Z</dc:date>
    </item>
    <item>
      <title>RE: PDF extraction with checkbox field</title>
      <link>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49733#M5084</link>
      <description>&lt;P&gt;Hi Amrutha,&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;
&lt;P&gt;Once you fetch the data after that you can use Excel macro to read the exact value&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;&lt;BR /&gt;&lt;BR /&gt;------------------------------&lt;BR /&gt;Sahil Chankotra&lt;BR /&gt;------------------------------&lt;BR /&gt;</description>
      <pubDate>Fri, 24 Feb 2023 14:55:00 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/49733#M5084</guid>
      <dc:creator>Chankotra1998</dc:creator>
      <dc:date>2023-02-24T14:55:00Z</dc:date>
    </item>
    <item>
      <title>Re: RE: PDF extraction with checkbox field</title>
      <link>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/110893#M49966</link>
      <description>&lt;P&gt;Hello KirtiMaan,&lt;/P&gt;&lt;P&gt;I am receiving errors&amp;nbsp;&lt;/P&gt;&lt;P&gt;"StringBuilder" and "PdfReader" could not be found (are you missing a using directive or an assembly reference?)&lt;/P&gt;&lt;P&gt;Does this have to deal with the itextsharp version? as there are many versions on the site you suggested&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Ndauti_0-1716727359586.png" style="width: 400px;"&gt;&lt;img src="https://community.blueprism.com/t5/image/serverpage/image-id/38336iB05D22918169A9A0/image-size/medium/is-moderation-mode/true?v=v2&amp;amp;px=400" role="button" title="Ndauti_0-1716727359586.png" alt="Ndauti_0-1716727359586.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;Thank you&lt;/P&gt;&lt;P&gt;Tim&lt;/P&gt;</description>
      <pubDate>Sun, 26 May 2024 12:44:41 GMT</pubDate>
      <guid>https://community.blueprism.com/t5/Product-Forum/PDF-extraction-with-checkbox-field/m-p/110893#M49966</guid>
      <dc:creator>Ndauti</dc:creator>
      <dc:date>2024-05-26T12:44:41Z</dc:date>
    </item>
  </channel>
</rss>

