Click here to Skip to main content
15,899,604 members
Please Sign up or sign in to vote.
0.00/5 (No votes)
See more:
Hello

With this code I get plain text from a PDF document.

I would like to know how to get the text formatted with the points, and line break, etc.?


Thanks a lot.

Sorry for my English


using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
using System.Text;
namespace Sample_2012_Web_App

{
    public partial class WebForm1 : System.Web.UI.Page

    {
        protected void Page_Load(object sender, EventArgs e)

        {

        }

        protected void btnShowContent_Click(object sender, EventArgs e)

        {

            if (PDFFileUpload.HasFile)

            {

                string strPDFFile = PDFFileUpload.FileName;

                PDFFileUpload.SaveAs(Server.MapPath(strPDFFile));

                StringBuilder strPdfContent = new StringBuilder();

                PdfReader reader = new PdfReader(Server.MapPath(strPDFFile));

                for (int i = 1; i <= reader.NumberOfPages; i++)

                {

                    ITextExtractionStrategy objExtractStrategy = new SimpleTextExtractionStrategy();

                    string strLineText = PdfTextExtractor.GetTextFromPage(reader, i, objExtractStrategy);

                    strLineText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(strLineText)));

				
					
                    strPdfContent.Append(strLineText);

                    reader.Close();

                    strPdfContent.Append("<br/>");
                     

                }
				
                lblPdfContent.Text = strPdfContent.ToString();
                

            }

        }

    }

}
Posted
Comments
ZurdoDev 8-May-14 14:01pm    
I would first make sure to go through iTextSharp's documentation.
Jagbir Saini 9-May-14 5:30am    
you can make a format using inline styles in the html tags and render these tags using HTML worker in iTextSharp.

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900