Click here to Skip to main content
15,891,762 members
Please Sign up or sign in to vote.
2.33/5 (3 votes)
See more:
I'm reading a PDF file using VB.net(iTextSharp library), How to remove HyperLinks and write the edited pdf file???

This is a piece of code where i'm reading pdf file..............
C#
string linkTextBuilder = "";
            string linkReferenceBuilder = "";   
                   
            
            PdfDictionary PageDictionary = default(PdfDictionary);
            PdfArray Annots = default(PdfArray);
            PdfReader reader = new PdfReader(pdfFilePath);           

            //Loop through each page
            for (int i = 1; i <= reader.NumberOfPages; i++)
            {
                //Get the current page
                PageDictionary = reader.GetPageN(i);
                
                //Get all of the annotations for the current page
                Annots = PageDictionary.GetAsArray(PdfName.ANNOTS);
                

                //Make sure we have something
                if ((Annots == null) || (Annots.Length == 0))
                {
                    continue;
                }
                    
                //Loop through each annotation
                foreach (PdfObject A in Annots.ArrayList)
                {
                    //Convert the itext-specific object as a generic PDF object
                    PdfDictionary AnnotationDictionary = (PdfDictionary)PdfReader.GetPdfObject(A);

                    //Make sure this annotation has a link
                    if (!AnnotationDictionary.Get(PdfName.SUBTYPE).Equals(PdfName.LINK))
                    {
                        continue;
                    }
                        
                    //Make sure this annotation has an ACTION
                    if (AnnotationDictionary.Get(PdfName.A) == null)
                    {
                        continue;
                    }
                        
                    //Get the ACTION for the current annotation
                    PdfDictionary AnnotationAction = (PdfDictionary)AnnotationDictionary.GetAsDict(PdfName.A);
                    if (AnnotationAction.Get(PdfName.S).Equals(PdfName.URI))
                    {                       
                        Get action link URL : linkReferenceBuilder
                        PdfString Link = AnnotationAction.GetAsString(PdfName.URI);
                       if (Link != null)
                       {
                            linkReferenceBuilder = Link.ToString();                                                        
                        }
}
                }
Posted
Updated 15-Jun-13 0:24am
v5
Comments
Prasad_Kulkarni 13-Jun-13 7:15am    
What exactly you want?

You want to remove hyperlinks [as you mentioned in head of question]
Or
You want to fetch the hyperlink [as you mentioned in question body]?
NeerajGSoft 13-Jun-13 7:20am    
I want to remove hyperlink but when i am reading pdf file, getting the text but not hyperlink of that text...
Sunasara Imdadhusen 13-Jun-13 7:48am    
Where is code?
Kschuler 13-Jun-13 11:05am    
When you have more information that is helpful, it's better to click the Improve Question link and add it to your original question. That way we can see the code formatted nicely and don't have to search through comments to find it.
NeerajGSoft 14-Jun-13 2:28am    
ok thanks for that...:-)

I got solution by myself and solution is below..
You can remove the link and call the function before saving document...

PDFDoc doc = new PDFDoc(fileIn);
            RemoveCertainExistingLinks(doc, ExcusableLinks);
            doc.Save(fileOut, pdftron.SDF.SDFDoc.SaveOptions.e_linearized);


public void RemoveCertainExistingLinks(PDFDoc doc, ICollection<string> excusedLinks)
        {
            for (PageIterator itr = doc.GetPageIterator(); itr.HasNext(); itr.Next())
            {
                Page p = itr.Current();

                int numAnnots = p.GetNumAnnots();

                // Loops over the annotations backwards because the document is modified in
                // place.
                int i = numAnnots; 
                while (i != 0)
                {                    
                    i--;

                    Annot annot = p.GetAnnot(i);
                   
                    if (annot.GetType() != Annot.Type.e_Link || !annot.IsValid())
                    {
                        continue;
                    }

                    
                    pdftron.PDF.Action linkAction = annot.GetLinkAction();
                    if (linkAction.GetType() != pdftron.PDF.Action.Type.e_URI)
                    {
                        continue;
                    }


                    pdftron.SDF.Obj sdfobj = linkAction.GetSDFObj();

                    // this should be a dictionary
                     pdftron.SDF.Obj URIobj = sdfobj.FindObj("URI");
                     string URI = URIobj.GetAsPDFText();                   
                    
                  
                    p.AnnotRemove(i);

                  }
            }
        }
 
Share this answer
 
Comments
Joezer BH 30-Jun-13 11:10am    
nice of you to post the solution too.

Just a sideshow bob notice:
while (i != 0)
is dangerous code, in more complex while statements, where you manipulate i and all in all decrease it, there could be cases where i drops below 0 and then ... :o
I'd suggest while(i > 0) in your case
NeerajGSoft 1-Jul-13 1:54am    
Thank you sir for your suggestion....:-)
You should have a look at Remove hyperlinks from a PDF document (iTextSharp)[^]

--Amit
 
Share this answer
 
Comments
NeerajGSoft 15-Jun-13 1:41am    
ok sir, and after removing hyperlinks how to write text again as a pdf???
_Amy 15-Jun-13 1:47am    
NeerajGSoft 15-Jun-13 2:01am    
no its not....
Actually when i am writing pdf text, the path of my pdf file with name is inserted instead of previous link...
this is my entire code...

PdfDictionary PageDictionary = default(PdfDictionary);
PdfArray Annots = default(PdfArray);
PdfReader reader = new PdfReader(pdfFilePath);

//Loop through each page
for (int i = 1; i <= reader.NumberOfPages; i++)
{
//Get the current page
PageDictionary = reader.GetPageN(i);

//Get all of the annotations for the current page
Annots = PageDictionary.GetAsArray(PdfName.ANNOTS);


//Make sure we have something
if ((Annots == null) || (Annots.Length == 0))
{
continue;
}

//Loop through each annotation
foreach (PdfObject A in Annots.ArrayList)
{
//Convert the itext-specific object as a generic PDF object
PdfDictionary AnnotationDictionary = (PdfDictionary)PdfReader.GetPdfObject(A);

//Make sure this annotation has a link
if (!AnnotationDictionary.Get(PdfName.SUBTYPE).Equals(PdfName.LINK))
{
continue;
}

//Make sure this annotation has an ACTION
if (AnnotationDictionary.Get(PdfName.A) == null)
{
continue;
}

//Get the ACTION for the current annotation
PdfDictionary AnnotationAction = (PdfDictionary)AnnotationDictionary.GetAsDict(PdfName.A);
if (AnnotationAction.Get(PdfName.S).Equals(PdfName.URI))
{
//Removing Link
AnnotationAction.Remove(PdfName.URI);
}
}
}

using (FileStream FS = new FileStream(OutputFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (Document Doc = new Document())
{
using (PdfCopy writer = new PdfCopy(Doc, FS))
{
Doc.Open();
for (int j = 1; j <= reader.NumberOfPages; j++)
{
writer.AddPage(writer.GetImportedPage(reader, j));
}
Doc.Close();
}
}
}
_Amy 15-Jun-13 2:07am    
The code where you are removing the link, you need to add text in that block only. Take the removed link text, hold it in a variable and then write it in PDF. You can use google if required.
--Amit
Member 9899837 7-Feb-14 7:50am    
If you have some code snippet then kindly provide.

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900