Hi!
Is there a magic solution? Is it possible to create a PDF document based on an existing document without losing all its accessibility tags?
Our situation is quite complex here and I am looking for a clean and elegant solution to our problem that we have been trying to solve for quite some time.
Our system allows the customer to load a PDF file, and we have a UI interface with which the customer can draw on the file, write, fill in signatures
and finally also download the file to him via the browser.
The problem occurs when the client loads a PDF file with accessibility for those with disabilities or the disabled.
When he downloads the PDF, it is no longer accessible and all its tags are gone.
We use
PDFSharp as the library that helps us perform all the manipulations on the PDF.
From a check in our code we do not refer to accessibility tags at all so we are now trying to check how we manage to find them and keep them during the code run.
In a nutshell, what we do is the following code:
using PdfSharp.Drawing;
using PdfSharp.Pdf;
using PdfSharp.Pdf.IO;
using System;
using System.Collections.Generic;
using System.IO;
private PdfDocument DrawOnPdf(Stream stream, dynamic layers, dynamic parameters, bool bIsAutomated = false)
{
PdfSharp.Drawing.XPdfForm formOriginal = XPdfForm.FromStream(stream);
PdfDocument outputDocument = new PdfDocument();
XGraphics gfx = null;
XRect box;
outputDocument.PageLayout = PdfPageLayout.SinglePage;
for (int idx = 1; idx <= formOriginal.PageCount; idx += 1)
{
PdfPage page = outputDocument.AddPage();
formOriginal.PageNumber = idx;
page.Orientation = formOriginal.Page.Orientation;
page.Width = formOriginal.Page.Width;
page.Height = formOriginal.Page.Height;
double width = page.Width;
double height = page.Height;
gfx = XGraphics.FromPdfPage(page);
box = new XRect(0, 0, width, height);
gfx.DrawImage(formOriginal, box);
dynamic currentPage = GetPageLayer(layers, idx, idx == formOriginal.PageCount);
DrawPdfLayer(gfx, currentPage, parameters, bIsAutomated);
}
return outputDocument;
}
private static void DrawPdfLayer(XGraphics gfx, dynamic page, dynamic parameters, bool bIsAutomated = false)
{
object item = null;
try
{
foreach (var layer in page.layer)
{
item = layer;
double x = layer.x;
double y = layer.y;
string input = null;
dynamic arrInput = null;
if (parameters != null && layer.input_id != null &&
parameters[layer.input_id.ToString()] != null)
{
var tmp = parameters[layer.input_id.ToString()];
if (tmp is Newtonsoft.Json.Linq.JArray)
arrInput = tmp;
else
input = tmp.ToString();
}
bool clear = (bool)layer.cleardash;
if (clear && input != null)
{
input = input.Replace("-", " ");
}
bool emailUser = (bool)layer.emailuser;
if (emailUser && input != null)
{
if (!String.IsNullOrEmpty(input))
input = input.Remove(input.IndexOf('@'));
}
bool emailDomain = (bool)layer.emaildomain;
if (emailDomain && input != null)
{
if (!String.IsNullOrEmpty(input))
input = input.Remove(0, input.IndexOf('@') + 1);
}
var brush = XBrushes.Black;
if (layer.color != null)
brush = new XSolidBrush(XColor.FromArgb(255, (int)layer.color.R, (int)layer.color.G, (int)layer.color.B));
string layerType = layer.type.ToString();
switch (layerType)
{
case "text":
WriteText(ref gfx, brush, layer, input, x, y);
break;
case "numberinput":
int maxLength = (int)layer.maxlength;
if (input != null)
{
if (input.Length < maxLength && maxLength > 0)
input = input.PadLeft(maxLength);
}
WriteText(ref gfx, brush, layer, input, x, y);
break;
case "date":
WriteDate(ref gfx, brush, layer, input, x, y);
break;
case "block":
WriteBlock(ref gfx, brush, layer, input, x, y);
break;
case "circle":
case "radio":
if (input != null) arrInput = new string[] { input };
if (arrInput != null)
foreach (var val in arrInput)
{
if (layerType == "radio")
WriteRadio(ref gfx, brush, layer, val.ToString(), x, y, bIsAutomated);
else
WriteCircle(ref gfx, brush, layer, val.ToString(), x, y);
}
break;
}
}
gfx.Restore(gfx.Save());
}
catch (Exception ex)
{
}
}
What I have tried:
What we tried so far was cloning the PDFDocument object, save all of its pages on a temp list, remove all of the original one pages and then working on the empty pdf but with the accessibility tags. We found out that the tags are missing as well.