public static string ExtractTextFromPdf(string filename) { using (PdfReader reader = new PdfReader(filename)) { StringBuilder text = new StringBuilder(); for (int i = 1; i <= reader.NumberOfPages; i++) { text.Append(PdfTextExtractor.GetTextFromPage(reader,i)); //text.Append(PdfTextExtractor.GetTextFromPage(reader,i)); } return text.ToString(); } } private void button1_Click(object sender, EventArgs e) { string pathName = @"D:\Data Sets\Enron"; string[] pdfFileNames = Directory.GetFiles(pathName, "*.pdf"); foreach (string pdfFileName in pdfFileNames) { DocumentVector d1 = new DocumentVector(); d1.content= ExtractTextFromPdf(pdfFileName); docCollection.DocumentList.Add(d1.content); } MessageBox.Show(pdfFileNames.Length.ToString()); }
string[] pdfFileNames = Directory.GetFiles(pathName, "*.pdf");
var
This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)