I have a text file which is created from electoral roll pdf file in which 3 persons data is placed on a same line.
I would like to get each persons data from the text file. Problem I m facing here is that after the name of person line, one line is kept empty incase the name is too long. Here in text file suppose 3rd persons name comes on next line then how to make reader identify that the data is of 3rd person and assign it accordingly.
Any help on this matter will be helfull.
Here is the sample data:
1 EPIC NO: XYZZ989898 2 EPIC NO: XYZZ989898 3 EPIC NO: XYZZ989898
Name : abcd xyz Name : abcd xyz Name : abcd lmno
xyz
Husband's abcdefghijklm xyz Father's abcd xyz Father's abcd xyz
Name: Name: Name:
House No: - House No: - House No: -
Age: 44 Sex: Female Age: 24 Sex: Male Age: 21 Sex: Female
here is the coding which i have to specify 2 variables value (r and l) to allow reader to identify it as boundries for columns but it is not working as different files will have different indentations and i'll have to specify it again and again.
If System.IO.File.Exists(FILE_NAME) = True Then
Dim objReader As New System.IO.StreamReader(FILE_NAME)
Dim lines As String() = IO.File.ReadAllLines(FILE_NAME)
Dim rsltstr As String = ""
Dim rsltstr1 As String = ""
Dim rsltstr2 As String = ""
Dim SrNo As Integer
Dim SrNo1 As Integer
Dim SrNo2 As Integer
Dim EPICNo As String
Dim EPICNo1 As String
Dim EPICNo2 As String
Dim age As Integer
Dim age1 As Integer
Dim age2 As Integer
Dim sex As String = ""
Dim sex1 As String = ""
Dim sex2 As String = ""
Dim nm As String = ""
Dim nm1 As String = ""
Dim nm2 As String = ""
Dim hno As String
Dim hno1 As String
Dim hno2 As String
Dim space() As Char = {" "}
Dim st() As Char = {"E ", "S ", "M ", "Q ", "R ", "# "}
For i = 0 To lines.Count - 1
If lines(i).Contains("EPIC NO") Then
If lines(i).Length > r Then
If lines(i).Length < l Then
rsltstr = lines(i).Substring(0, lines(i).Length)
Else
rsltstr = lines(i).Substring(0, l)
End If
If lines(i).Length > l And lines(i).Length < r Then
rsltstr1 = lines(i).Substring(l, (lines(i).Length - l))
ElseIf lines(i).Length >= r Then
rsltstr1 = lines(i).Substring(l, l)
End If
If lines(i).Length > r Then
rsltstr2 = lines(i).Substring(lines(i).Length - (lines(i).Length - r))
End If
rsltstr = rsltstr.Replace("EPIC NO", "")
rsltstr = rsltstr.Replace(":", ">")
Dim sridinfo As String() = rsltstr.Split(">")
sridinfo(0) = sridinfo(0).TrimStart(space)
sridinfo(0) = sridinfo(0).TrimStart(st)
SrNo = sridinfo(0).TrimEnd(space)
sridinfo(1) = sridinfo(1).TrimStart(space)
EPICNo = sridinfo(1).TrimEnd(space)
rsltstr1 = rsltstr1.Replace("EPIC NO", "")
rsltstr1 = rsltstr1.Replace(":", ">")
Dim sridinfo1 As String() = rsltstr1.Split(">")
sridinfo1(0) = sridinfo1(0).TrimStart(space)
sridinfo1(0) = sridinfo1(0).TrimStart(st)
SrNo1 = sridinfo1(0).TrimEnd(space)
sridinfo1(1) = sridinfo1(1).TrimStart(space)
EPICNo1 = sridinfo1(1).TrimEnd(space)
rsltstr2 = rsltstr2.Replace("EPIC NO", "")
rsltstr2 = rsltstr2.Replace(":", ">")
Dim sridinfo2 As String() = rsltstr2.Split(">")
sridinfo2(0) = sridinfo2(0).TrimStart(space)
sridinfo2(0) = sridinfo2(0).TrimStart(st)
SrNo2 = sridinfo2(0).TrimEnd(space)
sridinfo2(1) = sridinfo2(1).TrimStart(space)
EPICNo2 = sridinfo2(1).TrimEnd(space)
rsltstr = ""
rsltstr1 = ""
rsltstr2 = ""
i = i + 1
If Not lines(i).Contains("Name") Then
If lines(i).Length < l Then
rsltstr = lines(i).Substring(0, lines(i).Length)
Else
rsltstr = lines(i).Substring(0, l)
End If
If lines(i).Length > l And lines(i).Length < r Then
rsltstr1 = lines(i).Substring(l, (lines(i).Length - l))
ElseIf lines(i).Length >= r Then
rsltstr1 = lines(i).Substring(l, l)
End If
If lines(i).Length > r Then
rsltstr2 = lines(i).Substring(lines(i).Length - (lines(i).Length - r))
End If
rsltstr = rsltstr.TrimStart(space)
nm = rsltstr.TrimEnd(space)
rsltstr1 = rsltstr1.TrimStart(space)
nm1 = rsltstr1.TrimEnd(space)
rsltstr2 = rsltstr2.TrimStart(space)
nm2 = rsltstr2.TrimEnd(space)
rsltstr = ""
rsltstr1 = ""
rsltstr2 = ""
i = i + 1
End If
If lines(i).Length < l Then
rsltstr = lines(i).Substring(0, lines(i).Length)
Else
rsltstr = lines(i).Substring(0, l)
End If
If lines(i).Length > l And lines(i).Length < r Then
rsltstr1 = lines(i).Substring(l, (lines(i).Length - l))
ElseIf lines(i).Length >= r Then
rsltstr1 = lines(i).Substring(l, l)
End If
If lines(i).Length > r Then
rsltstr2 = lines(i).Substring(lines(i).Length - (lines(i).Length - r))
End If
rsltstr = rsltstr.Replace("Name", "")
rsltstr = rsltstr.Replace(":", "")
rsltstr = rsltstr.TrimStart(space)
nm = nm + " " + rsltstr.TrimEnd(space)
nm = nm.TrimStart(space)
nm = nm.TrimEnd(space)
rsltstr1 = rsltstr1.Replace("Name", "")
rsltstr1 = rsltstr1.Replace(":", "")
rsltstr1 = rsltstr1.TrimStart(space)
nm1 = nm1 + " " + rsltstr1.TrimEnd(space)
nm1 = nm1.TrimStart(space)
nm1 = nm1.TrimEnd(space)
rsltstr2 = rsltstr2.Replace("Name", "")
rsltstr2 = rsltstr2.Replace(":", "")
rsltstr2 = rsltstr2.TrimStart(space)
nm2 = nm2 + " " + rsltstr2.TrimEnd(space)
nm2 = nm2.TrimStart(space)
nm2 = nm2.TrimEnd(space)
rsltstr = ""
rsltstr1 = ""
rsltstr2 = ""
i = i + 1
If Not lines(i) = "" Then
If lines(i).Length < l Then
rsltstr = lines(i).Substring(0, lines(i).Length)
Else
rsltstr = lines(i).Substring(0, l)
End If
If lines(i).Length > l And lines(i).Length < r Then
rsltstr1 = lines(i).Substring(l, (lines(i).Length - l))
ElseIf lines(i).Length >= r Then
rsltstr1 = lines(i).Substring(l, l)
End If
If lines(i).Length > r Then
rsltstr2 = lines(i).Substring(lines(i).Length - (lines(i).Length - r))
End If
rsltstr = rsltstr.TrimStart(space)
nm = nm + " " + rsltstr.TrimEnd(space)
nm = nm.TrimStart(space)
nm = nm.TrimEnd(space)
rsltstr1 = rsltstr1.TrimStart(space)
nm1 = nm1 + " " + rsltstr1.TrimEnd(space)
nm1 = nm1.TrimStart(space)
nm1 = nm1.TrimEnd(space)
rsltstr2 = rsltstr2.TrimStart(space)
nm2 = nm2 + " " + rsltstr2.TrimEnd(space)
nm2 = nm2.TrimStart(space)
nm2 = nm2.TrimEnd(space)
rsltstr = ""
rsltstr1 = ""
rsltstr2 = ""
End If
i = i + 1
Do While Not lines(i).Contains("House No")
i = i + 1
Loop
If lines(i).Length < l Then
rsltstr = lines(i).Substring(0, lines(i).Length)
Else
rsltstr = lines(i).Substring(0, l)
End If
If lines(i).Length > l And lines(i).Length < r Then
rsltstr1 = lines(i).Substring(l, (lines(i).Length - l))
ElseIf lines(i).Length >= r Then
rsltstr1 = lines(i).Substring(l, l)
End If
If lines(i).Length > r Then
rsltstr2 = lines(i).Substring(lines(i).Length - (lines(i).Length - r))
End If
rsltstr = rsltstr.Replace("House No", "")
rsltstr = rsltstr.Replace(":", "")
rsltstr = rsltstr.TrimStart(space)
rsltstr = rsltstr.TrimEnd(space)
If rsltstr.Length > 10 Then
hno = rsltstr.Substring(rsltstr.Length - 10)
Else
hno = rsltstr
End If
rsltstr1 = rsltstr1.Replace("House No", "")
rsltstr1 = rsltstr1.Replace(":", "")
rsltstr1 = rsltstr1.TrimStart(space)
rsltstr1 = rsltstr1.TrimEnd(space)
If rsltstr1.Length > 10 Then
hno1 = rsltstr1.Substring(rsltstr1.Length - 10)
Else
hno1 = rsltstr1
End If
rsltstr2 = rsltstr2.Replace("House No", "")
rsltstr2 = rsltstr2.Replace(":", "")
rsltstr2 = rsltstr2.TrimStart(space)
rsltstr2 = rsltstr2.TrimEnd(space)
If rsltstr2.Length > 10 Then
hno2 = rsltstr2.Substring(rsltstr2.Length - 10)
Else
hno2 = rsltstr2
End If
rsltstr = ""
rsltstr1 = ""
rsltstr2 = ""
i = i + 1
If Not lines(i).Contains("Age") Then
If lines(i).Length < l Then
rsltstr = lines(i).Substring(0, lines(i).Length)
Else
rsltstr = lines(i).Substring(0, l)
End If
If lines(i).Length > l And lines(i).Length < r Then
rsltstr1 = lines(i).Substring(l, (lines(i).Length - l))
ElseIf lines(i).Length >= r Then
rsltstr1 = lines(i).Substring(l, l)
End If
If lines(i).Length > r Then
rsltstr2 = lines(i).Substring(lines(i).Length - (lines(i).Length - r))
End If
rsltstr = rsltstr.TrimStart(space)
rsltstr = rsltstr.TrimEnd(space)
If rsltstr.Length > 10 Then
hno = hno + " " + rsltstr.Substring(rsltstr.Length - 10)
hno = hno.TrimStart(space)
hno = hno.TrimEnd(space)
Else
hno = hno + " " + rsltstr
hno = hno.TrimStart(space)
hno = hno.TrimEnd(space)
End If
rsltstr1 = rsltstr1.TrimStart(space)
rsltstr1 = rsltstr1.TrimEnd(space)
If rsltstr1.Length > 10 Then
hno1 = hno1 + " " + rsltstr1.Substring(rsltstr1.Length - 10)
hno1 = hno1.TrimStart(space)
hno1 = hno1.TrimEnd(space)
Else
hno1 = hno1 + " " + rsltstr1
hno1 = hno1.TrimStart(space)
hno1 = hno1.TrimEnd(space)
End If
rsltstr2 = rsltstr2.TrimStart(space)
rsltstr2 = rsltstr2.TrimEnd(space)
If rsltstr2.Length > 10 Then
hno2 = hno2 + " " + rsltstr2.Substring(rsltstr2.Length - 10)
hno2 = hno2.TrimStart(space)
hno2 = hno2.TrimEnd(space)
Else
hno2 = hno2 + " " + rsltstr2
hno2 = hno2.TrimStart(space)
hno2 = hno2.TrimEnd(space)
End If
rsltstr = ""
rsltstr1 = ""
rsltstr2 = ""
i = i + 1
End If
If lines(i).Contains("Age") Then
If lines(i).Length < l Then
rsltstr = lines(i).Substring(0, lines(i).Length)
Else
rsltstr = lines(i).Substring(0, l)
End If
If lines(i).Length > l And lines(i).Length < r Then
rsltstr1 = lines(i).Substring(l, (lines(i).Length - l))
ElseIf lines(i).Length >= r Then
rsltstr1 = lines(i).Substring(l, l)
End If
If lines(i).Length > r Then
rsltstr2 = lines(i).Substring(lines(i).Length - (lines(i).Length - r))
End If
rsltstr = rsltstr.Replace("Age", "")
rsltstr = rsltstr.Replace(":", "")
rsltstr = rsltstr.Replace("Sex", "-")
Dim agsx As String() = rsltstr.Split("-")
agsx(0) = agsx(0).TrimStart(space)
age = agsx(0).TrimEnd(space)
agsx(1) = agsx(1).TrimStart(space)
agsx(1) = agsx(1).TrimEnd(space)
If agsx(1).Contains("Female") And agsx(1).Length >= 6 Then
sex = "Female"
Else
sex = "Male"
End If
rsltstr1 = rsltstr1.Replace("Age", "")
rsltstr1 = rsltstr1.Replace(":", "")
rsltstr1 = rsltstr1.Replace("Sex", "-")
Dim agsx1 As String() = rsltstr1.Split("-")
agsx1(0) = agsx1(0).TrimStart(space)
age1 = agsx1(0).TrimEnd(space)
agsx1(1) = agsx1(1).TrimStart(space)
agsx1(1) = agsx1(1).TrimEnd(space)
If agsx1(1).Contains("female") And agsx1(1).Length >= 6 Then
sex1 = "Female"
Else
sex1 = "Male"
End If
rsltstr2 = rsltstr2.Replace("Age", "")
rsltstr2 = rsltstr2.Replace(":", "")
rsltstr2 = rsltstr2.Replace("Sex", "-")
Dim agsx2 As String() = rsltstr2.Split("-")
agsx2(0) = agsx2(0).TrimStart(space)
age2 = agsx2(0).TrimEnd(space)
agsx2(1) = agsx2(1).TrimStart(space)
agsx2(1) = agsx2(1).TrimEnd(space)
If agsx2(1).Contains("female") And agsx2(1).Length >= 6 Then
sex2 = "Female"
Else
sex2 = "Male"
End If
rsltstr = ""
rsltstr1 = ""
rsltstr2 = ""
End If
next
To convert pdf to txt file i use pdf2text pilot software.