I am new in python with xml and I need help
I know how to resolve but it is difficult to translate in a code
Mi file have the name of "Opta24"
her structure it is:
<Games timestamp="2012-09-11T10:20:32">
<Game id="360481" away_team_id="43" away_team_name="Manchester City" competition_id="8" competition_name="English Barclays Premier League" game_date="2011-08-21T16:00:00" home_team_id="30" home_team_name="Bolton Wanderers" matchday="2" period_1_start="2011-08-21T16:00:38" period_2_start="2011-08-21T17:03:47" season_id="2011" season_name="Season 2011/2012">
<Event id="301038339" event_id="1" type_id="34" period_id="16" min="0" sec="0" team_id="43" outcome="1" x="0.0" y="0.0" timestamp="2011-08-21T15:23:06.696" last_modified="2011-08-21T15:54:56">
<Q id="2028397186" qualifier_id="130" value="4" />
<Q id="1518776786" qualifier_id="227" value="0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0" />
<Q id="997025056" qualifier_id="59" value="25, 2, 13, 18, 4, 6, 42, 7, 10, 16, 21, 5, 11, 15, 20, 22, 32, 45" />
<Q id="955425655" qualifier_id="194" value="17476" />
<Q id="996147927" qualifier_id="131" value="1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0" />
<Q id="1940069841" qualifier_id="44" value="1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5, 5" />
<Q id="1529687618" qualifier_id="30" value="15749, 20492, 42593, 1632, 17476, 7551, 14664, 15157, 42544, 37572, 20664, 20658, 19959, 65807, 56827, 17336, 20312, 42493" />
</Event>
<Event id="1475524684" event_id="1" type_id="34" period_id="16" min="0" sec="0" team_id="30" outcome="1" x="0.0" y="0.0" timestamp="2011-08-21T15:39:39.166" last_modified="2011-08-21T16:06:40">
<Q id="1993329296" qualifier_id="59" value="22, 2, 4, 6, 5, 12, 7, 19, 17, 14, 10, 1, 3, 16, 20, 21, 31, 38" />
<Q id="783602879" qualifier_id="131" value="1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0" />
<Q id="1981808255" qualifier_id="30" value="1344, 28183, 2004, 27696, 19419, 1587, 18428, 14668, 9765, 3630, 10089, 45175, 82263, 19930, 1615, 15188, 19958, 105088" />
<Q id="1521261840" qualifier_id="194" value="3630" />
<Q id="459356083" qualifier_id="227" value="0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0" />
<Q id="2003349974" qualifier_id="130" value="2" />
<Q id="1582676412" qualifier_id="44" value="1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5, 5" />
</Event>
<Event id="2036897618" event_id="2" type_id="32" period_id="1" min="0" sec="0" team_id="30" outcome="1" x="0.0" y="0.0" timestamp="2011-08-21T16:00:38.967" last_modified="2011-08-21T16:00:39">
<Q id="530297025" qualifier_id="127" value="Left to Right" />
</Event>
<Event id="336246484" event_id="2" type_id="32" period_id="1" min="0" sec="0" team_id="43" outcome="1" x="0.0" y="0.0" timestamp="2011-08-21T16:00:39.132" last_modified="2011-08-21T16:00:39">
<Q id="1227488973" qualifier_id="127" value="Right to Left" />
</Event>
<Event id="1372839298" event_id="3" type_id="1" period_id="1" min="0" sec="1" player_id="37572" team_id="43" outcome="1" x="50.1" y="50.0" timestamp="2011-08-21T16:00:40.179" last_modified="2011-08-21T16:00:41">
<Q id="541570642" qualifier_id="212" value="2.5" />
<Q id="1202220317" qualifier_id="140" value="52.4" />
<Q id="1922915435" qualifier_id="141" value="49.1" />
<Q id="566075534" qualifier_id="56" value="Center" />
<Q id="21729623" qualifier_id="213" value="6.0" />
</Event>
<Event id="978322590" event_id="4" type_id="1" period_id="1" min="0" sec="2" player_id="20664" team_id="43" outcome="1" x="48.2" y="49.1" timestamp="2011-08-21T16:00:41.585" last_modified="2011-08-21T16:00:44">
<Q id="1842556548" qualifier_id="213" value="2.4" />
<Q id="1580811978" qualifier_id="56" value="Back" />
<Q id="1316578499" qualifier_id="140" value="29.0" />
<Q id="870951602" qualifier_id="212" value="27.5" />
<Q id="1361996302" qualifier_id="141" value="76.5" />
</Event>
<Event id="1572252644" event_id="3" type_id="7" period_id="1" min="0" sec="5" player_id="3630" team_id="30" outcome="1" x="66.4" y="13.7" timestamp="2011-08-21T16:00:44.304" last_modified="2011-08-21T16:38:51">
<Q id="1492254907" qualifier_id="167" />
<Q id="491183529" qualifier_id="56" value="Right" />
</Event>
<Event id="2077205137" event_id="5" type_id="3" period_id="1" min="0" sec="5" player_id="42593" team_id="43" outcome="0" x="28.3" y="86.1" timestamp="2011-08-21T16:00:44.304" last_modified="2011-08-22T12:29:34">
<Q id="176412213" qualifier_id="56" value="Back" />
</Event>
<Event id="403196518" event_id="4" type_id="5" period_id="1" min="0" sec="7" player_id="3630" team_id="30" outcome="0" x="72.0" y="-1.1" timestamp="2011-08-21T16:00:46.279" last_modified="2011-08-21T16:00:46">
<Q id="1046884000" qualifier_id="56" value="Right" />
</Event>
<Event id="509587597" event_id="6" type_id="5" period_id="1" min="0" sec="7" player_id="42593" team_id="43" outcome="1" x="25.9" y="101.2" timestamp="2011-08-21T16:00:46.444" last_modified="2011-08-21T16:00:58">
<Q id="1487531325" qualifier_id="56" value="Back" />
</Event>
<Event id="1962550717" event_id="7" type_id="1" period_id="1" min="0" sec="19" player_id="42593" team_id="43" outcome="0" x="27.8" y="100.0" timestamp="2011-08-21T16:00:58.445" last_modified="2011-08-21T16:01:01">
<Q id="1388961572" qualifier_id="56" value="Back" />
<Q id="1464407097" qualifier_id="141" value="93.4" />
<Q id="1290217595" qualifier_id="212" value="23.2" />
<Q id="63165432" qualifier_id="107" />
<Q id="1582662528" qualifier_id="213" value="6.1" />
<Q id="742294617" qualifier_id="140" value="49.3" />
</Event>
<Event id="1886599927" event_id="5" type_id="1" period_id="1" min="0" sec="23" player_id="27696" team_id="30" outcome="1" x="50.9" y="20.0" timestamp="2011-08-21T16:01:02.466" last_modified="2011-08-21T16:01:17">
<Q id="1358834683" qualifier_id="212" value="15.0" />
<Q id="215350901" qualifier_id="140" value="63.6" />
<Q id="1207838176" qualifier_id="3" />
<Q id="1518806348" qualifier_id="141" value="30.1" />
<Q id="71823979" qualifier_id="56" value="Center" />
<Q id="922302198" qualifier_id="213" value="0.5" />
</Event>
<Event id="604657066" event_id="6" type_id="4" period_id="1" min="0" sec="26" player_id="3630" team_id="30" outcome="0" x="63.6" y="30.1" timestamp="2011-08-21T16:01:05.482" last_modified="2011-08-21T16:01:07">
<Q id="554791351" qualifier_id="56" value="Center" />
<Q id="1096167754" qualifier_id="13" />
</Event>
<Event id="497476041" event_id="8" type_id="4" period_id="1" min="0" sec="26" player_id="1632" team_id="43" outcome="1" x="31.7" y="75.8" timestamp="2011-08-21T16:01:05.507" last_modified="2011-08-21T16:01:06">
<Q id="503664000" qualifier_id="13" />
<Q id="956789322" qualifier_id="56" value="Back" />
</Event>
<Event id="854361320" event_id="9" type_id="1" period_id="1" min="0" sec="30" player_id="7551" team_id="43" outcome="1" x="29.3" y="73.0" timestamp="2011-08-21T16:01:09.507" last_modified="2011-08-21T16:01:11">
<Q id="1681674682" qualifier_id="5" />
<Q id="43769595" qualifier_id="212" value="11.9" />
<Q id="1348926242" qualifier_id="213" value="4.8" />
<Q id="1228266170" qualifier_id="140" value="30.2" />
<Q id="2126971087" qualifier_id="56" value="Back" />
<Q id="911744327" qualifier_id="141" value="55.6" />
</Event>
<Event id="959833686" event_id="10" type_id="1" period_id="1" min="0" sec="32" player_id="14664" team_id="43" outcome="1" x="31.9" y="53.6" timestamp="2011-08-21T16:01:11.663" last_modified="2011-08-21T16:01:13">
<Q id="638330763" qualifier_id="140" value="42.3" />
<Q id="816211408" qualifier_id="212" value="16.8" />
<Q id="1375255370" qualifier_id="141" value="34.9" />
<Q id="1175687287" qualifier_id="56" value="Back" />
<Q id="1501159585" qualifier_id="213" value="5.4" />
</Event>
<Event id="1460827261" event_id="11" type_id="1" period_id="1" min="0" sec="33" player_id="20664" team_id="43" outcome="1" x="44.3" y="31.8" timestamp="2011-08-21T16:01:13.101" last_modified="2011-08-21T16:01:16">
<Q id="1355697445" qualifier_id="213" value="5.1" />
<Q id="1474043617" qualifier_id="140" value="51.8" />
<Q id="491448639" qualifier_id="141" value="5.3" />
<Q id="1696680483" qualifier_id="56" value="Right" />
<Q id="1895256131" qualifier_id="212" value="19.7" />
</Event>
<Event id="834406608" event_id="12" type_id="1" period_id="1" min="0" sec="36" player_id="20492" team_id="43" outcome="1" x="52.1" y="5.3" timestamp="2011-08-21T16:01:15.992" last_modified="2011-08-21T16:01:20">
<Q id="1242718866" qualifier_id="1" />
<Q id="283189683" qualifier_id="141" value="17.6" />
<Q id="1681189612" qualifier_id="213" value="0.2" />
<Q id="193816719" qualifier_id="212" value="48.9" />
<Q id="1022848643" qualifier_id="56" value="Right" />
<Q id="431648281" qualifier_id="140" value="98.0" />
What I have tried:
And I want to
1) Search for the 'Game' tag with the findall method.
2) Inside that 'Game', search for all the 'Event' tags, again with the findall method
3) Implement a for loop for each event, and filter out those whose type_id attribute is equal to 1
4) Within each event, extract the fields we need and put them in the lists.
5) In addition, it is necessary to make a loop inside the loop to extract the qualifiers and to include them in the lists.
6) With the lists created, pass it all to a dataframe.
the 1 step it is something like
et_game = tree.findall("Game")
and the 2 it is this code but with []
the rest I don t know how to do it
thanks for all support