pyparsing has some helpful constructs for pulling data from HTML pages, and the results tend to be self-structuring and self-naming (if you set up the parser/scanner correctly). Here is a pyparsing solution for this particular web page:
from pyparsing import * # for stripping HTML tags anyTag,anyClose = makeHTMLTags(Word(alphas,alphanums+":_")) commonHTMLEntity.setParseAction(replaceHTMLEntity) stripHTML = lambda tokens: (commonHTMLEntity | Suppress(anyTag | anyClose) ).transformString(''.join(tokens)) # make pyparsing expressions for HTML opening and closing tags # (suppress all from results, as there is no interesting content in the tags or their attributes) h3,h3End = map(Suppress,makeHTMLTags("h3")) table,tableEnd = map(Suppress,makeHTMLTags("table")) tr,trEnd = map(Suppress,makeHTMLTags("tr")) th,thEnd = map(Suppress,makeHTMLTags("th")) td,tdEnd = map(Suppress,makeHTMLTags("td")) # nothing interesting in column headings - parse them, but suppress the results colHeading = Suppress(th + SkipTo(thEnd) + thEnd) # simple routine for defining data cells, with optional results name colData = lambda name='' : td + SkipTo(tdEnd)(name) + tdEnd playerListing = Group(tr + colData() + colData() + colData("username") + colData().setParseAction(stripHTML)("role") + colData("networkID") + trEnd) teamListing = (h3 + ungroup(SkipTo("Match Players" + h3End, failOn=h3))("name") + "Match Players" + h3End + table + tr + colHeading*5 + trEnd + Group(OneOrMore(playerListing))("players")) for team in teamListing.searchString(recentsource): # use this to print out names and structures of results #print team.dump() print "Team:", team.name for player in team.players: print "- %s: %s (%s)" % (player.role, player.username, player.networkID) # or like this # print "- %(role)s: %(username)s (%(networkID)s)" % player print
Prints:
Team: Team CrYpToN Gaming EU - Leader: CrYpToN_Crossy (CrYpToN_Crossy) - Captain: Juddanorty (CrYpToN_Judd) - Member: BLaZe_Elfy (CrYpToN_Elfy) Team: eXCeL™ - Leader: Caaahil (Caaahil) - Member: eSportsmanship (eSportsmanship) - Member: KillBoy-NL (iClown-x)