0

I have the following html that I need to parse, how access all items of window.WIZ_global_data as a normal dictionary? Is window.WIZ_global_data is also an attribute of script tag?

I am able to access the script tag through soup.head.script

<!DOCTYPE html> <html dir="ltr" lang="no"> <head> <base href="https://consent.google.com/"/> <meta content="origin" name="referrer"/> <link href="https://consent.google.com/m" rel="canonical"/> <meta content="initial-scale=1,minimum-scale=1,maximum-scale=5,width=device-width" name="viewport"/> <link href="//www.google.com/favicon.ico" rel="shortcut icon"/> <script data-id="_gd" nonce="QI9+XeJ9TwHcCspiFyqIIQ"> window.WIZ_global_data = {"DndLYb":"","DpimGf":false,"EP1ykd":["/_/*"],"FdrFJe":"2318287307032857584","GVlsxf":"www.google.com","Im6cmf":"/_/ConsentUi","LVIXXb":1,"LoQv7e":false,"MT7f9b":[],"Mypbod":"https://www.googleapis.com/reauth","PYFuDc":"DUMMY_X_CLIENT_DATA_WIZ_GLOBAL_KEY_DO_NOT_USE","QrtxK":"","R6pIad":"%.@.]","S06Grb":"","TTHqvb":"https://kidsmanagement-pa.googleapis.com","Yllh3e":"%[email protected],170695567,503968613]","cfb2h":"boq_identityfrontenduiserver_20211111.08_p0","eNnkwf":"1637316536","ejMLCd":"DUMMY_X_GEO_WIZ_GLOBAL_KEY_DO_NOT_USE","eptZe":"/_/ConsentUi/","fPDxwd":[1763433,1772879,1782333,45814370],"gGcLoe":false,"ksKYzf":"%[email protected],true,false,false,null,false,null,\"\",null,[[\"RelayState\",\"SAMLRequest\",\"SigAlg\",\"Signature\",\"TL\",\"af\",\"alwf\",\"btmpl\",\"c\",\"cbflow\",\"cd\",\"checkConnection\",\"checkedDomains\",\"client_id\",\"continue\",\"cpbps\",\"dsh\",\"emr\",\"faa\",\"flowEntry\",\"flowName\",\"followup\",\"forceOsidOriginForTest\",\"gae\",\"go\",\"hd\",\"hide_status_bar\",\"hl\",\"idvToken\",\"ifkv\",\"ifr\",\"ignoreShadow\",\"kdi\",\"kid_continue\",\"ltmpl\",\"marl\",\"migrate\",\"multilogin\",\"next\",\"oauth\",\"osid\",\"pageId\",\"passwdsession\",\"platform_variant\",\"pstMsg\",\"rart\",\"rip\",\"rm\",\"sarp\",\"scc\",\"scope\",\"secure\",\"sendvemail\",\"service\",\"session\",\"skipShadow\",\"skipvpage\",\"source\",\"ss\",\"ss_mode\",\"sspa\",\"t\",\"target\",\"theme\",\"ul\"]],null,null,[],[[null,null,\"https://accounts.google.com/AccountChooser?continue\\u003dhttps://www.google.com/maps/place/Hauges%2Bgate%2B66,%2B3019%2BDrammen/@59.7476727,10.1924839,17z/data%3D!3m1!4b1!4m5!3m4!1s0x46412335e0db3d83:0x544c99fb4daa3946!8m2!3d59.7476727!4d10.1946726\\u0026hl\\u003dno\"],null,[null,null,\"https://accounts.google.com/signin/recovery?continue\\u003dhttps://www.google.com/maps/place/Hauges%2Bgate%2B66,%2B3019%2BDrammen/@59.7476727,10.1924839,17z/data%3D!3m1!4b1!4m5!3m4!1s0x46412335e0db3d83:0x544c99fb4daa3946!8m2!3d59.7476727!4d10.1946726\\u0026hl\\u003dno\"],[null,null,\"https://accounts.google.com/restart?continue\\u003dhttps://www.google.com/maps/place/Hauges%2Bgate%2B66,%2B3019%2BDrammen/@59.7476727,10.1924839,17z/data%3D!3m1!4b1!4m5!3m4!1s0x46412335e0db3d83:0x544c99fb4daa3946!8m2!3d59.7476727!4d10.1946726\\u0026hl\\u003dno\"]],null,false,[]]","nQyAE":{"vEMF5e":"false","LlkYkf":"false","WBBR0d":"true","ViN5Xd":"false","p6p11":"true","EoymAc":"false","FbHgvb":"false","P1ceCf":"false","tBSlob":"false","XqMd3":"false","Ee3RQb":"false","a76Enc":"false","G25Msb":"false","VL5wad":"false","XSgnJf":"false","APYQvd":"false","rONKDd":"false","Dpi3Gf":"false","lttELb":"false","YDrknb":"false","kQrwQd":"false","ihOA2":"false","Ozjmee":"false"},"qwAQke":"ConsentUi","qymVe":"YNnh99mAz0-TXbG5yo-NTKQ-0l4","rtQCxc":-60,"thykhd":"AKH95euIF6P3sAglrRifxgEcfcB9vtI_XXJFaZ5Oty53KFviM2VkkPyRfmncVF-cgLGCK0LYtfquTSFWlA3cjJWXXNjjUICQ5v3brxMomscQEgs9760XyDgPgf0\u003d","unNRMb":"AKJVzcpKsW1gHA6-l3WGdp1MQsU_eTjL-Ufc5L6-PjxIndj8D2t6PFy3SRwl7yc0EXpFbqSSCh50","vAyiz":"ChUI/o793Lf0vtdFEK6ek/ubmfGdvgE\u003d","w2btAe":"%[email protected],null,\"\",false,null,null,true,false]","zChJod":"%.@.]"}; </script> 

EDIT: I can also use soup.head.script.text to get the content as string, but would like to get them as dictionary and in more pythonic way.

1 Answer 1

1

You can go with re to search for the variable, extracting its value and then json.loads() to access it like a dictionary:

json.loads(re.search(r'window.WIZ_global_data = ({.*})', html).group(1)) 

Example

html=r''' <!DOCTYPE html> <html dir="ltr" lang="no"> <head> <base href="https://consent.google.com/"/> <meta content="origin" name="referrer"/> <link href="https://consent.google.com/m" rel="canonical"/> <meta content="initial-scale=1,minimum-scale=1,maximum-scale=5,width=device-width" name="viewport"/> <link href="//www.google.com/favicon.ico" rel="shortcut icon"/> <script data-id="_gd" nonce="QI9+XeJ9TwHcCspiFyqIIQ"> window.WIZ_global_data = {"DndLYb":"","DpimGf":false,"EP1ykd":["/_/*"],"FdrFJe":"2318287307032857584","GVlsxf":"www.google.com","Im6cmf":"/_/ConsentUi","LVIXXb":1,"LoQv7e":false,"MT7f9b":[],"Mypbod":"https://www.googleapis.com/reauth","PYFuDc":"DUMMY_X_CLIENT_DATA_WIZ_GLOBAL_KEY_DO_NOT_USE","QrtxK":"","R6pIad":"%.@.]","S06Grb":"","TTHqvb":"https://kidsmanagement-pa.googleapis.com","Yllh3e":"%[email protected],170695567,503968613]","cfb2h":"boq_identityfrontenduiserver_20211111.08_p0","eNnkwf":"1637316536","ejMLCd":"DUMMY_X_GEO_WIZ_GLOBAL_KEY_DO_NOT_USE","eptZe":"/_/ConsentUi/","fPDxwd":[1763433,1772879,1782333,45814370],"gGcLoe":false,"ksKYzf":"%[email protected],true,false,false,null,false,null,\"\",null,[[\"RelayState\",\"SAMLRequest\",\"SigAlg\",\"Signature\",\"TL\",\"af\",\"alwf\",\"btmpl\",\"c\",\"cbflow\",\"cd\",\"checkConnection\",\"checkedDomains\",\"client_id\",\"continue\",\"cpbps\",\"dsh\",\"emr\",\"faa\",\"flowEntry\",\"flowName\",\"followup\",\"forceOsidOriginForTest\",\"gae\",\"go\",\"hd\",\"hide_status_bar\",\"hl\",\"idvToken\",\"ifkv\",\"ifr\",\"ignoreShadow\",\"kdi\",\"kid_continue\",\"ltmpl\",\"marl\",\"migrate\",\"multilogin\",\"next\",\"oauth\",\"osid\",\"pageId\",\"passwdsession\",\"platform_variant\",\"pstMsg\",\"rart\",\"rip\",\"rm\",\"sarp\",\"scc\",\"scope\",\"secure\",\"sendvemail\",\"service\",\"session\",\"skipShadow\",\"skipvpage\",\"source\",\"ss\",\"ss_mode\",\"sspa\",\"t\",\"target\",\"theme\",\"ul\"]],null,null,[],[[null,null,\"https://accounts.google.com/AccountChooser?continue\\u003dhttps://www.google.com/maps/place/Hauges%2Bgate%2B66,%2B3019%2BDrammen/@59.7476727,10.1924839,17z/data%3D!3m1!4b1!4m5!3m4!1s0x46412335e0db3d83:0x544c99fb4daa3946!8m2!3d59.7476727!4d10.1946726\\u0026hl\\u003dno\"],null,[null,null,\"https://accounts.google.com/signin/recovery?continue\\u003dhttps://www.google.com/maps/place/Hauges%2Bgate%2B66,%2B3019%2BDrammen/@59.7476727,10.1924839,17z/data%3D!3m1!4b1!4m5!3m4!1s0x46412335e0db3d83:0x544c99fb4daa3946!8m2!3d59.7476727!4d10.1946726\\u0026hl\\u003dno\"],[null,null,\"https://accounts.google.com/restart?continue\\u003dhttps://www.google.com/maps/place/Hauges%2Bgate%2B66,%2B3019%2BDrammen/@59.7476727,10.1924839,17z/data%3D!3m1!4b1!4m5!3m4!1s0x46412335e0db3d83:0x544c99fb4daa3946!8m2!3d59.7476727!4d10.1946726\\u0026hl\\u003dno\"]],null,false,[]]","nQyAE":{"vEMF5e":"false","LlkYkf":"false","WBBR0d":"true","ViN5Xd":"false","p6p11":"true","EoymAc":"false","FbHgvb":"false","P1ceCf":"false","tBSlob":"false","XqMd3":"false","Ee3RQb":"false","a76Enc":"false","G25Msb":"false","VL5wad":"false","XSgnJf":"false","APYQvd":"false","rONKDd":"false","Dpi3Gf":"false","lttELb":"false","YDrknb":"false","kQrwQd":"false","ihOA2":"false","Ozjmee":"false"},"qwAQke":"ConsentUi","qymVe":"YNnh99mAz0-TXbG5yo-NTKQ-0l4","rtQCxc":-60,"thykhd":"AKH95euIF6P3sAglrRifxgEcfcB9vtI_XXJFaZ5Oty53KFviM2VkkPyRfmncVF-cgLGCK0LYtfquTSFWlA3cjJWXXNjjUICQ5v3brxMomscQEgs9760XyDgPgf0\u003d","unNRMb":"AKJVzcpKsW1gHA6-l3WGdp1MQsU_eTjL-Ufc5L6-PjxIndj8D2t6PFy3SRwl7yc0EXpFbqSSCh50","vAyiz":"ChUI/o793Lf0vtdFEK6ek/ubmfGdvgE\u003d","w2btAe":"%[email protected],null,\"\",false,null,null,true,false]","zChJod":"%.@.]"}; </script> ''' import json,re json.loads(re.search(r'window.WIZ_global_data = ({.*})', html).group(1)) 
Sign up to request clarification or add additional context in comments.

Comments

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.