Writing to a json file with Polish characters

Question

I am using a JSON file to send data from an LDAP database on linux ADDC SAMBA for further processing. I fetch the data with a script written in python3. My problem is that some fields contain Polish characters that are encoded in unicode, for example "Bo\u017Cena \u017Ar\u00F3dlana" should be "Bożena Źródlana" . I would like the file to contain already decoded data so that I can read them without guessing what character is behind the unicode code. I need to ask for help where in my code I should put something similar to a decoder so that the entire file is already saved as decoded and containing Polish special characters

my python3 code:

#! /usr/bin/python3 import os import configparser import getpass import sys import json import ssl import shutil from ldap3 import Server, Connection, Tls, ALL_ATTRIBUTES from datetime import date # screen cleaner os.system('clear') # timestamp current_datetime = str(date.today()) # load main config files main_conf_file = "/tmp/ldap-searchlight/config/searchlight.conf" config = configparser.RawConfigParser() config.read(main_conf_file) # variables main_path = config['GLOBAL']['main_path'] conf_path = config['GLOBAL']['conf_path'] data_path = config['GLOBAL']['data_path'] arch_patch = config['GLOBAL']['arch_patch'] json_users_file = config['USERS']['json_users_file'] json_cmptrs_file = config['CMPTRS']['json_cmptrs_file'] # ldap variables ldap_base_dn = config['GLOBAL']["ldap-base-dn"] ldap_users = config['USERS']['ldap-users'] ldap_cmptrs = config['CMPTRS']['ldap_cmptrs'] user1_name = config['USERS']['user1-name'] user2_name = config['USERS']['user2-name'] user3_name = config['USERS']['user3-name'] user4_name = config['USERS']['user4-name'] user5_name = config['USERS']['user5-name'] # user's choice print( "Logujesz się jako:\n" + " wybierz [ 1 ] dla " + user1_name + "\n" + " wybierz [ 2 ] dla " + user2_name + "\n" + " wybierz [ 3 ] dla " + user3_name + "\n" + " wybierz [ 4 ] dla " + user4_name + "\n" + " wybierz [ 5 ] dla " + user5_name + "\n" ) input_name = input("WYBRANO: ") if input_name == "1" : user = config["USERS"]["ldap-user1"] elif input_name == "2" : user = config["USERS"]["ldap-user2"] elif input_name == "3" : user = config["USERS"]["ldap-user3"] elif input_name == "4" : user = config["USERS"]["ldap-user4"] elif input_name == "5" : user = config["USERS"]["ldap-user5"] else: print("Permission danied\n") sys.exit(1) password = getpass.getpass() LDAP_HOST = config['GLOBAL']['ldap-host'] LDAP_USER = user +","+ ldap_users +","+ ldap_base_dn LDAP_PASSWORD = password tls_configuration = Tls(validate=ssl.CERT_NONE, version=ssl.PROTOCOL_TLSv1) def ldap_server(): return Server(LDAP_HOST, use_ssl=True, tls=tls_configuration, get_info=ALL_ATTRIBUTES) def ldap_connection(): server = ldap_server(), return Connection(server, user=LDAP_USER, password=LDAP_PASSWORD, auto_bind=True) # ldap users LDAP_BASE_DN = ldap_users +","+ ldap_base_dn LDAP_OBJECT_FILTER = '(objectclass=user)' user_attr_list=[ \ 'cn', \ 'sn', \ 'givenName', \ 'instanceType', \ 'whenCreated', \ 'displayName', \ 'uSNCreated', \ 'name', \ 'objectGUID', \ 'badPwdCount', \ 'codePage', \ 'countryCode', \ 'badPasswordTime', \ 'lastLogoff', \ 'lastLogon',\ 'primaryGroupID', \ 'objectSid', \ 'accountExpires', \ 'logonCount', \ 'sAMAccountName', \ 'sAMAccountType', \ 'userPrincipalName', \ 'objectCategory', \ 'pwdLastSet', \ 'userAccountControl', \ 'lastLogonTimestamp', \ 'whenChanged', \ 'uSNChanged', \ 'memberOf', \ 'distinguishedName' ] conn = ldap_connection() conn.search(LDAP_BASE_DN, LDAP_OBJECT_FILTER, attributes=user_attr_list) # output to json json_users_data = main_path + data_path + json_users_file data = json.loads(conn.response_to_json()) with open(json_users_data, 'w') as jsonfile: json.dump(data, jsonfile) # copy data to archive json_users_arch = main_path + arch_patch + current_datetime + "_" + json_users_file shutil.copy2(json_users_data, json_users_arch) # ldap computers LDAP_BASE_DN = ldap_cmptrs +","+ ldap_base_dn LDAP_OBJECT_FILTER = '(objectclass=computer)' cmptr_attr_list=[ \ 'cn', \ 'instanceType', \ 'whenCreated', \ 'uSNCreated', \ 'name', \ 'objectGUID', \ 'badPwdCount', \ 'codePage', \ 'countryCode', \ 'badPasswordTime', \ 'lastLogoff', \ 'lastLogon',\ 'primaryGroupID', \ 'accountExpires', \ 'logonCount', \ 'sAMAccountName', \ 'sAMAccountType', \ 'objectCategory', \ 'pwdLastSet', \ 'userAccountControl', \ 'lastLogonTimestamp', \ 'whenChanged', \ 'uSNChanged', \ 'dNSHostName', \ 'isCriticalSystemObject', \ 'msDS-SupportedEncryptionTypes', \ 'operatingSystem', \ 'operatingSystemVersion', \ 'servicePrincipalName', \ 'distinguishedName' ] conn = ldap_connection() conn.search(LDAP_BASE_DN, LDAP_OBJECT_FILTER, attributes=cmptr_attr_list) # output to json json_cmptrs_data = main_path + data_path + json_cmptrs_file data = json.loads(conn.response_to_json()) with open(json_cmptrs_data, 'w') as jsonfile: json.dump(data, jsonfile) # copy data json_cmptrs_arch = main_path + arch_patch + current_datetime + "_" + json_cmptrs_file shutil.copy2(json_cmptrs_data, json_cmptrs_arch) print("USERS:") print("Data file created at: " + json_users_data) print("Archive file created at: " + json_users_arch) print("------------------------------------------------------------------------------") print("COMPUTERS") print("Data file created at: " + json_cmptrs_data) print("Archive file created at: " + json_cmptrs_arch) sys.exit(0) # exit(0) -> OK # exit(1) -> FAULT

my jsons output looks:

{"entries": [ {"attributes": { "accountExpires": ["9223372036854775807"], "badPasswordTime": [], "badPwdCount": [], "cn": ["Bo\u017Cena \u017Ar\u00F3dlana"], "codePage": ["0"], "countryCode": ["0"], "displayName": ["Bo\u017Cena \u017Ar\u00F3dlana"], "distinguishedName": ["CN=Bo\u017Cena \u017Ar\u00F3dlana,OU=FE,OU=Users,OU=UNIVERSUM,DC=universum,DC=local"], "givenName": ["Bo\u017Cena"], "instanceType": ["4"], "lastLogoff": [], "lastLogon": [], "lastLogonTimestamp": ["132978476924537530"], "logonCount": [], "memberOf": [], "name": ["Bo\u017Cena \u017Ar\u00F3dlana"], "objectCategory": ["CN=Person,CN=Schema,CN=Configuration,DC=universum,DC=local"], "objectGUID": [ { "encoded": "AFvzBO0T+Ey9TL3RHGtghQ==", "encoding": "base64" } ], "objectSid": [ { "encoded": "AQUAAAAAAAUVAAAA6TO9FZD9W8QoWlFDIE8AAA==", "encoding": "base64" } ], "primaryGroupID": ["513"], "pwdLastSet": ["132979783101549910"], "sAMAccountName": ["pjarmolowicz"], "sAMAccountType": ["805306368"], "sn": ["\u017Ar\u00F3dlana"], "uSNChanged": ["4986"], "uSNCreated": ["4986"], "userAccountControl": ["512"], "userPrincipalName": ["[email protected]"], "whenChanged": ["20220525185150.0Z"], "whenCreated": ["20211125124337.0Z"]}, "dn": "CN=Bo\u017Cena \u017Ar\u00F3dlana,OU=FE,OU=Users,OU=UNIVERSUM,DC=universum,DC=local" }, {"attributes": { "accountExpires": ["9223372036854775807"], "badPasswordTime": ["133128872888506790"], "badPwdCount": ["0"], "cn": ["Jan Kowalski"], "codePage": ["0"], "countryCode": ["0"], "displayName": ["Jan Kowalski"], "distinguishedName": ["CN=Jan Kowalski,OU=RR-32,OU=RR,OU=Users,OU=UNIVERSUM,DC=universum,DC=local"], "givenName": ["Jan"], "instanceType": ["4"], "lastLogoff": [], "lastLogon": ["133129921828641420"], "lastLogonTimestamp": ["133125345565644950"], "logonCount": ["55"], "memberOf": [], "name": ["Jan Kowalski"], "objectCategory": ["CN=Person,CN=Schema,CN=Configuration,DC=universum,DC=local"], "objectGUID": [ { "encoded": "AScnTASpKUun4oadMC5Qxg==", "encoding": "base64" } ], "objectSid": [ { "encoded": "AQUAAAAAAAUVAAAA6TO9FZD9W8QoWlFDngQAAA==", "encoding": "base64" } ], "primaryGroupID": ["513"], "pwdLastSet": ["131577266641617910"], "sAMAccountName": ["jkowalski"], "sAMAccountType": ["805306368"], "sn": ["Kowalski"], "uSNChanged": ["149609"], "uSNCreated": ["5397"], "userAccountControl": ["512"], "userPrincipalName": ["[email protected]"], "whenChanged": ["20221110061556.0Z"], "whenCreated": ["20130610115016.0Z"], "dn": "CN=Jan Kowalski,OU=RR-32,OU=RR,OU=Users,OU=UNIVERSUM,DC=universum,DC=local" } ] }

Mark Tolonen · Accepted Answer · 2022-11-16 18:07:54Z

Use the following to suppress Unicode escape codes and write the data UTF-8-encoded to support non-ASCII characters.

with open(json_cmptrs_data, 'w', encoding='utf8') as jsonfile: json.dump(data, jsonfile, ensure_ascii=False)

Working example:

import json data = {"cn": ["Bo\u017Cena \u017Ar\u00F3dlana"]} with open('output.json', 'w', encoding='utf8') as file: json.dump(data, file, ensure_ascii=False)

output.csv (UTF-8-encoded):

{"cn": ["Bożena źródlana"]}

Collectives™ on Stack Overflow

Writing to a json file with Polish characters

1 Answer 1

Comments

Hot Network Questions