0

I am using a JSON file to send data from an LDAP database on linux ADDC SAMBA for further processing. I fetch the data with a script written in python3. My problem is that some fields contain Polish characters that are encoded in unicode, for example "Bo\u017Cena \u017Ar\u00F3dlana" should be "Bożena Źródlana" . I would like the file to contain already decoded data so that I can read them without guessing what character is behind the unicode code. I need to ask for help where in my code I should put something similar to a decoder so that the entire file is already saved as decoded and containing Polish special characters

my python3 code:

#! /usr/bin/python3 import os import configparser import getpass import sys import json import ssl import shutil from ldap3 import Server, Connection, Tls, ALL_ATTRIBUTES from datetime import date # screen cleaner os.system('clear') # timestamp current_datetime = str(date.today()) # load main config files main_conf_file = "/tmp/ldap-searchlight/config/searchlight.conf" config = configparser.RawConfigParser() config.read(main_conf_file) # variables main_path = config['GLOBAL']['main_path'] conf_path = config['GLOBAL']['conf_path'] data_path = config['GLOBAL']['data_path'] arch_patch = config['GLOBAL']['arch_patch'] json_users_file = config['USERS']['json_users_file'] json_cmptrs_file = config['CMPTRS']['json_cmptrs_file'] # ldap variables ldap_base_dn = config['GLOBAL']["ldap-base-dn"] ldap_users = config['USERS']['ldap-users'] ldap_cmptrs = config['CMPTRS']['ldap_cmptrs'] user1_name = config['USERS']['user1-name'] user2_name = config['USERS']['user2-name'] user3_name = config['USERS']['user3-name'] user4_name = config['USERS']['user4-name'] user5_name = config['USERS']['user5-name'] # user's choice print( "Logujesz się jako:\n" + " wybierz [ 1 ] dla " + user1_name + "\n" + " wybierz [ 2 ] dla " + user2_name + "\n" + " wybierz [ 3 ] dla " + user3_name + "\n" + " wybierz [ 4 ] dla " + user4_name + "\n" + " wybierz [ 5 ] dla " + user5_name + "\n" ) input_name = input("WYBRANO: ") if input_name == "1" : user = config["USERS"]["ldap-user1"] elif input_name == "2" : user = config["USERS"]["ldap-user2"] elif input_name == "3" : user = config["USERS"]["ldap-user3"] elif input_name == "4" : user = config["USERS"]["ldap-user4"] elif input_name == "5" : user = config["USERS"]["ldap-user5"] else: print("Permission danied\n") sys.exit(1) password = getpass.getpass() LDAP_HOST = config['GLOBAL']['ldap-host'] LDAP_USER = user +","+ ldap_users +","+ ldap_base_dn LDAP_PASSWORD = password tls_configuration = Tls(validate=ssl.CERT_NONE, version=ssl.PROTOCOL_TLSv1) def ldap_server(): return Server(LDAP_HOST, use_ssl=True, tls=tls_configuration, get_info=ALL_ATTRIBUTES) def ldap_connection(): server = ldap_server(), return Connection(server, user=LDAP_USER, password=LDAP_PASSWORD, auto_bind=True) # ldap users LDAP_BASE_DN = ldap_users +","+ ldap_base_dn LDAP_OBJECT_FILTER = '(objectclass=user)' user_attr_list=[ \ 'cn', \ 'sn', \ 'givenName', \ 'instanceType', \ 'whenCreated', \ 'displayName', \ 'uSNCreated', \ 'name', \ 'objectGUID', \ 'badPwdCount', \ 'codePage', \ 'countryCode', \ 'badPasswordTime', \ 'lastLogoff', \ 'lastLogon',\ 'primaryGroupID', \ 'objectSid', \ 'accountExpires', \ 'logonCount', \ 'sAMAccountName', \ 'sAMAccountType', \ 'userPrincipalName', \ 'objectCategory', \ 'pwdLastSet', \ 'userAccountControl', \ 'lastLogonTimestamp', \ 'whenChanged', \ 'uSNChanged', \ 'memberOf', \ 'distinguishedName' ] conn = ldap_connection() conn.search(LDAP_BASE_DN, LDAP_OBJECT_FILTER, attributes=user_attr_list) # output to json json_users_data = main_path + data_path + json_users_file data = json.loads(conn.response_to_json()) with open(json_users_data, 'w') as jsonfile: json.dump(data, jsonfile) # copy data to archive json_users_arch = main_path + arch_patch + current_datetime + "_" + json_users_file shutil.copy2(json_users_data, json_users_arch) # ldap computers LDAP_BASE_DN = ldap_cmptrs +","+ ldap_base_dn LDAP_OBJECT_FILTER = '(objectclass=computer)' cmptr_attr_list=[ \ 'cn', \ 'instanceType', \ 'whenCreated', \ 'uSNCreated', \ 'name', \ 'objectGUID', \ 'badPwdCount', \ 'codePage', \ 'countryCode', \ 'badPasswordTime', \ 'lastLogoff', \ 'lastLogon',\ 'primaryGroupID', \ 'accountExpires', \ 'logonCount', \ 'sAMAccountName', \ 'sAMAccountType', \ 'objectCategory', \ 'pwdLastSet', \ 'userAccountControl', \ 'lastLogonTimestamp', \ 'whenChanged', \ 'uSNChanged', \ 'dNSHostName', \ 'isCriticalSystemObject', \ 'msDS-SupportedEncryptionTypes', \ 'operatingSystem', \ 'operatingSystemVersion', \ 'servicePrincipalName', \ 'distinguishedName' ] conn = ldap_connection() conn.search(LDAP_BASE_DN, LDAP_OBJECT_FILTER, attributes=cmptr_attr_list) # output to json json_cmptrs_data = main_path + data_path + json_cmptrs_file data = json.loads(conn.response_to_json()) with open(json_cmptrs_data, 'w') as jsonfile: json.dump(data, jsonfile) # copy data json_cmptrs_arch = main_path + arch_patch + current_datetime + "_" + json_cmptrs_file shutil.copy2(json_cmptrs_data, json_cmptrs_arch) print("USERS:") print("Data file created at: " + json_users_data) print("Archive file created at: " + json_users_arch) print("------------------------------------------------------------------------------") print("COMPUTERS") print("Data file created at: " + json_cmptrs_data) print("Archive file created at: " + json_cmptrs_arch) sys.exit(0) # exit(0) -> OK # exit(1) -> FAULT 

my jsons output looks:

{"entries": [ {"attributes": { "accountExpires": ["9223372036854775807"], "badPasswordTime": [], "badPwdCount": [], "cn": ["Bo\u017Cena \u017Ar\u00F3dlana"], "codePage": ["0"], "countryCode": ["0"], "displayName": ["Bo\u017Cena \u017Ar\u00F3dlana"], "distinguishedName": ["CN=Bo\u017Cena \u017Ar\u00F3dlana,OU=FE,OU=Users,OU=UNIVERSUM,DC=universum,DC=local"], "givenName": ["Bo\u017Cena"], "instanceType": ["4"], "lastLogoff": [], "lastLogon": [], "lastLogonTimestamp": ["132978476924537530"], "logonCount": [], "memberOf": [], "name": ["Bo\u017Cena \u017Ar\u00F3dlana"], "objectCategory": ["CN=Person,CN=Schema,CN=Configuration,DC=universum,DC=local"], "objectGUID": [ { "encoded": "AFvzBO0T+Ey9TL3RHGtghQ==", "encoding": "base64" } ], "objectSid": [ { "encoded": "AQUAAAAAAAUVAAAA6TO9FZD9W8QoWlFDIE8AAA==", "encoding": "base64" } ], "primaryGroupID": ["513"], "pwdLastSet": ["132979783101549910"], "sAMAccountName": ["pjarmolowicz"], "sAMAccountType": ["805306368"], "sn": ["\u017Ar\u00F3dlana"], "uSNChanged": ["4986"], "uSNCreated": ["4986"], "userAccountControl": ["512"], "userPrincipalName": ["[email protected]"], "whenChanged": ["20220525185150.0Z"], "whenCreated": ["20211125124337.0Z"]}, "dn": "CN=Bo\u017Cena \u017Ar\u00F3dlana,OU=FE,OU=Users,OU=UNIVERSUM,DC=universum,DC=local" }, {"attributes": { "accountExpires": ["9223372036854775807"], "badPasswordTime": ["133128872888506790"], "badPwdCount": ["0"], "cn": ["Jan Kowalski"], "codePage": ["0"], "countryCode": ["0"], "displayName": ["Jan Kowalski"], "distinguishedName": ["CN=Jan Kowalski,OU=RR-32,OU=RR,OU=Users,OU=UNIVERSUM,DC=universum,DC=local"], "givenName": ["Jan"], "instanceType": ["4"], "lastLogoff": [], "lastLogon": ["133129921828641420"], "lastLogonTimestamp": ["133125345565644950"], "logonCount": ["55"], "memberOf": [], "name": ["Jan Kowalski"], "objectCategory": ["CN=Person,CN=Schema,CN=Configuration,DC=universum,DC=local"], "objectGUID": [ { "encoded": "AScnTASpKUun4oadMC5Qxg==", "encoding": "base64" } ], "objectSid": [ { "encoded": "AQUAAAAAAAUVAAAA6TO9FZD9W8QoWlFDngQAAA==", "encoding": "base64" } ], "primaryGroupID": ["513"], "pwdLastSet": ["131577266641617910"], "sAMAccountName": ["jkowalski"], "sAMAccountType": ["805306368"], "sn": ["Kowalski"], "uSNChanged": ["149609"], "uSNCreated": ["5397"], "userAccountControl": ["512"], "userPrincipalName": ["[email protected]"], "whenChanged": ["20221110061556.0Z"], "whenCreated": ["20130610115016.0Z"], "dn": "CN=Jan Kowalski,OU=RR-32,OU=RR,OU=Users,OU=UNIVERSUM,DC=universum,DC=local" } ] } 
0

1 Answer 1

1

Use the following to suppress Unicode escape codes and write the data UTF-8-encoded to support non-ASCII characters.

with open(json_cmptrs_data, 'w', encoding='utf8') as jsonfile: json.dump(data, jsonfile, ensure_ascii=False) 

Working example:

import json data = {"cn": ["Bo\u017Cena \u017Ar\u00F3dlana"]} with open('output.json', 'w', encoding='utf8') as file: json.dump(data, file, ensure_ascii=False) 

output.csv (UTF-8-encoded):

{"cn": ["Bożena źródlana"]} 
Sign up to request clarification or add additional context in comments.

Comments

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.