How can i convert unicode characters into text? Preferably without importing any library. Input is a string from a list.
Sample input:
\\u006A\\u0061\\u0064\\u0072\\u006F expected output:
jadro (+ should not be a bytes class at the end) a list i am translating:
["I[0][1][0][0] = '\\u006A\\u0061\\u0064\\u0072\\u006F';", "I[1][1][0][0] = '\\u0047\\u0075\\u0074\\u0065\\u006E\\u0062\\u0065\\u0072\\u0067\\u006F\\u0076\\u0061';", "I[2][1][0][0] = '\\u0070\\u006C\\u00E1\\u0161\\u0165';", "I[3][1][0][0] = '\\u0061\\u0073\\u0074\\u0065\\u006E\\u006F\\u0073\\u0066\\u00E9\\u0072\\u0061';", "I[4][1][0][0] = '\\u004D\\u006F\\u0068\\u006F\\u0072\\u006F\\u0076\\u0069\\u010D\\u0069\\u010D\\u006F\\u0076\\u0061';", "I[5][1][0][0] = '\\u006B\\u00F4\\u0072\\u0061';", "I[6][1][0][0] = '\\u0050\\u0065\\u0076\\u006E\\u0069\\u006E\\u0073\\u006B\\u00E1';", "I[7][1][0][0] = '\\u0067\\u0072\\u0061\\u006E\\u0069\\u0074\\u006F\\u0076\\u00E1';", "I[8][1][0][0] = '\\u0034\\u002C\\u0035';", "I[9][1][0][0] = '\\u0070\\u0072\\u0076\\u006F\\u0068\\u00F4\\u0072';", "I[10][1][0][0] = '\\u0052\\u006F\\u0064\\u0069\\u006E\\u0069\\u0061';", "I[11][1][0][0] = '\\u0050\\u0061\\u006E\\u0067\\u0065\\u0061';", "I[12][1][0][0] = '\\u0065\\u0075\\u0072\\u00E1\\u007A\\u0069\\u006A\\u0073\\u006B\\u00E1';", "I[13][1][0][0] = '\\u0070\\u0061\\u0063\\u0069\\u0066\\u0069\\u0063\\u006B\\u00E1';", "I[14][1][0][0] = '\\u0041\\u0074\\u006C\\u0061\\u006E\\u0074\\u0069\\u0063\\u006B\\u00E1';", "I[15][1][0][0] = '\\u0069\\u006E\\u0064\\u006F';", "I[16][1][0][0] = '\\u0061\\u0066\\u0072\\u0069\\u0063\\u006B\\u00E1';", "I[17][1][0][0] = '\\u0061\\u006E\\u0074\\u0061\\u0072\\u006B\\u0074\\u0069\\u0063\\u006B\\u00E1';", "I[18][1][0][0] = '\\u0076\\u0072\\u00E1\\u0073\\u006E\\u0065\\u006E\\u0069\\u0065';", "I[19][1][0][0] = '\\u0068\\u0072\\u0061\\u0073\\u0165';", "I[20][1][0][0] = '\\u0070\\u0072\\u0069\\u0065\\u006B\\u006F\\u0070\\u006F\\u0076\\u00E1\\u0020\\u0070\\u0072\\u0065\\u0070\\u0061\\u0064\\u006C\\u0069\\u006E\\u0061';", "I[21][1][0][0] = '\\u006D\\u0061\\u0067\\u006D\\u0061\\u0074\\u0069\\u0063\\u006B\\u00FD\\u0020\\u006B\\u0072\\u0062';", "I[22][1][0][0] = '\\u004B\\u0061\\u006C\\u0064\\u0065\\u0072\\u0061';", "I[23][1][0][0] = '\\u0056\\u0065\\u007A\\u0075\\u0076';", "I[24][1][0][0] = '\\u0048\\u0061\\u0076\\u0061\\u006A\\u0073\\u006B\\u00FD\\u0063\\u0068';", "I[25][1][0][0] = '\\u0046\\u0075\\u0064\\u017E\\u0069';", "I[26][1][0][0] = '\\u006B\\u0079\\u0073\\u006C\\u00E9';", "I[27][1][0][0] = '\\u0062\\u0061\\u007A\\u0061\\u006C\\u0074\\u0069\\u0063\\u006B\\u00E1';", "I[28][1][0][0] = '\\u0052\\u0069\\u0063\\u0068\\u0074\\u0065\\u0072\\u006F\\u0076\\u006F\\u0075';", "I[29][1][0][0] = '\\u0073\\u0065\\u0069\\u007A\\u006D\\u006F\\u0067\\u0072\\u0061\\u0066';"] code for saving the translated and crrupted result:
f = open("dump.TXT","w+") f.close() f = open("dump.TXT","a") for i in res: unic = i.split("'")[-2] trans = bytes(unic,"utf-8").decode('unicode-escape') + "\n" trans = trans.encode('utf-8').decode('utf8') f.write(trans) Full code:
import re with open("org22.htm","r") as f: data = f.read() start = "I = new Array();" end = "State = new Array();" s = data array_r = s[s.find(start)+len(start):s.rfind(end)] array_r = array_r.split("\n") count, res = 0, [] for line in array_r: compare = "I["+str(count)+"][1][0][0] =" if compare in line: res.append(line) count += 1 f = open("dump.TXT","w+") f.close() f = open("dump.TXT","a") for i in res: unic = i.split("'")[-2] trans = bytes(unic,"utf-8").decode('unicode-escape') + "\n" trans = trans.encode('utf-8').decode('utf8') f.write(trans) f.close()