I'm trying, as the title suggests, to compare a dictionary with another dictionary. It's for comparing bigrams from 1 dictionary with another dictionary.
I'm using a console application to load a file. The application reads this file and adds every 2 letters (for instance; hey becomes he and ey) as a key, and the value will be how often the bigram is seen in percentage (in this case 50% for he and 50% for ey). I got to the part where I'm able to do this.
Now the tricky part is to compare the recently added bigrams with a second file (text). I'm adding the second file to another dictionary. So now I have 2 dictionaries (1 for the first file and the second for the second file).
Now I want to compare those 2 dictionaries and see what bigrams aren't in one or the other (for instance; first directory has he, ey; second directory has he, ey, yg, gu, uy, ys). So yg, gu, uy, ys should come up.
Here's my code:
static StringBuilder newText = new StringBuilder(); static Dictionary<string, int> d = new Dictionary<string, int>(); static Dictionary<string, double> dNL = new Dictionary<string, double>(); static Dictionary<string, double> dDE = new Dictionary<string, double>(); static Dictionary<string, double> dFR = new Dictionary<string, double>(); static Dictionary<string, double> dSP = new Dictionary<string, double>(); static Dictionary<string, double> dEN = new Dictionary<string, double>(); static Dictionary<string, double> dIT = new Dictionary<string, double>(); static List<string> fileList = new List<string>(); static List<string> dConverted = new List<string>(); static List<string> dConvertedNL = new List<string>(); static List<string> dConvertedDE = new List<string>(); static List<string> dConvertedFR = new List<string>(); static List<string> dConvertedSP = new List<string>(); static List<string> dConvertedEN = new List<string>(); static List<string> dConvertedIT = new List<string>(); static string text; static string languageChosen; static string gecombineerdeLetters; static int value = 0; static int totaleNGram = 0; static string[] fileRead; static char[] tekst; [STAThread] static void Main(string[] args) { Start: Console.WriteLine("Welcome to this program. We need some text to start getting results. Please enter a Language your text is in:"); Console.WriteLine("press 1 - 7 / enter 'exit' to close the program"); Console.WriteLine("1. Dutch / 2. German / 3. French / 4. Spanish / 5. English / 6. Italian / 7. Enter unknown language"); Console.WriteLine(); languageChosen = Console.ReadLine(); if (languageChosen == "1" || languageChosen == "2" || languageChosen == "3" || languageChosen == "4" || languageChosen == "5" || languageChosen == "6") { calculateChanceKnown(); Console.WriteLine(); goto Start; } else if (languageChosen == "7") { OpenReadFile(); // compare to every language // first convert to list dConverted = d.Keys.ToList(); dConvertedNL = dNL.Keys.ToList(); dConvertedDE = dDE.Keys.ToList(); dConvertedFR = dFR.Keys.ToList(); dConvertedSP = dSP.Keys.ToList(); dConvertedEN = dEN.Keys.ToList(); dConvertedIT = dIT.Keys.ToList(); // compare d against each other list to see which has the most simularities //List<string> firstNotSecond = dConverted.Except(dConvertedNL).ToList(); Console.WriteLine(); goto Start; } else if(languageChosen == "exit") { Environment.Exit(0); } else { Console.WriteLine("Wrong input, try again!"); Console.WriteLine(); goto Start; } Console.Read(); } static void calculateChanceKnown() { OpenReadFile(); switch (languageChosen) { case "1": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dNL.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); //if key already exists -> add +1 to totaleNGram -> d[gecombineerdeletters] +1 -> calculate % again } d.Clear(); //foreach (KeyValuePair<string, double> kvp in dNL) //{ // Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); //} break; case "2": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dDE.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dDE) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "3": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dFR.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dFR) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "4": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dSP.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dSP) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "5": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dEN.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dEN) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "6": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dIT.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dIT) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; default: break; } } static void OpenReadFile() { var fileDialog = new OpenFileDialog { Multiselect = false, Title = "Open Text File", Filter = "txt files (*txt)|*.txt| word files (*.doc, *.docx)|*.doc; *docx" }; using (fileDialog) { if (fileDialog.ShowDialog() == DialogResult.OK) { System.IO.StreamReader sr = new System.IO.StreamReader(fileDialog.FileName); string line; while ((line = sr.ReadLine()) != null) { fileList.Add(line); } } } fileRead = fileList.ToArray(); tekst = string.Join(string.Empty, fileRead).ToCharArray(); foreach (char c in tekst) { if (!char.IsPunctuation(c)) newText.Append(c); } text = newText.ToString(); text.ToLower(); text = Regex.Replace(text, @"[\s+]", ""); tekst = text.ToCharArray(); for (int i = 0; i < tekst.Count() - 1; i += 1) { gecombineerdeLetters = string.Format("{0}{1}", tekst[i], tekst[i + 1]); //Console.WriteLine(gecombineerdeLetters); if (!d.TryGetValue(gecombineerdeLetters, out value)) { d.Add(gecombineerdeLetters, 1); totaleNGram += 1; } else { d[gecombineerdeLetters] += 1; totaleNGram += 1; } } } What I've tried:
- Using the except<>
- SequenceEqual
Every time I tried to compare the same file it would give me one bigram that wasn't in either directory.
dict1.Keys.Except(dict2.Keys)and vice versa?