Skip to main content
formatting, directory => dictionary, remove fluff
Source Link
CDspace
  • 2.7k
  • 19
  • 32
  • 39

Compare directorydictionary with another dictionary

I've been trying to get this to work for a while now, but I don't seem to make it work :p I'm trying, as the title suggests, to compare a dictionary with another dictionary. It's for comparing bigrams from 1 dictionary with another dictionaydictionary.
I'm

I'm using a console application to load a file. The application reads this file and adds every 2 letters (for instance; hey becomes he and ey) as a key, and the value will be how often the bigram is seen in percentage (in this case 50% for he and 50% for ey). I got to the part where I'm able to do this.
Now

Now the tricky part (for me) is to compare the recently added bigrams with a second file (text). I'm adding the second file to another directorydictionary. So now I have 2 directoriesdictionaries (1 for the first file and the second for the second file).
Now I want to compare those 2 directories and see what bigrams aren't in on or the other (for instance; first directory has he, ey; second directory has he, ey, yg, gu, uy, ys).
So yg, gu, uy, ys should come up.
Here's my code if it makes more sense:

Now I want to compare those 2 dictionaries and see what bigrams aren't in one or the other (for instance; first directory has he, ey; second directory has he, ey, yg, gu, uy, ys). So yg, gu, uy, ys should come up.

Here's my code:

 static StringBuilder newText = new StringBuilder();  static Dictionary<string, int> d = new Dictionary<string, int>();  static Dictionary<string, double> dNL = new Dictionary<string, double>();  static Dictionary<string, double> dDE = new Dictionary<string, double>();  static Dictionary<string, double> dFR = new Dictionary<string, double>();  static Dictionary<string, double> dSP = new Dictionary<string, double>();  static Dictionary<string, double> dEN = new Dictionary<string, double>();  static Dictionary<string, double> dIT = new Dictionary<string, double>();  static List<string> fileList = new List<string>();  static List<string> dConverted = new List<string>();  static List<string> dConvertedNL = new List<string>();  static List<string> dConvertedDE = new List<string>();  static List<string> dConvertedFR = new List<string>();  static List<string> dConvertedSP = new List<string>();  static List<string> dConvertedEN = new List<string>();  static List<string> dConvertedIT = new List<string>();  static string text;  static string languageChosen;  static string gecombineerdeLetters;  static int value = 0;  static int totaleNGram = 0;  static string[] fileRead;  static char[] tekst; [STAThread] static void Main(string[] args) {  [STAThread] Start: staticConsole.WriteLine("Welcome voidto Mainthis program. We need some text to start getting results. Please enter a Language your text is in:"); Console.WriteLine(string[]"press args1 - 7 / enter 'exit' to close the program"); Console.WriteLine("1. Dutch / 2. German / 3. French / 4. Spanish / 5. English / 6. Italian / 7. Enter unknown language"); Console.WriteLine(); languageChosen = Console.ReadLine(); if (languageChosen == "1" || languageChosen == "2" || languageChosen == "3" || languageChosen == "4" || languageChosen == "5" || languageChosen == "6") { Start: Console.WriteLine("Welcome to this program. We need some text to start getting results. Please enter a Language your text is in:"); Console.WriteLine("press 1 - 7 / enter 'exit' to close the program"); Console.WriteLinecalculateChanceKnown("1. Dutch / 2. German / 3. French / 4. Spanish / 5. English / 6. Italian / 7. Enter unknown language"); Console.WriteLine(); languageChosen = Console.ReadLine(); if (languageChosen == "1" || languageChosen == "2" || languageChosen == "3" || languageChosen == "4" || languageChosen == "5" || languageChosen == "6") { calculateChanceKnown(); Console.WriteLine(); goto Start; } else if (languageChosen == "7") { OpenReadFile(); // compare to every language // first convert to list dConverted = d.Keys.ToList(); dConvertedNL = dNL.Keys.ToList(); dConvertedDE = dDE.Keys.ToList(); dConvertedFR = dFR.Keys.ToList(); dConvertedSP = dSP.Keys.ToList(); dConvertedEN = dEN.Keys.ToList(); dConvertedIT = dIT.Keys.ToList(); // compare d against each other list to see which has the most simularities //List<string> firstNotSecond = dConverted.Except(dConvertedNL).ToList(); Console.WriteLine(); goto Start; } else if(languageChosen == "exit") { Environment.Exit(0); } else { Console.WriteLine("Wrong input, try again!"); Console.WriteLine(); goto Start; } Console.Read(); }    staticelse voidif calculateChanceKnown(languageChosen == "7") { OpenReadFile();  // compare to every language // first convert to list dConverted = d.Keys.ToList(); dConvertedNL = dNL.Keys.ToList(); dConvertedDE = dDE.Keys.ToList(); dConvertedFR = dFR.Keys.ToList(); dConvertedSP = dSP.Keys.ToList(); dConvertedEN = dEN.Keys.ToList(); dConvertedIT = dIT.Keys.ToList(); // compare d against each other list to see which has the most simularities //List<string> firstNotSecond = dConverted.Except(dConvertedNL).ToList();  switch (languageChosen) { case "1": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dNL.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); //if key already exists -> add +1 to totaleNGram -> d[gecombineerdeletters] +1 -> calculate % again } d.Clear(); //foreach (KeyValuePair<string, double> kvp in dNL) //{ // Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); //} break; case "2": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dDE.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dDE) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "3": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dFR.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); }   foreach (KeyValuePair<string, double> kvp in dFR) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "4": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dSP.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dSP) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "5": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dEN.Add(kvp.Key, ((double)kvp.Value / totaleNGram) *goto 100);Start;   }   foreachelse if(KeyValuePair<string, double> kvplanguageChosen in== dEN"exit")   {   ConsoleEnvironment.WriteLineExit("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value);   } break; case "6":   foreach (KeyValuePair<string, int> kvp in d)else   {   //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); "Wrong dIT.Add(kvp.Keyinput, ((double)kvp.Value / totaleNGram) *try 100again!"); }   foreach (KeyValuePair<string, double> kvp in dIT)  { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value);   } break; default: break; goto }Start; } Console.Read(); } static void calculateChanceKnown() {  OpenReadFile(); switch (languageChosen) { case "1": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dNL.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); //if key already exists -> add +1 to totaleNGram -> d[gecombineerdeletters] +1 -> calculate % again } d.Clear(); //foreach (KeyValuePair<string, double> kvp in dNL) //{ // Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); //} break; case "2": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dDE.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dDE) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "3": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dFR.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dFR) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "4": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dSP.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dSP) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "5": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dEN.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dEN) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "6": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dIT.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dIT) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; default: break; } } static void OpenReadFile() { var fileDialog = new OpenFileDialog { Multiselect = false, Title = "Open Text File", Filter = "txt files (*txt)|*.txt| word files (*.doc, *.docx)|*.doc; *docx" }; using (fileDialog) {  using  if (fileDialog.ShowDialog() == DialogResult.OK) { ifSystem.IO.StreamReader sr = new System.IO.StreamReader(fileDialog.ShowDialog(FileName);  == DialogResult string line; while ((line = sr.OKReadLine()) != null) { System.IO.StreamReader sr = new System.IO.StreamReader(fileDialog.FileName); string line; while ((line = sr.ReadLine()) != null) { fileList.Add(line); } } } }  fileRead = fileList.ToArray();   tekst = string.Join(string.Empty, fileRead).ToCharArray(); foreach (char c in foreachtekst) {  if (!char.IsPunctuation(c)) newText.Append(c);  in } text = newText.ToString(); text.ToLower();  text = Regex.Replace(text, @"[\s+]", ""); tekst = text.ToCharArray(); for (int i = 0; i < tekst.Count() - 1; i += 1) { gecombineerdeLetters = string.Format("{0}{1}", tekst[i], tekst[i + 1]); //Console.WriteLine(gecombineerdeLetters); if (!d.TryGetValue(gecombineerdeLetters, out value)) { if (!chard.IsPunctuationAdd(c))gecombineerdeLetters, newText.Append(c1); totaleNGram += 1; } text = newText.ToString(); text.ToLower(); text = Regex.Replace(text, @"[\s+]", ""); tekst = text.ToCharArray(); for (int i = 0; i < tekst.Count() - 1; i += 1)else { gecombineerdeLetters = string.Format("{0}{1}", tekst[i], tekst[i + 1]); //Console.WriteLine(gecombineerdeLetters); if (!d.TryGetValue(gecombineerdeLetters, out value)) { d.Add(gecombineerdeLetters, 1); totaleNGram += 1; } else { d[gecombineerdeLetters] += 1;   totaleNGram += 1; } }    }  } 

I hope you guys can help me out, because I've been going at this for about 5 hours now.
WhatWhat I've tried:

  • Using the except<>
  • SequenceEqual
  • More I can't remember :/

EverytimeEvery time I tried to compare the same file it would give me one bigram that wasn't in either directory.

I hope this makes a little sense, and I hope you guys can help me.

Cheers, Remco

Compare directory with another dictionary

I've been trying to get this to work for a while now, but I don't seem to make it work :p I'm trying, as the title suggests, to compare a dictionary with another dictionary. It's for comparing bigrams from 1 dictionary with another dictionay.
I'm using a console application to load a file. The application reads this file and adds every 2 letters (for instance; hey becomes he and ey) as a key, and the value will be how often the bigram is seen in percentage (in this case 50% for he and 50% for ey). I got to the part where I'm able to do this.
Now the tricky part (for me) is to compare the recently added bigrams with a second file (text). I'm adding the second file to another directory. So now I have 2 directories (1 for the first file and the second for the second file).
Now I want to compare those 2 directories and see what bigrams aren't in on or the other (for instance; first directory has he, ey; second directory has he, ey, yg, gu, uy, ys).
So yg, gu, uy, ys should come up.
Here's my code if it makes more sense:

 static StringBuilder newText = new StringBuilder();  static Dictionary<string, int> d = new Dictionary<string, int>();  static Dictionary<string, double> dNL = new Dictionary<string, double>();  static Dictionary<string, double> dDE = new Dictionary<string, double>();  static Dictionary<string, double> dFR = new Dictionary<string, double>();  static Dictionary<string, double> dSP = new Dictionary<string, double>();  static Dictionary<string, double> dEN = new Dictionary<string, double>();  static Dictionary<string, double> dIT = new Dictionary<string, double>();  static List<string> fileList = new List<string>();  static List<string> dConverted = new List<string>();  static List<string> dConvertedNL = new List<string>();  static List<string> dConvertedDE = new List<string>();  static List<string> dConvertedFR = new List<string>();  static List<string> dConvertedSP = new List<string>();  static List<string> dConvertedEN = new List<string>();  static List<string> dConvertedIT = new List<string>();  static string text;  static string languageChosen;  static string gecombineerdeLetters;  static int value = 0;  static int totaleNGram = 0;  static string[] fileRead;  static char[] tekst; [STAThread] static void Main(string[] args) { Start: Console.WriteLine("Welcome to this program. We need some text to start getting results. Please enter a Language your text is in:"); Console.WriteLine("press 1 - 7 / enter 'exit' to close the program"); Console.WriteLine("1. Dutch / 2. German / 3. French / 4. Spanish / 5. English / 6. Italian / 7. Enter unknown language"); Console.WriteLine(); languageChosen = Console.ReadLine(); if (languageChosen == "1" || languageChosen == "2" || languageChosen == "3" || languageChosen == "4" || languageChosen == "5" || languageChosen == "6") { calculateChanceKnown(); Console.WriteLine(); goto Start; } else if (languageChosen == "7") { OpenReadFile(); // compare to every language // first convert to list dConverted = d.Keys.ToList(); dConvertedNL = dNL.Keys.ToList(); dConvertedDE = dDE.Keys.ToList(); dConvertedFR = dFR.Keys.ToList(); dConvertedSP = dSP.Keys.ToList(); dConvertedEN = dEN.Keys.ToList(); dConvertedIT = dIT.Keys.ToList(); // compare d against each other list to see which has the most simularities //List<string> firstNotSecond = dConverted.Except(dConvertedNL).ToList(); Console.WriteLine(); goto Start; } else if(languageChosen == "exit") { Environment.Exit(0); } else { Console.WriteLine("Wrong input, try again!"); Console.WriteLine(); goto Start; } Console.Read(); }    static void calculateChanceKnown() { OpenReadFile(); switch (languageChosen) { case "1": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dNL.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); //if key already exists -> add +1 to totaleNGram -> d[gecombineerdeletters] +1 -> calculate % again } d.Clear(); //foreach (KeyValuePair<string, double> kvp in dNL) //{ // Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); //} break; case "2": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dDE.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dDE) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "3": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dFR.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); }   foreach (KeyValuePair<string, double> kvp in dFR) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "4": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dSP.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dSP) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "5": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dEN.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100);   }   foreach (KeyValuePair<string, double> kvp in dEN)   {   Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value);   } break; case "6":   foreach (KeyValuePair<string, int> kvp in d)   {   //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100);  dIT.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); }   foreach (KeyValuePair<string, double> kvp in dIT)  { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value);   } break; default: break;  } } static void OpenReadFile() { var fileDialog = new OpenFileDialog { Multiselect = false, Title = "Open Text File", Filter = "txt files (*txt)|*.txt| word files (*.doc, *.docx)|*.doc; *docx" }; using (fileDialog) { if (fileDialog.ShowDialog() == DialogResult.OK) { System.IO.StreamReader sr = new System.IO.StreamReader(fileDialog.FileName); string line; while ((line = sr.ReadLine()) != null) { fileList.Add(line); } } } fileRead = fileList.ToArray();   tekst = string.Join(string.Empty, fileRead).ToCharArray(); foreach (char c in tekst) { if (!char.IsPunctuation(c)) newText.Append(c); } text = newText.ToString(); text.ToLower(); text = Regex.Replace(text, @"[\s+]", ""); tekst = text.ToCharArray(); for (int i = 0; i < tekst.Count() - 1; i += 1) { gecombineerdeLetters = string.Format("{0}{1}", tekst[i], tekst[i + 1]); //Console.WriteLine(gecombineerdeLetters); if (!d.TryGetValue(gecombineerdeLetters, out value)) { d.Add(gecombineerdeLetters, 1); totaleNGram += 1; } else { d[gecombineerdeLetters] += 1;   totaleNGram += 1; } }    } 

I hope you guys can help me out, because I've been going at this for about 5 hours now.
What I've tried:

  • Using the except<>
  • SequenceEqual
  • More I can't remember :/

Everytime I tried to compare the same file it would give me one bigram that wasn't in either directory.

I hope this makes a little sense, and I hope you guys can help me.

Cheers, Remco

Compare dictionary with another dictionary

I'm trying, as the title suggests, to compare a dictionary with another dictionary. It's for comparing bigrams from 1 dictionary with another dictionary.

I'm using a console application to load a file. The application reads this file and adds every 2 letters (for instance; hey becomes he and ey) as a key, and the value will be how often the bigram is seen in percentage (in this case 50% for he and 50% for ey). I got to the part where I'm able to do this.

Now the tricky part is to compare the recently added bigrams with a second file (text). I'm adding the second file to another dictionary. So now I have 2 dictionaries (1 for the first file and the second for the second file).

Now I want to compare those 2 dictionaries and see what bigrams aren't in one or the other (for instance; first directory has he, ey; second directory has he, ey, yg, gu, uy, ys). So yg, gu, uy, ys should come up.

Here's my code:

static StringBuilder newText = new StringBuilder(); static Dictionary<string, int> d = new Dictionary<string, int>(); static Dictionary<string, double> dNL = new Dictionary<string, double>(); static Dictionary<string, double> dDE = new Dictionary<string, double>(); static Dictionary<string, double> dFR = new Dictionary<string, double>(); static Dictionary<string, double> dSP = new Dictionary<string, double>(); static Dictionary<string, double> dEN = new Dictionary<string, double>(); static Dictionary<string, double> dIT = new Dictionary<string, double>(); static List<string> fileList = new List<string>(); static List<string> dConverted = new List<string>(); static List<string> dConvertedNL = new List<string>(); static List<string> dConvertedDE = new List<string>(); static List<string> dConvertedFR = new List<string>(); static List<string> dConvertedSP = new List<string>(); static List<string> dConvertedEN = new List<string>(); static List<string> dConvertedIT = new List<string>(); static string text; static string languageChosen; static string gecombineerdeLetters; static int value = 0; static int totaleNGram = 0; static string[] fileRead; static char[] tekst; [STAThread] static void Main(string[] args) {   Start: Console.WriteLine("Welcome to this program. We need some text to start getting results. Please enter a Language your text is in:"); Console.WriteLine("press 1 - 7 / enter 'exit' to close the program"); Console.WriteLine("1. Dutch / 2. German / 3. French / 4. Spanish / 5. English / 6. Italian / 7. Enter unknown language"); Console.WriteLine(); languageChosen = Console.ReadLine(); if (languageChosen == "1" || languageChosen == "2" || languageChosen == "3" || languageChosen == "4" || languageChosen == "5" || languageChosen == "6") { calculateChanceKnown(); Console.WriteLine(); goto Start; } else if (languageChosen == "7") { OpenReadFile();  // compare to every language // first convert to list dConverted = d.Keys.ToList(); dConvertedNL = dNL.Keys.ToList(); dConvertedDE = dDE.Keys.ToList(); dConvertedFR = dFR.Keys.ToList(); dConvertedSP = dSP.Keys.ToList(); dConvertedEN = dEN.Keys.ToList(); dConvertedIT = dIT.Keys.ToList(); // compare d against each other list to see which has the most simularities //List<string> firstNotSecond = dConverted.Except(dConvertedNL).ToList();  Console.WriteLine(); goto Start; } else if(languageChosen == "exit") { Environment.Exit(0); } else { Console.WriteLine("Wrong input, try again!"); Console.WriteLine(); goto Start; } Console.Read(); } static void calculateChanceKnown() {  OpenReadFile(); switch (languageChosen) { case "1": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dNL.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); //if key already exists -> add +1 to totaleNGram -> d[gecombineerdeletters] +1 -> calculate % again } d.Clear(); //foreach (KeyValuePair<string, double> kvp in dNL) //{ // Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); //} break; case "2": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dDE.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dDE) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "3": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dFR.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dFR) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "4": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dSP.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dSP) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "5": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dEN.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dEN) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "6": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dIT.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dIT) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; default: break; } } static void OpenReadFile() { var fileDialog = new OpenFileDialog { Multiselect = false, Title = "Open Text File", Filter = "txt files (*txt)|*.txt| word files (*.doc, *.docx)|*.doc; *docx" }; using (fileDialog) {    if (fileDialog.ShowDialog() == DialogResult.OK) { System.IO.StreamReader sr = new System.IO.StreamReader(fileDialog.FileName);   string line; while ((line = sr.ReadLine()) != null) { fileList.Add(line); } } }  fileRead = fileList.ToArray(); tekst = string.Join(string.Empty, fileRead).ToCharArray(); foreach (char c in tekst) {  if (!char.IsPunctuation(c)) newText.Append(c);   } text = newText.ToString(); text.ToLower();  text = Regex.Replace(text, @"[\s+]", ""); tekst = text.ToCharArray(); for (int i = 0; i < tekst.Count() - 1; i += 1) { gecombineerdeLetters = string.Format("{0}{1}", tekst[i], tekst[i + 1]); //Console.WriteLine(gecombineerdeLetters); if (!d.TryGetValue(gecombineerdeLetters, out value)) { d.Add(gecombineerdeLetters, 1); totaleNGram += 1; } else { d[gecombineerdeLetters] += 1; totaleNGram += 1; } }  } 

What I've tried:

  • Using the except<>
  • SequenceEqual

Every time I tried to compare the same file it would give me one bigram that wasn't in either directory.

added 6 characters in body
Source Link
Remco1250
  • 85
  • 1
  • 13

I've been trying to get this to work for a while now, but I don't seem to make it work :p I'm trying, as the title suggests, to compare a dictionary with another dictionary. It's for comparing bigrams from 1 dictionary with another dictionay.
I'm using a console application to load a file. The application reads this file and adds every 2 letters (for instance; hey becomes he and ey) as a key, and the value will be how often the bigram is seen in percentage (in this case 50% for he and 50% for ey). I got to the part where I'm able to do this.
Now the tricky part (for me) is to compare the recently added bigrams with a second file (text). I'm adding the second file to another directory. So now I have 2 directories (1 for the first file and the second for the second file).
Now I want to compare those 2 directories and see what bigrams aren't in on or the other (for instance; first directory has he, ey; second directory has he, ey, yg, gu, uy, ys).
So yg, gu, uy, ys should come up.
Here's my code if it makes more sense: static StringBuilder newText = new StringBuilder(); static Dictionary<string, int> d = new Dictionary<string, int>(); static Dictionary<string, double> dNL = new Dictionary<string, double>(); static Dictionary<string, double> dDE = new Dictionary<string, double>(); static Dictionary<string, double> dFR = new Dictionary<string, double>(); static Dictionary<string, double> dSP = new Dictionary<string, double>(); static Dictionary<string, double> dEN = new Dictionary<string, double>(); static Dictionary<string, double> dIT = new Dictionary<string, double>();

 static StringBuilder newText = new StringBuilder(); static Dictionary<string, int> d = new Dictionary<string, int>(); static Dictionary<string, double> dNL = new Dictionary<string, double>(); static Dictionary<string, double> dDE = new Dictionary<string, double>(); static Dictionary<string, double> dFR = new Dictionary<string, double>(); static Dictionary<string, double> dSP = new Dictionary<string, double>(); static Dictionary<string, double> dEN = new Dictionary<string, double>(); static Dictionary<string, double> dIT = new Dictionary<string, double>(); static List<string> fileList = new List<string>(); static List<string> dConverted = new List<string>(); static List<string> dConvertedNL = new List<string>(); static List<string> dConvertedDE = new List<string>(); static List<string> dConvertedFR = new List<string>(); static List<string> dConvertedSP = new List<string>(); static List<string> dConvertedEN = new List<string>(); static List<string> dConvertedIT = new List<string>(); static string text; static string languageChosen; static string gecombineerdeLetters; static int value = 0; static int totaleNGram = 0; static string[] fileRead; static char[] tekst; [STAThread] static void Main(string[] args) { Start: Console.WriteLine("Welcome to this program. We need some text to start getting results. Please enter a Language your text is in:"); Console.WriteLine("press 1 - 7 / enter 'exit' to close the program"); Console.WriteLine("1. Dutch / 2. German / 3. French / 4. Spanish / 5. English / 6. Italian / 7. Enter unknown language"); Console.WriteLine(); languageChosen = Console.ReadLine(); if (languageChosen == "1" || languageChosen == "2" || languageChosen == "3" || languageChosen == "4" || languageChosen == "5" || languageChosen == "6") { calculateChanceKnown(); Console.WriteLine(); goto Start; } else if (languageChosen == "7") { OpenReadFile(); // compare to every language // first convert to list dConverted = d.Keys.ToList(); dConvertedNL = dNL.Keys.ToList(); dConvertedDE = dDE.Keys.ToList(); dConvertedFR = dFR.Keys.ToList(); dConvertedSP = dSP.Keys.ToList(); dConvertedEN = dEN.Keys.ToList(); dConvertedIT = dIT.Keys.ToList(); // compare d against each other list to see which has the most simularities //List<string> firstNotSecond = dConverted.Except(dConvertedNL).ToList(); Console.WriteLine(); goto Start; } else if(languageChosen == "exit") { Environment.Exit(0); } else { Console.WriteLine("Wrong input, try again!"); Console.WriteLine(); goto Start; } Console.Read(); } static void calculateChanceKnown() { OpenReadFile(); switch (languageChosen) { case "1": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dNL.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); //if key already exists -> add +1 to totaleNGram -> d[gecombineerdeletters] +1 -> calculate % again } d.Clear(); //foreach (KeyValuePair<string, double> kvp in dNL) //{ // Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); //} break; case "2": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dDE.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dDE) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "3": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dFR.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dFR) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "4": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dSP.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dSP) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "5": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dEN.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dEN) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "6": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dIT.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dIT) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; default: break; } } static void OpenReadFile() { var fileDialog = new OpenFileDialog { Multiselect = false, Title = "Open Text File", Filter = "txt files (*txt)|*.txt| word files (*.doc, *.docx)|*.doc; *docx" }; using (fileDialog) { if (fileDialog.ShowDialog() == DialogResult.OK) { System.IO.StreamReader sr = new System.IO.StreamReader(fileDialog.FileName); string line; while ((line = sr.ReadLine()) != null) { fileList.Add(line); } } } fileRead = fileList.ToArray(); tekst = string.Join(string.Empty, fileRead).ToCharArray(); foreach (char c in tekst) { if (!char.IsPunctuation(c)) newText.Append(c); } text = newText.ToString(); text.ToLower(); text = Regex.Replace(text, @"[\s+]", ""); tekst = text.ToCharArray(); for (int i = 0; i < tekst.Count() - 1; i += 1) { gecombineerdeLetters = string.Format("{0}{1}", tekst[i], tekst[i + 1]); //Console.WriteLine(gecombineerdeLetters); if (!d.TryGetValue(gecombineerdeLetters, out value)) { d.Add(gecombineerdeLetters, 1); totaleNGram += 1; } else { d[gecombineerdeLetters] += 1; totaleNGram += 1; } } } 

I've been trying to get this to work for a while now, but I don't seem to make it work :p I'm trying, as the title suggests, to compare a dictionary with another dictionary. It's for comparing bigrams from 1 dictionary with another dictionay.
I'm using a console application to load a file. The application reads this file and adds every 2 letters (for instance; hey becomes he and ey) as a key, and the value will be how often the bigram is seen in percentage (in this case 50% for he and 50% for ey). I got to the part where I'm able to do this.
Now the tricky part (for me) is to compare the recently added bigrams with a second file (text). I'm adding the second file to another directory. So now I have 2 directories (1 for the first file and the second for the second file).
Now I want to compare those 2 directories and see what bigrams aren't in on or the other (for instance; first directory has he, ey; second directory has he, ey, yg, gu, uy, ys).
So yg, gu, uy, ys should come up.
Here's my code if it makes more sense: static StringBuilder newText = new StringBuilder(); static Dictionary<string, int> d = new Dictionary<string, int>(); static Dictionary<string, double> dNL = new Dictionary<string, double>(); static Dictionary<string, double> dDE = new Dictionary<string, double>(); static Dictionary<string, double> dFR = new Dictionary<string, double>(); static Dictionary<string, double> dSP = new Dictionary<string, double>(); static Dictionary<string, double> dEN = new Dictionary<string, double>(); static Dictionary<string, double> dIT = new Dictionary<string, double>();

 static List<string> fileList = new List<string>(); static List<string> dConverted = new List<string>(); static List<string> dConvertedNL = new List<string>(); static List<string> dConvertedDE = new List<string>(); static List<string> dConvertedFR = new List<string>(); static List<string> dConvertedSP = new List<string>(); static List<string> dConvertedEN = new List<string>(); static List<string> dConvertedIT = new List<string>(); static string text; static string languageChosen; static string gecombineerdeLetters; static int value = 0; static int totaleNGram = 0; static string[] fileRead; static char[] tekst; [STAThread] static void Main(string[] args) { Start: Console.WriteLine("Welcome to this program. We need some text to start getting results. Please enter a Language your text is in:"); Console.WriteLine("press 1 - 7 / enter 'exit' to close the program"); Console.WriteLine("1. Dutch / 2. German / 3. French / 4. Spanish / 5. English / 6. Italian / 7. Enter unknown language"); Console.WriteLine(); languageChosen = Console.ReadLine(); if (languageChosen == "1" || languageChosen == "2" || languageChosen == "3" || languageChosen == "4" || languageChosen == "5" || languageChosen == "6") { calculateChanceKnown(); Console.WriteLine(); goto Start; } else if (languageChosen == "7") { OpenReadFile(); // compare to every language // first convert to list dConverted = d.Keys.ToList(); dConvertedNL = dNL.Keys.ToList(); dConvertedDE = dDE.Keys.ToList(); dConvertedFR = dFR.Keys.ToList(); dConvertedSP = dSP.Keys.ToList(); dConvertedEN = dEN.Keys.ToList(); dConvertedIT = dIT.Keys.ToList(); // compare d against each other list to see which has the most simularities //List<string> firstNotSecond = dConverted.Except(dConvertedNL).ToList(); Console.WriteLine(); goto Start; } else if(languageChosen == "exit") { Environment.Exit(0); } else { Console.WriteLine("Wrong input, try again!"); Console.WriteLine(); goto Start; } Console.Read(); } static void calculateChanceKnown() { OpenReadFile(); switch (languageChosen) { case "1": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dNL.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); //if key already exists -> add +1 to totaleNGram -> d[gecombineerdeletters] +1 -> calculate % again } d.Clear(); //foreach (KeyValuePair<string, double> kvp in dNL) //{ // Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); //} break; case "2": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dDE.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dDE) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "3": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dFR.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dFR) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "4": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dSP.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dSP) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "5": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dEN.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dEN) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "6": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dIT.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dIT) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; default: break; } } static void OpenReadFile() { var fileDialog = new OpenFileDialog { Multiselect = false, Title = "Open Text File", Filter = "txt files (*txt)|*.txt| word files (*.doc, *.docx)|*.doc; *docx" }; using (fileDialog) { if (fileDialog.ShowDialog() == DialogResult.OK) { System.IO.StreamReader sr = new System.IO.StreamReader(fileDialog.FileName); string line; while ((line = sr.ReadLine()) != null) { fileList.Add(line); } } } fileRead = fileList.ToArray(); tekst = string.Join(string.Empty, fileRead).ToCharArray(); foreach (char c in tekst) { if (!char.IsPunctuation(c)) newText.Append(c); } text = newText.ToString(); text.ToLower(); text = Regex.Replace(text, @"[\s+]", ""); tekst = text.ToCharArray(); for (int i = 0; i < tekst.Count() - 1; i += 1) { gecombineerdeLetters = string.Format("{0}{1}", tekst[i], tekst[i + 1]); //Console.WriteLine(gecombineerdeLetters); if (!d.TryGetValue(gecombineerdeLetters, out value)) { d.Add(gecombineerdeLetters, 1); totaleNGram += 1; } else { d[gecombineerdeLetters] += 1; totaleNGram += 1; } } } 

I've been trying to get this to work for a while now, but I don't seem to make it work :p I'm trying, as the title suggests, to compare a dictionary with another dictionary. It's for comparing bigrams from 1 dictionary with another dictionay.
I'm using a console application to load a file. The application reads this file and adds every 2 letters (for instance; hey becomes he and ey) as a key, and the value will be how often the bigram is seen in percentage (in this case 50% for he and 50% for ey). I got to the part where I'm able to do this.
Now the tricky part (for me) is to compare the recently added bigrams with a second file (text). I'm adding the second file to another directory. So now I have 2 directories (1 for the first file and the second for the second file).
Now I want to compare those 2 directories and see what bigrams aren't in on or the other (for instance; first directory has he, ey; second directory has he, ey, yg, gu, uy, ys).
So yg, gu, uy, ys should come up.
Here's my code if it makes more sense:

 static StringBuilder newText = new StringBuilder(); static Dictionary<string, int> d = new Dictionary<string, int>(); static Dictionary<string, double> dNL = new Dictionary<string, double>(); static Dictionary<string, double> dDE = new Dictionary<string, double>(); static Dictionary<string, double> dFR = new Dictionary<string, double>(); static Dictionary<string, double> dSP = new Dictionary<string, double>(); static Dictionary<string, double> dEN = new Dictionary<string, double>(); static Dictionary<string, double> dIT = new Dictionary<string, double>(); static List<string> fileList = new List<string>(); static List<string> dConverted = new List<string>(); static List<string> dConvertedNL = new List<string>(); static List<string> dConvertedDE = new List<string>(); static List<string> dConvertedFR = new List<string>(); static List<string> dConvertedSP = new List<string>(); static List<string> dConvertedEN = new List<string>(); static List<string> dConvertedIT = new List<string>(); static string text; static string languageChosen; static string gecombineerdeLetters; static int value = 0; static int totaleNGram = 0; static string[] fileRead; static char[] tekst; [STAThread] static void Main(string[] args) { Start: Console.WriteLine("Welcome to this program. We need some text to start getting results. Please enter a Language your text is in:"); Console.WriteLine("press 1 - 7 / enter 'exit' to close the program"); Console.WriteLine("1. Dutch / 2. German / 3. French / 4. Spanish / 5. English / 6. Italian / 7. Enter unknown language"); Console.WriteLine(); languageChosen = Console.ReadLine(); if (languageChosen == "1" || languageChosen == "2" || languageChosen == "3" || languageChosen == "4" || languageChosen == "5" || languageChosen == "6") { calculateChanceKnown(); Console.WriteLine(); goto Start; } else if (languageChosen == "7") { OpenReadFile(); // compare to every language // first convert to list dConverted = d.Keys.ToList(); dConvertedNL = dNL.Keys.ToList(); dConvertedDE = dDE.Keys.ToList(); dConvertedFR = dFR.Keys.ToList(); dConvertedSP = dSP.Keys.ToList(); dConvertedEN = dEN.Keys.ToList(); dConvertedIT = dIT.Keys.ToList(); // compare d against each other list to see which has the most simularities //List<string> firstNotSecond = dConverted.Except(dConvertedNL).ToList(); Console.WriteLine(); goto Start; } else if(languageChosen == "exit") { Environment.Exit(0); } else { Console.WriteLine("Wrong input, try again!"); Console.WriteLine(); goto Start; } Console.Read(); } static void calculateChanceKnown() { OpenReadFile(); switch (languageChosen) { case "1": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dNL.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); //if key already exists -> add +1 to totaleNGram -> d[gecombineerdeletters] +1 -> calculate % again } d.Clear(); //foreach (KeyValuePair<string, double> kvp in dNL) //{ // Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); //} break; case "2": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dDE.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dDE) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "3": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dFR.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dFR) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "4": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dSP.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dSP) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "5": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dEN.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dEN) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "6": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dIT.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dIT) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; default: break; } } static void OpenReadFile() { var fileDialog = new OpenFileDialog { Multiselect = false, Title = "Open Text File", Filter = "txt files (*txt)|*.txt| word files (*.doc, *.docx)|*.doc; *docx" }; using (fileDialog) { if (fileDialog.ShowDialog() == DialogResult.OK) { System.IO.StreamReader sr = new System.IO.StreamReader(fileDialog.FileName); string line; while ((line = sr.ReadLine()) != null) { fileList.Add(line); } } } fileRead = fileList.ToArray(); tekst = string.Join(string.Empty, fileRead).ToCharArray(); foreach (char c in tekst) { if (!char.IsPunctuation(c)) newText.Append(c); } text = newText.ToString(); text.ToLower(); text = Regex.Replace(text, @"[\s+]", ""); tekst = text.ToCharArray(); for (int i = 0; i < tekst.Count() - 1; i += 1) { gecombineerdeLetters = string.Format("{0}{1}", tekst[i], tekst[i + 1]); //Console.WriteLine(gecombineerdeLetters); if (!d.TryGetValue(gecombineerdeLetters, out value)) { d.Add(gecombineerdeLetters, 1); totaleNGram += 1; } else { d[gecombineerdeLetters] += 1; totaleNGram += 1; } } } 
Source Link
Remco1250
  • 85
  • 1
  • 13

Compare directory with another dictionary

I've been trying to get this to work for a while now, but I don't seem to make it work :p I'm trying, as the title suggests, to compare a dictionary with another dictionary. It's for comparing bigrams from 1 dictionary with another dictionay.
I'm using a console application to load a file. The application reads this file and adds every 2 letters (for instance; hey becomes he and ey) as a key, and the value will be how often the bigram is seen in percentage (in this case 50% for he and 50% for ey). I got to the part where I'm able to do this.
Now the tricky part (for me) is to compare the recently added bigrams with a second file (text). I'm adding the second file to another directory. So now I have 2 directories (1 for the first file and the second for the second file).
Now I want to compare those 2 directories and see what bigrams aren't in on or the other (for instance; first directory has he, ey; second directory has he, ey, yg, gu, uy, ys).
So yg, gu, uy, ys should come up.
Here's my code if it makes more sense: static StringBuilder newText = new StringBuilder(); static Dictionary<string, int> d = new Dictionary<string, int>(); static Dictionary<string, double> dNL = new Dictionary<string, double>(); static Dictionary<string, double> dDE = new Dictionary<string, double>(); static Dictionary<string, double> dFR = new Dictionary<string, double>(); static Dictionary<string, double> dSP = new Dictionary<string, double>(); static Dictionary<string, double> dEN = new Dictionary<string, double>(); static Dictionary<string, double> dIT = new Dictionary<string, double>();

 static List<string> fileList = new List<string>(); static List<string> dConverted = new List<string>(); static List<string> dConvertedNL = new List<string>(); static List<string> dConvertedDE = new List<string>(); static List<string> dConvertedFR = new List<string>(); static List<string> dConvertedSP = new List<string>(); static List<string> dConvertedEN = new List<string>(); static List<string> dConvertedIT = new List<string>(); static string text; static string languageChosen; static string gecombineerdeLetters; static int value = 0; static int totaleNGram = 0; static string[] fileRead; static char[] tekst; [STAThread] static void Main(string[] args) { Start: Console.WriteLine("Welcome to this program. We need some text to start getting results. Please enter a Language your text is in:"); Console.WriteLine("press 1 - 7 / enter 'exit' to close the program"); Console.WriteLine("1. Dutch / 2. German / 3. French / 4. Spanish / 5. English / 6. Italian / 7. Enter unknown language"); Console.WriteLine(); languageChosen = Console.ReadLine(); if (languageChosen == "1" || languageChosen == "2" || languageChosen == "3" || languageChosen == "4" || languageChosen == "5" || languageChosen == "6") { calculateChanceKnown(); Console.WriteLine(); goto Start; } else if (languageChosen == "7") { OpenReadFile(); // compare to every language // first convert to list dConverted = d.Keys.ToList(); dConvertedNL = dNL.Keys.ToList(); dConvertedDE = dDE.Keys.ToList(); dConvertedFR = dFR.Keys.ToList(); dConvertedSP = dSP.Keys.ToList(); dConvertedEN = dEN.Keys.ToList(); dConvertedIT = dIT.Keys.ToList(); // compare d against each other list to see which has the most simularities //List<string> firstNotSecond = dConverted.Except(dConvertedNL).ToList(); Console.WriteLine(); goto Start; } else if(languageChosen == "exit") { Environment.Exit(0); } else { Console.WriteLine("Wrong input, try again!"); Console.WriteLine(); goto Start; } Console.Read(); } static void calculateChanceKnown() { OpenReadFile(); switch (languageChosen) { case "1": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dNL.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); //if key already exists -> add +1 to totaleNGram -> d[gecombineerdeletters] +1 -> calculate % again } d.Clear(); //foreach (KeyValuePair<string, double> kvp in dNL) //{ // Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); //} break; case "2": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dDE.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dDE) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "3": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dFR.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dFR) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "4": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dSP.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dSP) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "5": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dEN.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dEN) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "6": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dIT.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dIT) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; default: break; } } static void OpenReadFile() { var fileDialog = new OpenFileDialog { Multiselect = false, Title = "Open Text File", Filter = "txt files (*txt)|*.txt| word files (*.doc, *.docx)|*.doc; *docx" }; using (fileDialog) { if (fileDialog.ShowDialog() == DialogResult.OK) { System.IO.StreamReader sr = new System.IO.StreamReader(fileDialog.FileName); string line; while ((line = sr.ReadLine()) != null) { fileList.Add(line); } } } fileRead = fileList.ToArray(); tekst = string.Join(string.Empty, fileRead).ToCharArray(); foreach (char c in tekst) { if (!char.IsPunctuation(c)) newText.Append(c); } text = newText.ToString(); text.ToLower(); text = Regex.Replace(text, @"[\s+]", ""); tekst = text.ToCharArray(); for (int i = 0; i < tekst.Count() - 1; i += 1) { gecombineerdeLetters = string.Format("{0}{1}", tekst[i], tekst[i + 1]); //Console.WriteLine(gecombineerdeLetters); if (!d.TryGetValue(gecombineerdeLetters, out value)) { d.Add(gecombineerdeLetters, 1); totaleNGram += 1; } else { d[gecombineerdeLetters] += 1; totaleNGram += 1; } } } 

I hope you guys can help me out, because I've been going at this for about 5 hours now.
What I've tried:

  • Using the except<>
  • SequenceEqual
  • More I can't remember :/

Everytime I tried to compare the same file it would give me one bigram that wasn't in either directory.

I hope this makes a little sense, and I hope you guys can help me.

Cheers, Remco