0

I'm trying, as the title suggests, to compare a dictionary with another dictionary. It's for comparing bigrams from 1 dictionary with another dictionary.

I'm using a console application to load a file. The application reads this file and adds every 2 letters (for instance; hey becomes he and ey) as a key, and the value will be how often the bigram is seen in percentage (in this case 50% for he and 50% for ey). I got to the part where I'm able to do this.

Now the tricky part is to compare the recently added bigrams with a second file (text). I'm adding the second file to another dictionary. So now I have 2 dictionaries (1 for the first file and the second for the second file).

Now I want to compare those 2 dictionaries and see what bigrams aren't in one or the other (for instance; first directory has he, ey; second directory has he, ey, yg, gu, uy, ys). So yg, gu, uy, ys should come up.

Here's my code:

static StringBuilder newText = new StringBuilder(); static Dictionary<string, int> d = new Dictionary<string, int>(); static Dictionary<string, double> dNL = new Dictionary<string, double>(); static Dictionary<string, double> dDE = new Dictionary<string, double>(); static Dictionary<string, double> dFR = new Dictionary<string, double>(); static Dictionary<string, double> dSP = new Dictionary<string, double>(); static Dictionary<string, double> dEN = new Dictionary<string, double>(); static Dictionary<string, double> dIT = new Dictionary<string, double>(); static List<string> fileList = new List<string>(); static List<string> dConverted = new List<string>(); static List<string> dConvertedNL = new List<string>(); static List<string> dConvertedDE = new List<string>(); static List<string> dConvertedFR = new List<string>(); static List<string> dConvertedSP = new List<string>(); static List<string> dConvertedEN = new List<string>(); static List<string> dConvertedIT = new List<string>(); static string text; static string languageChosen; static string gecombineerdeLetters; static int value = 0; static int totaleNGram = 0; static string[] fileRead; static char[] tekst; [STAThread] static void Main(string[] args) { Start: Console.WriteLine("Welcome to this program. We need some text to start getting results. Please enter a Language your text is in:"); Console.WriteLine("press 1 - 7 / enter 'exit' to close the program"); Console.WriteLine("1. Dutch / 2. German / 3. French / 4. Spanish / 5. English / 6. Italian / 7. Enter unknown language"); Console.WriteLine(); languageChosen = Console.ReadLine(); if (languageChosen == "1" || languageChosen == "2" || languageChosen == "3" || languageChosen == "4" || languageChosen == "5" || languageChosen == "6") { calculateChanceKnown(); Console.WriteLine(); goto Start; } else if (languageChosen == "7") { OpenReadFile(); // compare to every language // first convert to list dConverted = d.Keys.ToList(); dConvertedNL = dNL.Keys.ToList(); dConvertedDE = dDE.Keys.ToList(); dConvertedFR = dFR.Keys.ToList(); dConvertedSP = dSP.Keys.ToList(); dConvertedEN = dEN.Keys.ToList(); dConvertedIT = dIT.Keys.ToList(); // compare d against each other list to see which has the most simularities //List<string> firstNotSecond = dConverted.Except(dConvertedNL).ToList(); Console.WriteLine(); goto Start; } else if(languageChosen == "exit") { Environment.Exit(0); } else { Console.WriteLine("Wrong input, try again!"); Console.WriteLine(); goto Start; } Console.Read(); } static void calculateChanceKnown() { OpenReadFile(); switch (languageChosen) { case "1": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dNL.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); //if key already exists -> add +1 to totaleNGram -> d[gecombineerdeletters] +1 -> calculate % again } d.Clear(); //foreach (KeyValuePair<string, double> kvp in dNL) //{ // Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); //} break; case "2": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dDE.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dDE) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "3": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dFR.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dFR) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "4": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dSP.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dSP) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "5": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dEN.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dEN) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; case "6": foreach (KeyValuePair<string, int> kvp in d) { //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100); dIT.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100); } foreach (KeyValuePair<string, double> kvp in dIT) { Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value); } break; default: break; } } static void OpenReadFile() { var fileDialog = new OpenFileDialog { Multiselect = false, Title = "Open Text File", Filter = "txt files (*txt)|*.txt| word files (*.doc, *.docx)|*.doc; *docx" }; using (fileDialog) { if (fileDialog.ShowDialog() == DialogResult.OK) { System.IO.StreamReader sr = new System.IO.StreamReader(fileDialog.FileName); string line; while ((line = sr.ReadLine()) != null) { fileList.Add(line); } } } fileRead = fileList.ToArray(); tekst = string.Join(string.Empty, fileRead).ToCharArray(); foreach (char c in tekst) { if (!char.IsPunctuation(c)) newText.Append(c); } text = newText.ToString(); text.ToLower(); text = Regex.Replace(text, @"[\s+]", ""); tekst = text.ToCharArray(); for (int i = 0; i < tekst.Count() - 1; i += 1) { gecombineerdeLetters = string.Format("{0}{1}", tekst[i], tekst[i + 1]); //Console.WriteLine(gecombineerdeLetters); if (!d.TryGetValue(gecombineerdeLetters, out value)) { d.Add(gecombineerdeLetters, 1); totaleNGram += 1; } else { d[gecombineerdeLetters] += 1; totaleNGram += 1; } } } 

What I've tried:

  • Using the except<>
  • SequenceEqual

Every time I tried to compare the same file it would give me one bigram that wasn't in either directory.

4
  • dict1.Keys.Except(dict2.Keys) and vice versa? Commented Nov 3, 2016 at 16:26
  • @itsme86 that's what I thought as well, but for some reason I still get a bigram that isn't in either directory. Commented Nov 3, 2016 at 16:30
  • Well that's just impossible! Commented Nov 3, 2016 at 16:33
  • @itsme86 Either I'm making a really dumb mistake, but when I do your answer, i get for (file 1 hey guys and file 2 hey) sh returned Commented Nov 3, 2016 at 16:34

1 Answer 1

1
public class DictionaryComparer { public List<string> CompareDictionaries(IDictionary<string, double> first, IDictionary<string, double> second) { var dictionary = new Dictionary<string, int>(); foreach (var f in first) { if (!dictionary.ContainsKey(f.Key)) { dictionary.Add(f.Key, 1); } else { dictionary[f.Key]++; } } foreach (var f in second) { if (!dictionary.ContainsKey(f.Key)) { dictionary.Add(f.Key, 1); } else { dictionary[f.Key]++; } } return dictionary.Where(s => s.Value == 1).Select(a => a.Key).ToList(); } } 

And test:

[TestFixture] public class Test { [Test] public void Compare() { IDictionary<string, double> dictionaryOne = new Dictionary<string, double>() { {"he", 0},{"ey", 0 } }; Dictionary<string, double> dictionaryTwo = new Dictionary<string, double>() { {"he", 0},{"ey", 0 },{"yg", 0 },{"gu", 0 },{"uy", 0 },{"ys", 0 } }; var comparer = new DictionaryComparer(); var list = comparer.CompareDictionaries(dictionaryOne, dictionaryTwo); Assert.That(4, Is.EqualTo(list.Count)); Assert.That("yg", Is.EqualTo(list[0])); Assert.That("gu", Is.EqualTo(list[1])); Assert.That("uy", Is.EqualTo(list[2])); Assert.That("ys", Is.EqualTo(list[3])); } } 

Hope it make sense

Sign up to request clarification or add additional context in comments.

3 Comments

Sorry forget to mention, the test is written in Nunit
But this is assuming I'm actually using hey and hey guys. I want to insert any text I want.
I think I will use your answer! I think with the foreach loop it might work.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.