Search Unity

  1. Megacity Metro Demo now available. Download now.
    Dismiss Notice
  2. Unity support for visionOS is now available. Learn more in our blog post.
    Dismiss Notice

Question Working DFS algorithm - Major lag - Help please.

Discussion in 'Scripting' started by nobluff67, Nov 8, 2022.

  1. nobluff67

    nobluff67

    Joined:
    Nov 3, 2016
    Posts:
    338
    Code (CSharp):
    1. using System;
    2. using System.Collections;
    3. using System.Collections.Generic;
    4. using UnityEngine;
    5. using UnityEngine.Jobs;
    6. using Unity.Burst;
    7. using Unity.Collections;
    8. using Unity.Jobs;
    9. using Unity.Mathematics;
    10. using System.Threading.Tasks;
    11.  
    12. public class FindAllPossibleWords : MonoBehaviour
    13. {
    14.  
    15.     static int tempStopCount = 0;
    16.     public static int wordFoundCount = 0;
    17.     private float startTime;
    18.     public static HashSet<string> wordFound = new HashSet<string>();
    19.     private int countBackupBreak;
    20.     private int complete;
    21.      public void HammerSpecial()
    22.      {
    23.          startTime = Time.realtimeSinceStartup;
    24.          FindValidWords();
    25.      }
    26.  
    27.      public async void FindValidWords()
    28.      {
    29.          tempStopCount = 0;
    30.          string word = String.Empty;
    31.  
    32. // Col = 6; Row = 8
    33.          for (int col = 0; col < 6; col++)
    34.          {
    35.              for (int row = 0; row < 8; row++)
    36.              {
    37.  
    38.                  await Task.Yield();
    39.                  if (GlobalVars.Instance.letterArray[col, row] == '?')
    40.                  {
    41. //                     Debug.Log("?: " +col+ " ROW: " +row);  
    42.                  }
    43.                  else
    44.                  {
    45.                      GlobalVars.Instance.visited[col,row] = true;
    46. //indexWords, ie the dictionary = 50k words
    47.                      var wordTask = FindWordAsync(GlobalVars.Instance.letterArray,GlobalVars.Instance.visited,col,row,word+GlobalVars.Instance.letterArray[col,row],GlobalVars.Instance.indexedWords);
    48.                      complete = wordTask.Result;
    49.                      await wordTask;
    50.                      GlobalVars.Instance.visited[col,row] = false;
    51.                  }
    52.              }
    53.              Debug.Log("Col: " + col + " " + ((Time.realtimeSinceStartup - startTime) * 1000f) + "ms");
    54.          }
    55.          Debug.Log(((Time.realtimeSinceStartup - startTime) * 1000f) + "ms");
    56.  
    57.          Debug.Log("words found: " + wordFoundCount +" Unique words: " + wordFound.Count + " Search Count: " + tempStopCount);
    58.      }
    59.  
    60.      private static int[] pathCol = {-1, 0, 1,-1, 1,-1,0,1};
    61.      private static int[] pathRow = {-1,-1,-1, 0, 0, 1,1,1};
    62.  
    63.      private async Task<int> FindWordAsync(char[,] board, bool[,] visited, int col, int row, string word,
    64.          HashSet<string> englishDictionary)
    65.      {
    66.          tempStopCount++;
    67. // Yes 50Million
    68.          if (tempStopCount > 50000000)
    69.          {
    70.              Debug.Log(tempStopCount);
    71.              return 50000000;
    72.          }
    73.          if (englishDictionary.Contains(word))
    74.          {
    75.              if (word.Length > 3 && word.Length < 9)
    76.              {
    77.                  wordFoundCount++;
    78.                  wordFound.Add(word);
    79.                  //Debug.Log("found word: " + word);
    80.                  return 1;
    81.              }
    82.          }
    83.  
    84.          if (word.Length > 9)
    85.          {
    86.              return 9;
    87.          }
    88.          for (int i = 0; i < pathRow.Length; i++)
    89.          {
    90.              int rowNew = row + pathRow[i];
    91.              int colNew = col + pathCol[i];
    92.              if (IfValid(colNew, rowNew, visited,word))
    93.              {
    94.                  visited[colNew,rowNew] = true;
    95.                  var wordTask = FindWordAsync(board,visited,colNew,rowNew,word+board[colNew,rowNew],englishDictionary);
    96.                  complete = wordTask.Result;
    97.                  await wordTask;
    98.                  visited[colNew,rowNew] = false;
    99.              }
    100.          }
    101.  
    102.          return 0;
    103.      }
    104.    
    105.      private static bool IfValid(int colNew, int rowNew, bool[,] visited,string word)
    106.      {
    107.          if ((rowNew >= 0) && (colNew >= 0) && (rowNew < GlobalVars.Instance.letterRowSize) &&
    108.              (word.Length <= GlobalVars.Instance.maxWordSize) &&
    109.              (colNew < GlobalVars.Instance.letterColSize) && (!visited[colNew,rowNew]))
    110.          {
    111.              return true;
    112.          }
    113.          return false;
    114.      }
    115. }
    The issue for me, I guess understandably (50 M searches), is that the UI input is very slow when this is running.

    Help needed
    1. I am looking for speed up suggestions.
    2. Or looking for alternatives (e.g. jobs, which I've looking into but I am stuck - need to flatten 2d array for jobs to work)
    Constraints:
    1. This has to run more than once, however it can however be delayed and/or executed a bit at a time.
    2. This will have to run at the same time as UI input.

    Stats:

    Capture6.JPG

    Profiles (I cant deep profile - hangs editor)

    Capture7.JPG

    Surprised me as I thought it would highlight the cs script. I also can not dig deeper so have no idea why 72+23 adds up to 612.

    What I have attempted

    1. Jobs - I'm butchering it as my experience on this is limited. I have attempted to follow a few tutorials however they are not really base on this type of situation (dfs algo).
    2. The above code is my attempt at async and tasks. I think the issue here is that the tasks are being run in the 10's of thousands, not sure though.

    Help or guide please.
     
  2. Bunny83

    Bunny83

    Joined:
    Oct 18, 2010
    Posts:
    3,916
    Well, your bottleneck is that you create over 2 million objects which sums to over 50MB of data. Using a Task here doesn't really help the case as Tasks are also objects which generate garbage.

    It's quite hard to disect your code to fully understand it. However it looks like you're searching for valid words in a grid that kinda snakes through the grid? Since you seem to allow the 8 directions around a cell, there are really many potential matches.

    First of all you should optimise your word check. Do not create a string for each word, especially not by adding characters together. You will create a new string each time. Instead you probably should convert your dictionary into a "hash tree", character by character. You could use a
    Dictionary<char, Node>
    inside each Node to go down the tree, however for performance reasons it probably makes more sense to just create an Node array where the element matches a certain character (0 = a, 1=b, ...). That way you would only populate the elements when there are actual "sub words". This makes the search absolutely garbage free. Additionally, you search through the grid recursively. You should record the longest possible word and stop as soon as you hit that length.

    Building a hash tree has the additional advantage that you can easily tell if there are more words following a certain combination or not. So you can stop quite early searching for more words.

    I would probably go with something like that:

    Code (CSharp):
    1.  
    2.     public class Node
    3.     {
    4.         public bool isWord = false;
    5.         public char ch;
    6.         public Node parent = null;
    7.         public Node[] childs = null;
    8.         public Node Find(char aCh)
    9.         {
    10.             if (childs == null)
    11.                 return null;
    12.             int index = char.ToUpper(aCh) - 'A';
    13.             if (index < 0 || index >= 26)
    14.                 return null;
    15.             return childs[index];
    16.         }
    17.         public Node GetOrCreateChild(char aCh)
    18.         {
    19.             int index = char.ToUpper(aCh) - 'A';
    20.             if (index < 0 || index >= 26)
    21.                 return null;
    22.             if (childs == null)
    23.                 childs = new Node[26];
    24.             var node = childs[index];
    25.             if (node == null)
    26.                 childs[index] = node = new Node { parent = this, ch = char.ToLower(aCh) };
    27.             return node;
    28.         }
    29.         public string GetWord()
    30.         {
    31.             var n = this;
    32.             var sb = new System.Text.StringBuilder();
    33.             while (n.parent != null)
    34.             {
    35.                 sb.Insert(0, n.ch);
    36.                 n = n.parent;
    37.             }
    38.             return sb.ToString();
    39.         }
    40.     }
    41.  
    42.  
    You can use the GetOrCreateChild method to build the tree and the Find method to traverse it. Assuming you have a List of all english words, you can create our tree like this

    Code (CSharp):
    1. Node root = new Node();
    2.  
    3. foreach(string word in allWords)
    4. {
    5.     var n = root;
    6.     foreach(char c in word)
    7.     {
    8.         n = n.GetOrCreateChild(c);
    9.         if (n == null)
    10.             throw new Exception("word does contain characters other than A-Z");
    11.     }
    12.     n.IsWord = true;
    13. }
    This should create the whole tree. So whenever you want to check a combination of characters, you simply start at the root and pass character by character to the Find method and use the returned Node as the next node for the next character. Whenever Find returns null, it means this is a dead end and you have to go back one step in your recursive search. After each character make sure you check "IsWord". If it's true, you have found a valid word. To get the word you can use the GetWord method. It traverses the tree back up to the root and collects all the characters on the way and puts them into a string. Note you could avoid the allocation of the StringBuilder as long as you don't try to multithread that code. You could declare a private static string builder inside the Node class and reuse it. Just make sure you call Clear on the builder before reusing it. However since you only need a stringbuilder for each word you found, it's not that bad. You have to create the final strings anyways. Of course if you're interested in the actual cells that make up the word, that's something you have to take care of yourself in your recursive function. Maybe use a
    Stack<Vector2Int>
    where you push the current position to whenever you enter a level of recursion and of course pop one when you leave one. That way the stack contains the whole path that makes up a word the moment you found one.

    Currently it seems that your code tries to find really all possible word combinations. It's not clear if that's intended or not. Of course you would most likely get overlapping and intersecting word paths. Performance would increase if you would take out words you have already found. But of course that would not give you the best outcome if that's what you're after. For example if the word "a" is actually in your dictionary, it would be impossible to get any longer word starting with a in such a case. Anyways, that's up to you to decide.
     
    jvo3dc and nobluff67 like this.
  3. nobluff67

    nobluff67

    Joined:
    Nov 3, 2016
    Posts:
    338
    Yes that's it. I will try your suggestion.

    A quick question about yours. Will UI input be possible at the same time, without much lag?
     
  4. Bunny83

    Bunny83

    Joined:
    Oct 18, 2010
    Posts:
    3,916
    Since I just wrote that Node class, I had some fun with it and added a GetSuggestions methods ^^ You could feed it the start or a word and it returns all words that start with those letters. I tested it with the words_alpha list I've found there.

    Code (CSharp):
    1.     public class Node
    2.     {
    3.         public bool isWord = false;
    4.         public char ch;
    5.         public Node parent = null;
    6.         public Node[] childs = null;
    7.         public Node Find(char aCh)
    8.         {
    9.             if (childs == null)
    10.                 return null;
    11.             int index = char.ToUpper(aCh) - 'A';
    12.             if (index < 0 || index >= 26)
    13.                 return null;
    14.             return childs[index];
    15.         }
    16.         public Node GetOrCreateChild(char aCh)
    17.         {
    18.             int index = char.ToUpper(aCh) - 'A';
    19.             if (index < 0 || index >= 26)
    20.                 return null;
    21.             if (childs == null)
    22.                 childs = new Node[26];
    23.             var node = childs[index];
    24.             if (node == null)
    25.                 childs[index] = node = new Node { parent = this, ch = char.ToLower(aCh) };
    26.             return node;
    27.         }
    28.         public string GetWord()
    29.         {
    30.             var n = this;
    31.             var sb = new System.Text.StringBuilder();
    32.             sb.Clear();
    33.             while (n.parent != null)
    34.             {
    35.                 sb.Insert(0, n.ch);
    36.                 n = n.parent;
    37.             }
    38.             return sb.ToString();
    39.         }
    40.         public List<string> GetSuggestions(string aPart)
    41.         {
    42.             var n = this;
    43.             foreach(char c in aPart)
    44.             {
    45.                 n = n.Find(c);
    46.                 if (n == null)
    47.                     return null;
    48.             }
    49.             List<string> result = new List<string>();
    50.             n.GetChilds(result);
    51.             return result;
    52.         }
    53.         void GetChilds(List<string> aList)
    54.         {
    55.             if (isWord)
    56.                 aList.Add(GetWord());
    57.             if (childs == null)
    58.                 return;
    59.             for(int i = 0; i < childs.Length; i++)
    60.             {
    61.                 if (childs[i] != null)
    62.                     childs[i].GetChilds(aList);
    63.             }
    64.         }
    65.         public static Node CreateDictionary(string[] aWords)
    66.         {
    67.             Node root = new Node();
    68.             foreach (string word in aWords)
    69.             {
    70.                 var n = root;
    71.                 foreach (char c in word)
    72.                 {
    73.                     n = n.GetOrCreateChild(c);
    74.                     if (n == null)
    75.                         throw new Exception("word does contain characters other than A-Z");
    76.                 }
    77.                 n.isWord = true;
    78.             }
    79.             return root;
    80.         }
    81.     }
    82.  

    Loading the word list from a file took about 55ms, creating the node tree took about 700ms, searching for all words starting with "a" (there are 25417 of them) took only 14ms. Fun fact: running GetSuggestions with an empty string (which would return the whole dictionary) took about 380ms. That includes the actual creation of all those strings ^^.

    ps: about your latest question.
    That's hard to tell because the performance highly depends on the size of the grid as well as the number of words in the dictionary. Though the size of the grid is the most important factor. Just looking up a word should be pretty fast. Don't make it a "Task" unless you have to. Adding a yield would wait for a frame. If that happens like 1 million times it would take ages to complete :)

    Recursive searches are generally a pretty unpredictable beast. I once made a quite simple sudoku solver. Using brute force was actually doable but the performance greatly varies depending on the number of given numbers and how good / bad the initial predictions match. If you start at guessing a 1 for the first field and and it actually is a 9, it could take a really long time. Though with just some very simple elimination rules, nowhere near a perfect analytical approach, it found the solutions pretty much instantly. I don't think I ever timed it. Though at each recursion I checked what are the possibilities for each empty cell and I bruteforced the once first with the fewest possibiilties left. Doing that at each recursion very quickly rules out wrong combinations. As soon as the possibilities for a cell is 0 we can stop and go back immediately.
     
    nobluff67 likes this.
  5. nobluff67

    nobluff67

    Joined:
    Nov 3, 2016
    Posts:
    338
    Code (CSharp):
    1.     Node root = new Node();
    2.    
    3.     foreach(string word in allWords)
    4.     {
    5.         var n = root;
    6.         foreach(char c in word)
    7.         {
    8.             n = n.GetOrCreateChild(c);
    9.             if (n == null)
    10.                 throw new Exception("word does contain characters other than A-Z");
    11.         }
    12.         n.IsWord = true;
    13.     }
    14.  
    A quick question on this. This creates one node for every word, correct?
     
  6. nobluff67

    nobluff67

    Joined:
    Nov 3, 2016
    Posts:
    338
    The grid is 6x8.

    I have not fully comprehended your code yet, but it seems that I still have to do the grid search section, correct? And then use your code to check if the word exist.

    CAT
    DOG
    DRD

    So from the above, traverse the grid

    CD - Use your node code - no possible word so forget this path.
    CO - Use your node code - possible word, continue.
    COD - Use your node code - word found, continue.
    .
    .
    .
    .
     
  7. Bunny83

    Bunny83

    Joined:
    Oct 18, 2010
    Posts:
    3,916
    Yes, it does create one node for each word as well as all previous tree nodes. With those 370k words we get about 1M nodes in total. They require about 100MB in total. So yes, compared to the word list itself which has about 5MB it's a quite a bit of extra memory, though it should speed up the lookup. A Hashmap also has additional memory.

    Yes, that's correct. As I said, the problem you try to solve is not an easy one and there's not much you can do to further simplify it unless you actually come up with more constraints.
     
    nobluff67 likes this.
  8. nobluff67

    nobluff67

    Joined:
    Nov 3, 2016
    Posts:
    338
    The current constrains is a 3-9 letter word.

    Im also thinking I could get all the letters in the grid and search one at a time when the user is not interacting with the UI.

    So in a grid:
    ABC
    DEF
    HIJ

    Search for A
    If user not interacting
    Search for B, etc

    I also don't quite understand your code 100% or how to implement it as your coding knowledge is beyond my level.

    I am currently initiating your code from another class with -
    global::Node.CreateDictionary(GlobalVars.Instance.allDictWords);

    // This works I think, just not sure how to check if it is working (takes about 500ms)

    I don't know how to do the actual word search. If I wanted to search for "DOG", which could find dog or dogwood.

    And thanks for the help, this is 100% sending me down a better path to what I had.
     
  9. nobluff67

    nobluff67

    Joined:
    Nov 3, 2016
    Posts:
    338
    @Bunny83 I ham stuck trying to figure this out myself, can you help with one specific thing?

    GetSuggestions(string aPart)

    How to do you run this? Any attempts either from the current class or other classes (which is what I want) leads to error.
    I am relatively sure there is a simple solution however I can not figure it out myself.

    If I make it static then the "var n = this;" doesn't work.
    If I leave as is, then I cant access non-static method.

    I have a few gaps in my knowledge that keeps me running in circles when trying to research this and solve by myself.
     
  10. Bunny83

    Bunny83

    Joined:
    Oct 18, 2010
    Posts:
    3,916
    Well, the "root" node can be stored where ever you like. You would call GetSuggestions on that root node.
     
  11. nobluff67

    nobluff67

    Joined:
    Nov 3, 2016
    Posts:
    338
    I'm still not following you. What do you mean by "root" can be stored anywhere? How does that allow me to use GetSuggestions in another class (monobehaviour)? I cant get around "cannot access non-static method in static context"

    EDIT: Ignore this for now, I am a couple of steps further in my understanding, let me see if I can figure this out.
     
    Last edited: Nov 18, 2022
  12. spiney199

    spiney199

    Joined:
    Feb 11, 2021
    Posts:
    7,606
    Bunny's code is just a plain class, so you need to make an instance of it to work with.

    Code (CSharp):
    1. public class MyMonobehaviour : Monobehaviour
    2. {
    3.     public Node RootNote = new Node();
    4.    
    5.     //more logic here
    6. }
    C# 101 stuff.
     
    nobluff67 likes this.
  13. nobluff67

    nobluff67

    Joined:
    Nov 3, 2016
    Posts:
    338
    I missed class 100.5 to 101.2:)

    Thanks, I just figured out that part. I am working my way through it, and struggling is sometimes a good way to learn. I have 1 or 2 more understanding issues but think I have it.
     
  14. nobluff67

    nobluff67

    Joined:
    Nov 3, 2016
    Posts:
    338
    Understand it now, and got it working. Thanks especially @Bunny83 and a special mention to @spiney199.

    public static Node RootNode = new Node(); // Missing static was one of my issues.
     
  15. nobluff67

    nobluff67

    Joined:
    Nov 3, 2016
    Posts:
    338
    Update:

    Using mostly your code:

    28ms and only checking on 20k words, while discarding the rest.

    Using my old code:

    8400ms and checking on 35million combos (most of them impossible words).

    Also should note that your code finds more words as my initial code stopped when it found the first word, so it would find DOG but would miss DOGWOOD. Your code fixes that.

    As you mentioned before, the main speed improvement comes from stopping the search when there are no more possible words, while the original would carry on looking for the next word even though there is zero chance of there being one.

    Once again @Bunny83 thanks.