Question

我正在尝试 LeetCode 中的一个问题，看起来快完成了。这就是我到目前为止所做的：和问题链接 – 。以下是我到目前为止所做的代码片段：

static IList < int > FindSubstring(string s, string[] words) {
    var list = new List < IList < string >> ();
    DoPermutation(words, 0, words.Length - 1, list);

    return TrackIndexes(s, list);
  }

  static IList < IList < string >> DoPermutation(string[] str, int start, int end, IList < IList < string >> list) {
    if (start == end) {
      list.Add(new List < string > (str));
    } else {
      for (var i = start; i <= end; i++) {
        Swap(ref str[start], ref str[i]);

        DoPermutation(str, start + 1, end, list);

        Swap(ref str[start], ref str[i]);
      }
    }

    return list;
  }

  static void Swap(ref string a, ref string b) {
    var temp = a;
    a = b;
    b = temp;
  }

  static List < int > TrackIndexes(string pattern, IList < IList < string >> aLst) {
    int count = 0;

    List < string > strings;
    List < int > indexes = new List < int > ();

    foreach(var item in aLst) {
      string str = "";

      strings = new List < string > {
        String.Concat(item)
      };

      foreach(var list in item) {
        str += list;

        if (str.Length == strings[0].Length && pattern.Contains(str)) {
          indexes.AddRange(AllIndexesOf(pattern, str));
          count += 1;
        }
      }
    }

    indexes.Sort();
    return indexes.Distinct().ToList();
  }

public static int[] AllIndexesOf(string str, string substr, bool ignoreCase = false) 
{
  if (string.IsNullOrWhiteSpace(str) || string.IsNullOrWhiteSpace(substr)) 
  {
    throw new ArgumentException("String or substring is not specified.");
  }

  var indexes = new List<int>();
  int index = 0;

  while ((index = str.IndexOf(substr, index, ignoreCase ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal)) != -1) 
  {
    indexes.Add(index++);
  }

  return indexes.ToArray();
}

我遇到的问题是以下输入：

s = "pjzkrkevzztxductzzxmxsvwjkxpvukmfjywwetvfnujhweiybwvvsrfequzkhossmootkmyxgjgfordrpapjuunmqnxxdrqrfgkrsjqbszgiqlcfnrpjlcwdrvbumtotzylshdvccdmsqoadfrpsvnwpizlwszrtyclhgilklydbmfhuywotjmktnwrfvizvnmfvvqfiokkdprznnnjycttprkxpuykhmpchiksyucbmtabiqkisgbhxngmhezrrqvayfsxauampdpxtafniiwfvdufhtwajrbkxtjzqjnfocdhekumttuqwovfjrgulhekcpjszyynadxhnttgmnxkduqmmyhzfnjhducesctufqbumxbamalqudeibljgbspeotkgvddcwgxidaiqcvgwykhbysjzlzfbupkqunuqtraxrlptivshhbihtsigtpipguhbhctcvubnhqipncyxfjebdnjyetnlnvmuxhzsdahkrscewabejifmxombiamxvauuitoltyymsarqcuuoezcbqpdaprxmsrickwpgwpsoplhugbikbkotzrtqkscekkgwjycfnvwfgdzogjzjvpcvixnsqsxacfwndzvrwrycwxrcismdhqapoojegggkocyrdtkzmiekhxoppctytvphjynrhtcvxcobxbcjjivtfjiwmduhzjokkbctweqtigwfhzorjlkpuuliaipbtfldinyetoybvugevwvhhhweejogrghllsouipabfafcxnhukcbtmxzshoyyufjhzadhrelweszbfgwpkzlwxkogyogutscvuhcllphshivnoteztpxsaoaacgxyaztuixhunrowzljqfqrahosheukhahhbiaxqzfmmwcjxountkevsvpbzjnilwpoermxrtlfroqoclexxisrdhvfsindffslyekrzwzqkpeocilatftymodgztjgybtyheqgcpwogdcjlnlesefgvimwbxcbzvaibspdjnrpqtyeilkcspknyylbwndvkffmzuriilxagyerjptbgeqgebiaqnvdubrtxibhvakcyotkfonmseszhczapxdlauexehhaireihxsplgdgmxfvaevrbadbwjbdrkfbbjjkgcztkcbwagtcnrtqryuqixtzhaakjlurnumzyovawrcjiwabuwretmdamfkxrgqgcdgbrdbnugzecbgyxxdqmisaqcyjkqrntxqmdrczxbebemcblftxplafnyoxqimkhcykwamvdsxjezkpgdpvopddptdfbprjustquhlazkjfluxrzopqdstulybnqvyknrchbphcarknnhhovweaqawdyxsqsqahkepluypwrzjegqtdoxfgzdkydeoxvrfhxusrujnmjzqrrlxglcmkiykldbiasnhrjbjekystzilrwkzhontwmehrfsrzfaqrbbxncphbzuuxeteshyrveamjsfiaharkcqxefghgceeixkdgkuboupxnwhnfigpkwnqdvzlydpidcljmflbccarbiegsmweklwngvygbqpescpeichmfidgsjmkvkofvkuehsmkkbocgejoiqcnafvuokelwuqsgkyoekaroptuvekfvmtxtqshcwsztkrzwrpabqrrhnlerxjojemcxel", 

words = ["dhvf","sind","ffsl","yekr","zwzq","kpeo","cila","tfty","modg","ztjg","ybty","heqg","cpwo","gdcj","lnle","sefg","vimw","bxcb"]

我的代码在方法中创建了一个可能的排列列表DoPermutation，对于上述输入，它生成了一个出现内存不足错误的字符串列表。我不确定是否可以使用我拥有的代码片段来更改任何内容。

目前，我的代码通过了 180 个测试用例中的 151 个。无论如何，是否使用现有的代码片段来消除内存异常，或者我必须重新考虑该问题？

目标：代码的主要目标是为字符串列表创建可能的排列，并将模式中的这些字符串作为子字符串进行匹配。

您真的需要创建完全具体化List<List<string>>的排列吗？或者您可以创建一个IEnumerable<List<string>>可枚举，其中一次只具体化一个列表吗？ — 
中指出的那样，您当前的算法创建了大量的排列：6,402,373,705,728,000仅 18 个单词。即使避免内存分配，也需要几天时间才能完成执行。你需要找到算法复杂度较低的东西。 —

Answer 1

你的问题是你正在具体化你的单词的所有可能排列的列表。然而，没有必要这样做。您需要做的就是枚举可能的排列并针对某种条件测试每一种排列。在 C# 中，这可以通过实现IEnumerable<string []>一种一次仅具体化一个字符串数组的方法来完成。

以下是针对 .NET 8 / C# 12 编写的此类内容之一：

public static class EnumerableExtensions
{
    ///<summary>
    ///Enumerate the possible permutations of the incoming array.  
    ///Note that the memory of the returned array is reused for the next permutation
    ///</summary>
    public static IEnumerable<T []> EnumeratePermutations<T>(this T [] array)
    {
        if (array.Length == 0)
        {
            yield return array;
            yield break;
        }
        
        // Mutate a copy rather than the original
        var copy = array.ToArray();
        
        static IEnumerable<T []> EnumeratePermutations(T [] array, int start)
        {
            if (start >= array.Length)
                throw new ArgumentException("start >= array.Length");
            for (int i = start; i < array.Length; i++)
            {
                Swap(ref array[start], ref array[i]);
                yield return array;
                Swap(ref array[start], ref array[i]);
            }
        }

        var stack = new Stack<IEnumerator<T []>>();
        try
        {
            stack.Push(EnumeratePermutations(copy, 0).GetEnumerator());
            while (stack.Count != 0)
            {
                var enumerator = stack.Peek();
                if (!enumerator.MoveNext())
                    stack.Pop().Dispose();
                else if (stack.Count < copy.Length)
                    stack.Push(EnumeratePermutations(enumerator.Current, stack.Count).GetEnumerator());
                else
                    yield return enumerator.Current;
            }
        }
        finally
        {
            foreach (var enumerator in stack)
                enumerator.Dispose();
        }
    }
    
    static void Swap<T>(ref T a, ref T b) 
    {
        var temp = a;
        a = b;
        b = temp;
    }
}

出于性能原因，我选择使用显式堆栈而不是递归来实现此目的，并避免潜在的堆栈溢出。

然后你可以修改你的参数TrackIndexes()以接受IEnumerable<string []> aLst参数，并像以前一样继续：

static List<int> TrackIndexes(string pattern, IEnumerable<string []> aLst) {
  // TODO: write and test this yourself
}

话虽这么说，虽然这可以解决内存不足的问题，但它不会通过。简单地生成每个排列，然后检查连接是否包含在目标字符串中，O(s.Length * Factorial(words.Length))这样的速度不足以通过测试。您将需要通过进行一些快速拒绝来大大减少枚举的排列数量，或者切换到在中建议的替代方法。

Answer 2

你现在让我真正开始做有趣的拼图了。虽然您的解决方案可能可以进行一些优化，但我认为您应该放弃当前的想法并重新思考问题。尝试向自己重新解释问题。

提示：“排列”一词的使用是一种诱饵，让您生成此列表。尝试不使用这种用法来表达问题。

有轻微剧透的提示

我解决这个问题的方法是使用这些方法名称和签名：

public IList<int> FindSubstring(string s, string[] words)

Dictionary<string, int> ExtractWordOccurances(string candidateSubstring, int chopSize)

bool SubStringMeetsRequirements(Dictionary<string, int> extractedChunks, string[] words)

这个解决方案仍然非常低效，并且只是我的第一个方法，没有过多考虑运行时间和内存使用情况。也许还有更聪明的方法。

是的，当然，如果解决方案无效或者我不确定，我不会给出答案。正如我提到的，它的效率并不高。 —

Answer 3

由于数组中所有字符串的words长度相同，并且最大长度（30）很小，因此我们可以尝试以一个单词的长度为模的所有可能的等价类。对于每一个，我们可以使用滑动窗口来跟踪中的最大匹配子串O(|s|)（其中|s|表示的长度s）。

总体时间复杂度：（O(|w| * |s|)其中w代表一个单词）。这也相当于O(30 * |s|)一个单词的最大长度是 30 个字符。

public IList<int> FindSubstring(string s, string[] words) {
    int wlen = words[0].Length;
    var freq = words.GroupBy(x => x).ToDictionary(x => x.Key, x => x.Count());
    var res = new List<int>();
    for (int i = 0; i < wlen; i++) {
        var curr = new Dictionary<string, int>();
        for (int l = i, r = l; r + wlen <= s.Length; r += wlen) {
            var w = s.Substring(r, wlen);
            curr.TryGetValue(w, out var currCnt);
            curr[w] = currCnt + 1;
            for (; curr[w] > freq.GetValueOrDefault(w, 0); l += wlen)
                --curr[s.Substring(l, wlen)];
            if (r - l == (words.Length - 1) * wlen)
                res.Add(l);
        }
    }
    return res;
}

c# – 字符串列表内存不足 – 代码日志

3 个回答
3

有轻微剧透的提示

3 个回答 3

有轻微剧透的提示

3 个回答
3