问题是找到具有 k 个重复元素的最长子串,但是 k 是重复元素的数量,例如如果我们有字符串“ababcbb”并且 k=1,那么检索到的最长子字符串是“babc”和“abcc”,因为在第一个实例中“b”是重复元素,在第二个实例中“c”是重复元素。如果k为2,则最长的子串是“babccb”,因为“b”重复了3次,“c”重复了两次,这就是“k是重复元素的数量”的意思。在最终的程序中,必须从文件中读取字符串。
def find_longest_substring(s, k):
longest_substring = []
current_substring = []
current_counts = {}
for char in s:
if char in current_counts:
current_counts[char] += 1
else:
current_counts[char] = 1
current_substring.append(char)
if max(current_counts.values()) > k:
removed_char = current_substring.pop(0)
current_counts[removed_char] -= 1
if current_counts[removed_char] == 0:
del current_counts[removed_char]
if len(current_substring) > len(longest_substring):
longest_substring = current_substring.copy()
return longest_substring
def read_string_from_file(filename):
with open(filename, 'r') as file:
return file.read().strip()
def print_substring_and_frequency(substring):
frequency = {}
for char in substring:
if char in frequency:
frequency[char] += 1
else:
frequency[char] = 1
print("Longest substring is", len(substring), ",", substring)
print("Frequency:", [(char, freq) for char, freq in frequency.items()])
def main():
filename = input("Enter the filename: ")
k = int(input("Enter the value of k: "))
s = read_string_from_file(filename)
longest_substring = find_longest_substring(s, k)
if longest_substring:
print_substring_and_frequency(longest_substring)
else:
print("No valid substring found.")
if __name__ == "__main__":
main()
这是我尝试过的代码,但给出了错误的结果。
代码的预期字符串是:
jgigecbggjjihhefefkjhffbagdbckkbgkfgiekeefffjcbbikbidhjgedjdcabdkeccfecijajjgckihdhedaejjidaaddbifcebffafcbfibahjffgijbjchhceejihkidkbigkccbdbcechggeekhedjhahdhhgkihiacbgfifjbdjkeeccjaekhcibefajkeehcbeiihigekfcdihdbhjigbgfgdcckaddkffjiiddbdjdjdiekfkdiefhjjakjaijahgdabcdgghkkdggfdabhhgiehbfiiagghkkbjfbabkeejjacfbgbedejkhgkfjjccfkcacjcfehgagbbbghefghkdgehicbaajkbgiffachhkfcahhibkidjgbeccakbfcbkgibikahgajghahcgcjfageiddkcdhakdkjhfkibbijfbcceghagaggfhibfijefcijidhdcaegcgkiadhgedjegggajfeijkbbceejgddhjdkbghchfifghcabkkdfeiigjcbbbdfjbjakkgfjabifjdcediicebfefbjckkhccgbfbcijdjfckkgdhiegjkkkdckcjffcdabfjejjebgbajibgcaidcgiijjccigghbdicfgabhcafcbfceikhgijjdedagcgbbffidjjkbdhbceifjajbfakbfhdefhkbecijgedhcddidikckdhhcckafikkjkbidkdheckjdibfhjjfbfjkkdeckkkfcgicfjigaccikjgijbbhdddfaehkhibejaeadafhffiikgjfbhbachgdeaihcedebfajcbifaedihhgcjgkhfieiabfjhgfgcjdcfejedhhggbkikadkadieggjcjghbjadeafdjafbjhehacbbebcaekaafddegeecaacjfidgcjjbjahebagacjciggdfjfceifkbbcijjcgiijckejbbecaabdihfgifefgccjjdgcjkkbakfckddcgbbcfaegcajgkghfhbcacbfiihcgbkcckdjghjaeibhfhdifiigbhhekkfkacdfkafbfgbkdagkkgiidahidgebfgcjjabfdjcchbhedgagghkfhfbghcbjfkbjjhefcigiibfifkgfbddhgfdeiifacfghcfghifkcafekbjabcaidefaecaddjaiejcfafgbbaedhbiiigekbjdaigjgbfgbbcefehcagckcfcakegjichkceiihebiakhagcdggfgfbadhhkjehgekifaaddeafdbbfihchacakbdfhhbaggagkhbabhieiaehjkdccfgcfbfjdgddbigdjifijhdkbaheaajaefjhaddjfckkcekjkcbacgdibjhbdekhbdgfaccehabbgfehgidejgdefjagkfecjgcjakfgcaaeghiaffdghjebaagbaajfffjigekajhdeghfcdkdgjgjbbdddiabcajdgdkdihidefiikhhjigjhcdjebahjfhcbedjfkiiaiccjjdjbbabajhbkbfjjjiggehidgbefdfbddchdbfahedhahehhiakfcehifdbheejdkjfejhcebhjgbhbjfbbgbdffjbikfhjhgkcifjgcckcaadcbhagbihidfbidgfdbgihkceefccjgbfdbgbbgkhkejccbadfdjfbecbbfkcgcadbkfeejbjkdbjghhjehhdakjfjkfgbcfbgbjkjbhfafaabgekbcggakhfeidchhahhjghcjhefgehcaheghchdkggfffgifjgkidgfchijbgifffdebjcjechddijgbekhjjejjddkkgfdbjaechfgkfagbihhcbjhefejggkefcdkdeaekejdgdiddhcejfjhiiaakjjhjbbdeickijbfdccefahdjbggffekeeikgikbdafajdfiecijgbjihaadaigkegheejhaceghbidkjhgaakdejhigjdhdbciehbbfdeaaagebhbidhdjfkijhabggeccfbhheaaggegjeajadbddbecbjcdegekebdgjehidccaggagifagcjdaakachgdjejjjjeckiekjgcacdiffgbdadjfiabgdkfhkcdajhcbjbgaajihhebgbfeeibceddachgegagegfjdjbefcaafgabbhgjijjabbcfhjfakabbebkgjihdbbabfddhhggceadadhggiaefcegjabhcgcfkaidfkikfceejkbffgijbikabfgggfkcfbekdhkfcaidkddaddgbhcbifdhfkikbfihaicijhiidjefgijdgdhheakhcaijfjjbhchbiffcdkkhjdgbcedicjfbdfkbifjabkjiicgdccjahcfabgeecbfaefhbceacbkbbkeikaikcidgjegghhikibgcjgejhfaciikbfjfeejkkekbbkebghacfkbbijbfdijchgbdjbiedgiaafddbkihicjajifcekkefjhggbcikhjeackcgbgibfghjgdfadfbhhfibeibfcdjbdbcbjjciajehdhjciegghcfkhckgjbkgjdjjeahhjiiibhkbddfccckeakfcibkjbiakbecihbcgeafhjeaggkabcjeabgcgaaghdgijbaichideejaegeebhfiibdejicbedagdedfegbekjidbcihfhcjcggifcifaghhdbhdcbfcbafbacedibdecjdkigfhjkehajfhckgijdjdkecihkjbcfdgadgjjfiabcffkdejdicjkcfikieggbefakfchhceaigfbbhkcaigbefjafiajcbkdcdiceaighdaghhbgkkebgggachhdcbckhekhhbbihhdkjbefcifaajdffceakcceifaejgcbekgccgbjjhhgaeheggagkjgcjaeefbgfcfccficjbfgkjcfbikbbgdbjabjkcecbgccbdcckbbhjaabbdcadbchjgccdghjbjihageikcghbajdkkikghcdbfkejeiidggehgiddaijfccekjbfchfeckgkhkbhjdehhekajjjcibbageibfiihcidbdjifhaiffghgeibefjakekbbdaehagkdjfcchhgibdicicbcfkbbeiddjifghcjjefbbdkkiihjegikjgckcdhjahkjbkeidbbaikcigaekeffaekeahgehgcigdihdgeibdkicfkbbiefbckefihgdchbjkkhjfcbecgfjdicjekdicdbgjhbchkgkbdkifchegahjaiicccaadejaeijbdbjhagihecaejdefiaabhejiidjaedckhgkgdcggkhjgcjcijhhfjcaabhghcfgagfafdbbbfegeigefcidbcdcedhcibadgagbbibhdfjgjkfbgjddjgdgikdfkfjhdiaeihdbefjkhcgfiigcbdfkffidafijegahdkhjajkiehihfjcaadbkgdchjcjgaebehekchdgcfkgcijddckkgdiaibehdefkikghaggjcggbfijjkagfegedcdekegibihkhbakcicfbikegkjcfkhegddjfkkegkhgidibefgabdkcciabhgkbakghajfhfgkgjbacgkbkbffhgjhkihdhaicbhbdiagibajdebgjaddaffehcjjhjckjkcdfefejefjccjaejkfceadfjgdcagbbbahdfcbjgikadkahggkkfhajffibhdfdijbdcbbdkeehcgdhchabakfadbfddicahafjhjghfjdfkaciihjaefebeabdehigfeaggbdidkfekjegcdbbaggjhidhfckgfeedkbkdacjdfhgkjdebadeakekchaefcjkfahhgcahbhkcabjefkbdiihdbhhdigkhabbafifiececdkjecabbkgehkikckbeaaibeicbffjjhicahfbbaaccbbaeajdadcggfkifccgeckbiffgifdcabjhdjfakjchgcdkbekgekfeiibgieebheeckhefhijddjcecbchbhfdcjahabhcciedhacgdjbdgbajckbkiifdhkdbecjgkeeifjjfaebfgdfkjbkbahbbbihajbgbbaacjaafjddkdkadgfbfdjihhfabjhifkfiaekjggjiejjfahcgjddgabhjbahcgidbiccdhjhefikhbcgdafacejcaiiaefhdhgcegkggdhjcihieadkdjfkbfihbhdjadgkjcacafhcaaiajkejdgifiafgjbkbjadghjicjjkhjhicaifibibaeeafjgekfkcjbcaihihgbdbecichcbedbcfkabkgdifhegbcbcgjigaiakedkgdegeaaacakhgjbkeckjeggikiekhjhefcbdjecheffciffdikkcdbeiidkfkhdbdjbjchiigadikdegffgggdbffjbkbbakcfjejbhbadjiajiabfikkajkibgdfgikcafgdichjgbeaedecjjdjadfdabgbfbfgeddjhcabagigdjhieifdhjgbbgdabfeechgeikdghgccacgjeiehbjckgeghecchiddefighgcjijeeghddijabidfagjicgkechggfegkhccgagjdgbjkdcdkegchdbdeagibkjggjcheacagfcijefgbhcabjdhiffakcbdikahgfjgkdgjkckfbcdgbkdfkdakikiihheffcahcbaghegifafecjcgdcikjkkkccecheaakigdjkhafjaiikcdagghiickdgjgicjcghbfcdgdchahafkhfiiafajaccegkajjkfihaghchbdkhifffjikfkdgddcjadhbejfbfjhefdbfaihckdbkgaijdagkiabkidfdhbjkbakggiikhachggaacgbfaeccieakdbbcjghijjjfiachbgjkchiahkejikdkifjhecceehidgcgagfaccbkcagjkeaiciiagdbikgdfkcffjaiejfffbghfjjddcbaifehbjdhkgjfabdgdbffhagijaigekgfidebgiagjicgbcjgbjcbjdhehgchdiibeighckgihcgdaigaeihbgcgckgcdedkihgehheiaddgakhdhggfcecfafifehihicfdgajdjekbhadgegbbjjjieieffahdfdageigbcbjacihcfgbheffbegcgccfgcabbjdeaeicdcdjieibdfekbgdcacdkjhkkekceaiaakdffdihdkehkijjcbbijghkficdafjhcekejagcdkfeiffdkjhekiagbcfhgcfeefkkifefcikkehcifegachghedbfhahibeihfieikcgdeagcbjagjiggfgfekcijefeefbihhhbkakaiafkhaeekkeikfkfeaekhbkehhgcbcgecifkceeikhkcccfgdkgjgfjgbjckikacejcdiacagbgfgceafejcfeejifkiibfagjjbfcaaeccfffkgfkehjdagdbbkadcjjgibjibjejaiijcbijbbfabidgfbbjeciiegbgbhffkjcgfahfkfccdkbaihabfhcejeicdabkkjfcaibgajjaddahachjifddkjcfjkkhbaiabkiahfbiachchgbagabhkijkgjdbdkejchfiiiieijkjgjfkefcahcdcceijeehkhehfahbhfiiikejcbjeecjgejbagkdbkbibficbahiddafgibjkgdedfbaababhghghbdiacbgcifchdhicgfbbbefbdfjfbjaagbikfgjdcjebdgeicffdbhaheihcddcahdaidjahdahcgihgcgbdgcbefcfgeahaebaecbicaeibccjjbcegkafaaffekgaidefghfbdfjfabaddbeceebdgdbdjjgeafkkbijdajcefbkjkcjjfaigkkaahjjdekdcfjijgfifkijaekgbggahkebccgahfecddccdedejfkfgakjjfafkjjbddaadjjhkcjigagedhkikgeacickebfaedebgigbajcggcgceaffkjgicjjcagbdbgeaeckikdiickdhkfdkhcdfddcbbecjigeejdbajikikficajihkfdfcfeafjfjchdibeghfcfaifkekfdehfcfhjabajbjegikihahcjekfgddajgccaaffjdbekhkfikfefbiidjaakaaiabjadbdcgddekbecfakkjhchjafihidiahcgefgcdheegahjjfhaeaabcbfeakbiiejgefijcjfdgjfjjjdkddighiehihckddchdbchhecjckbbigbhejcdafhhajghdjifadhbggbjiahjgjcaacehjeejceghebbdbhhiifeijkkkfakebhahhhchbbiheacgjickbfgdijkkebkfafkcejbkegkgeafdfegiakfegbbkcdihejckdbfgkdcifcccdkihgeffcabbdaaagbajefjgfecdgecbdkdbheiidhjjckbafdhjccedjchbahkeiaikakgfcaaabibhdgafbhghbekefibbhdjkfeejbfhdkbaaheikakhfghcbicjgiebcicefbciajhjdifibcgbdkhfbhhbicgaacajedfhdcjaehjjfgfkjiajdghhhbbhkfbfffjfcegdcegdggdbhhecgfffggekdfjfieihfkcghkaahbhdhiejahfeihgeicjbkekegbebbgfkakccjdhicjgkcdcbacfjkgajhbhaabbghfjceiaccfeceegbghkcdbfebhjkdkfgdekgfceakjjhgjadeegcejkbgffkcbheigccfadjcdjjfejfhbgebkbggdabfcajacaegffecggikgihjjidejbjkffhhgaccdacigdgjbkdgiigjajiigdfdhiggcggdaggckehbjbhijbbfeeacgkaehigiiiigagggckbadjhkbjahfehihhbafbbjiagejfaecahfhdikaajkbeabbbjfgiajjjfjakddchfhbkjkgekjhbabaiheehcaifafideeigcebgjeidfdijfcfeggcgaaejgjhgfdgfbbhejchfdkgeekhhckieagabgiffjdahfkchibgagcifdjffhbcihcehidgkhbbakdacgiefhgagcibeghjbcdkdifbkjgehgaaigdidfaacekkiigjkdcdbbggjhakkbickkajkgchjjikdkbigddjadhbchiefjeaihkibbcgdegiajbbbfehjhbbefakdebdfecjidhjdigfekdfdiekdhiddhceackcihfafajdaaaddhafdkejdegccjfebhhkehgekdcjkbhbddaiekgiachiaafaefhbfabjkchghkkcjghacidhdkddjkbcgihhgckbbhfebaiighggehgkgkbfkkejgficbgbhfdcckhhihijdiiebhdggkhbakkadgeibcidkafjijjedfgajcdhhhaeiejchjecfifdchbkkjbdeecfeaaejeeibabhckdgejjjgkckekhicfchbkcickhicejiaifaikjhikghkaaghfikbgaefidaakddihdachceicbchbebiggaahgjbaekbbfefibchgdjfkjcahfhjkiefjjicfeegjdfhfakacijcaakchdfjecigcigihkjdkgabjfbfcijgbbiidkkddefiijgjjbjcdihbidfihicjibkkhggcifckchdffebhgdijbbdfebiaebiafchcaiedgihbceagahbgkbkbdcghaaakhhcighgchegaeddfaakbgciadhiaejeigfhcdjacajjfcfahhbeajjbfibiaibjddiihddbeicjbdacfdjidjbgdahfjagefgakcidfghahjgibhdkcgdijcdaeecjgbikijfhkijbffiiididgbehbfhcbkedbhhkhhbkheeajhfeafagigkhahhgjkcffcfhjbkbehefeefbkfjfacdhjkabhkhccefddbheajheheeihegiiffiiiedidcicekeccddidhcjjdddfekfjcibekkfgiicjihfkbhhkcjjciakjccgkhjcaahkdfebeebcjdjkbijcffcjahdjgijkgadagbkfebgcfecfigkdeekaefhfeighedhkcaechhiaddeijagaikkkjfdfekigckgkfahdfdigdffaefbkfhjebdedibafcjckjbcihhddgdbjjciffedfcjadfijdhgfdicbjijeiabhfkkhkbckjeggehfhdbeaahabhgifackiagijifahiffgkjicjidjkjjegcfcghjgeefbijagfbbiehjedfigekadfehebifjhjfifaecikkijckhgcajihhhaejkdggkifffiicejbbdgggffccdehhcehjijieiiadkhiiccbhacdafgdejgicigagcbkfckjdjjdbcicahfeaakedaiiihdkbfdjffcjfiajfjhkadghjihccbfckadjgkiibgdaheaaehdjjhaidhjkeeaiedfkghajejgehbakkadgidcbdehejakegehfbhgaifkiageakidbekcgjcfhjdhicbbhjhgbhbbkfhhcakccgbcagieabeajcfcbaifgjfgaffegjjadkfbhdejchkjeaedhachcehgbefhaacfijaefjgfhccjkjeidgcfgceekejbdcgacgfdcaeechkfcbahcbgfcdajikjafkaekekfejhggjeagidekiceefbeiieffafecjccefdcddbgabbhghekkgbjhkjibkdecgcbbceccadcjjhgacgifhbdjadhjkkdbkhbegkbhibfacfgdjajcjhkjagbaaghjafcdghabccccehchjkgfkkkdjjakffkeihjhjdkabdchbkgefkfbceccdjikdcjabaedfajfjabkajdekfhiaaaejajfeefkhbjgfdebfbgddahieejhedfcjfjhbckdkgdcegidaddechadbcekkjihchahagafgbgacfebdkhadicheidfafcgkjdkkhgkebhfkjagkhgjigkifjaddgkhajbdibgcicdekbjjjihfficfjecdiafkhgbacajhehgjagcadjdcgjidjabgeekjebbihcjkakhkgbifhdcbabjjkhafggekehihjdkchbkdgcchefeccgfckbhhdekhcbbdiidhahihjeakjjiadijadihjkedcdjjiabkiecceacagjicjjgdgcfhjhfjgfkdekjcjcjdkkijifegkiggjcbecjbkjfhcghfacfaciagakejaickkkideihchkadjeihhidghiakakfbfgfagejekkaabkkdedfgbjbkdgedbjgfgeabkjjffgdajgbkbfjaeihhbeakeicajijcafaggakdghgigeabegcihejfbejekjhceikghhahjkjbcjbighjeehdjgebbdkgiafciecgbebiiajccafhciigeefchacgigadhbedbgaakcfbhbibjfeekfahdidbhjjdafekbdgadadjjchiicffhbdiheadghdechkdgjgkhdcciibigcgkfeccjbhhkdgbkgdjaeeigchhbdkeeeckcbhfjaafjaiaajacebdagfcbfdecaebbbedihicidfhjkieakffbikiaaicjebjjeajkjbaeekgjhbabafbcicgbdighabgjciachbjfajddaedkejfaeecdgkhhkhdahfkabdjijiejgkfhjefebckkgbchkhghabhecdekgekhfikghebccbbijahbiakibddjggahchfdeafbgdhegkhjakaffhgickjhfhcfkhcaakafbifdbhbgbcjacck
the intended result for k=3 is k=3, longest substring is 22, ['j', 'j', 'g', 'i', 'b', 'j', 'i', 'b', 'j', 'e', 'j', 'a', 'i', 'i', 'j', 'c', 'b', 'i', 'j', 'b', 'b', 'f'] with the frequency [('i', 5), ('b', 5), ('j', 7)]
['d', 'd', 'd', 'i', 'a', 'b', 'c', 'a', 'j', 'd', 'g', 'd', 'k', 'd', 'i', 'h', 'i', 'd', 'e', 'f', 'i', 'i'] with the frequency [('i', 5), ('a', 2), ('d', 7)]
['d', 'f', 'k', 'c', 'f', 'f', 'j', 'a', 'i', 'e', 'j', 'f', 'f', 'f', 'b', 'g', 'h', 'f', 'j', 'j', 'd', 'd'] with the frequency [('j', 4), ('d', 3), ('f', 7)]
我得到的答案是最长的子串是 32 ,
['b', 'a', 'f', 'b', 'a', 'c', 'e', 'd', 'i', 'b', 'd', 'e', 'c', 'j', 'd', 'k', 'i', 'g', 'f', 'h', 'j', 'k', 'e', 'h', 'a', 'j', 'f', 'h', 'c', 'k', 'g', 'i']
频率:
[('b', 3), ('a', 3), ('f', 3), ('c', 3), ('e', 3), ('d', 3), ('i', 3), ('j', 3), ('k', 3), ('g', 2), ('h', 3)]
您可以通过准确维护匹配或超过“k”的字符数量的计数来完善滑动窗口方法。这就需要根据这个条件是否满足来调整滑动窗口大小。
这是您的代码,其中包含详细注释,可帮助您了解必要的调整:
def find_longest_substring(s, k):
if k == 0:
return "" # Special case, if k is 0, return empty string
n = len(s)
max_len = 0
longest_substrings = []
# Try to find the longest substring for every possible starting character
for start in range(n):
counts = {}
freq = {}
max_char_freq = 0
# Sliding window from start
for end in range(start, n):
char = s[end]
if char in counts:
counts[char] += 1
else:
counts[char] = 1
# Frequency of the character count
if counts[char] in freq:
freq[counts[char]] += 1
else:
freq[counts[char]] = 1
# Maintain max_char_freq which tracks the number of chars meeting the k requirement
if counts[char] == k:
max_char_freq += 1
# Check if we have exactly 'k' distinct characters all with frequencies >= k
if max_char_freq == k:
current_len = end - start + 1
if current_len > max_len:
max_len = current_len
longest_substrings = [s[start:end+1]]
elif current_len == max_len:
longest_substrings.append(s[start:end+1])
# Break early if the number of possible characters with freq >= k exceeds
if max_char_freq > k:
break
return longest_substrings, max_len
此实现修改了滑动窗口如何根据满足重复要求(k)的字符数进行调整。现在,它返回所有此类最大长度的子字符串,其中 k 个字符至少重复 k 次。此外,它可以更好地处理满足条件的多个等长子字符串。
这是完整代码:
def find_longest_substring(s, k):
if k == 0:
return "" # Special case, if k is 0, return empty string
n = len(s)
max_len = 0
longest_substrings = []
# We try to find the longest substring for every possible starting character
for start in range(n):
counts = {}
freq = {}
max_char_freq = 0
# Sliding window from start
for end in range(start, n):
char = s[end]
if char in counts:
counts[char] += 1
else:
counts[char] = 1
# Frequency of the character count
if counts[char] in freq:
freq[counts[char]] += 1
else:
freq[counts[char]] = 1
# Maintain max_char_freq which tracks the number of chars meeting the k requirement
if counts[char] == k:
max_char_freq += 1
# Check if we have exactly 'k' distinct characters all with frequencies >= k
if max_char_freq == k:
current_len = end - start + 1
if current_len > max_len:
max_len = current_len
longest_substrings = [s[start:end+1]]
elif current_len == max_len:
longest_substrings.append(s[start:end+1])
# Breaking out early if the number of possible characters with freq >= k exceeds
if max_char_freq > k:
break
return longest_substrings, max_len
def read_string_from_file(filename):
with open(filename, 'r') as file:
return file.read().strip()
def print_substrings_and_frequencies(substrings):
print(f"Longest substrings of length {len(substrings[0])} with exactly k repeated elements:")
for substring in substrings:
frequency = {}
for char in substring:
if char in frequency:
frequency[char] += 1
else:
frequency[char] = 1
print(f"Substring: {substring}")
print("Frequency:", [(char, freq) for char, freq in frequency.items()])
def main():
filename = input("Enter the filename: ")
k = int(input("Enter the value of k: "))
s = read_string_from_file(filename)
longest_substrings, max_len = find_longest_substring(s, k)
if longest_substrings:
print_substrings_and_frequencies(longest_substrings)
else:
print("No valid substring found.")
if __name__ == "__main__":
main()