python 中的二叉搜索树与 10000 长数组。如何处理递归

问题描述 投票:0回答:0

我的大学项目有问题。我们必须做一些与二叉分割、二叉搜索树和搜索相关的任务。我们必须测量整个过程的时间,我们被迫使用大量数据来使测量时间具有可比性。这是我们制作的代码:


import copy
import random
import time
import csv
import sys
import threading
sys.setrecursionlimit(2**31-31)
threading.stack_size(2**27)

def generateArrayNonRepeating(n):
    return random.sample(range(0, n), n)

def QSmid(table):
    if len(table) <= 1:
        return table

    pivot_index = len(table) // 2
    pivot = table[pivot_index]
    left = []
    right = []
    for i in range(len(table)):
        if i != pivot_index:
            if table[i] < pivot:
                left.append(table[i])
            else:
                right.append(table[i])
    return QSmid(left) + [pivot] + QSmid(right)


def search(table, value):
    for i in range(len(table)):
        if table[i] == value:
            return i

def binarySearch(table, value):
    left = 0
    right = len(table) - 1
    while left <= right:
        mid = (left + right) // 2
        if table[mid] == value:
            return mid
        elif table[mid] < value:
            left = mid + 1
        else:
            right = mid - 1

def binarySplit(table):
    if len(table) <= 1:
        return table
    mid_pivot = len(table) // 2
    mid = []
    mid.append(table[mid_pivot])
    left = table[:mid_pivot]
    right = table[mid_pivot+1:]
    return mid + binarySplit(left) + binarySplit(right)


class Node():
    def __init__(self, data=None):
        self.data = data
        self.left_child = None
        self.right_child = None


class BST():
    def __init__(self):
        self.root = Node(None)

    def add(self, data):
        if self.root.data is None:
            self.root.data = data


        else:
            self.add_to_top(data, self.root)

    def add_to_top(self, data, top):
        if data != top.data:
            if data < top.data:
                if top.left_child is None:
                    top.left_child = Node(data)

                    return top.left_child
                else:
                    return self.add_to_top(data, top.left_child)
            else:
                if top.right_child is None:
                    top.right_child = Node(data)

                    return top.right_child
                else:
                    return self.add_to_top(data, top.right_child)
        else:
            return top

    def height(self):
        return self._height(self.root)

    def _height(self, node):
        if node is None:
            return 0
        else:
            left_height = self._height(node.left_child)
            right_height = self._height(node.right_child)
            return max(left_height, right_height) + 1

    def search(self, data):
        return self._search(data, self.root)

    def _search(self, data, node):
        if node is None or node.data == data:
            return node
        elif data < node.data:
            return self._search(data, node.left_child)
        else:
            return self._search(data, node.right_child)





with open('resultsCreate.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["n", "CB", "CTA", "CTB"])
with open('resultsSearch.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["n", "SA", "SB", "STA", "STB"])
with open('resultsHeight.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["n", "HTA", "HTB"])

for j in range(5):
    for i in range(10):
        n = 10000 + i * 1000

        A = generateArrayNonRepeating(n)


        start = time.time()
        B = copy.deepcopy(A)
        QSmid(B)
        end = time.time()
        CB = end - start
        print("kopiowanie i sortowanie")

        start = time.time()
        for i in B:
            search(A, i)
        end = time.time()
        SA = end - start
        print("sa")

        start = time.time()
        for i in A:
            binarySearch(B, i)
        end = time.time()
        SB = end - start
        print("sb")


        start = time.time()
        TA = BST()
        TA.add(A[0])
        for i in range(1, len(A)):
            TA.add(i)
        end = time.time()
        CTA = end - start

        HTA = TA.height()
        print("git")

        start = time.time()
        for i in A:
            TA.search(i)
        end = time.time()
        STA = end - start
        print("drzewo")

        start = time.time()
        TB = BST()
        TB.add(A[0])
        for i in range(1, len(A)):
            TB.add(i)
        end = time.time()
        CTB = end - start

        HTB = TB.height()
        print("wysokosc drzewa")

        start = time.time()
        for i in A:
            TB.search(i)
        end = time.time()
        STB = end - start
        print("search in tree")

        with open('resultsCreate.csv', 'a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([n, CB, CTA, CTB])
        with open('resultsSearch.csv', 'a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([n, SA, SB, STA, STB])
        with open('resultsHeight.csv', 'a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([n, HTA, HTB])

    with open('resultsCreate.csv', 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([" ", " ", " ", " "])
    with open('resultsSearch.csv', 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([" ", " ", " ", " ", " "])
    with open('resultsHeight.csv', 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([" ", " ", " "])
        del A
        del B
        del TA
        del TB

我们面临的问题是“进程完成,退出代码为 -1073741571 (0xC00000FD)”,因此深度递归/堆栈溢出

我们已经尝试添加 thread.lock 来避免这种情况,但问题仍然存在,只是在稍长一段时间后突然出现。我已将 pycharm 中的最大堆大小更改为 99999999 MiB,但它仍然不会使用我所有的 ram。老实说,当程序在我的 ram 消耗中运行时,我什至无法识别任何变化。 我如何强制 pycharm/python 使用我的更多资源来完成这项工作?当我使用小于 ~3500 的数据时,它可以工作,但我们无法发现任何恒定的时间差异。

python pycharm binary-search-tree
© www.soinside.com 2019 - 2024. All rights reserved.