Monday 18 January 2016

Dynamic Programming - Find If A Given Tree Has Duplicate Sub-Tree (Part I)

1. Problem Description
This is a Google interview question for software develop engineer from careercup. Here is the original thread,
"
Find if a given binary tree has duplicate sub trees or not. 

neer.1304 24 days ago in United States Report Duplicate | Flag ".

2. Analysis
This is a tree structure problem and very naturally think of using recursive implementation (dynamic solution) is an intuitive option. I am going to use binary tree to illustrate the solution.

The idea is quite simple that find out if one duplicate sub-tree is located in the left branch and another is located in the right branch and keep the sub-problem of both left and right branch if it doesn't have.

F(Root) = true , if a sub-tree found in the left branch found in the right branch
             = false, if not and turn into a sub-problem of F(Root->left) and F(Root->right)

Pseudo-code
1. Find out if a sub-tree in left branch exists in the right branch
    * Return true, if yes
2. Root <= Root->left (sub-problem)
    * Repeat Step 1
3. Root <= Root->right (sub-problem)
    * Repeat Step 1
4. Return false if exhausted the whole tree and not found

How to achieve Step 1
A. Find out if LEFT(Root->left) can be found in RIGHT (Root->right)
    * Return true, if yes
B. Try LEFT->left can be found in RIGHT
    * Repeat Step A
C. Try LEFT->right can be found in RIGHT
    * Repeat Step A
D. Return false  if exhausted the whole LEFT and not found

How to achieve Step A
I. Compare "SubTree1" (as constant) from LEFT with RIGHT
    * Return ture, if yes
II. Try "SubTree1" with RIGHT->left
    * Repeat Step I
III. Try "SubTree1" with RIGHT->right
    * Repeat Step I
IV. Return false if exhausted the whole RIGHT and not found

The dynamic solution produces very clear and nice logic and code. But the complexity is very heavy and no optimization at all and literally it is to use the brute force to examine all the possibilities.

Note: as the discussion of the thread revealed that a single leaf node with its parent can't regarded a sub-tree, therefore any "Root" passed into including the sub-problem has to have at least 3 nodes (including itself).

3. C++ Implementation
The implementation is based on binary tree.
// ********************************************************************************
// Implementation
// ********************************************************************************
// compare two trees if have exactly values
template<typename T>
bool CompareTwoTrees_R(TreeNode<T>* lhs, TreeNode<T>* rhs)
{
    if (lhs == nullptr && rhs == nullptr) {
        return true;
    }
    if ((lhs == nullptr || rhs == nullptr) ||
        *lhs->data != *rhs->data) {
        return false;
    }
    return CompareTwoTrees_R(lhs->left, rhs->left) ||
           CompareTwoTrees_R(lhs->right, rhs->right);
}

template <typename T>
bool ContainSubTree_R(TreeNode<T> *base,
                      TreeNode<T>* sub,
                      TreeNode<T>* &entry1,
                      TreeNode<T>* &entry2)
{
    if (base == nullptr || sub == nullptr) {
        return false;
    }
    if (CompareTwoTrees_R(base, sub)) {
        entry1 = base;
        entry2 = sub;
        return true;
    }
    // Implementation of Step I
    return ContainSubTree_R(base->left, sub, entry1, entry2) ||
           ContainSubTree_R(base->right, sub, entry1, entry2);
}

template <typename T>
bool MoreThanNNodes_R(TreeNode<T>* root, const size_t N, size_t &curSize)
{
    if (curSize > N) {
        return true;
    }
    if (!root) {
        return false;
    }

    ++curSize;
    return MoreThanNNodes_R(root->left, N, curSize) ||
           MoreThanNNodes_R(root->right, N, curSize);
}

template <typename T>
bool MoreThanNNodes_R(TreeNode<T>* root, const size_t N)
{
    // Implementation of comparing the nodes in the tree against a size
    size_t curSize = 0;
    return MoreThanNNodes_R(root, N, curSize);
}

template <typename T>
bool FindCommonTree_R(TreeNode<T>* base1,
                      TreeNode<T>* base2,
                      TreeNode<T>* &entry1,
                      TreeNode<T>* &entry2)
{
    // sub-problems have to have at least 3 nodes
    if (!MoreThanNNodes_R(base1, 2) || !MoreThanNNodes_R(base2, 2)) {
        return false;
    }
    // Implementation of Step A
    return ContainSubTree_R(base1, base2, entry1, entry2) ||
           FindCommonTree_R(base1, base2->left, entry1, entry2) ||
           FindCommonTree_R(base1, base2->right, entry1, entry2);
}

template <typename T>
bool HaveDuplicateSubTree_R(TreeNode<T> *root, TreeNode<T>* &entry1, TreeNode<T>* &entry2)
{
    if (root == nullptr) {
        return false;
    }
    // Implementation of Step 1
    return FindCommonTree_R(root->left, root->right, entry1, entry2) ||
           HaveDuplicateSubTree_R(root->left, entry1, entry2) ||
           HaveDuplicateSubTree_R(root->right, entry1, entry2);
}

// ********************************************************************************
// Test
// ********************************************************************************
void TestFindDuplicateSubTree()
{
    {
        TreeNode<int>* root = nullptr;
        TreeNode<int>* entry1;
        TreeNode<int>* entry2;
        assert(HaveDuplicateSubTree_R(root, entry1, entry2) == false);
    }

    {
        const std::vector<int> testInput = { 1, 2 };
        auto root = ConstructTreeRecursive(testInput);
        assert(root != nullptr);
        assert(GetTreeSize_R(root) == testInput.size());
        TreeNode<int>* entry1;
        TreeNode<int>* entry2;
        assert(HaveDuplicateSubTree_R(root, entry1, entry2) == false);
        DeleteTree(&root);
    }

    {
        const std::vector<int> testInput = { 1, 2, 3 };
        auto root = ConstructTreeRecursive(testInput);
        assert(root != nullptr);
        assert(GetTreeSize_R(root) == testInput.size());
        TreeNode<int>* entry1;
        TreeNode<int>* entry2;
        assert(HaveDuplicateSubTree_R(root, entry1, entry2) == false);
        DeleteTree(&root);
    }

    {
        const std::vector<int> testInput = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
        auto root = ConstructTreeRecursive(testInput);
        assert(root != nullptr);
        assert(GetTreeSize_R(root) == testInput.size());
        TreeNode<int>* entry1;
        TreeNode<int>* entry2;
        assert(HaveDuplicateSubTree_R(root, entry1, entry2) == false);
        DeleteTree(&root);
    }

    {
        const std::vector<int> testInput = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
        auto root = ConstructTreeRecursive(testInput);
        assert(root != nullptr);
        assert(GetTreeSize_R(root) == testInput.size());
        TreeNode<int>* entry1;
        TreeNode<int>* entry2;
        assert(HaveDuplicateSubTree_R(root, entry1, entry2) == false);

        const std::vector<int> subTreeInput = { 100, 101 };
        auto subTree1 = ConstructTreeRecursive(subTreeInput);
        auto subTree2 = ConstructTreeRecursive(subTreeInput);
        assert(GetTreeSize_R(subTree1) == subTreeInput.size());
        assert(GetTreeSize_R(subTree2) == subTreeInput.size());
        assert(CheckTwoTreesWithSameValues(subTree1, subTree2) == true);

        auto leftestNode = FindLeftestNode(root);
        auto rightestNode = FindRightestNode(root);
        assert(leftestNode != nullptr);
        assert(leftestNode->left == nullptr);
        assert(rightestNode != nullptr);
        assert(rightestNode->right == nullptr);
        assert(*leftestNode->data == 1);
        assert(*rightestNode->data == 10);

        leftestNode->left = subTree1;
        rightestNode->right = subTree2;
        assert(HaveDuplicateSubTree_R(root, entry1, entry2) == false);

        DeleteTree(&root);
    }

    {
        const std::vector<int> testInput = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
        auto root = ConstructTreeRecursive(testInput);
        assert(root != nullptr);
        assert(GetTreeSize_R(root) == testInput.size());
        TreeNode<int>* entry1;
        TreeNode<int>* entry2;
        assert(HaveDuplicateSubTree_R(root, entry1, entry2) == false);

        const std::vector<int> subTreeInput = { 100, 101, 102 };
        auto subTree1 = ConstructTreeRecursive(subTreeInput);
        auto subTree2 = ConstructTreeRecursive(subTreeInput);
        assert(GetTreeSize_R(subTree1) == subTreeInput.size());
        assert(GetTreeSize_R(subTree2) == subTreeInput.size());
        assert(CheckTwoTreesWithSameValues(subTree1, subTree2) == true);

        auto leftestNode = FindLeftestNode(root);
        auto rightestNode = FindRightestNode(root);
        assert(leftestNode != nullptr);
        assert(leftestNode->left == nullptr);
        assert(rightestNode != nullptr);
        assert(rightestNode->right == nullptr);
        assert(*leftestNode->data == 1);
        assert(*rightestNode->data == 10);

        leftestNode->left = subTree1;
        rightestNode->right = subTree2;
        assert(HaveDuplicateSubTree_R(root, entry1, entry2) == true);
        assert(entry1 != nullptr);
        assert(entry2 != nullptr);
        assert(CheckTwoTreesWithSameValues(entry1, subTree1));
        assert(CheckTwoTreesWithSameValues(entry2, subTree2));
        assert(CheckTwoTreesWithSameValues(entry1, entry2));

        DeleteTree(&root);
    }

    {
        const std::vector<int> testInput = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
        auto root = ConstructTreeRecursive(testInput);
        assert(root != nullptr);
        assert(GetTreeSize_R(root) == testInput.size());
        TreeNode<int>* entry1;
        TreeNode<int>* entry2;
        assert(HaveDuplicateSubTree_R(root, entry1, entry2) == false);

        const std::vector<int> subTreeInput = { 100, 101, 102 };
        auto subTree1 = ConstructTreeRecursive(subTreeInput);
        auto subTree2 = ConstructTreeRecursive(subTreeInput);
        assert(GetTreeSize_R(subTree1) == subTreeInput.size());
        assert(GetTreeSize_R(subTree2) == subTreeInput.size());
        assert(CheckTwoTreesWithSameValues(subTree1, subTree2) == true);

        auto leftestNode = FindLeftestNode(root->left);
        auto rightestNode = FindRightestNode(root->left);
        assert(leftestNode != nullptr);
        assert(leftestNode->left == nullptr);
        assert(rightestNode != nullptr);
        assert(rightestNode->right == nullptr);
        assert(*leftestNode->data == 1);
        assert(*rightestNode->data == 4);

        leftestNode->left = subTree1;
        rightestNode->right = subTree2;
        assert(HaveDuplicateSubTree_R(root, entry1, entry2) == true);
        assert(entry1 != nullptr);
        assert(entry2 != nullptr);
        assert(CheckTwoTreesWithSameValues(entry1, subTree1));
        assert(CheckTwoTreesWithSameValues(entry2, subTree2));
        assert(CheckTwoTreesWithSameValues(entry1, entry2));

        DeleteTree(&root);
    }

    {
        const std::vector<int> testInput = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
        auto root = ConstructTreeRecursive(testInput);
        assert(root != nullptr);
        assert(GetTreeSize_R(root) == testInput.size());
        TreeNode<int>* entry1;
        TreeNode<int>* entry2;
        assert(HaveDuplicateSubTree_R(root, entry1, entry2) == false);

        const std::vector<int> subTreeInput = { 100, 101, 102 };
        auto subTree1 = ConstructTreeRecursive(subTreeInput);
        auto subTree2 = ConstructTreeRecursive(subTreeInput);
        assert(GetTreeSize_R(subTree1) == subTreeInput.size());
        assert(GetTreeSize_R(subTree2) == subTreeInput.size());
        assert(CheckTwoTreesWithSameValues(subTree1, subTree2) == true);

        auto leftestNode = FindLeftestNode(root->right);
        auto rightestNode = FindRightestNode(root->right);
        assert(leftestNode != nullptr);
        assert(leftestNode->left == nullptr);
        assert(rightestNode != nullptr);
        assert(rightestNode->right == nullptr);
        assert(*leftestNode->data == 6);
        assert(*rightestNode->data == 10);

        leftestNode->left = subTree1;
        rightestNode->right = subTree2;
        assert(HaveDuplicateSubTree_R(root, entry1, entry2) == true);
        assert(entry1 != nullptr);
        assert(entry2 != nullptr);
        assert(CheckTwoTreesWithSameValues(entry1, subTree1));
        assert(CheckTwoTreesWithSameValues(entry2, subTree2));
        assert(CheckTwoTreesWithSameValues(entry1, entry2));

        DeleteTree(&root);
    }
}

No comments:

Post a Comment