Data Structures Mastery

Data Structures Mastery: Complete Algorithm Foundation

Table of Contents

  1. Introduction to Data Structures
  2. Arrays and Dynamic Arrays
  3. Linked Lists
  4. Stacks and Queues
  5. Trees and Binary Search Trees
  6. Hash Tables and Hash Maps
  7. Heaps and Priority Queues
  8. Graphs and Graph Algorithms
  9. Advanced Data Structures
  10. Time and Space Complexity Analysis

Introduction to Data Structures {#introduction}

Data structures are fundamental building blocks of computer science that organize and store data efficiently. Understanding data structures is crucial for writing efficient algorithms and solving complex programming problems.

Why Data Structures Matter

  • Efficiency: Choose the right structure for optimal performance
  • Memory Management: Efficient use of system resources
  • Problem Solving: Foundation for algorithmic thinking
  • Scalability: Handle growing amounts of data
  • Code Organization: Better software architecture

Big O Notation Primer

# O(1) - Constant Time
def get_first_element(arr):
    return arr[0] if arr else None

# O(n) - Linear Time
def find_element(arr, target):
    for element in arr:
        if element == target:
            return True
    return False

# O(n²) - Quadratic Time
def bubble_sort(arr):
    n = len(arr)
    for i in range(n):
        for j in range(0, n - i - 1):
            if arr[j] > arr[j + 1]:
                arr[j], arr[j + 1] = arr[j + 1], arr[j]
    return arr

# O(log n) - Logarithmic Time
def binary_search(arr, target):
    left, right = 0, len(arr) - 1
    while left <= right:
        mid = (left + right) // 2
        if arr[mid] == target:
            return mid
        elif arr[mid] < target:
            left = mid + 1
        else:
            right = mid - 1
    return -1

Arrays and Dynamic Arrays {#arrays}

Static Arrays

class StaticArray:
    def __init__(self, capacity):
        self.capacity = capacity
        self.data = [None] * capacity
        self.size = 0

    def get(self, index):
        if 0 <= index < self.size:
            return self.data[index]
        raise IndexError("Index out of bounds")

    def set(self, index, value):
        if 0 <= index < self.size:
            self.data[index] = value
        else:
            raise IndexError("Index out of bounds")

    def append(self, value):
        if self.size < self.capacity:
            self.data[self.size] = value
            self.size += 1
        else:
            raise OverflowError("Array is full")

    def __len__(self):
        return self.size

    def __str__(self):
        return str(self.data[:self.size])

# Usage
arr = StaticArray(5)
arr.append(1)
arr.append(2)
arr.append(3)
print(arr)  # [1, 2, 3]

Dynamic Arrays (Resizable)

class DynamicArray:
    def __init__(self):
        self.capacity = 2
        self.size = 0
        self.data = [None] * self.capacity

    def _resize(self, new_capacity):
        """Resize the underlying array."""
        old_data = self.data
        self.data = [None] * new_capacity
        self.capacity = new_capacity

        for i in range(self.size):
            self.data[i] = old_data[i]

    def append(self, value):
        """Add element to the end - O(1) amortized."""
        if self.size >= self.capacity:
            self._resize(2 * self.capacity)

        self.data[self.size] = value
        self.size += 1

    def get(self, index):
        """Get element at index - O(1)."""
        if 0 <= index < self.size:
            return self.data[index]
        raise IndexError("Index out of bounds")

    def set(self, index, value):
        """Set element at index - O(1)."""
        if 0 <= index < self.size:
            self.data[index] = value
        else:
            raise IndexError("Index out of bounds")

    def insert(self, index, value):
        """Insert element at index - O(n)."""
        if index < 0 or index > self.size:
            raise IndexError("Index out of bounds")

        if self.size >= self.capacity:
            self._resize(2 * self.capacity)

        # Shift elements to the right
        for i in range(self.size, index, -1):
            self.data[i] = self.data[i - 1]

        self.data[index] = value
        self.size += 1

    def delete(self, index):
        """Delete element at index - O(n)."""
        if index < 0 or index >= self.size:
            raise IndexError("Index out of bounds")

        # Shift elements to the left
        for i in range(index, self.size - 1):
            self.data[i] = self.data[i + 1]

        self.size -= 1

        # Shrink if necessary
        if self.size <= self.capacity // 4 and self.capacity > 2:
            self._resize(self.capacity // 2)

    def find(self, value):
        """Find first occurrence of value - O(n)."""
        for i in range(self.size):
            if self.data[i] == value:
                return i
        return -1

    def __len__(self):
        return self.size

    def __str__(self):
        return str([self.data[i] for i in range(self.size)])

# Advanced array operations
class ArrayOperations:
    @staticmethod
    def merge_sorted_arrays(arr1, arr2):
        """Merge two sorted arrays - O(n + m)."""
        result = []
        i = j = 0

        while i < len(arr1) and j < len(arr2):
            if arr1[i] <= arr2[j]:
                result.append(arr1[i])
                i += 1
            else:
                result.append(arr2[j])
                j += 1

        # Add remaining elements
        result.extend(arr1[i:])
        result.extend(arr2[j:])

        return result

    @staticmethod
    def rotate_array(arr, k):
        """Rotate array to the right by k positions - O(n)."""
        if not arr or k == 0:
            return arr

        n = len(arr)
        k = k % n  # Handle k > n

        def reverse(start, end):
            while start < end:
                arr[start], arr[end] = arr[end], arr[start]
                start += 1
                end -= 1

        # Reverse entire array
        reverse(0, n - 1)
        # Reverse first k elements
        reverse(0, k - 1)
        # Reverse remaining elements
        reverse(k, n - 1)

        return arr

    @staticmethod
    def find_max_subarray_sum(arr):
        """Kadane's algorithm for maximum subarray sum - O(n)."""
        if not arr:
            return 0

        max_sum = current_sum = arr[0]

        for i in range(1, len(arr)):
            current_sum = max(arr[i], current_sum + arr[i])
            max_sum = max(max_sum, current_sum)

        return max_sum

Linked Lists {#linked-lists}

Singly Linked List

class ListNode:
    def __init__(self, val=0, next=None):
        self.val = val
        self.next = next

    def __str__(self):
        return str(self.val)

class SinglyLinkedList:
    def __init__(self):
        self.head = None
        self.size = 0

    def append(self, val):
        """Add element to the end - O(n)."""
        new_node = ListNode(val)

        if not self.head:
            self.head = new_node
        else:
            current = self.head
            while current.next:
                current = current.next
            current.next = new_node

        self.size += 1

    def prepend(self, val):
        """Add element to the beginning - O(1)."""
        new_node = ListNode(val)
        new_node.next = self.head
        self.head = new_node
        self.size += 1

    def insert(self, index, val):
        """Insert element at index - O(n)."""
        if index < 0 or index > self.size:
            raise IndexError("Index out of bounds")

        if index == 0:
            self.prepend(val)
            return

        new_node = ListNode(val)
        current = self.head

        for _ in range(index - 1):
            current = current.next

        new_node.next = current.next
        current.next = new_node
        self.size += 1

    def delete(self, val):
        """Delete first occurrence of value - O(n)."""
        if not self.head:
            return False

        if self.head.val == val:
            self.head = self.head.next
            self.size -= 1
            return True

        current = self.head
        while current.next:
            if current.next.val == val:
                current.next = current.next.next
                self.size -= 1
                return True
            current = current.next

        return False

    def find(self, val):
        """Find element - O(n)."""
        current = self.head
        index = 0

        while current:
            if current.val == val:
                return index
            current = current.next
            index += 1

        return -1

    def reverse(self):
        """Reverse the linked list - O(n)."""
        prev = None
        current = self.head

        while current:
            next_node = current.next
            current.next = prev
            prev = current
            current = next_node

        self.head = prev

    def get_middle(self):
        """Find middle element using two pointers - O(n)."""
        if not self.head:
            return None

        slow = fast = self.head

        while fast.next and fast.next.next:
            slow = slow.next
            fast = fast.next.next

        return slow.val

    def has_cycle(self):
        """Detect cycle using Floyd's algorithm - O(n)."""
        if not self.head:
            return False

        slow = fast = self.head

        while fast and fast.next:
            slow = slow.next
            fast = fast.next.next

            if slow == fast:
                return True

        return False

    def __len__(self):
        return self.size

    def __str__(self):
        if not self.head:
            return "[]"

        result = []
        current = self.head
        while current:
            result.append(str(current.val))
            current = current.next

        return " -> ".join(result)

Doubly Linked List

class DoublyListNode:
    def __init__(self, val=0, prev=None, next=None):
        self.val = val
        self.prev = prev
        self.next = next

class DoublyLinkedList:
    def __init__(self):
        # Dummy head and tail nodes
        self.head = DoublyListNode()
        self.tail = DoublyListNode()
        self.head.next = self.tail
        self.tail.prev = self.head
        self.size = 0

    def append(self, val):
        """Add element to the end - O(1)."""
        new_node = DoublyListNode(val)
        prev_node = self.tail.prev

        prev_node.next = new_node
        new_node.prev = prev_node
        new_node.next = self.tail
        self.tail.prev = new_node

        self.size += 1

    def prepend(self, val):
        """Add element to the beginning - O(1)."""
        new_node = DoublyListNode(val)
        next_node = self.head.next

        self.head.next = new_node
        new_node.prev = self.head
        new_node.next = next_node
        next_node.prev = new_node

        self.size += 1

    def delete_node(self, node):
        """Delete a specific node - O(1)."""
        if node == self.head or node == self.tail:
            return False

        prev_node = node.prev
        next_node = node.next

        prev_node.next = next_node
        next_node.prev = prev_node

        self.size -= 1
        return True

    def find_node(self, val):
        """Find node with value - O(n)."""
        current = self.head.next

        while current != self.tail:
            if current.val == val:
                return current
            current = current.next

        return None

    def __str__(self):
        if self.size == 0:
            return "[]"

        result = []
        current = self.head.next

        while current != self.tail:
            result.append(str(current.val))
            current = current.next

        return " <-> ".join(result)

Stacks and Queues {#stacks-queues}

Stack Implementation

class Stack:
    def __init__(self):
        self.items = []

    def push(self, item):
        """Add item to top - O(1)."""
        self.items.append(item)

    def pop(self):
        """Remove and return top item - O(1)."""
        if self.is_empty():
            raise IndexError("Stack is empty")
        return self.items.pop()

    def peek(self):
        """Return top item without removing - O(1)."""
        if self.is_empty():
            raise IndexError("Stack is empty")
        return self.items[-1]

    def is_empty(self):
        """Check if stack is empty - O(1)."""
        return len(self.items) == 0

    def size(self):
        """Return number of items - O(1)."""
        return len(self.items)

    def __str__(self):
        return str(self.items)

# Stack Applications
class StackApplications:
    @staticmethod
    def is_balanced_parentheses(s):
        """Check if parentheses are balanced - O(n)."""
        stack = Stack()
        pairs = {'(': ')', '[': ']', '{': '}'}

        for char in s:
            if char in pairs:  # Opening bracket
                stack.push(char)
            elif char in pairs.values():  # Closing bracket
                if stack.is_empty():
                    return False

                opening = stack.pop()
                if pairs[opening] != char:
                    return False

        return stack.is_empty()

    @staticmethod
    def evaluate_postfix(expression):
        """Evaluate postfix expression - O(n)."""
        stack = Stack()
        operators = {'+', '-', '*', '/'}

        for token in expression.split():
            if token in operators:
                if stack.size() < 2:
                    raise ValueError("Invalid expression")

                b = stack.pop()
                a = stack.pop()

                if token == '+':
                    result = a + b
                elif token == '-':
                    result = a - b
                elif token == '*':
                    result = a * b
                elif token == '/':
                    result = a / b

                stack.push(result)
            else:
                stack.push(float(token))

        if stack.size() != 1:
            raise ValueError("Invalid expression")

        return stack.pop()

    @staticmethod
    def infix_to_postfix(expression):
        """Convert infix to postfix notation - O(n)."""
        stack = Stack()
        result = []
        precedence = {'+': 1, '-': 1, '*': 2, '/': 2, '^': 3}

        for char in expression:
            if char.isalnum():  # Operand
                result.append(char)
            elif char == '(':
                stack.push(char)
            elif char == ')':
                while not stack.is_empty() and stack.peek() != '(':
                    result.append(stack.pop())
                stack.pop()  # Remove '('
            elif char in precedence:
                while (not stack.is_empty() and 
                       stack.peek() != '(' and
                       precedence.get(stack.peek(), 0) >= precedence[char]):
                    result.append(stack.pop())
                stack.push(char)

        while not stack.is_empty():
            result.append(stack.pop())

        return ''.join(result)

Queue Implementation

from collections import deque

class Queue:
    def __init__(self):
        self.items = deque()

    def enqueue(self, item):
        """Add item to rear - O(1)."""
        self.items.append(item)

    def dequeue(self):
        """Remove and return front item - O(1)."""
        if self.is_empty():
            raise IndexError("Queue is empty")
        return self.items.popleft()

    def front(self):
        """Return front item without removing - O(1)."""
        if self.is_empty():
            raise IndexError("Queue is empty")
        return self.items[0]

    def is_empty(self):
        """Check if queue is empty - O(1)."""
        return len(self.items) == 0

    def size(self):
        """Return number of items - O(1)."""
        return len(self.items)

    def __str__(self):
        return str(list(self.items))

class CircularQueue:
    def __init__(self, capacity):
        self.capacity = capacity
        self.queue = [None] * capacity
        self.front = 0
        self.rear = -1
        self.size = 0

    def enqueue(self, item):
        """Add item to rear - O(1)."""
        if self.is_full():
            raise OverflowError("Queue is full")

        self.rear = (self.rear + 1) % self.capacity
        self.queue[self.rear] = item
        self.size += 1

    def dequeue(self):
        """Remove and return front item - O(1)."""
        if self.is_empty():
            raise IndexError("Queue is empty")

        item = self.queue[self.front]
        self.queue[self.front] = None
        self.front = (self.front + 1) % self.capacity
        self.size -= 1

        return item

    def is_empty(self):
        return self.size == 0

    def is_full(self):
        return self.size == self.capacity

    def __str__(self):
        if self.is_empty():
            return "[]"

        result = []
        for i in range(self.size):
            index = (self.front + i) % self.capacity
            result.append(str(self.queue[index]))

        return "[" + ", ".join(result) + "]"

# Priority Queue (Min-Heap based)
import heapq

class PriorityQueue:
    def __init__(self):
        self.heap = []
        self.index = 0

    def enqueue(self, item, priority):
        """Add item with priority - O(log n)."""
        heapq.heappush(self.heap, (priority, self.index, item))
        self.index += 1

    def dequeue(self):
        """Remove and return highest priority item - O(log n)."""
        if self.is_empty():
            raise IndexError("Priority queue is empty")

        priority, _, item = heapq.heappop(self.heap)
        return item

    def peek(self):
        """Return highest priority item without removing - O(1)."""
        if self.is_empty():
            raise IndexError("Priority queue is empty")

        return self.heap[0][2]

    def is_empty(self):
        return len(self.heap) == 0

    def size(self):
        return len(self.heap)

This comprehensive guide covers the fundamental data structures that every programmer should master. Understanding these structures and their operations is crucial for solving algorithmic problems efficiently and building scalable software systems.

Each data structure has its strengths and optimal use cases. The key is understanding when to use which structure based on the specific requirements of your problem, such as the types of operations you need to perform and their frequency.