defsift_down(array,index): left = index*2+1 right = index*2+2 small = indexif left<len(array)and array[small]>array[left]: small = leftif right<len(array)and array[small]>array[right]: small = rightif small!=index: array[small], array[index]= array[index], array[small]sift_down(array, small)defpop(self): res = self.array[0] self.array[0], self.array[-1]= self.array[-1], self.array[0] self.array.pop() self.sift_down(self.array,0)return res
Time Complexity: O (log n)
Initialization/Heapify
从Array初始化一个Heap 其实是一个Sift down
defbuild_heap(arr):for i inrange(len(arr)/2-1, -1, -1)sift_down(arr)
Time Complexity: O(n) 等比数列求和、求极限推导
Smallest K Elements
soln1: sort
O(nlogn)
soln2: quick select/partition
Average Case: O(n+n/2+n/4+n/8+...+1) = O(n)
Worst Case: O(n+ (n-1) + (n-2) + ... ) = O(n^2)
O(kn)
Soln3: 小根堆 Space O(n)
Step 1: Heapify all elements O(n)
Step 2: Call pop() k times to get the k smallest elements. O(klogn)
Time Complexity Total: O(n+klogn)
classSolution(object):defkSmallest(self,array,k):""" input: int[] array, int k return: int[] """# write your solution hereimport heapq result = [] heapq.heapify(array)for i inrange(min(k,len(array))): result.append(heapq.heappop(array))return result
Soln4: 更适合online 大根堆 Space O(k)
任何时候maxheap里的都是当前的smallest k stream的数据进来,任何时候fixed window with size of k里的元素都是当前的k个最小,之后再有新的stream,都会update when appropriate 流数据找current min之类的也是类似的算法
Step 1: Heapify the first k elements to form a max-heap of size k O(k)
Step 2: Iterate over the remaining n-k elements one by one.
Compare with the largest element of the previous smallest k candidates.
case 1: new element>=top: ignore
case 2: new element<top: update (top ->new element) O((n-k)logk)
Total O(k+(n-k)logk)
defkSmallest2(array,k):ifnot array:return arrayif k>=len(array):return array res = [-elem for elem in array[0:k]] heapq.heapify(res)for i inrange(k,len(array)):if array[i]<-res[0] heapq.heappop(res) heapq.heappush(res, -array[i])return [-elem for elem in res]
Solution 2:用k-size min heap,不断push,把最小的弹出去。注意输出时需要reverse一下。
deftopKFrequentPQ(nums,k):# 统计频率 O() freq_hash =dict()#0 frequency, 1 numberfor n in numbers:if n in freq_hash: freq_hash[n]+=1else: freq_hash[n]=1# 末尾淘汰 n*2*O(logk) heap=[]for n, f in freq_hash.items():# items()返回的是列表对象,iteritems()返回的是iterator对象 heapq.heappush(heap,(f,n))iflen(heap)>k: heapq.heappop(heap)# 调换顺序 O(klogk)whilelen(heap)!=0: ret.append(heapq.heappop(heap)[1]) ret.reverse()return ret
在这种方法下不需要每次heapify
Merge K sorted array
Step 1: Create a min heap, put the first element of each array into the heap
Step 2: Each time pop an element from the heap, and then push the next element into the heap.
defmergek(arrays):ifnot arrays:returnNone heap = []for i inrange (len()):iflen(arrays[i]): heap.append((arrays[i][0], i, 0)) heapq.heapify(heap) result=[]while heap: val, index_array, index_element = heapq.heappop(heap) result.append(val)if index_element+1<len(arrays[index_array]): heapq.heappush(heap, (arrays[index_array][index_element+1], index_array, index_element+1))return result
Time: O(2K+nlogk)
k读入、k来heapify、nlogk来push和popk次
Merge in Stream
defmerge_two_streams(A,B): INVALID = sys.maxsize streams = (A, B) cache = [(INVALID,0), (INVALID,1)]for i in [0,1]:try: cache[i]= (next(streams[i]), i)exceptStopIteration: cache[i]= (INVALID, i) while cache: min_val, index =min(cache)if min_val == INVALID:breaktry: cache[index]= (next(streams[index]), index)exceptStopIteration: cache[index]= (INVALID, index)yield min_valdefmerge_three_streams(A,B,C): INVALID = sys.maxsize streams = (A, B, C) cache = [(INVALID,0), (INVALID,1), (INVALID,2)]for i in [0,1,2]:try: cache[i]= (next(streams[i]), i)exceptStopIteration: cache[i]= (INVALID, i) while cache: min_val, index =min(cache)if min_val == INVALID:breaktry: cache[index]= (next(streams[index]), index)exceptStopIteration: cache[index]= (INVALID, index)yield min_valfrom heapq import heapify, heappop, heappushdefmerge_k_streams(streams): cache, k = [],len(streams)for i inrange(k):try: cache.append((next(streams[i]),i))exceptStopIteration:passheapify(cache)while cache: min_val, index =heappop(cache)try:heappush(cache, (next(streams[index]), index))exceptStopIteration:passyield min_valdefstreaming(nums):for num in nums:yield numif__name__=="__main__":print([num for num inmerge_two_streams(streaming([1,3,5]),streaming([2,4,6]))])print([num for num inmerge_three_streams(streaming([1,4,7]),streaming([2,5,8]),streaming([3,6,9]))])print([num for num inmerge_k_streams([streaming(nums) for nums in [[1,3,5,7],[2,4,6,8],[1,4,7,10],[2,5,8,11]]])])
面试题
Linkedin:Can you say something you know about stack and heap?