def merge(array1, array2):
i=j=0
results=[]
while i<len(array1) and j<len(array2):
if array1[i]<array2[j]:
results.append(array1[i])
i+=1
else:
results.append(array2[j])
j+=1
while i<len(array1):
results.append(array1[i])
i+=1
while j<len(array2):
results.append(array2[j])
j+=1
return results
#Time: O(n)
#对于这个function来说 Space: O(1)
# slicing
def merge_sort(array):
if len(array)==0 or len(array)==1:
return array
mid = len(array)/2
left = merge_sort(array[:middle]) #O(n) space due to slicing
right = merge_sort(array[middle:]) #O(n) space due to slicing
return merge(left, right) #调用一次merge是O(n)Time,在这里调用了logn次
时间复杂度
Upper Half to split: Time O(n)=1+2+4+...n/2
Lower Half to merge: Time O(nlogn) 每一层merge的时间复杂度是O(n) 一共有h=logn层
Total Time Complexity O(nlogn)
store index是一个隔板,左边都是比pivot小的,右边都是比pivot大的。所以一开始store index是从0开始移动
(..., store_index): <pivot
[store_index, i): >pivot
[i,...) unknown
def partition (lst, start, end, pivot_index):
lst[pivot_index], lst[end] = lst[end], lst[pivot_index]
store_index = start
pivot = lst[end]
for i in range (start, end):
if lst[i]<pivot:
lst[i], lst[store_index] = lst[store_index], lst[i]
store_index+=1
lst[store_index],lst[end]=lst[end], lst[store_index]
return store_index
from random import randrange
def quick_sort(lst, start, end):
if start>=end:
return
pivot_index = randrange(start, end+1)
new_pivot_index = partition(lst, start, end, pivot_index)
quick_sort(lst, start, new_pivot_index-1)
quick_sort(lst, new_pivot_index+1, end)
randrange (start, stop, step) doesn’t consider the last item i.e. it is exclusive. For example, randrange (10,20,1) will return any random number from 10 to 19 (exclusive). it will never select 20.
Time: Best Case and On average, both O(nlogn)
为什么Average是O(nlogn)
下面这个推导的最后结论
Space: O(logn) worst case O(n) 这次是只取决于高度了,因为每层的call stack上只分配了常数级别的空间(pivot_index)
class Solution(object):
def quickSort(self, array):
"""
input: int[] array
return: int[]
"""
# write your solution here
if not array or len(array)<=1:
return array
return self.helper(array, 0, len(array)-1)
def helper(self, array, start, end):
from random import randrange #保证稳定的,防止被hacker攻击
if start>=end:
return
pivot_index=randrange(start, end+1)
new_index=self.partition(array, pivot_index, start, end)
self.helper(array, start, new_index-1)
self.helper(array, new_index+1, end)
return array
def partition(self, array, index, start, end):
store_index=start
array[end],array[index]=array[index],array[end]
pivot=array[end]
for i in range (start, end):
if array[i]<pivot:
array[i], array[store_index] = array[store_index],array[i]
store_index+=1
array[store_index], array[end] = array[end], array[store_index]
return store_index
class Solution(object):
def rainbowSort(self, array):
"""
input: int[] array
return: int[]
"""
# write your solution here
if not array or len(array)<=1:
return array
ones, zeros, negs = len(array)-1, 0, 0
while zeros<=ones:
if array[zeros]==-1:
array[zeros],array[negs]=array[negs],array[zeros]
zeros+=1
negs+=1
elif array[zeros]==0:
zeros+=1
else:
array[zeros],array[ones]=array[ones],array[zeros]
ones-=1
return array
Find the k-th largest element in an array
Soln1: Brute Force: Sort the array in descending order, and return the element at index (k-1). O(nlogn)
Soln2: Heap
每次扔一半 左边是比pivot大的,右边是比pivot小的
import random
def find_kth_largest(arr, k):
left, right = 0, len(arr)-1
while left<=right:
pivot_idx=random.randint(left, right)
new_pivot_idx=partition(left, right, pivot_idx, arr)
if new_pivot_idx == k-1:
return arr[new_pivot_idx]
elif new_pivot_idx > k-1:
right = new_pivot_idx -1
else:
left = new_pivot_idx + 1
def partition(left, right, pivot_idx, arr):
pivot = arr[pivot_idx]
new_pivot_idx = left
arr[pivot_idx], arr[right] = arr[right], arr[pivot_idx]
for i in range(left, right):
if arr[i]>pivot:
arr[i], arr[new_pivot_idx]=arr[new_pivot_idx], arr[i]
new_pivot_idx += 1
arr[right], arr[new_pivot_idx] = arr[new_pivot_idx], arr[right]
return new_pivot_idx
arr = [3,2,5,1,4,0]
print(find_kth_largest(arr,1))
Rainbow Sort 系列
I 3种颜色 -1,0,1
下面这个赶紧忘掉 写的什么玩意
class Solution(object):
def rainbowSort(self, array):
"""
input: int[] array
return: int[]
"""
# write your solution here
if not array or len(array)<=1:
return array
left,index,right=0,0,len(array)-1
while index<=right:
if array[index]==-1:
array[index], array[left]= array[left], array[index]
index+=1
left+=1
elif array[index]==1:
array[index], array[right] = array[right],array[index]
right-=1
else:
index+=1
return array
II 4种颜色 0,1,2,3
只有在大量重复数字的sort才有意义
class Solution(object):
def rainbowSortII(self, array):
"""
input: int[] array
return: int[]
"""
# write your solution here
return self.quickSort(array, 0, len(array) - 1)
def quickSort(self, A, start, end):
if start >= end:
return A
left, right = start, end
# key point 1: pivot is the value, not the index
pivot = A[(start + end) // 2];
# key point 2: every time you compare left & right, it should be
# left <= right not left < right
while left <= right:
while left <= right and A[left] < = pivot:
left += 1
while left <= right and A[right] > pivot:
right -= 1
if left <= right:
A[left], A[right] = A[right], A[left]
left += 1
right -= 1
self.quickSort(A, start, right)
self.quickSort(A, left, end)
return A
III K种颜色,用quick sort的思想
传入两个区间,一个是颜色区间 color_from, color_to。另外一个是待排序的数组区间 index_from, index_to.
找到颜色区间的中点,将数组范围内进行 partition,<= color 的去左边,>color 的去右边。
然后继续递归。
时间复杂度 O(nlogk)n是数的个数, k 是颜色数目。这是基于比较的算法的最优时间复杂度。
不基于比较的话,可以用计数排序(Counting Sort)
class Solution(object):
def rainbowSortIII(self, array, k):
"""
input: int[] array, int k
return: int[]
"""
# write your solution here
if not array or len(array)<=1:
return array
return self.sort(array, 0, len(array)-1,1,k)
def sort(self, array, idx_l, idx_r, color_l, color_r):
if idx_l==idx_r or color_l==color_r:
return
pivot=(color_l+color_r)//2
left, right = idx_l, idx_r
while left<=right:
while left<=right and array[left]<=pivot:
left+=1
while left<=right and array[right]>pivot:
right-=1
if left<=right:
array[left],array[right]=array[right],array[left]
left+=1
right-=1
self.sort(array,idx_l,right,color_l,pivot)
self.sort(array,left,idx_r,pivot+1,color_r)
return array
面试题目:
为什么基于比较的排序 时间复杂度下界是Omega(nlogn)
Given n numbers, there are n! possible permutations. For any value, x!=y, x<y is half of the permutation. In each comparison, we are left with n!/2 permutation. In the end, we operate log(n!) to achieve sorting.
Olog(n!)=O(nlogn) (Stirling Formula)
如果有1Mb的数据,用哪个sorting algorithm? What about 10Mb,100Mb,1Gb,1Tb,1Pb?
1Mb: Memory sorting 随便算法
1Tb: Disk sorting, External Sorting 外排,单机,多机,最后merge,写回磁盘
For any pivot position i;i∈{0,…,n−1}⋅ Time for partitioning an array : cn⋅ The head and tail subarrays contain i and n−1−i items, respectively: T(n)=cn+T(i)+T(n−1−i) Average running time for sorting (a more complex recurrence): T(n)=n1∑i=0n−1(T(i)+T(n−1−i)+cn)
最坏的情况不取决于input本身是否有序,而是取决于每次的pivot都选的特别差,比如每次都选到了最大的或者最小的,导致每一次都是n-1比它小,这就成了一个直上直下的一叉树,每一层都需要做n次交换,高度是n, worst case O( n2 )
Soln3: QuickSort Average O(n) Worst Case O(n2)
Time complexity: T(n) Let nT(n) Space complexity: O(1)=T(n/2)+O(n)=T(n/4)+O(n/2+n)=…=T(n/2nk)+O(n+n/2+…+n/2∧k)]=T(n/2nk)+O(n+2∗(1−0.5∧k))=2∧k=T(1)+O(2n−2)⇒T(n)=O(n)