1. 哈希表(计算一个字符串或数组里的元素存不存在、唯不唯一、重复、计数、匹配)
数据成员(Data Member)
操作(Operations)
魔法盒:哈希函数进行操作
哈希(Hash):
数值K经过哈希函数hash_function生成哈希码hash_code会得到索引值Index
冲突(Collisions):
两个不同的值经过hash_function产生的哈希索引值或哈希码相同,这种叫冲突
解决方案(Resolution)
开放地址一:如果发生冲突,将冲突的相同的值,通过相同线性探索的方式寻找下一个地址
2. 哈希表起源:
数组查找:线性增加,时间复杂度是O(n)
我们可以通过数组索引,直接访问块(Block),这种方法的访问时间是常数
数组:
牺牲空间换取时间 ——‘holes’会吃掉很多存储空间
依赖于元素之间的顺序,元素之间的顺序将会转化为数据存储在内存空间上的顺序
3. Python中关于hash的模块:
1. Dictionary<key, value>
2. set(与dictionary是孪生兄弟)<key>
3. Counter
4. 自己实现一个dictionary的代码
父类:
class MapBase(): class _Item: __slots__ = '_key' , '_value' def __init__ (self, k, v): self._key = k self._value = v def __eq__ (self, other): return self._key == other._key def __ne__ (self, other): return not (self == other) def __lt__ (self, other): return self._key < other._key def __print__ (self): print(str(self._key) + ":" + str(self._value), end = ", ")
子类:
from MapBase import MapBase from random import randrange class HashMapBase(MapBase): def __init__ (self, cap=11, p=109345121): self._table = cap * [ None ] self._n=0 self._prime = p self._scale = 1 + randrange(p-1) self._shift = randrange(p) def _hash_function(self, k): return (hash(k) * self._scale + self._shift) % self._prime % len(self._table) # index def __len__ (self): return self._n # O(1) def __getitem__ (self, k): j = self._hash_function(k) #index return self._bucket_getitem(j, k) # O(1) def __setitem__ (self, k, v): j = self._hash_function(k) #index print("hash for", k, "is", j) self._bucket_setitem(j, k, v) if self._n > len(self._table) // 2: # keep load factor <= 0.5 self.resize(2 * len(self._table) - 1) # O(1) def __delitem__ (self, k): j = self._hash_function(k) self._bucket_delitem(j, k) self._table[j] = None self._n -= 1 def resize(self, c): old = list(self.items( )) self._table = c * [None] self._n = 0 for (k,v) in old: self[k] = v
from HashMapBase import HashMapBase from SimpleUnsortedTableMap import UnsortedTableMap class ProbeHashMap(HashMapBase): """Hash map implemented with linear probing for collision resolution.""" _AVAIL = object() # sentinal marks locations of previous deletions def _is_available(self, j): """Return True if index j is available in table.""" return self._table[j] is None or self._table[j] is ProbeHashMap._AVAIL def _find_slot(self, j, k): """Search for key k in bucket at index j. Return (success, index) tuple, described as follows: If match was found, success is True and index denotes its location. If no match found, success is False and index denotes first available slot. """ firstAvail = None while True: if self._is_available(j): if firstAvail is None: firstAvail = j # mark this as first avail if self._table[j] is None: return (False, firstAvail) # search has failed elif k == self._table[j]._key: return (True, j) # found a match j = (j + 1) % len(self._table) # keep looking (cyclically) def _bucket_getitem(self, j, k): found, s = self._find_slot(j, k) if not found: raise KeyError('Key Error: ' + repr(k)) # no match found return self._table[s]._value def _bucket_setitem(self, j, k, v): found, s = self._find_slot(j, k) if not found: self._table[s] = self._Item(k,v) # insert new item self._n += 1 # size has increased else: self._table[s]._value = v # overwrite existing def _bucket_delitem(self, j, k): print(j, k) found, s = self._find_slot(j, k) print(found, s) if not found: raise KeyError('Key Error: ' + repr(k)) # no match found self._table[s] = ProbeHashMap._AVAIL # mark as vacated def __iter__(self): for j in range(len(self._table)): # scan entire table if not self._is_available(j): yield self._table[j]._key def _print_ (self): for bucket in self._table: if bucket is not None: # a nonempty slot bucket.__print__()
5. 自己动手写一个自定义可hash对象
class People: def __init__(self, name, age, salary): self.name = name self.age = age self.salary = salary def __hash__(self): return hash((self.name, self.age)) def __eq__(self, other): return (self.name, self.age, self.salary) == (other.name, other.age, other.salary) def __ne__(self, other): return not (self == other) def __str__(self): return self.name + str(self.age) + str(self.salary) def eat(self): print("eat") def sleep(self): pass p1 = People("Tom","1",20) p2 = People("Tom","1",18) p3 = People("zion","2",18) p4 = People("Adam","3",20) p5 = People("Alice","4",18) dict = {p1:'A', p2:'B', p3:'C', p4:'D', p5:'E'} print(dict) for key in dict: print(key, 'cooresponds to', dict[key])
6. 计算一段字符串中的字母出现频率最多的一个
def letterCount(s): freq = {} for piece in s: word = ''.join(c for c in piece if c.isalpha()) if word: freq[word] = 1 + freq.get(word, 0) max_word = '' max_count = 0 print(freq) for (w, c) in freq.items(): if c > max_count: max_word = w max_count = c print("The most frequent word is", max_word) print("The most frequent occurrences is", max_count) s = "Hello World How are you" letterCount(s)
from collections import Counter def letterCount2(s): c = Counter(x for x in s if x != " ") for letter, count in c.most_common(4): print('%s: %7d' % (letter, count))
7. 计算单词中出现次数最多的一个
from collections import Counter def wordCount(s): wordcount = Counter(s.split()) print(wordcount)
8. 在一个字符串中找到第一个唯一的字符
def firstUniqChar(s): letters = 'abcdefghigklmnopqrstuvwxyz' index = [s.index(l) for l in letters if s.count(l) == 1] return min(index) if len(index)>0 else -1
9. 找出两个数组的交集
def intersection(num1, num2): return list(set(num1) & set(num2)) num1 = [1, 2, 2, 1] num2 = [2, 2] print(intersection(num1, num2))
10. 找出两个数组的交集,重复的元素也打印出来
def intersection1(num1, num2): dict1 = dict() for i in num1: if i not in dict1: dict1[i] = 1 else: dict1[i] += 1 ret = [] for i in num2: if i in dict1 and dict1[i]>0: ret.append(i) dict1[i] -= 1 return ret
11. 珠宝和普通石头的故事
Example1: J = "aA", S = "aAAbbbb"
问石头中有几个珠宝
def numJewelsInStones_bf(J, S): count = 0 for c in S: if c in J: count += 1 return count def numJewelsInStones_bf1(J, S): setJ = set(J) return sum(s in setJ for s in S) J = "aA" S = "aAAbbbb" print(numJewelsInStones_bf1(J, S))
12. 访问的网站次数进行计数
Example1:
Input : ["9001 scholar.google.com"]
output: ["9001 scholar.google.com"] ["9001 google.com"]["9001 com"]
Example2:
Input:
["900 google.mail.com", "50 yahoo.com", "1 intel.mail.com", "5 wiki.org"]
Output:
["901 mail.com","50 yahoo.com","900 google.mail.com","5 wiki.org","5 org","1 intel.mail.com","951 com"]
import collections def subdomainVisits(cpdomains): ans = collections.Counter() for domain in cpdomains: count, domain = domain.split() count = int(count) frages = domain.split('.') for i in range(len(frages)): ans[".".join(frages[i:])] += count return ["{} {}".format(ct, dom) for dom, ct in ans.items()] cp = ["900 google.mail.com","50 yahoo.com", "1 intel.mail.com","5 wiki.org"] print(subdomainVisits(cp))
13. 找到字符串在键盘上是一行的字符
Example 1:
Input: ["Hello", "Alaska", "Dad", "Peace"]
Output: ["Alaska", "Dad"]
def findWords(words): line1, line2, line3 = set('qwertyuiop'), set('asdfghjkl'), set('zxcvbnm') ret = [] for word in words: w = set(word.lower()) if w.issubset(line1) or w.issubset(line2) or w.issubset(line3): ret.append(word) return ret
14. 单词匹配
Examples:
pattern = "abba", str = "dog cat cat dog" should return true.
pattern = "abba", str = "dog cat cat fish" should return false.
pattern = "aaaa", str = "dog cat cat dog" should return false.
pattern = "abba", str = "dog dog dog dog" should return false.
def WordPattern(pattern, str): s = pattern t = str.split() return len(set(zip(s, t))) == len(set(s)) == len(set(t)) and len(s) == len(t)