This week is based on Lessons 5 of the Udacity course CS101 "Introduction to Computer Science". It would be best if you sign up and watch these lectures.
We will cover:
Check out Subsection 5.5. "Dictionaries" of the Python Tutorial.
Also, check out Subsection 4.4 "Dictionaries" of the Programming Knights book. Here is the link to Chapter 4.
String | List | Dictionary |
---|---|---|
sequence of characters | list of elements | set of <key, value> pairs |
|
|
|
immutable | mutable | mutable |
|
|
|
elements = {'hydrogen' : 1, 'helium' : 2, 'carbon' : 6}
elements['lithium']=3
elements['nitrogen']=8 # mistake
elements['nitrogen']=7
print(elements)
print(elements['hydrogen'])
print(elements['lithium'])
print('lithium' in elements)
elements = {}
elements['H'] = {'name' : 'Hydrogen', 'number' : 1, 'weight' : 1.00794}
elements['He'] = {'name' : 'Helium', 'number' : 2, 'weight' : 4.002602, 'noble gas' : True}
We need to modify the following methods:
crawl_web
add_to_index
lookup
def crawl_web(seed):
tocrawl = [seed]
crawled = []
index = []
while tocrawl:
url = tocrawl.pop()
if url not in crawled:
content = get_page(url)
add_page_to_index(index, url, content)
union(tocrawl, get_all_links(content))
crawled.append(url)
return index
The required modification is very simple.
def crawl_web(seed):
# same code
index = {} # <= this is the only modification
# same code
def add_to_index(index,keyword,url):
for entry in index:
if entry[0] == keyword:
entry[1].append(url)
return
index.append([keyword,[url]])
The modified add_to_index method that use a dictionary to realize a fast index:
def add_to_index(index,keyword,url):
if keyword in index:
index[keyword].append(url)
else:
index[keyword] = [url]
def lookup(index,keyword):
for entry in index:
if entry[0] == keyword:
return entry[1]
return []
The modified look_up method that uses a dictionary to realize a fast index:
def lookup(index,keyword):
if keyword in index:
return index[keyword]
return None
def make_big_index(size):
index = []
letters = ['a','a','a','a','a','a','a','a']
while len(index) < size:
word = make_string(letters)
add_to_index(index, word, 'fake')
for i in range(len(letters) - 1, 0, -1):
if letters[i] < 'z':
letters[i] = chr(ord(letters[i]) + 1)
break
else:
letters[i] = 'a'
return index
The required modification is very simple.
def make_big_index(size):
index = {} # <== This is the only modification
# same code
import time
def time_execution(code):
start = time.clock()
result = eval(code)
run_time = time.clock() - start
return result, run_time
def add_to_index(index,keyword,url):
if keyword in index:
index[keyword].append(url)
else:
index[keyword] = [url]
def lookup(index,keyword):
if keyword in index:
return index[keyword]
return None
def make_string(p):
s = ""
for e in p:
s = s + e
return s
# building a fake index
def make_big_index(size):
index = {}
letters = ['a','a','a','a','a','a','a','a']
while len(index) < size:
word = make_string(letters)
add_to_index(index, word, 'fake')
for i in range(len(letters) - 1, 0, -1):
if letters[i] < 'z':
letters[i] = chr(ord(letters[i]) + 1)
break
else:
letters[i] = 'a'
return index
index100k = make_big_index(100000)
# timing lookup
print(time_execution("lookup(index100k,'aaaaaaaa')"))
print(time_execution("lookup(index100k,'zzzzzzzz')"))
import time
def time_execution(code):
start = time.clock()
result = eval(code)
run_time = time.clock() - start
return result, run_time
def add_to_index(index,keyword,url):
for entry in index:
if entry[0] == keyword:
entry[1].append(url)
return
index.append([keyword,[url]])
def lookup(index,keyword):
for entry in index:
if entry[0] == keyword:
return entry[1]
return []
def make_string(p):
s = ""
for e in p:
s = s + e
return s
# building a fake index
def make_big_index(size):
index = []
letters = ['a','a','a','a','a','a','a','a']
while len(index) < size:
word = make_string(letters)
add_to_index(index, word, 'fake')
for i in range(len(letters) - 1, 0, -1):
if letters[i] < 'z':
letters[i] = chr(ord(letters[i]) + 1)
break
else:
letters[i] = 'a'
return index
index100k = make_big_index(100000)
print("first element of our fake index: ", index100k[0])
print("last element of our fake index: ", index100k[-1])
# timing lookup
print(time_execution("lookup(index10k,'aaaaaaaa')"))
print(time_execution("lookup(index10k,'zzzzzzzz')"))