This week is based on Lesson 2 of the Udacity course CS101 "Introduction to Computer Science". It would be best if you sign up and watch these lectures. If you watch them, then you'll see the solution to problem 3 on HW #1 :)
We will cover:
Finish reading Chapter 1 of "Knights Programming". The remaining sections are Section 1.8 (useful for problem 2 on HW #1) and Section 1.9.
Read Chapter 2 of "Knights Programming" to learn about functions/procedures and if statements.
Read Section 3.1 of "Knights Programming" to learn about while loops.
To do this, we need to learn:
start_link = page.find('<a href=')
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1 : end_quote]
print(url)
# finding the first URL
start_link = page.find('<a href=')
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1 : end_quote]
print(url)
# finding the second URL
page = page[end_quote:]
# same code as above
start_link = page.find('<a href=')
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1 : end_quote]
print(url)
To avoid this repetition of code, we will use procedural abstraction.
#
# input +---------------+ output
# -----> | | ----->
# -----> | Procedure | ----->
# -----> | | ----->
# +---------------+
#
# def <name>(<parameters>):
# <block>
# turn this into a procedure
start_link = page.find('<a href=')
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1 : end_quote]
#
print(url)
page = page[end_quote:]
# turn this into a procedure
start_link = page.find('<a href=')
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1 : end_quote]
#
print(url)
What should the inputs be for the procedure get_next_target?
What should the outputs be for the procedure get_next_target?
def get_next_target(s):
start_link = s.find('<a href=')
start_quote = s.find('"', start_link)
end_quote = s.find('"', start_quote + 1)
url = s[start_quote + 1 : end_quote]
return url, end_quote
import urllib.request
response = urllib.request.urlopen('http://www.cs.ucf.edu/courses/cop3223/spr2014/section1/simple.html')
html = response.read().decode("utf-8")
# PW added decode("utf-8") to convert
# the buffer returned by response.read()
# into a string
print(html)
You can find more information about the urllib package at http://docs.python.org/3/howto/urllib2.html
The above code snippet is a modification of http://docs.python.org/3/howto/urllib2.html#fetching-urls
import urllib.request
def get_html(url):
response = urllib.request.urlopen(url)
return response.read().decode("utf-8")
import urllib.request
def get_html(url):
response = urllib.request.urlopen(url)
return response.read.decode("utf-8")
def get_next_target(s):
start_link = s.find('<a href=')
start_quote = s.find('"', start_link)
end_quote = s.find('"', start_quote + 1)
url = s[start_quote + 1 : end_quote]
return url, end_quote
def main():
page = get_html('http://www.cs.ucf.edu/courses/cop3223/spr2014/section1/simple.html')
first, end = get_next_target(page)
page = page[end:]
second, end = get_next_target(page)
print("The first URL is", first)
print("The second URL is", second)
def inc(n):
return n+1
What does the inc procedure defined above do?
def sum(n):
a = a + b
What does the inc procedure defined above do?
def sum(n):
a = a + b
return a
What does the modified inc procedure defined above do? Keep different data types in mind!
# Define a procedure, square, that takes one number
# as its input, and returns the square of that
# number (result of multiplying
# the number by itself).
def square(a):
# fill in the missing code
# Define a procedure, square, that takes one number
# as its input, and returns the square of that
# number (result of multiplying
# the number by itself).
def square(a):
a = a * a
return a
# Define a procedure, sum3, that takes three numbers
# as its input, and returns the sum of the three
# input numbers.
def sum3(a,b,c):
# fill in the missing code
# Define a procedure, sum3, that takes three inputs
# as its input, and returns the sum of the three
# input numbers.
def sum3(a,b,c):
return a+b+c
# Define a procedure, find_second, that takes
# two strings as its inputs: a search string
# and a target string. It should return a
# number that is the position of the second
# occurrence of the target string in the
# search string.
def find_second(search, target):
# fill in missing code
# Define a procedure, find_second, that takes
# two strings as its inputs: a search string
# and a target string. It should return a
# number that is the position of the second
# occurrence of the target string in the
# search string.
def find_second(search, target):
first = search.find(target)
second = search.find(target, first+1)
return second
# Python operators for comparison:
# ==, !=, <, > <=, >=
#
# Syntax:
# <Number> <Operator> <Number>
#
# the output is a Boolean value: True / False
print(2 < 3)
print(21 < 3)
print(7 * 3 < 21)
print(7 * 3 != 21)
print(7 * 3 == 21)
Note that the equality comparision is done using == instead of = because = means assignment.
# if <TestExpression>:
# <Block>
# returns the absolute value of a number
def absolute(x):
if x < 0:
x = -x
return x
# Define a procedure, bigger, that takes in
# two numbers as inputs, and returns the
# greater of the two inputs.
def bigger(a,b):
# fill in missing code
# Define a procedure, bigger, that takes in
# two numbers as inputs, and returns the
# greater of the two inputs.
def bigger(a,b):
if (a > b):
return a
return b
print(False or False) # => False
print(False or True) # => True
print(True or False) # => True
print(True or True) # => True
# this_is_an_error is an undefined variable
print(this_is_an_error)
# => NameError: name 'this_is_an_error' is not defined
# but the statement below is OK
print(True or this_is_an_error) # => True
# if the first operand is True, then
# Python does not evaluate the second and
# outputs True
# Define a procedure, biggest, that takes 3
# numbers as inputs, and outputs the greatest
# of the three numbers
def biggest(a,b,c):
# fill in missing code
# Define a procedure, biggest, that takes 3
# numbers as inputs, and outputs the greatest
# of the three numbers
def biggest(a,b,c):
if a > b:
if a > c:
return a
else: # c >= a > b
return c
else: # b >= a
if b > c:
return b
else: # c >= b >= a
return c
# Define a procedure, biggest, that takes 3
# numbers as inputs, and outputs the greatest
# of the three numbers
# alternative solution
def bigger(a,b):
if (a > b):
return a
return b
def biggest(a,b,c):
return bigger(bigger(a,b),c)
# if <TestExpression>:
# <Block> # executed 0 or 1 times
# while < TestExpression>: # exectued 0, 1, 2, ... times
# < Block >
i=0
while i < 10:
print(i)
i = i + 1
i=0
while i != 10:
i = i + 1
print(i)
i=1
while i != 10:
i = i + 2
print(i)
# Define a procedure, print_numbers, that takes
# as input a positive whole number, and prints
# out all the whole numbers from 1 to the input
# number.
# Make sure your procedure prints "upwards", so
# from 1 up to the input number.
# Define a procedure, print_numbers, that takes
# as input a positive whole number, and prints
# out all the whole numbers from 1 to the input
# number.
def print_numbers(n):
i = 1;
while i <= n:
print(i)
i = i + 1
# You need to call the above procedure and pass to
# it an input. Otherwise "nothing would happen".
print_numbers(3)
# Define a procedure, print_numbers, that takes
# as input a positive whole number, and prints
# out all the whole numbers from 1 to the input
# number.
def print_numbers(n):
i = 0;
while i < n:
i = i + 1
print(i)
print_numbers(3)
# Define a procedure, factorial, that
# takes one number as its input
# and returns the factorial of
# that number.
def factorial(n):
result = 1
i = 2
while i<=n:
result = result * i
i = i + 1
return result
print(factorial(1))
print(factorial(2))
print(factorial(3))
print(factorial(4))
# while <TestExpression>
# <Code>
# if <BreakTest>
# break
# <MoreCode>
# <AfterWhile>
def print_numbers(n):
i = 1
while i <= n:
print(i)
i = i + 1
def print_numbers(n):
i = 1
while True:
if i > n:
break
print(i)
i = i + 1
This example only illustrates the usage of break. This code is not as good as the previous code.
def get_next_target(s):
start_link = s.find('<a href=')
start_quote = s.find('"', start_link)
end_quote = s.find('"', start_quote + 1)
url = s[start_quote + 1 : end_quote]
return url, end_quote
# call the function
first, end = get_next_target(page)
# Multiple Assignment
# <Name1>, <Name2>, ... = <Expression1>, <Expression2>, ...
# <Name> = <Expression>
# What does the code below do?
# s, t = t, s
def get_next_target(s):
start_link = s.find('<a href=')
start_quote = s.find('"', start_link)
end_quote = s.find('"', start_quote + 1)
url = s[start_quote + 1 : end_quote]
return url, end_quote
print(get_next_target('this is a <a href="www.ucf.edu">link</a>'))
print(get_next_target('Not good'))
def get_next_target(s):
start_link = s.find('<a href=')
if start_link != -1:
start_quote = s.find('"', start_link)
end_quote = s.find('"', start_quote + 1)
url = s[start_quote + 1 : end_quote]
return url, end_quote
else:
return None, 0
print(get_next_target('this is a <a href="www.ucf.edu">link</a>'))
print(get_next_target('Not good'))
def get_next_target(s):
start_link = s.find('<a href=')
if start_link == -1:
return None, 0
start_quote = s.find('"', start_link)
end_quote = s.find('"', start_quote + 1)
url = s[start_quote + 1 : end_quote]
return url, end_quote
def print_all_links(page):
while True:
url, endpos = get_next_target(page)
if url:
print url
page = page[endpos:]
else:
break
import urllib.request
def get_html(url):
response = urllib.request.urlopen(url)
html = response.read().decode("utf-8")
return html
def get_next_target(s):
start_link = s.find('<a href=')
if start_link == -1:
return None, 0
start_quote = s.find('"', start_link)
end_quote = s.find('"', start_quote + 1)
url = s[start_quote + 1 : end_quote]
return url, end_quote
def print_all_links(page):
while True:
url, endpos = get_next_target(page)
if url:
print(url)
page = page[endpos:]
else:
break
page = get_html('http://www.cs.ucf.edu/courses/cop3223/spr2014/section1/simple.html')
print_all_links(page)