Changes

Jump to navigation Jump to search
import string
import re
##########################
#launch Google Chrome Browser
driver = webdriver.Chrome()
##########################
def switch_window():
handles = driver.window_handles
driver.switch_to_window(handles[-1])
##########################
#Visit desired website
driver.get('http://www.chambredesrepresentants.ma/ar/%D8%A7%D9%84%D8%AA%D8%B4%D8%B1%D9%8A%D8%B9/%D9%84%D8%A7%D8%A6%D8%AD%D8%A9- %D9%85%D9%82%D8%AA%D8%B1%D8%AD%D8%A7%D8%AA-%D8%A7%D9%84%D9%82%D9%88%D8%A7%D9%86%D9%8A%D9%86?body_value=&field_og_commission_target_id=All')
##########################
bills_list = driver.find_elements_by_xpath("//li/h3/a")
for i in range(len(bills_list)):
bills_list[i].click()
ActionChains(driver).key_up(Keys.SHIFT).perform()
 
switch_window()
url = driver.current_url
unicode_url = urllib.unquote(str(url)).decode('utf8')
url_parts = string.split(unicode_url, "/")
i = len(url_parts)
##########################
#Build arabic tag backwards, accounting for backwards spelling
tag = ""
tag += url_parts[i - 1]
i -= 1
##########################
#Navigate to pdf of website
change_button = driver.find_elements_by_xpath("//a [@class='pdf' and @rel='nofollow']")[0]
change_button.click()
ActionChains(driver).key_up(Keys.SHIFT).perform()
 
switch_window()
#########################
#Gets current window's URL
url = driver.current_url
########################
#Saves file at URL to current directory
urllib.urlretrieve(url, tag)
 
driver.close()
 
switch_window()
#########################
pdfs_on_page = driver.find_elements_by_xpath("//div/div/div/article/div/ul/li/a")
 
#finds interior pdfs on the page
if pdfs_on_page:
for j in range(len(pdfs_on_page)):
element = pdfs_on_page[j]
#######################
#click on pdf
ActionChains(driver).key_down(Keys.SHIFT).perform()
element.click()
ActionChains(driver).key_up(Keys.SHIFT).perform()
 
switch_window()
 
url = driver.current_url
pdf_tag = string.split(str(url), "/")[-1]
######################
#leaves link if it is not a pdf
if re.findall(".pdf", pdf_tag):
 
#saves interior pdf
urllib.urlretrieve(url, pdf_tag)
driver.close()
 
switch_window()
########################
driver.close()
 
switch_window()
#########################
print "download complete"
#########################
#close browser
driver.quit()

Navigation menu