Commit bacd7e5e authored by Thom Wiggers's avatar Thom Wiggers 📐
Browse files

Merge branch '44-migrate-alv-documents' into 'master'

Migrate ALV documents

Closes #44

See merge request !35
parents 6da25450 c8b8789c
from django.core.files.base import ContentFile
from django.utils import timezone
from django.utils.timezone import datetime
from utils.management.commands import legacylogin
from documents.models import GeneralMeeting, GeneralMeetingDocument
from bs4 import BeautifulSoup
import requests
import os
def filefield_from_url(filefield, url):
file = ContentFile(requests.get(url).content)
filefield.save(os.path.basename(url), file)
class Command(legacylogin.Command):
help = "Scrapes the General Meetings from the old Thalia website"
def handle(self, *args, **options):
super().handle(*args, **options)
url = "https://thalia.nu/ajax/alvyearview?year={}"
for year in range(1990, 2016):
src = self.session.get(url.format(year)).text
soup = BeautifulSoup(src, 'lxml')
for alv in soup.div.find_all('div', recursive=False):
meeting = GeneralMeeting()
datetext = alv.find(attrs={'class': 'gw-go-coinb'}).text
date = datetime.strptime(datetext.strip() + ' ' + str(year),
'%d %b %Y')
if date.month < 9:
date = datetime(year+1, date.month, date.day)
date = timezone.make_aware(date,
timezone.get_current_timezone())
meeting.datetime = date
meeting.location = alv.find('p').text
meeting.save()
minutes = alv.find('div', {'class': 'gw-go-footer'}).find('a')
if minutes is not None:
minutes_url = 'https://thalia.nu' + minutes['href']
filefield_from_url(meeting.minutes, minutes_url)
for document in alv.find_all('li'):
doc_url = 'https://thalia.nu' + document.find('a')['href']
doc = GeneralMeetingDocument()
doc.meeting = meeting
filefield_from_url(doc.file, doc_url)
doc.save()
from django.core.files.base import ContentFile
from utils.management.commands import legacylogin
from documents.models import AssociationDocumentsYear
from bs4 import BeautifulSoup
import requests
import os
class Command(legacylogin.Command):
help = "Scrapes the policy documents from the old Thalia website"
def handle(self, *args, **options):
super().handle(*args, **options)
url = "https://thalia.nu/association/documents"
documentpage = self.session.get(url)
soup = BeautifulSoup(documentpage.text, 'lxml')
wrapper = soup(attrs={'class': 'policywrapper'})[0]
uls = wrapper.find_all('ul', recursive=False)
policies = uls[0].find_all('li', recursive=False)
reports = uls[1].find_all('li', recursive=False)
for policy, report in zip(policies, reports):
year = policy.find('h2').find(text=True)
year = int(year.replace("Beleidsplan '", '')[:2])
year = (19 if year >= 90 else 20)*100 + year
obj, cr = AssociationDocumentsYear.objects.get_or_create(year=year)
obj.year = year
files = [(obj.policy_document,
policy.find(attrs={'class': 'overlay-icon-link'})),
(obj.annual_report,
report.find(attrs={'class': 'overlay-icon-link'})),
(obj.financial_report,
report.find(attrs={'class': 'overlay-icon-euro'}))]
for filefield, url in files:
if url is not None:
url = "https://thalia.nu" + url['href']
file = ContentFile(requests.get(url).content)
# File names are ignored when serving files anyway
filefield.save(os.path.basename(url), file)
from django.core.management.base import BaseCommand
from bs4 import BeautifulSoup
import requests
import getpass
class Command(BaseCommand):
help = "Logs into the old Thalia website"
def handle(self, *args, **options):
self.session = requests.Session()
loginpage = self.session.get("https://thalia.nu/account")
soup = BeautifulSoup(loginpage.text, 'lxml')
ccm_token = soup(attrs={'name': 'ccm_token'})[0]['value']
while True:
data = {
'uName': input("What is your Thalia username? "),
'uPassword': getpass.getpass("And what is your password? "),
'ccm_token': ccm_token,
}
url = 'https://thalia.nu/login/authenticate/concrete'
r = self.session.post(url, data=data)
if "Ongeldige gebruikersnaam" in r.text:
print("You did not authenticate successfully. Try again.")
continue
print("Authentication successful.")
break
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment