Commit b4c5d740 authored by Sébastiaan Versteeg's avatar Sébastiaan Versteeg
Browse files

Merge documents migrations into one command

parent 77f22fd9
......@@ -3,7 +3,12 @@ from django.utils import timezone
from django.utils.timezone import datetime
from utils.management.commands import legacylogin
from documents.models import GeneralMeeting, GeneralMeetingDocument
from documents.models import (
GeneralMeeting,
GeneralMeetingDocument,
AssociationDocumentsYear,
MiscellaneousDocument
)
from bs4 import BeautifulSoup
import requests
......@@ -16,12 +21,15 @@ def filefield_from_url(filefield, url):
class Command(legacylogin.Command):
help = "Scrapes the General Meetings from the old Thalia website"
help = "Scrapes the documents from the old Thalia website"
def handle(self, *args, **options):
super().handle(*args, **options)
print("Migrating the general meetings")
url = "https://thalia.nu/ajax/alvyearview?year={}"
for year in range(1990, 2016):
print("Migrating {}".format(year))
src = self.session.get(url.format(year)).text
soup = BeautifulSoup(src, 'lxml')
for alv in soup.div.find_all('div', recursive=False):
......@@ -46,3 +54,46 @@ class Command(legacylogin.Command):
doc.meeting = meeting
filefield_from_url(doc.file, doc_url)
doc.save()
print("Migrating general meetings complete")
print("Migrating the policy documents")
url = "https://thalia.nu/association/documents"
documentpage = self.session.get(url)
soup = BeautifulSoup(documentpage.text, 'lxml')
wrapper = soup(attrs={'class': 'policywrapper'})[0]
uls = wrapper.find_all('ul', recursive=False)
policies = uls[0].find_all('li', recursive=False)
reports = uls[1].find_all('li', recursive=False)
for policy, report in zip(policies, reports):
year = policy.find('h2').find(text=True)
print("Migrating {}".format(year))
year = int(year.replace("Beleidsplan '", '')[:2])
year += (19 if year >= 90 else 20) * 100
obj, cr = AssociationDocumentsYear.objects.get_or_create(year=year)
obj.year = year
files = [(obj.policy_document,
policy.find(attrs={'class': 'overlay-icon-link'})),
(obj.annual_report,
report.find(attrs={'class': 'overlay-icon-link'})),
(obj.financial_report,
report.find(attrs={'class': 'overlay-icon-euro'}))]
for filefield, url in files:
if url is not None:
url = "https://thalia.nu" + url['href']
file = ContentFile(requests.get(url).content)
# File names are ignored when serving files anyway
filefield.save(os.path.basename(url), file)
print("Migrating policy documents complete")
print("Migrating the miscellaneous documents")
container = soup(attrs={'class': 'generalcontainer'})[0]
documents = container.find_all('li', recursive=False)
for document in documents:
name = document.find('h2').find(text=True)
print("Migrating {}".format(name))
obj, cr = MiscellaneousDocument.objects.get_or_create(name=name)
url = document.find(attrs={'class': 'overlay-icon-link'})
if url is not None:
url = "https://thalia.nu" + url['href']
filefield_from_url(obj.file, url)
print("Migrating miscellaneous documents complete")
import os
import requests
from bs4 import BeautifulSoup
from django.core.files.base import ContentFile
from documents.models import MiscellaneousDocument
from utils.management.commands import legacylogin
def filefield_from_url(filefield, url):
file = ContentFile(requests.get(url).content)
filefield.save(os.path.basename(url), file)
class Command(legacylogin.Command):
help = "Scrapes the Miscellaneous Documents from the old Thalia website"
def handle(self, *args, **options):
super().handle(*args, **options)
url = "https://thalia.nu/association/documents"
documentpage = self.session.get(url)
soup = BeautifulSoup(documentpage.text, 'lxml')
container = soup(attrs={'class': 'generalcontainer'})[0]
documents = container.find_all('li', recursive=False)
for document in documents:
name = document.find('h2').find(text=True)
obj, cr = MiscellaneousDocument.objects.get_or_create(name=name)
url = document.find(attrs={'class': 'overlay-icon-link'})
if url is not None:
url = "https://thalia.nu" + url['href']
filefield_from_url(obj.file, url)
from django.core.files.base import ContentFile
from utils.management.commands import legacylogin
from documents.models import AssociationDocumentsYear
from bs4 import BeautifulSoup
import requests
import os
class Command(legacylogin.Command):
help = "Scrapes the policy documents from the old Thalia website"
def handle(self, *args, **options):
super().handle(*args, **options)
url = "https://thalia.nu/association/documents"
documentpage = self.session.get(url)
soup = BeautifulSoup(documentpage.text, 'lxml')
wrapper = soup(attrs={'class': 'policywrapper'})[0]
uls = wrapper.find_all('ul', recursive=False)
policies = uls[0].find_all('li', recursive=False)
reports = uls[1].find_all('li', recursive=False)
for policy, report in zip(policies, reports):
year = policy.find('h2').find(text=True)
year = int(year.replace("Beleidsplan '", '')[:2])
year = (19 if year >= 90 else 20)*100 + year
obj, cr = AssociationDocumentsYear.objects.get_or_create(year=year)
obj.year = year
files = [(obj.policy_document,
policy.find(attrs={'class': 'overlay-icon-link'})),
(obj.annual_report,
report.find(attrs={'class': 'overlay-icon-link'})),
(obj.financial_report,
report.find(attrs={'class': 'overlay-icon-euro'}))]
for filefield, url in files:
if url is not None:
url = "https://thalia.nu" + url['href']
file = ContentFile(requests.get(url).content)
# File names are ignored when serving files anyway
filefield.save(os.path.basename(url), file)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment