Unverified Commit 83d3a342 authored by Thom Wiggers's avatar Thom Wiggers 📐
Browse files

Remove old migrations

Thanks, you've served us well.

Closes #266
parent b4c518a3
import os
import requests
from bs4 import BeautifulSoup
from django.core.files.base import ContentFile
from django.utils import timezone
from django.utils.timezone import datetime
from documents.models import (AssociationDocumentsYear, GeneralMeeting,
GeneralMeetingDocument, MiscellaneousDocument)
from utils.management.commands import legacylogin
def filefield_from_url(filefield, url):
file = ContentFile(requests.get(url).content)
filefield.save(os.path.basename(url), file)
class Command(legacylogin.Command):
help = "Scrapes the documents from the old Thalia website"
def handle(self, *args, **options):
super().handle(*args, **options)
print("Migrating the general meetings")
url = "https://thalia.nu/ajax/alvyearview?year={}"
for year in range(1990, 2017):
print("Migrating {}".format(year))
src = self.session.get(url.format(year)).text
soup = BeautifulSoup(src, 'lxml')
for alv in soup.div.find_all('div', recursive=False):
meeting = GeneralMeeting()
datetext = alv.find(attrs={'class': 'gw-go-coinb'}).text
date = datetime.strptime(datetext.strip() + ' ' + str(year),
'%d %b %Y')
if date.month < 9:
date = datetime(year+1, date.month, date.day)
date = timezone.make_aware(date,
timezone.get_current_timezone())
meeting.datetime = date
meeting.location = alv.find('p').text
meeting.save()
minutes = alv.find('div', {'class': 'gw-go-footer'}).find('a')
if minutes is not None:
minutes_url = 'https://thalia.nu' + minutes['href']
filefield_from_url(meeting.minutes, minutes_url)
for document in alv.find_all('li'):
doc_url = 'https://thalia.nu' + document.find('a')['href']
doc = GeneralMeetingDocument()
doc.meeting = meeting
filefield_from_url(doc.file, doc_url)
doc.save()
print("Migrating general meetings complete")
print("Migrating the policy documents")
url = "https://thalia.nu/association/documents"
documentpage = self.session.get(url)
soup = BeautifulSoup(documentpage.text, 'lxml')
wrapper = soup(attrs={'class': 'policywrapper'})[0]
uls = wrapper.find_all('ul', recursive=False)
policies = uls[0].find_all('li', recursive=False)
reports = uls[1].find_all('li', recursive=False)
for policy, report in zip(policies, reports):
year = policy.find('h2').find(text=True)
print("Migrating {}".format(year))
year = int(year.replace("Beleidsplan '", '')[:2])
year += (19 if year >= 90 else 20) * 100
obj, cr = AssociationDocumentsYear.objects.get_or_create(year=year)
obj.year = year
files = [(obj.policy_document,
policy.find(attrs={'class': 'overlay-icon-link'})),
(obj.annual_report,
report.find(attrs={'class': 'overlay-icon-link'})),
(obj.financial_report,
report.find(attrs={'class': 'overlay-icon-euro'}))]
for filefield, url in files:
if url is not None:
url = "https://thalia.nu" + url['href']
file = ContentFile(requests.get(url).content)
# File names are ignored when serving files anyway
filefield.save(os.path.basename(url), file)
print("Migrating policy documents complete")
print("Migrating the miscellaneous documents")
container = soup(attrs={'class': 'generalcontainer'})[0]
documents = container.find_all('li', recursive=False)
for document in documents:
name = document.find('h2').find(text=True)
print("Migrating {}".format(name))
obj, cr = MiscellaneousDocument.objects.get_or_create(name=name)
url = document.find(attrs={'class': 'overlay-icon-link'})
if url is not None:
url = "https://thalia.nu" + url['href']
filefield_from_url(obj.file, url)
print("Migrating miscellaneous documents complete")
import json
import os
import requests
from django.conf import settings
from django.contrib.auth.models import User
from django.core.exceptions import ImproperlyConfigured, PermissionDenied
from django.core.files.base import ContentFile
from django.utils.dateparse import parse_date
from django.utils.translation import activate
from education.models import Category, Course, Exam, Summary
from utils.management.commands import legacylogin
def filefield_from_url(filefield, url):
file = ContentFile(requests.get(url).content)
filefield.save(os.path.basename(url), file)
class Command(legacylogin.Command):
help = 'Scrapes the education data from the old Thalia website'
def handle(self, *args, **options):
activate('en')
if not settings.MIGRATION_KEY:
raise ImproperlyConfigured("MIGRATION_KEY not specified")
url = "https://thalia.nu/index.php/onderwijs/api?apikey={}".format(
settings.MIGRATION_KEY
)
input_val = input(
'Do you want to delete all existing objects? (type yes or no) ')
if input_val == 'yes':
Summary.objects.all().delete()
Exam.objects.all().delete()
Course.objects.all().delete()
Category.objects.all().delete()
session = requests.Session()
src = session.get(url).text
if 'invalid api key' in src:
raise PermissionDenied('Invalid API key')
data = json.loads(src)
category_map = {}
print('Importing categories')
for key in data['categories']:
name = data['categories'][key]
id = Category()
id.name_nl = name
id.name_en = name
id.save()
category_map[key] = id.pk
print('Importing categories complete')
course_map = {}
print('Importing courses')
for key in data['courses']:
src = data['courses'][key]
course = Course()
course.name_nl = src['name']
course.name_en = src['name']
course.course_code = src['course_code']
course.shorthand_nl = src['course_shorthand']
course.shorthand_en = src['course_shorthand']
course.ec = int(src['e_c_t_s'])
course.period = src['quarter'].replace(' en ', ' & ')
course.since = src['since']
if int(src['until']) != 0:
course.until = src['until']
course.save()
for id in src['categories']:
course.categories.add(Category.objects
.get(pk=category_map[str(id)]))
course_map[key] = course.pk
print('Combining courses with predecessors')
for key in data['courses']:
src = data['courses'][key]
course = Course.objects.get(pk=course_map[key])
try:
for id in src['predecessors']:
if id == 0:
continue
old_course = Course.objects.get(pk=course_map[str(id)])
course.old_courses.add(old_course)
except KeyError:
pass
course.save()
print('Importing courses complete')
print('Importing summaries')
for key in data['summaries']:
src = data['summaries'][key]
summary = Summary()
summary.name = src['name']
summary.author = '' if src['author'] is None else src['author']
summary.year = int(src['year'])
summary.uploader_date = parse_date(src['uploader_date'])
summary.accepted = src['accepted'] == '1'
course_id = str(src['course_id'])
summary.course = Course.objects.get(pk=course_map[course_id])
try:
summary.uploader = User.objects.get(username=src['uploader'])
except User.DoesNotExist:
summary.uploader = User.objects.get(pk=1)
filefield_from_url(summary.file, src['file_url'])
summary.save()
print('Importing summaries complete')
print('Importing exams')
for key in data['exams']:
src = data['exams'][key]
exam = Exam()
exam.name = '' if src['name'] is None else src['name']
exam.accepted = src['accepted'] == '1'
course_id = str(src['course_id'])
exam.course = Course.objects.get(pk=course_map[course_id])
type_map = {
0: 'document',
1: 'exam',
2: 'partial',
3: 'resit',
5: 'practice'
}
exam.type = type_map.get(int(src['type']), 'document')
exam.exam_date = parse_date(src['date'])
exam.uploader_date = parse_date(src['uploader_date'])
try:
exam.uploader = User.objects.get(username=src['uploader'])
except User.DoesNotExist:
exam.uploader = User.objects.get(pk=1)
filefield_from_url(exam.file, src['file_url'])
exam.save()
print('Importing exams complete')
import json
import re
from datetime import datetime
import requests
from django.conf import settings
from django.contrib.auth.models import User
from django.core.exceptions import ImproperlyConfigured
from django.core.management.base import BaseCommand
from django.utils import timezone
import events.models as events_models
import members.models as members_models
FIELD_DATA_TYPES = {
'0': events_models.RegistrationInformationField.TEXT_FIELD,
'1': events_models.RegistrationInformationField.INTEGER_FIELD,
'2': events_models.RegistrationInformationField.BOOLEAN_FIELD,
}
def naive_to_aware(date_string):
"""Convert string of form '%Y-%m-%d %H:%M:%S'
to timezone aware datetime object"""
naive_datetime = datetime.strptime(date_string, '%Y-%m-%d %H:%M:%S')
return timezone.get_current_timezone().localize(naive_datetime)
class Command(BaseCommand):
help = 'Migrate the events from the old website.'
def handle(self, *args, **options):
if not settings.MIGRATION_KEY:
raise ImproperlyConfigured('MIGRATION_KEY not specified')
events_api_url = 'https://thalia.nu/events/api/?apikey={}'.format(
settings.MIGRATION_KEY)
print('[*]Getting events json data')
try:
response = requests.get(events_api_url,
headers={'User-Agent': 'The Donald'})
except requests.RequestException:
print('[!]Could not get {}'.format(events_api_url))
return
try:
data = response.json()
except json.decoder.JSONDecodeError:
print('[!]No json data found')
return
event_fields_translations = {
# name in data : name in model
'title': 'title',
'description': 'description',
'location': 'location',
'start_date': 'start',
'end_date': 'end',
'member_price': 'price',
'thalia_costs': 'cost',
'begin_registration': 'registration_start',
'end_registration': 'registration_end',
'end_cancel': 'cancel_deadline',
'registration_not_needed_message': 'no_registration_message',
}
activity_map = {}
registration_map = {}
information_field_map = {}
print('[*]Parsing event data.')
# Event
for event_data in data['events']:
new_event = events_models.Event(
published=bool(int(event_data['public'])),
max_participants=int(event_data['registration_limit']),
)
for concrete_field in event_fields_translations:
django_field = event_fields_translations[concrete_field]
concrete_data = event_data[concrete_field]
# MultilingualField
if django_field in (
'title', 'description', 'location',
'no_registration_message'):
for language_code in ('en', 'nl'):
django_multilingualfield = '{}_{}'.format(
django_field, language_code)
if not hasattr(new_event, django_multilingualfield):
print('[!]Could neither find {} nor {}'.format(
django_field, django_multilingualfield))
return
if django_field == 'title':
django_field = django_field[:100]
setattr(new_event, django_multilingualfield,
concrete_data)
# DateTimeField
elif concrete_data and django_field in (
'start', 'end', 'registration_start',
'registration_end',
'cancel_deadline'):
setattr(new_event, django_field,
naive_to_aware(concrete_data))
# DecimalField
elif django_field in ('price', 'cost'):
if re.match(r'[-+]?\d*\.?\d+$', concrete_data):
setattr(new_event, django_field, float(concrete_data))
else:
# TODO: is 0 the right value?
setattr(new_event, django_field, 0)
new_event.save()
activity_map[event_data['id']] = new_event.pk
print('[*]Parsing registration field data.')
# RegistrationInformationField
for field_data in data['extra_fields']:
new_registration_information_field = \
events_models.RegistrationInformationField(
# TODO: UGLY AF
name_en=field_data['field_name'][:100],
name_nl=field_data['field_name'][:100],
description_en=field_data['field_explanation'],
description_nl=field_data['field_explanation'],
type=FIELD_DATA_TYPES[field_data['data_type']],
required=True,
event=events_models.Event.objects.get(
pk=activity_map[field_data['activity_id']]
),
)
new_registration_information_field.save()
information_field_map[field_data['field_id']] = \
new_registration_information_field.pk
print('[*]Parsing registration data.')
# Registration
for registration_data in data['registrations']:
new_registration = events_models.Registration(
name=registration_data['name'],
date=naive_to_aware(registration_data['date']),
paid=bool(registration_data['paid']),
event=events_models.Event.objects.get(
pk=activity_map[registration_data['activity_id']]
),
)
username = registration_data['username']
if registration_data['username'] and User.objects.filter(
username=username).exists():
registration_user = User.objects.get(username=username)
new_registration.member = members_models.Member.objects.get(
user=registration_user)
cancelled_date = registration_data['canceled']
if cancelled_date:
new_registration.date_cancelled = naive_to_aware(
cancelled_date)
new_registration.save()
registration_map[registration_data['id']] = new_registration.pk
print('[*]Parsing registration field info data.')
# fields info
for field_info_data in data['extra_info']:
registration_field = events_models.RegistrationInformationField.\
objects.get(
pk=information_field_map[field_info_data['field_id']]
)
parameters = {
'registration': events_models.Registration.objects.get(
pk=registration_map[field_info_data['registration_id']]),
'field': registration_field,
}
if registration_field.type == events_models.\
RegistrationInformationField.TEXT_FIELD:
new_registration_information = events_models. \
TextRegistrationInformation(
value=field_info_data['value'],
**parameters
)
elif registration_field.type == events_models.\
RegistrationInformationField.BOOLEAN_FIELD:
value = False
if value and bool(int(field_info_data['value'])):
value = True
new_registration_information = \
events_models.BooleanRegistrationInformation(
value=value,
**parameters)
# registration_field.type == INTEGER_FIELD:
else:
new_registration_information = \
events_models.IntegerRegistrationInformation(
value=field_info_data['value'] or 0,
**parameters)
new_registration_information.save()
import requests
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.core.management.base import BaseCommand
from activemembers.models import Board, Committee
from mailinglists.models import MailingList, VerbatimAddress
from members.models import Member
class Command(BaseCommand):
help = "Migrates mailinglists. This should be done after migrating members"
def handle(self, *args, **options):
if not settings.MIGRATION_KEY:
raise ImproperlyConfigured("MIGRATION_KEY not specified")
url = ("https://oud.thalia.nu/api/export_mail.php"
"?apikey={}&lists".format(
settings.MIGRATION_KEY))
lines = requests.get(url).text.split('\n')
for lID in lines:
if ':' in lID:
raise Exception("Turns out we actually used aliasses.")
if lID.isnumeric():
url = ("https://oud.thalia.nu/api/export_mail.php"
"?apikey={}&list={}".format(
settings.MIGRATION_KEY,
lID,
)
)
lines = requests.get(url).text.split('\n')
lines = lines[1:-2]
name, prefix, arch, mod, *lines = lines
mlist, cr = MailingList.objects.get_or_create(name=name)
mlist.prefix = prefix
mlist.archived = bool(arch)
mlist.moderated = bool(mod)
group_it = iter(lines[:lines.index('-')])
groups = []
while True:
try:
groups.append((next(group_it), []))
while True:
x = next(group_it)
if x == '=':
break
groups[-1][1].append(x)
except StopIteration:
break
lines = lines[lines.index('-')+1:]
users = lines[:lines.index('-')]
verbatims = lines[lines.index('-')+1:]
mlist.save()
for user in users:
mlist.members.add(Member.objects.get(user__username=user))
for g, users in groups:
try:
mlist.committees.add(Committee.objects.get(name_nl=g))
except Committee.DoesNotExist:
try:
mlist.committees.add(Board.objects.get(name_nl=g))
except Committee.DoesNotExist:
print("[{}] Did not find group '{}'".format(name,
g))
print(" Adding individual users: {}".format(
', '.join(users))
)
for user in users:
mlist.members.add(
Member.objects.get(user__username=user))
for v in verbatims:
obj = VerbatimAddress(address=v, mailinglist=mlist)
obj.save()
mlist.save()
import json
import os
import requests
from bs4 import BeautifulSoup
from django.conf import settings
from django.contrib.auth.models import User
from django.core.exceptions import ImproperlyConfigured
from django.core.files.base import ContentFile
from django.core.management.base import BaseCommand
from django.utils.dateparse import parse_date
from django.utils.translation import activate
from activemembers.models import (Board, Committee,
CommitteeMembership, Mentorship)
from members.models import Member, Membership
def imagefield_from_url(imagefield, url):
basename = os.path.basename(url)
if '?' in basename:
basename = '?'.join(basename.split('?')[:-1])
file = ContentFile(requests.get(url).content)
imagefield.save(basename, file)