Commit 1732593e authored by Joost Rijneveld's avatar Joost Rijneveld
Browse files

Merge branch 'remove_migrations' into 'master'

Remove old migrations

Closes #266

See merge request !296
parents b4c518a3 cb09e0c2
......@@ -29,8 +29,6 @@ RUN apk add --no-cache \
zlib \
freetype \
lcms2 \
libxml2 \
libxslt \
libffi \
ghostscript \
......@@ -44,8 +42,6 @@ RUN apk add --no-cache --virtual .builddeps \
freetype-dev \
lcms2-dev \
libwebp-dev \
libxml2-dev \
libxslt-dev \
libffi-dev \
linux-headers \
git \
......@@ -59,12 +55,10 @@ WORKDIR /usr/src/app
# install python requirements
COPY requirements.txt /usr/src/app/
COPY production-requirements.txt /usr/src/app/
COPY migration-requirements.txt /usr/src/app/
COPY dev-requirements.txt /usr/src/app/
RUN pip install --no-cache-dir \
-r requirements.txt \
-r production-requirements.txt \
-r migration-requirements.txt \
-r dev-requirements.txt
RUN apk del .builddeps
# Data migrations
The old new website of Study Association Thalia contained a lot of data.
Before we can launch the new website, all old data has to be migrated
to the new website.
The migration instructions for each part of the website are shown below.
## Active members
This is taken care of during member migration.
## Documents
Make sure you have valid login credentials for
To migrate all documents execute `python migratedocuments`
and enter the required login information.
## Education
Make sure that the API key is available as `MIGRATION_KEY` in
To migrate the events, make sure that membershave been migrated.
Then simply execute `python migrateeducation`.
## Events
Make sure that the API key is available as `MIGRATION_KEY` in
To migrate the events, make sure that members, committees and boards
have been migrated. Then simply execute `python migrateevents`.
## Mailing lists
Make sure that the API key is available as `MIGRATION_KEY` in
To migrate the mailinglists, make sure that members, committees and boards
have been migrated. Then simply execute `python migratelists`.
## Members
To migrate members, committees, boards, memberships of committees and boards,
as well as introductionmentorships, execute `python migratemembers`.
Unfortunately, not all the data is complete. Most notably, some begin-dates are
missing. This will need to be fixed manually. These have been set to 1970,
as start dates are not optional.
This migration typically takes a few minutes.
## Newsletters
Make sure that the API key is available as `MIGRATION_KEY` in
To migrate the newsletters simply execute `python migratenewsletters`.
## Partners
To migrate partners, partner events, vacancy categories and vacancies,
execute `python migratepartners`.
## Photos
Make sure the album directories exist somewhere on the server. Then,
for each album, perform `python importphotos <album dir>`.
This needs to be done per album to avoid having to store two copies of all
albums at the same time. Consider doing several albums at once by using
your shell to loop over a part of the set (e.g. divide them over directories).
## Pizzas
## Thabloids
Simply execute `python migratethabloids` to migrate all Thabloids.
## Merchandise
Make sure you have valid login credentials for
To migrate the merchandise execute `python migratemerchandise` and
enter the required login information.
## Wiki
Copy `/data` folder to new installation.
import os
import requests
from bs4 import BeautifulSoup
from django.core.files.base import ContentFile
from django.utils import timezone
from django.utils.timezone import datetime
from documents.models import (AssociationDocumentsYear, GeneralMeeting,
GeneralMeetingDocument, MiscellaneousDocument)
from import legacylogin
def filefield_from_url(filefield, url):
file = ContentFile(requests.get(url).content), file)
class Command(legacylogin.Command):
help = "Scrapes the documents from the old Thalia website"
def handle(self, *args, **options):
super().handle(*args, **options)
print("Migrating the general meetings")
url = "{}"
for year in range(1990, 2017):
print("Migrating {}".format(year))
src = self.session.get(url.format(year)).text
soup = BeautifulSoup(src, 'lxml')
for alv in soup.div.find_all('div', recursive=False):
meeting = GeneralMeeting()
datetext = alv.find(attrs={'class': 'gw-go-coinb'}).text
date = datetime.strptime(datetext.strip() + ' ' + str(year),
'%d %b %Y')
if date.month < 9:
date = datetime(year+1, date.month,
date = timezone.make_aware(date,
meeting.datetime = date
meeting.location = alv.find('p').text
minutes = alv.find('div', {'class': 'gw-go-footer'}).find('a')
if minutes is not None:
minutes_url = '' + minutes['href']
filefield_from_url(meeting.minutes, minutes_url)
for document in alv.find_all('li'):
doc_url = '' + document.find('a')['href']
doc = GeneralMeetingDocument()
doc.meeting = meeting
filefield_from_url(doc.file, doc_url)
print("Migrating general meetings complete")
print("Migrating the policy documents")
url = ""
documentpage = self.session.get(url)
soup = BeautifulSoup(documentpage.text, 'lxml')
wrapper = soup(attrs={'class': 'policywrapper'})[0]
uls = wrapper.find_all('ul', recursive=False)
policies = uls[0].find_all('li', recursive=False)
reports = uls[1].find_all('li', recursive=False)
for policy, report in zip(policies, reports):
year = policy.find('h2').find(text=True)
print("Migrating {}".format(year))
year = int(year.replace("Beleidsplan '", '')[:2])
year += (19 if year >= 90 else 20) * 100
obj, cr = AssociationDocumentsYear.objects.get_or_create(year=year)
obj.year = year
files = [(obj.policy_document,
policy.find(attrs={'class': 'overlay-icon-link'})),
report.find(attrs={'class': 'overlay-icon-link'})),
report.find(attrs={'class': 'overlay-icon-euro'}))]
for filefield, url in files:
if url is not None:
url = "" + url['href']
file = ContentFile(requests.get(url).content)
# File names are ignored when serving files anyway, file)
print("Migrating policy documents complete")
print("Migrating the miscellaneous documents")
container = soup(attrs={'class': 'generalcontainer'})[0]
documents = container.find_all('li', recursive=False)
for document in documents:
name = document.find('h2').find(text=True)
print("Migrating {}".format(name))
obj, cr = MiscellaneousDocument.objects.get_or_create(name=name)
url = document.find(attrs={'class': 'overlay-icon-link'})
if url is not None:
url = "" + url['href']
filefield_from_url(obj.file, url)
print("Migrating miscellaneous documents complete")
import json
import os
import requests
from django.conf import settings
from django.contrib.auth.models import User
from django.core.exceptions import ImproperlyConfigured, PermissionDenied
from django.core.files.base import ContentFile
from django.utils.dateparse import parse_date
from django.utils.translation import activate
from education.models import Category, Course, Exam, Summary
from import legacylogin
def filefield_from_url(filefield, url):
file = ContentFile(requests.get(url).content), file)
class Command(legacylogin.Command):
help = 'Scrapes the education data from the old Thalia website'
def handle(self, *args, **options):
if not settings.MIGRATION_KEY:
raise ImproperlyConfigured("MIGRATION_KEY not specified")
url = "{}".format(
input_val = input(
'Do you want to delete all existing objects? (type yes or no) ')
if input_val == 'yes':
session = requests.Session()
src = session.get(url).text
if 'invalid api key' in src:
raise PermissionDenied('Invalid API key')
data = json.loads(src)
category_map = {}
print('Importing categories')
for key in data['categories']:
name = data['categories'][key]
id = Category()
id.name_nl = name
id.name_en = name
category_map[key] =
print('Importing categories complete')
course_map = {}
print('Importing courses')
for key in data['courses']:
src = data['courses'][key]
course = Course()
course.name_nl = src['name']
course.name_en = src['name']
course.course_code = src['course_code']
course.shorthand_nl = src['course_shorthand']
course.shorthand_en = src['course_shorthand'] = int(src['e_c_t_s'])
course.period = src['quarter'].replace(' en ', ' & ')
course.since = src['since']
if int(src['until']) != 0:
course.until = src['until']
for id in src['categories']:
course_map[key] =
print('Combining courses with predecessors')
for key in data['courses']:
src = data['courses'][key]
course = Course.objects.get(pk=course_map[key])
for id in src['predecessors']:
if id == 0:
old_course = Course.objects.get(pk=course_map[str(id)])
except KeyError:
print('Importing courses complete')
print('Importing summaries')
for key in data['summaries']:
src = data['summaries'][key]
summary = Summary() = src['name'] = '' if src['author'] is None else src['author']
summary.year = int(src['year'])
summary.uploader_date = parse_date(src['uploader_date'])
summary.accepted = src['accepted'] == '1'
course_id = str(src['course_id'])
summary.course = Course.objects.get(pk=course_map[course_id])
summary.uploader = User.objects.get(username=src['uploader'])
except User.DoesNotExist:
summary.uploader = User.objects.get(pk=1)
filefield_from_url(summary.file, src['file_url'])
print('Importing summaries complete')
print('Importing exams')
for key in data['exams']:
src = data['exams'][key]
exam = Exam() = '' if src['name'] is None else src['name']
exam.accepted = src['accepted'] == '1'
course_id = str(src['course_id'])
exam.course = Course.objects.get(pk=course_map[course_id])
type_map = {
0: 'document',
1: 'exam',
2: 'partial',
3: 'resit',
5: 'practice'
exam.type = type_map.get(int(src['type']), 'document')
exam.exam_date = parse_date(src['date'])
exam.uploader_date = parse_date(src['uploader_date'])
exam.uploader = User.objects.get(username=src['uploader'])
except User.DoesNotExist:
exam.uploader = User.objects.get(pk=1)
filefield_from_url(exam.file, src['file_url'])
print('Importing exams complete')
import json
import re
from datetime import datetime
import requests
from django.conf import settings
from django.contrib.auth.models import User
from django.core.exceptions import ImproperlyConfigured
from import BaseCommand
from django.utils import timezone
import events.models as events_models
import members.models as members_models
'0': events_models.RegistrationInformationField.TEXT_FIELD,
'1': events_models.RegistrationInformationField.INTEGER_FIELD,
'2': events_models.RegistrationInformationField.BOOLEAN_FIELD,
def naive_to_aware(date_string):
"""Convert string of form '%Y-%m-%d %H:%M:%S'
to timezone aware datetime object"""
naive_datetime = datetime.strptime(date_string, '%Y-%m-%d %H:%M:%S')
return timezone.get_current_timezone().localize(naive_datetime)
class Command(BaseCommand):
help = 'Migrate the events from the old website.'
def handle(self, *args, **options):
if not settings.MIGRATION_KEY:
raise ImproperlyConfigured('MIGRATION_KEY not specified')
events_api_url = '{}'.format(
print('[*]Getting events json data')
response = requests.get(events_api_url,
headers={'User-Agent': 'The Donald'})
except requests.RequestException:
print('[!]Could not get {}'.format(events_api_url))
data = response.json()
except json.decoder.JSONDecodeError:
print('[!]No json data found')
event_fields_translations = {
# name in data : name in model
'title': 'title',
'description': 'description',
'location': 'location',
'start_date': 'start',
'end_date': 'end',
'member_price': 'price',
'thalia_costs': 'cost',
'begin_registration': 'registration_start',
'end_registration': 'registration_end',
'end_cancel': 'cancel_deadline',
'registration_not_needed_message': 'no_registration_message',
activity_map = {}
registration_map = {}
information_field_map = {}
print('[*]Parsing event data.')
# Event
for event_data in data['events']:
new_event = events_models.Event(
for concrete_field in event_fields_translations:
django_field = event_fields_translations[concrete_field]
concrete_data = event_data[concrete_field]
# MultilingualField
if django_field in (
'title', 'description', 'location',
for language_code in ('en', 'nl'):
django_multilingualfield = '{}_{}'.format(
django_field, language_code)
if not hasattr(new_event, django_multilingualfield):
print('[!]Could neither find {} nor {}'.format(
django_field, django_multilingualfield))
if django_field == 'title':
django_field = django_field[:100]
setattr(new_event, django_multilingualfield,
# DateTimeField
elif concrete_data and django_field in (
'start', 'end', 'registration_start',
setattr(new_event, django_field,
# DecimalField
elif django_field in ('price', 'cost'):
if re.match(r'[-+]?\d*\.?\d+$', concrete_data):
setattr(new_event, django_field, float(concrete_data))
# TODO: is 0 the right value?
setattr(new_event, django_field, 0)
activity_map[event_data['id']] =
print('[*]Parsing registration field data.')
# RegistrationInformationField
for field_data in data['extra_fields']:
new_registration_information_field = \
information_field_map[field_data['field_id']] = \
print('[*]Parsing registration data.')
# Registration
for registration_data in data['registrations']:
new_registration = events_models.Registration(
username = registration_data['username']
if registration_data['username'] and User.objects.filter(
registration_user = User.objects.get(username=username)
new_registration.member = members_models.Member.objects.get(
cancelled_date = registration_data['canceled']
if cancelled_date:
new_registration.date_cancelled = naive_to_aware(
registration_map[registration_data['id']] =
print('[*]Parsing registration field info data.')
# fields info
for field_info_data in data['extra_info']:
registration_field = events_models.RegistrationInformationField.\
parameters = {
'registration': events_models.Registration.objects.get(
'field': registration_field,
if registration_field.type == events_models.\
new_registration_information = events_models. \
elif registration_field.type == events_models.\
value = False
if value and bool(int(field_info_data['value'])):
value = True
new_registration_information = \
# registration_field.type == INTEGER_FIELD:
new_registration_information = \
value=field_info_data['value'] or 0,