# This is a Python
script to transform Church website user data output by Shepherd's Staff
# into a form that
can be input as new member data to the Import/Export plugin on the church
website
# The program
assumes the packages openpyxl, itertools, pygtrie and os are installed.
# The input expected
by this program is an Excel worksheet ("Input_worksheet.xlsx") with
# one worksheet,
row sorted by Number and containing the following header values:
# Household_name
# First
#
First_or_preferred
# Cell
# Personal_E_mail
# Work_E_mail
# Number
# Received
# Street_Address
#
City,state,postal_code
# To obtain this
file, data from Shepherd's Staff is transferred to an Excel Spreadsheet, Then
# the following
header value substitutions are made: Email(1st) -> Personal_E_mail;
Email(2nd) -> Work_E_mail;
# Line 1 ->
Street_Address. Furthermore, blanks in all header values are changed to
"_".
# This program
assumes the use of an Excel spreadsheet ("Current_user_data.xlsx")
with one worksheet
# that contains the
current values of the church website user data output using the export function
# of the
Import/Export plugin. The export function creates a CSV file, which must be
opened
# by Excel. After
this, the rows are sorted by the values in column 1 (user_login) and saved.
# Only the first
column of this spreadsheet is used (user_login).
#
# Each household
name in the input is compared against the user login values in the current user
# data file. If
there are matches, then the name in the input is suffixed with an integer to
make
# it unique. If no
current user data spreadsheet is supplied, this step is skipped.
# Note that the
files Input_worksheet.xlsx and Current_user_data.xlsx must reside in the
# same directory as
this Python script. The output spreadsheet is also created in this directory.
# The result of the
script is an output worksheet ("Output_worksheet.xlsx") with the
following
# header values:
# user_login
# user_email
# last_name
# mo_home_phone
# mo_home_email
# mo_family_list
# mo_street_address
# mo_city
# mo_state
# mo_zip
# mo_phone_list
# mo_email_list
# mo_image
# household_id
# In order to
install the users described in the input worksheet on the web site, the output
# Excel spreadsheet
must be saved in CSV formate, which is then used as the target of the
# import function
of the import/export plugin.
from openpyxl import load_workbook
from openpyxl import Workbook
from itertools import groupby
import pygtrie
import os
import pandas as pd
I_workbook = load_workbook(filename="Input_worksheet.xlsx")
i_sheet = I_workbook.active
O_workbook = Workbook()
o_sheet = O_workbook.active
if (os.path.exists("Current_user_data.xlsx")):
U_workbook = load_workbook(filename="Current_user_data.xlsx")
u_sheet = U_workbook.active
# First write the
header row to the output file
o_sheet.cell(row=1, column=1).value = "user_login"
o_sheet.cell(row=1, column=2).value = "user_email"
o_sheet.cell(row=1, column=3).value = "last_name"
o_sheet.cell(row=1, column=4).value = "mo_home_phone"
o_sheet.cell(row=1, column=5).value = "mo_home_email"
o_sheet.cell(row=1, column=6).value = "mo_family_list"
o_sheet.cell(row=1, column=7).value = "mo_street_address"
o_sheet.cell(row=1, column=8).value = "mo_city"
o_sheet.cell(row=1, column=9).value = "mo_state"
o_sheet.cell(row=1, column=10).value = "mo_zip"
o_sheet.cell(row=1, column=11).value = "mo_phone_list"
o_sheet.cell(row=1, column=12).value = "mo_email_list"
o_sheet.cell(row=1, column=13).value = "mo_image"
o_sheet.cell(row=1, column=14).value = "household_id"
o_sheet.cell(row=1, column=15).value = "email"
# Initialize
variables for the first iteration of the following loop
current_number = i_sheet["G2"].value
household_id = current_number
username = i_sheet["A2"].value
last_name = username
user_email = ""
email = ""
mo_home_phone = ""
mo_home_email = ""
mo_image = ""
mo_street_address = i_sheet["I2"].value
CityStateZip = i_sheet["J2"].value
CityStateZip = CityStateZip.replace(' ',',')
CityStateZip = CityStateZip.replace(',,',',')
CityStateZipSplit = CityStateZip.split(',')
mo_city = CityStateZipSplit[0]
mo_state = CityStateZipSplit[1]
mo_zip = CityStateZipSplit[2]
mo_family_list = ""
mo_phone_list = ""
mo_email_list = ""
current_output_row = 2
# Massage the data
in the next input worksheet row so it is suitable for output
for row in i_sheet.iter_rows(min_row=2):
First = row[1].value
First_or_preferred = row[2].value
if(First_or_preferred == None):
First_or_preferred = ""
Cell = row[3].value
if(Cell == None):
Cell = ""
Personal_E_mail = row[4].value
if(Personal_E_mail == None):
Personal_E_mail = ""
Work_E_mail = row[5].value
if(Work_E_mail == None):
Work_E_mail = ""
Number = row[6].value
if (current_number != Number):
o_sheet.insert_rows(idx=current_output_row+1)
o_sheet.cell(row=current_output_row, column=1).value = username
if(mo_email_list != ""):
burst_email_list = mo_email_list.split(',')
first_email = burst_email_list[0]
first_email_burst = first_email.split('(')
first_email = first_email_burst[0]
o_sheet.cell(row=current_output_row, column=2).value = first_email
else:
burst_family_list = mo_family_list.split(',')
first_part = burst_family_list[0]
first_part_burst = first_part.split('(')
first_part = first_part_burst[0]
dummy_email_address = "mountoliveusers" + first_part + "_" + last_name + "@gmail.com"
o_sheet.cell(row=current_output_row, column=2).value = dummy_email_address
o_sheet.cell(row=current_output_row, column=3).value = last_name
o_sheet.cell(row=current_output_row, column=4).value = mo_home_phone
o_sheet.cell(row=current_output_row, column=5).value = mo_home_email
o_sheet.cell(row=current_output_row, column=6).value = mo_family_list
o_sheet.cell(row=current_output_row, column=7).value = mo_street_address
o_sheet.cell(row=current_output_row, column=8).value = mo_city
o_sheet.cell(row=current_output_row, column=9).value = mo_state
o_sheet.cell(row=current_output_row, column=10).value = mo_zip
o_sheet.cell(row=current_output_row, column=11).value = mo_phone_list
o_sheet.cell(row=current_output_row, column=12).value = mo_email_list
o_sheet.cell(row=current_output_row, column=13).value = mo_image
o_sheet.cell(row=current_output_row, column=14).value = household_id
o_sheet.cell(row=current_output_row, column=15).value = email
# Get ready for the next iteration of
the loop
current_number = Number
current_output_row = current_output_row + 1
mo_family_list = ""
mo_phone_list = ""
mo_email_list = ""
username = row[0].value
last_name = username
mo_street_address = row[8].value
CityStateZip = row[9].value
email = ""
mo_home_phone = ""
mo_home_email = ""
if(CityStateZip != None):
CityStateZip = CityStateZip.replace(' ',',')
CityStateZip = CityStateZip.replace(',,', ',')
CityStateZipSplit = CityStateZip.split(',')
mo_city = CityStateZipSplit[0]
mo_state = CityStateZipSplit[1]
mo_zip = CityStateZipSplit[2]
else:
mo_city = ""
mo_state = ""
mo_zip = ""
mo_image = ""
household_id = Number
if(First_or_preferred != ""):
FirstModified = First + "(" + First_or_preferred + ")"
else:
FirstModified = First
if(mo_family_list != ""):
mo_family_list = mo_family_list + ', ' + FirstModified
else:
mo_family_list = FirstModified
if(Cell != ""):
if(mo_phone_list != ""):
mo_phone_list = mo_phone_list + ', ' + Cell + '(' + FirstModified + ')'
else:
mo_phone_list = Cell + '(' + First + ')'
if(Personal_E_mail != ""):
if(mo_email_list != ""):
mo_email_list = mo_email_list + ', ' + Personal_E_mail + '(' + FirstModified + ')'
else:
mo_email_list = Personal_E_mail + '(' + FirstModified + ')'
if (Work_E_mail != ""):
if(mo_email_list != ""):
mo_email_list = mo_email_list + ', ' + Work_E_mail + '(' + FirstModified + ')'
else:
mo_email_list = Work_E_mail + '(' + FirstModified + ')'
# If we have
processed the last row, save the last values
o_sheet.insert_rows(idx=current_output_row + 1)
o_sheet.cell(row=current_output_row, column=1).value = username
if (mo_email_list != ""):
burst_email_list = mo_email_list.split(',')
first_email = burst_email_list[0]
first_email_burst = first_email.split('(')
first_email = first_email_burst[0]
o_sheet.cell(row=current_output_row, column=2).value = first_email
else:
burst_family_list = mo_family_list.split(',')
first_part = burst_family_list[0]
first_part_burst = first_part.split('(')
first_part = first_part_burst[0]
dummy_email_address = "mountoliveusers" + first_part + "_" + last_name + "@gmail.com"
o_sheet.cell(row=current_output_row, column=2).value = dummy_email_address
o_sheet.cell(row=current_output_row, column=3).value = last_name
o_sheet.cell(row=current_output_row, column=4).value = mo_home_phone
o_sheet.cell(row=current_output_row, column=5).value = mo_home_email
o_sheet.cell(row=current_output_row, column=6).value = mo_family_list
o_sheet.cell(row=current_output_row, column=7).value = mo_street_address
o_sheet.cell(row=current_output_row, column=8).value = mo_city
o_sheet.cell(row=current_output_row, column=9).value = mo_state
o_sheet.cell(row=current_output_row, column=10).value = mo_zip
o_sheet.cell(row=current_output_row, column=11).value = mo_phone_list
o_sheet.cell(row=current_output_row, column=12).value = mo_email_list
o_sheet.cell(row=current_output_row, column=13).value = mo_image
o_sheet.cell(row=current_output_row, column=14).value = household_id
o_sheet.cell(row=current_output_row, column=15).value = email
O_workbook.save(filename="Output_worksheet.xlsx")
# Sort the
output_worksheet by user_login. Used Pandas here because openpyxl doesn't
support
# sorting in place.
In hindsight, I would have used Pandas for everything if I realized this
# from the get-go
O_dataframe = pd.read_excel('Output_worksheet.xlsx')
O_dataframe.sort_values(by=['user_login'], inplace=True)
O_dataframe.to_excel('Output_worksheet.xlsx', index=False)
# Get the sorted
worksheet to process
O_workbook = load_workbook(filename="Output_worksheet.xlsx")
o_sheet = O_workbook.active
# transform last
names in the input file when duplicates exist and output the result
# to the output
file.
# split_text provided
in
#
https://stackoverflow.com/questions/12409894/fast-way-to-split-alpha-and-numeric-chars-in-a-python-string
def split_text(s):
for k, g in groupby(s, str.isalpha):
yield ''.join(g)
# create a trie to
hold user_login names split at numeric suffix
id_trie = pygtrie.StringTrie()
if (os.path.exists("Current_user_data.xlsx")):
# for each user_login id in
Current_user_data, split it into alpha and numeric
# parts and store them in id_trie
for u_row in u_sheet.iter_rows(min_row=2):
user_login = u_row[0].value
split_user_login = list(split_text(user_login))
if (len(split_user_login) == 1):
id_trie[split_user_login[0] + "/"] = split_user_login[0]
else:
id_trie[split_user_login[0] + "/" + split_user_login[1]] = split_user_login[0] + "/" + split_user_login[1]
# For each
user_login identifier in O_workbook, see if it already exists in id_trie.
# If not and the
next user_login is the same value, add a version with suffix "1".
# Otherwise, add
the user_login to the trie as a singleton subtree without suffix.
# If a suffixed
version already exists, find the version with the largest valued suffix and
# add a version
with suffix one greater to id_trie. This algorithm depends on the
# Output worksheet
being sorted by user_login.
for i in range(2, o_sheet.max_row + 1):
cell_obj = o_sheet.cell(row=i, column=1)
user_login = cell_obj.value
if (id_trie.has_node(user_login) != pygtrie.Trie.HAS_SUBTRIE):
next_cell_obj = o_sheet.cell(row=i+1, column=1)
next_user_login = next_cell_obj.value
if(user_login == next_user_login):
id_trie[user_login + '/' + '1'] = user_login + '1'
o_sheet.cell(row=i, column=1).value = user_login + '1'
else:
id_trie[user_login + '/'] = user_login
o_sheet.cell(row=i, column=1).value = user_login
else:
list_of_user_logins = id_trie.values(user_login)
suffix = len(list_of_user_logins) + 1
id_trie[user_login + '/' + str(suffix)] = user_login + '/' + str(suffix)
o_sheet.cell(row=i, column=1).value = user_login + str(suffix)
O_workbook.save(filename="Output_worksheet.xlsx")
'''
# debug
# list the values
in id_trie
id_trie.enable_sorting(enable=True)
print(sorted(id_trie.items()))
# debug end
'''
# Press the green
button in the gutter to run the script.
if __name__ == '__main__':
exit()