# This is a Python script to transform Church website user data output by Shepherd's Staff

# into a form that can be input as new member data to the Import/Export plugin on the church website

# The program assumes the packages openpyxl, itertools, pygtrie and os are installed.

 

# The input expected by this program is an Excel worksheet ("Input_worksheet.xlsx") with

# one worksheet, row sorted by Number and containing the following header values:

 

# Household_name

# First

# First_or_preferred

# Cell

# Personal_E_mail

# Work_E_mail

# Number

# Received

# Street_Address

# City,state,postal_code

 

# To obtain this file, data from Shepherd's Staff is transferred to an Excel Spreadsheet, Then

# the following header value substitutions are made: Email(1st) -> Personal_E_mail; Email(2nd) -> Work_E_mail;

# Line 1 -> Street_Address. Furthermore, blanks in all header values are changed to "_".

 

# This program assumes the use of an Excel spreadsheet ("Current_user_data.xlsx") with one worksheet

# that contains the current values of the church website user data output using the export function

# of the Import/Export plugin. The export function creates a CSV file, which must be opened

# by Excel. After this, the rows are sorted by the values in column 1 (user_login) and saved.

# Only the first column of this spreadsheet is used (user_login).

#

# Each household name in the input is compared against the user login values in the current user

# data file. If there are matches, then the name in the input is suffixed with an integer to make

# it unique. If no current user data spreadsheet is supplied, this step is skipped.

 

# Note that the files Input_worksheet.xlsx and Current_user_data.xlsx must reside in the

# same directory as this Python script. The output spreadsheet is also created in this directory.

 

# The result of the script is an output worksheet ("Output_worksheet.xlsx") with the following

# header values:

 

# user_login

# user_email

# last_name

# mo_home_phone

# mo_home_email

# mo_family_list

# mo_street_address

# mo_city

# mo_state

# mo_zip

# mo_phone_list

# mo_email_list

# mo_image

# household_id

 

# In order to install the users described in the input worksheet on the web site, the output

# Excel spreadsheet must be saved in CSV formate, which is then used as the target of the

# import function of the import/export plugin.

 

from openpyxl import load_workbook

from openpyxl import Workbook

from itertools import groupby

import pygtrie

import os

import pandas as pd

 

I_workbook = load_workbook(filename="Input_worksheet.xlsx")

i_sheet = I_workbook.active

O_workbook = Workbook()

o_sheet = O_workbook.active

if (os.path.exists("Current_user_data.xlsx")):

    U_workbook = load_workbook(filename="Current_user_data.xlsx")

    u_sheet = U_workbook.active

 

# First write the header row to the output file

 

o_sheet.cell(row=1, column=1).value = "user_login"

o_sheet.cell(row=1, column=2).value = "user_email"

o_sheet.cell(row=1, column=3).value = "last_name"

o_sheet.cell(row=1, column=4).value = "mo_home_phone"

o_sheet.cell(row=1, column=5).value = "mo_home_email"

o_sheet.cell(row=1, column=6).value = "mo_family_list"

o_sheet.cell(row=1, column=7).value = "mo_street_address"

o_sheet.cell(row=1, column=8).value = "mo_city"

o_sheet.cell(row=1, column=9).value = "mo_state"

o_sheet.cell(row=1, column=10).value = "mo_zip"

o_sheet.cell(row=1, column=11).value = "mo_phone_list"

o_sheet.cell(row=1, column=12).value = "mo_email_list"

o_sheet.cell(row=1, column=13).value = "mo_image"

o_sheet.cell(row=1, column=14).value = "household_id"

o_sheet.cell(row=1, column=15).value = "email"

 

# Initialize variables for the first iteration of the following loop

 

current_number = i_sheet["G2"].value

household_id = current_number

username = i_sheet["A2"].value

last_name = username

user_email = ""

email = ""

mo_home_phone = ""

mo_home_email = ""

mo_image = ""

mo_street_address = i_sheet["I2"].value

CityStateZip = i_sheet["J2"].value

CityStateZip = CityStateZip.replace(' ',',')

CityStateZip = CityStateZip.replace(',,',',')

CityStateZipSplit = CityStateZip.split(',')

mo_city = CityStateZipSplit[0]

mo_state = CityStateZipSplit[1]

mo_zip = CityStateZipSplit[2]

mo_family_list = ""

mo_phone_list = ""

mo_email_list = ""

current_output_row = 2

 

# Massage the data in the next input worksheet row so it is suitable for output

 

for row in i_sheet.iter_rows(min_row=2):

 

    First = row[1].value

    First_or_preferred = row[2].value

    if(First_or_preferred == None):

        First_or_preferred = ""

    Cell = row[3].value

    if(Cell == None):

        Cell = ""

    Personal_E_mail = row[4].value

    if(Personal_E_mail == None):

        Personal_E_mail = ""

    Work_E_mail = row[5].value

    if(Work_E_mail == None):

        Work_E_mail = ""

    Number = row[6].value

 

    if (current_number != Number):

 

        o_sheet.insert_rows(idx=current_output_row+1)

        o_sheet.cell(row=current_output_row, column=1).value = username

 

        if(mo_email_list != ""):

            burst_email_list = mo_email_list.split(',')

            first_email = burst_email_list[0]

            first_email_burst = first_email.split('(')

            first_email = first_email_burst[0]

            o_sheet.cell(row=current_output_row, column=2).value = first_email

        else:

            burst_family_list = mo_family_list.split(',')

            first_part = burst_family_list[0]

            first_part_burst = first_part.split('(')

            first_part = first_part_burst[0]

            dummy_email_address = "mountoliveusers" + first_part + "_" + last_name + "@gmail.com"

            o_sheet.cell(row=current_output_row, column=2).value = dummy_email_address

 

        o_sheet.cell(row=current_output_row, column=3).value = last_name

        o_sheet.cell(row=current_output_row, column=4).value = mo_home_phone

        o_sheet.cell(row=current_output_row, column=5).value = mo_home_email

        o_sheet.cell(row=current_output_row, column=6).value = mo_family_list

        o_sheet.cell(row=current_output_row, column=7).value = mo_street_address

        o_sheet.cell(row=current_output_row, column=8).value = mo_city

        o_sheet.cell(row=current_output_row, column=9).value = mo_state

        o_sheet.cell(row=current_output_row, column=10).value = mo_zip

        o_sheet.cell(row=current_output_row, column=11).value = mo_phone_list

        o_sheet.cell(row=current_output_row, column=12).value = mo_email_list

        o_sheet.cell(row=current_output_row, column=13).value = mo_image

        o_sheet.cell(row=current_output_row, column=14).value = household_id

        o_sheet.cell(row=current_output_row, column=15).value = email

       

        # Get ready for the next iteration of the loop

 

        current_number = Number

        current_output_row = current_output_row + 1

        mo_family_list = ""

        mo_phone_list = ""

        mo_email_list = ""

        username = row[0].value

        last_name = username

        mo_street_address = row[8].value

        CityStateZip = row[9].value

        email = ""

        mo_home_phone = ""

        mo_home_email = ""

        if(CityStateZip != None):

            CityStateZip = CityStateZip.replace(' ',',')

            CityStateZip = CityStateZip.replace(',,', ',')

            CityStateZipSplit = CityStateZip.split(',')

            mo_city = CityStateZipSplit[0]

            mo_state = CityStateZipSplit[1]

            mo_zip = CityStateZipSplit[2]

        else:

            mo_city = ""

            mo_state = ""

            mo_zip = ""

        mo_image = ""

        household_id = Number

 

    if(First_or_preferred != ""):

        FirstModified = First + "(" + First_or_preferred + ")"

    else:

        FirstModified = First

    if(mo_family_list != ""):

        mo_family_list = mo_family_list + ', ' + FirstModified

    else:

        mo_family_list = FirstModified

    if(Cell != ""):

        if(mo_phone_list != ""):

            mo_phone_list = mo_phone_list + ', ' + Cell + '(' + FirstModified + ')'

        else:

            mo_phone_list = Cell + '(' + First + ')'

    if(Personal_E_mail != ""):

        if(mo_email_list != ""):

            mo_email_list = mo_email_list + ', ' + Personal_E_mail + '(' + FirstModified + ')'

        else:

            mo_email_list = Personal_E_mail + '(' + FirstModified + ')'

    if (Work_E_mail != ""):

        if(mo_email_list != ""):

            mo_email_list = mo_email_list + ', ' + Work_E_mail + '(' + FirstModified + ')'

        else:

            mo_email_list = Work_E_mail + '(' + FirstModified + ')'

 

# If we have processed the last row, save the last values

 

o_sheet.insert_rows(idx=current_output_row + 1)

o_sheet.cell(row=current_output_row, column=1).value = username

 

if (mo_email_list != ""):

    burst_email_list = mo_email_list.split(',')

    first_email = burst_email_list[0]

    first_email_burst = first_email.split('(')

    first_email = first_email_burst[0]

    o_sheet.cell(row=current_output_row, column=2).value = first_email

else:

    burst_family_list = mo_family_list.split(',')

    first_part = burst_family_list[0]

    first_part_burst = first_part.split('(')

    first_part = first_part_burst[0]

    dummy_email_address = "mountoliveusers" + first_part + "_" + last_name + "@gmail.com"

    o_sheet.cell(row=current_output_row, column=2).value = dummy_email_address

 

o_sheet.cell(row=current_output_row, column=3).value = last_name

o_sheet.cell(row=current_output_row, column=4).value = mo_home_phone

o_sheet.cell(row=current_output_row, column=5).value = mo_home_email

o_sheet.cell(row=current_output_row, column=6).value = mo_family_list

o_sheet.cell(row=current_output_row, column=7).value = mo_street_address

o_sheet.cell(row=current_output_row, column=8).value = mo_city

o_sheet.cell(row=current_output_row, column=9).value = mo_state

o_sheet.cell(row=current_output_row, column=10).value = mo_zip

o_sheet.cell(row=current_output_row, column=11).value = mo_phone_list

o_sheet.cell(row=current_output_row, column=12).value = mo_email_list

o_sheet.cell(row=current_output_row, column=13).value = mo_image

o_sheet.cell(row=current_output_row, column=14).value = household_id

o_sheet.cell(row=current_output_row, column=15).value = email

 

O_workbook.save(filename="Output_worksheet.xlsx")

 

# Sort the output_worksheet by user_login. Used Pandas here because openpyxl doesn't support

# sorting in place. In hindsight, I would have used Pandas for everything if I realized this

# from the get-go

 

O_dataframe = pd.read_excel('Output_worksheet.xlsx')

O_dataframe.sort_values(by=['user_login'], inplace=True)

O_dataframe.to_excel('Output_worksheet.xlsx', index=False)

 

# Get the sorted worksheet to process

 

O_workbook = load_workbook(filename="Output_worksheet.xlsx")

o_sheet = O_workbook.active

 

# transform last names in the input file when duplicates exist and output the result

# to the output file.

 

# split_text provided in

# https://stackoverflow.com/questions/12409894/fast-way-to-split-alpha-and-numeric-chars-in-a-python-string

 

def split_text(s):

    for k, g in groupby(s, str.isalpha):

        yield ''.join(g)

 

# create a trie to hold user_login names split at numeric suffix

 

id_trie = pygtrie.StringTrie()

 

if (os.path.exists("Current_user_data.xlsx")):

 

    # for each user_login id in Current_user_data, split it into alpha and numeric

    # parts and store them in id_trie

 

    for u_row in u_sheet.iter_rows(min_row=2):

        user_login = u_row[0].value

        split_user_login = list(split_text(user_login))

        if (len(split_user_login) == 1):

            id_trie[split_user_login[0] + "/"] = split_user_login[0]

        else:

            id_trie[split_user_login[0] + "/" + split_user_login[1]] = split_user_login[0] + "/" + split_user_login[1]

 

# For each user_login identifier in O_workbook, see if it already exists in id_trie.

# If not and the next user_login is the same value, add a version with suffix "1".

# Otherwise, add the user_login to the trie as a singleton subtree without suffix.

# If a suffixed version already exists, find the version with the largest valued suffix and

# add a version with suffix one greater to id_trie. This algorithm depends on the

# Output worksheet being sorted by user_login.

 

for i in range(2, o_sheet.max_row + 1):

    cell_obj = o_sheet.cell(row=i, column=1)

    user_login = cell_obj.value

    if (id_trie.has_node(user_login) != pygtrie.Trie.HAS_SUBTRIE):

        next_cell_obj = o_sheet.cell(row=i+1, column=1)

        next_user_login = next_cell_obj.value

        if(user_login == next_user_login):

            id_trie[user_login + '/' + '1'] = user_login + '1'

            o_sheet.cell(row=i, column=1).value = user_login + '1'

        else:

            id_trie[user_login + '/'] = user_login

            o_sheet.cell(row=i, column=1).value = user_login

    else:

        list_of_user_logins = id_trie.values(user_login)

        suffix = len(list_of_user_logins) + 1

        id_trie[user_login + '/' + str(suffix)] = user_login + '/' + str(suffix)

        o_sheet.cell(row=i, column=1).value = user_login + str(suffix)

 

O_workbook.save(filename="Output_worksheet.xlsx")

 

'''

 

# debug

 

# list the values in id_trie

 

id_trie.enable_sorting(enable=True)

print(sorted(id_trie.items()))

 

# debug end

 

'''

 

# Press the green button in the gutter to run the script.

if __name__ == '__main__':

 

    exit()