import pandas as pd
import csv


#define variables
listRepetition = []


#Read the data
df = pd.read_csv('INPUT.csv',sep=",",error_bad_lines=False, encoding='utf-8', low_memory=False)

#Sort the on customerID and starttimestamp
df = df.sort_values(by=["CustomerID", "startTimestamp"], ascending=[True, True])

#initiate the current row
row_iterator = df.iterrows()
_, last = next(row_iterator)
listRepetition.append(False)

#iterate from the next row forward
for i, row in row_iterator:

    #Are the current and next row for the same CustomerID
    if(str(row["CustomerID"]).strip() == str(last["CustomerID"]).strip()):

        #Are the current and next row the same Activity
        if((str(row["Activity"]).strip() + str(row["service_detail_EN"]).strip()) == (str(last["Activity"]).strip() + str(last["service_detail_EN"]).strip())):
            isRepetition = True
        else:
            isRepetition = False
    else:
        isRepetition = False

    #Store the data in a list
    listRepetition.append(isRepetition)

    last = row

#Add the repetition list as a column to the dataframe
df["isRepetition"] = listRepetition

#Save the data to a file
df.to_csv('OUTPUT.csv', quoting=csv.QUOTE_ALL, encoding='utf-8')