Notice
Recent Posts
Recent Comments
Link
«   2026/04   »
1 2 3 4
5 6 7 8 9 10 11
12 13 14 15 16 17 18
19 20 21 22 23 24 25
26 27 28 29 30
Tags
more
Archives
Today
Total
관리 메뉴

인턴기록지

pull_updated_data.py 업데이트된 데이터 뽑기 본문

Python

pull_updated_data.py 업데이트된 데이터 뽑기

인턴신분현경이 2020. 11. 13. 11:42

자전거 대여를 7/30일 18:00 시에 빌렸는데 8/1 01:00에 데이터를 pull 할 때까지 반납이 되지 않았다면

8월데이터에는 rental 기록만 있을 것이다.

이런 경우 9월에 데이터를 pull해오면 return 8/2 12:00 에 반납되었다는 기록이 있다.

 

위와 같이 데이터가 다음에 pull한 파일에서 update된 데이터들을 찾아 db에 반영해주는 코드를 작성하였다.

 

#!/usr/bin/python3
import os
import datetime
import csv
from configparser import ConfigParser
import psycopg2
#db update function
import update_data as ud

## pull log 비교해서  update된 데이터만 뽑기 ##

FOLDER = os.path.abspath('/Users/hklee/digitaltwin/hk/update_data_9')

# ewl_pulllog pull_time 가져오는 함수
def get_pulltime(cursor, file_name):
    if file_name.find('ewl_repair') >= 0:
        pull_query = "SELECT pull_time FROM ewl_pulllog where file_name like 'ewl_repair%';"

    # 대여반납일때
    elif file_name.find('ewl_rental_history') >= 0:
        pull_query = "SELECT pull_time FROM ewl_pulllog where file_name like 'ewl_rental_return%';"

    cursor.execute(pull_query)
    pull_time = cursor.fetchall()
    # print(pull_time[0][0])
    pull_time = datetime.datetime.strptime(str(pull_time[0][0]), "%Y-%m-%d %H:%M:%S")

    return pull_time

# update data 가져오기
def load_update_data(file_name, time_value):
    filepath = os.path.join(FOLDER, file_name)

    try:

        datafile = open(filepath, mode='r', encoding='utf-8')
        csv_reader = csv.reader(datafile)

        data_rows = list(csv_reader)
        
        # 일시 컬럼 index구해주기
        if file_name.find('ewl_repair') >= 0:
            out_time_column_index = data_rows[0].index("등록_일시")
            in_time_column_index = data_rows[0].index("완료_일시")
            file_type = 1
        elif file_name.find('ewl_rental_history') >= 0:
            out_time_column_index = data_rows[0].index("대여_일시")
            in_time_column_index = data_rows[0].index("반납_일시")
            file_type = 2
        # print("timecolumn index", out_time_column_index, in_time_column_index)

        columns = data_rows.pop(0)
        # num_rows = len(data_rows)
        # print("num rows ",num_rows)
        # print("pull_time : ",time_value)
        update_data_rows = []
        update_data_rows.append(columns)
        # print(update_data_rows)

        for row in data_rows:
            # out data o in data o
            if row[out_time_column_index] != '' and row[in_time_column_index] != '':
                # repair
                # out data 가 pulltime보다 작고 in data가 pulltime보다 큰 경우 update
                if file_type == 1 :
                    vs_time = datetime.datetime.strptime(row[out_time_column_index], "%Y-%m-%d")
                    vs_time_in = datetime.datetime.strptime(row[in_time_column_index], "%Y-%m-%d")
                    if vs_time < datetime.datetime(time_value.year, time_value.month, time_value.day, 00, 00, 00) and vs_time_in >= time_value :
                        update_data_rows.append(row)
                # rental_return
                else:
                    time_value = datetime.datetime(time_value.year, time_value.month, time_value.day, 00, 20, 00)
                    vs_time = datetime.datetime.strptime(row[out_time_column_index], "%Y-%m-%d %H:%M")
                    vs_time_in = datetime.datetime.strptime(row[in_time_column_index], "%Y-%m-%d %H:%M")
                    if vs_time < time_value and vs_time_in >= time_value :
                        update_data_rows.append(row)
            # out data o in data x
            elif row[out_time_column_index] != '' and row[in_time_column_index] == '':
                continue
            # out x in o -> 재배치 일때 만
            else:
                print('남겨진 것', row)

        if file_type == 1 :
            new_file = open(FOLDER+'\\repair_update_data.txt', mode='at', encoding='UTF-8')
        elif file_type == 2 :
            new_file = open(FOLDER+'\\rental_return_update_data.txt', mode='at', encoding='UTF-8')

        for row in update_data_rows:
            for ix in range(len(row)):
                if ix == len(row)-1:
                    new_file.write(row[ix])
                else:
                    if file_type == 1 and ix == 4 :
                        new_file.write('"')
                        new_file.writelines(row[ix])
                        new_file.write('"')
                        new_file.write(',')
                    else:
                        new_file.write(row[ix])
                        new_file.write(',')
            new_file.write('\n')
        new_file.close()

        datafile.close()
        # update_data.py 의 insert함수를 이용해주기 위해 update 된 데이터를 return
        return update_data_rows

    except (Exception, psycopg2.DatabaseError) as error:
        print(error)

def config(filename='database.ini', section='database'):
    # create a parser
    parser = ConfigParser()
    # read config file
    parser.read(filename)

    # get section, default to postgresql
    db = {}
    if parser.has_section(section):
        params = parser.items(section)
        for param in params:
            db[param[0]] = param[1]
    else:
        raise Exception('Section {0} not found in the {1} file'.format(section, filename))

    return db


if __name__ == '__main__':
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # read connection parameters
        params = config()
        print(params)
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params)

        # create a cursor
        cur = conn.cursor()

        for filename in os.listdir(FOLDER):
            print('\n')
            print(filename)
            if filename.find('rental_history') >= 0 : #or filename.find('ewl_repair') >= 0:
                time_value = get_pulltime(cur, filename)
                print("value: ",time_value)
                update_rows = load_update_data(filename, time_value)
                # update_data.py 함수를 이용해 데이터 update
                ud.update_tuple(cur, conn, update_rows)

        cur.close()

    except (Exception, psycopg2.DatabaseError) as error:
        print(error)

    finally:
        if conn is not None:
            conn.close()
            print('Database connection closed.')