import os
import gzip
import shutil
# Define input and output base directories
input_base_dir = "data" # Folder where the compressed files are stored
output_base_dir = "unzip" # Destination folder for decompressed files
# Walk through all directories
for root, _, files in os.walk(input_base_dir):
for file in files:
if file.endswith(".parquet.gzip"): # Process only .parquet.gzip files
input_file_path = os.path.join(root, file)
# Extract the relative path (year and month structure)
relative_path = os.path.relpath(root, input_base_dir)
# Convert "year=2025" to "year_2025"
new_relative_path = relative_path.replace("year=", "year_")
# Create output directory structure
output_dir = os.path.join(output_base_dir, new_relative_path)
os.makedirs(output_dir, exist_ok=True)
# Define the output file name (remove .gzip extension)
output_file_path = os.path.join(output_dir, file.replace(".gzip", ""))
# Decompress and save as .parquet
with gzip.open(input_file_path, "rb") as f_in:
with open(output_file_path, "wb") as f_out:
shutil.copyfileobj(f_in, f_out)
print(f"✅ Converted: {input_file_path} → {output_file_path}")
print("🎉 All files have been decompressed and organized!")
'비공개' 카테고리의 다른 글
vuejs) frontend 입문기 (0) | 2025.02.24 |
---|