398 lines
16 KiB
Python
398 lines
16 KiB
Python
|
|
#!/usr/bin/env python
|
|||
|
|
"""
|
|||
|
|
LocationからLocation2025への完全データ移行スクリプト(統計検証付き)
|
|||
|
|
|
|||
|
|
機能:
|
|||
|
|
- 全フィールド対応の完全データ移行
|
|||
|
|
- リアルタイム統計検証
|
|||
|
|
- データ品質チェック
|
|||
|
|
- 移行前後の比較
|
|||
|
|
- 詳細レポート生成
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import os
|
|||
|
|
import django
|
|||
|
|
from collections import defaultdict, Counter
|
|||
|
|
|
|||
|
|
# Django設定
|
|||
|
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
|
|||
|
|
django.setup()
|
|||
|
|
|
|||
|
|
from rog.models import Location, Location2025, NewEvent2
|
|||
|
|
from django.contrib.auth import get_user_model
|
|||
|
|
from django.contrib.gis.geos import Point
|
|||
|
|
|
|||
|
|
def analyze_source_data():
|
|||
|
|
"""移行前のデータ分析"""
|
|||
|
|
print('=== 移行前データ分析 ===')
|
|||
|
|
|
|||
|
|
total_locations = Location.objects.count()
|
|||
|
|
print(f'総Location件数: {total_locations}件')
|
|||
|
|
|
|||
|
|
# グループ別統計
|
|||
|
|
with_group = Location.objects.exclude(group__isnull=True).exclude(group='').count()
|
|||
|
|
without_group = total_locations - with_group
|
|||
|
|
print(f'groupありLocation: {with_group}件')
|
|||
|
|
print(f'groupなしLocation: {without_group}件')
|
|||
|
|
|
|||
|
|
# 座標データ統計
|
|||
|
|
with_geom = Location.objects.exclude(geom__isnull=True).count()
|
|||
|
|
with_lat_lng = Location.objects.exclude(longitude__isnull=True).exclude(latitude__isnull=True).count()
|
|||
|
|
print(f'geom座標あり: {with_geom}件')
|
|||
|
|
print(f'lat/lng座標あり: {with_lat_lng}件')
|
|||
|
|
|
|||
|
|
# フィールド統計
|
|||
|
|
fields_stats = {}
|
|||
|
|
text_fields = ['photos', 'videos', 'remark', 'tags', 'evaluation_value', 'sub_loc_id', 'subcategory']
|
|||
|
|
numeric_fields = ['checkin_point', 'buy_point']
|
|||
|
|
boolean_fields = ['hidden_location']
|
|||
|
|
|
|||
|
|
for field in text_fields:
|
|||
|
|
if hasattr(Location, field):
|
|||
|
|
count = Location.objects.exclude(**{f'{field}__isnull': True}).exclude(**{field: ''}).count()
|
|||
|
|
fields_stats[field] = count
|
|||
|
|
print(f'{field}データあり: {count}件')
|
|||
|
|
|
|||
|
|
for field in numeric_fields:
|
|||
|
|
if hasattr(Location, field):
|
|||
|
|
count = Location.objects.exclude(**{f'{field}__isnull': True}).exclude(**{field: 0}).count()
|
|||
|
|
fields_stats[field] = count
|
|||
|
|
print(f'{field}データあり: {count}件')
|
|||
|
|
|
|||
|
|
for field in boolean_fields:
|
|||
|
|
if hasattr(Location, field):
|
|||
|
|
count = Location.objects.filter(**{field: True}).count()
|
|||
|
|
fields_stats[field] = count
|
|||
|
|
print(f'{field}データあり: {count}件')
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
'total': total_locations,
|
|||
|
|
'with_group': with_group,
|
|||
|
|
'without_group': without_group,
|
|||
|
|
'with_geom': with_geom,
|
|||
|
|
'with_lat_lng': with_lat_lng,
|
|||
|
|
'fields': fields_stats
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
def validate_migration_data(source_stats):
|
|||
|
|
"""移行後データ検証"""
|
|||
|
|
print('\n=== 移行後データ検証 ===')
|
|||
|
|
|
|||
|
|
total_migrated = Location2025.objects.count()
|
|||
|
|
print(f'移行完了件数: {total_migrated}件')
|
|||
|
|
|
|||
|
|
# フィールド検証
|
|||
|
|
migrated_stats = {}
|
|||
|
|
field_mapping = {
|
|||
|
|
'photos': 'photos',
|
|||
|
|
'videos': 'videos',
|
|||
|
|
'remark': 'remark',
|
|||
|
|
'tags': 'tags',
|
|||
|
|
'evaluation_value': 'evaluation_value',
|
|||
|
|
'hidden_location': 'hidden_location',
|
|||
|
|
'sub_loc_id': 'sub_loc_id',
|
|||
|
|
'subcategory': 'subcategory'
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
for source_field, target_field in field_mapping.items():
|
|||
|
|
if source_field == 'hidden_location':
|
|||
|
|
count = Location2025.objects.filter(**{target_field: True}).count()
|
|||
|
|
else:
|
|||
|
|
count = Location2025.objects.exclude(**{f'{target_field}__isnull': True}).exclude(**{target_field: ''}).count()
|
|||
|
|
migrated_stats[source_field] = count
|
|||
|
|
print(f'{target_field}データあり: {count}件')
|
|||
|
|
|
|||
|
|
# 座標検証
|
|||
|
|
with_location = Location2025.objects.exclude(location__isnull=True).count()
|
|||
|
|
with_lat_lng = Location2025.objects.exclude(longitude__isnull=True).exclude(latitude__isnull=True).count()
|
|||
|
|
print(f'location座標あり: {with_location}件')
|
|||
|
|
print(f'lat/lng座標あり: {with_lat_lng}件')
|
|||
|
|
|
|||
|
|
# 必須フィールド検証
|
|||
|
|
with_event = Location2025.objects.exclude(event__isnull=True).count()
|
|||
|
|
with_cp_name = Location2025.objects.exclude(cp_name__isnull=True).exclude(cp_name='').count()
|
|||
|
|
print(f'eventリンクあり: {with_event}件')
|
|||
|
|
print(f'cp_nameあり: {with_cp_name}件')
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
'total': total_migrated,
|
|||
|
|
'fields': migrated_stats,
|
|||
|
|
'with_location': with_location,
|
|||
|
|
'with_lat_lng': with_lat_lng,
|
|||
|
|
'with_event': with_event,
|
|||
|
|
'with_cp_name': with_cp_name
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
def generate_comparison_report(source_stats, migrated_stats):
|
|||
|
|
"""移行前後比較レポート"""
|
|||
|
|
print('\n=== 移行前後比較レポート ===')
|
|||
|
|
|
|||
|
|
print(f'総件数比較:')
|
|||
|
|
print(f' 移行前: {source_stats["total"]:,}件')
|
|||
|
|
print(f' 移行後: {migrated_stats["total"]:,}件')
|
|||
|
|
print(f' 移行率: {(migrated_stats["total"] / source_stats["total"] * 100):.1f}%')
|
|||
|
|
|
|||
|
|
print(f'\nフィールド別データ保持率:')
|
|||
|
|
for field in source_stats['fields']:
|
|||
|
|
if field in migrated_stats['fields']:
|
|||
|
|
source_count = source_stats['fields'][field]
|
|||
|
|
migrated_count = migrated_stats['fields'][field]
|
|||
|
|
if source_count > 0:
|
|||
|
|
retention_rate = (migrated_count / source_count * 100)
|
|||
|
|
print(f' {field}: {migrated_count:,}/{source_count:,}件 ({retention_rate:.1f}%)')
|
|||
|
|
else:
|
|||
|
|
print(f' {field}: {migrated_count:,}/0件 (N/A)')
|
|||
|
|
|
|||
|
|
def analyze_event_distribution():
|
|||
|
|
"""イベント別分布分析"""
|
|||
|
|
print('\n=== イベント別分布分析 ===')
|
|||
|
|
|
|||
|
|
event_stats = {}
|
|||
|
|
for location in Location2025.objects.select_related('event'):
|
|||
|
|
event_name = location.event.event_name if location.event else 'No Event'
|
|||
|
|
event_code = location.event.event_code if location.event else 'No Code'
|
|||
|
|
key = f"{event_code} ({event_name})"
|
|||
|
|
event_stats[key] = event_stats.get(key, 0) + 1
|
|||
|
|
|
|||
|
|
# 件数順でソート
|
|||
|
|
sorted_events = sorted(event_stats.items(), key=lambda x: x[1], reverse=True)
|
|||
|
|
|
|||
|
|
print(f'総イベント数: {len(sorted_events)}件')
|
|||
|
|
print(f'上位イベント:')
|
|||
|
|
for i, (event_key, count) in enumerate(sorted_events[:10], 1):
|
|||
|
|
print(f' {i:2d}. {event_key}: {count:,}件')
|
|||
|
|
|
|||
|
|
return event_stats
|
|||
|
|
|
|||
|
|
def sample_data_verification():
|
|||
|
|
"""サンプルデータ検証"""
|
|||
|
|
print('\n=== サンプルデータ検証 ===')
|
|||
|
|
|
|||
|
|
# 各種データパターンのサンプルを取得
|
|||
|
|
samples = []
|
|||
|
|
|
|||
|
|
# 写真データありのサンプル
|
|||
|
|
photo_sample = Location2025.objects.filter(photos__isnull=False).exclude(photos='').first()
|
|||
|
|
if photo_sample:
|
|||
|
|
samples.append(('写真データあり', photo_sample))
|
|||
|
|
|
|||
|
|
# remarkデータありのサンプル
|
|||
|
|
remark_sample = Location2025.objects.filter(remark__isnull=False).exclude(remark='').first()
|
|||
|
|
if remark_sample:
|
|||
|
|
samples.append(('詳細説明あり', remark_sample))
|
|||
|
|
|
|||
|
|
# 高ポイントのサンプル
|
|||
|
|
high_point_sample = Location2025.objects.filter(cp_point__gt=50).first()
|
|||
|
|
if high_point_sample:
|
|||
|
|
samples.append(('高ポイント', high_point_sample))
|
|||
|
|
|
|||
|
|
# 通常サンプル
|
|||
|
|
if not samples:
|
|||
|
|
normal_sample = Location2025.objects.first()
|
|||
|
|
if normal_sample:
|
|||
|
|
samples.append(('通常データ', normal_sample))
|
|||
|
|
|
|||
|
|
for sample_type, sample in samples[:3]:
|
|||
|
|
print(f'\n【{sample_type}サンプル】')
|
|||
|
|
print(f' CP番号: {sample.cp_number}')
|
|||
|
|
print(f' CP名: {sample.cp_name}')
|
|||
|
|
print(f' CPポイント: {sample.cp_point}')
|
|||
|
|
print(f' フォトポイント: {sample.photo_point}')
|
|||
|
|
print(f' sub_loc_id: {sample.sub_loc_id}')
|
|||
|
|
print(f' subcategory: {sample.subcategory}')
|
|||
|
|
|
|||
|
|
# データ長を制限して表示
|
|||
|
|
def truncate_text(text, max_len=30):
|
|||
|
|
if not text:
|
|||
|
|
return '(空)'
|
|||
|
|
return text[:max_len] + '...' if len(text) > max_len else text
|
|||
|
|
|
|||
|
|
print(f' 写真: {truncate_text(sample.photos)}')
|
|||
|
|
print(f' 動画: {truncate_text(sample.videos)}')
|
|||
|
|
print(f' 詳細: {truncate_text(sample.remark)}')
|
|||
|
|
print(f' タグ: {truncate_text(sample.tags)}')
|
|||
|
|
print(f' 評価値: {truncate_text(sample.evaluation_value)}')
|
|||
|
|
print(f' 隠し: {sample.hidden_location}')
|
|||
|
|
print(f' イベント: {sample.event.event_name if sample.event else "None"}')
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
"""メイン実行関数"""
|
|||
|
|
User = get_user_model()
|
|||
|
|
default_user = User.objects.first()
|
|||
|
|
|
|||
|
|
print('='*60)
|
|||
|
|
print('Location → Location2025 完全移行スクリプト(統計検証付き)')
|
|||
|
|
print('='*60)
|
|||
|
|
|
|||
|
|
# 1. 移行前データ分析
|
|||
|
|
source_stats = analyze_source_data()
|
|||
|
|
|
|||
|
|
# 2. 既存Location2025データ削除
|
|||
|
|
print('\n=== 既存データクリア ===')
|
|||
|
|
deleted_count = Location2025.objects.count()
|
|||
|
|
Location2025.objects.all().delete()
|
|||
|
|
print(f'削除済み: {deleted_count}件')
|
|||
|
|
|
|||
|
|
# 3. NewEvent2のevent_codeマップ作成
|
|||
|
|
print('\n=== Event Code マッピング ===')
|
|||
|
|
events = NewEvent2.objects.filter(event_code__isnull=False).exclude(event_code='')
|
|||
|
|
event_code_map = {}
|
|||
|
|
for event in events:
|
|||
|
|
event_code_map[event.event_code] = event
|
|||
|
|
print(f'有効なevent_code数: {len(event_code_map)}件')
|
|||
|
|
|
|||
|
|
# 4. データ移行実行
|
|||
|
|
print('\n=== データ移行実行 ===')
|
|||
|
|
locations = Location.objects.all()
|
|||
|
|
processed_combinations = set()
|
|||
|
|
migrated_count = 0
|
|||
|
|
skipped_count = 0
|
|||
|
|
error_count = 0
|
|||
|
|
event_migration_stats = defaultdict(int)
|
|||
|
|
|
|||
|
|
for location in locations:
|
|||
|
|
try:
|
|||
|
|
# groupが空の場合はスキップ
|
|||
|
|
if not location.group:
|
|||
|
|
skipped_count += 1
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# location.groupに含まれるevent_codeを検索
|
|||
|
|
matched_event = None
|
|||
|
|
matched_event_code = None
|
|||
|
|
|
|||
|
|
for event_code, event in event_code_map.items():
|
|||
|
|
if event_code in location.group:
|
|||
|
|
matched_event = event
|
|||
|
|
matched_event_code = event_code
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
# マッチするevent_codeがない場合はスキップ
|
|||
|
|
if not matched_event:
|
|||
|
|
skipped_count += 1
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# cp_number + event_idの組み合わせを確認
|
|||
|
|
combination_key = (location.cp, matched_event.id)
|
|||
|
|
if combination_key in processed_combinations:
|
|||
|
|
skipped_count += 1
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# この組み合わせを処理済みとしてマーク
|
|||
|
|
processed_combinations.add(combination_key)
|
|||
|
|
|
|||
|
|
# MultiPointからPointに変換
|
|||
|
|
point_location = None
|
|||
|
|
if location.geom and len(location.geom) > 0:
|
|||
|
|
first_point = location.geom[0]
|
|||
|
|
point_location = Point(first_point.x, first_point.y)
|
|||
|
|
elif location.longitude and location.latitude:
|
|||
|
|
point_location = Point(location.longitude, location.latitude)
|
|||
|
|
|
|||
|
|
# Location2025レコードを作成
|
|||
|
|
location2025, created = Location2025.objects.update_or_create(
|
|||
|
|
cp_number=location.cp,
|
|||
|
|
event=matched_event,
|
|||
|
|
defaults={
|
|||
|
|
'cp_name': location.location_name or '',
|
|||
|
|
'sub_loc_id': location.sub_loc_id or '',
|
|||
|
|
'subcategory': location.subcategory or '',
|
|||
|
|
'latitude': location.latitude or 0.0,
|
|||
|
|
'longitude': location.longitude or 0.0,
|
|||
|
|
'location': point_location,
|
|||
|
|
'cp_point': int(location.checkin_point) if location.checkin_point else 0,
|
|||
|
|
'photo_point': int(location.checkin_point) if location.checkin_point else 0,
|
|||
|
|
'buy_point': int(location.buy_point) if location.buy_point else 0,
|
|||
|
|
'checkin_radius': location.checkin_radius or 100.0,
|
|||
|
|
'auto_checkin': location.auto_checkin or False,
|
|||
|
|
'shop_closed': location.shop_closed or False,
|
|||
|
|
'shop_shutdown': location.shop_shutdown or False,
|
|||
|
|
'opening_hours': '',
|
|||
|
|
'address': location.address or '',
|
|||
|
|
'phone': location.phone or '',
|
|||
|
|
'website': '',
|
|||
|
|
'description': location.remark or '',
|
|||
|
|
# 追加フィールド
|
|||
|
|
'photos': location.photos or '',
|
|||
|
|
'videos': location.videos or '',
|
|||
|
|
'remark': location.remark or '',
|
|||
|
|
'tags': location.tags or '',
|
|||
|
|
'evaluation_value': location.evaluation_value or '',
|
|||
|
|
'hidden_location': location.hidden_location or False,
|
|||
|
|
# 管理情報
|
|||
|
|
'is_active': True,
|
|||
|
|
'sort_order': 0,
|
|||
|
|
'csv_source_file': 'migration_from_location',
|
|||
|
|
'created_by': default_user,
|
|||
|
|
'updated_by': default_user,
|
|||
|
|
}
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if created:
|
|||
|
|
migrated_count += 1
|
|||
|
|
event_migration_stats[matched_event_code] += 1
|
|||
|
|
|
|||
|
|
if migrated_count % 100 == 0:
|
|||
|
|
print(f'進捗: {migrated_count:,}件完了')
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f'❌ エラー: CP {location.cp} - {str(e)}')
|
|||
|
|
error_count += 1
|
|||
|
|
|
|||
|
|
# 5. 移行結果サマリー
|
|||
|
|
print(f'\n=== 移行結果サマリー ===')
|
|||
|
|
print(f'移行完了: {migrated_count:,}件')
|
|||
|
|
print(f'スキップ: {skipped_count:,}件')
|
|||
|
|
print(f'エラー: {error_count:,}件')
|
|||
|
|
print(f'総処理: {migrated_count + skipped_count + error_count:,}件')
|
|||
|
|
|
|||
|
|
# 6. 移行後データ検証
|
|||
|
|
migrated_stats = validate_migration_data(source_stats)
|
|||
|
|
|
|||
|
|
# 7. 比較レポート生成
|
|||
|
|
generate_comparison_report(source_stats, migrated_stats)
|
|||
|
|
|
|||
|
|
# 8. イベント別分布分析
|
|||
|
|
event_distribution = analyze_event_distribution()
|
|||
|
|
|
|||
|
|
# 9. サンプルデータ検証
|
|||
|
|
sample_data_verification()
|
|||
|
|
|
|||
|
|
# 10. 最終検証サマリー
|
|||
|
|
print('\n' + '='*60)
|
|||
|
|
print('🎯 移行完了検証サマリー')
|
|||
|
|
print('='*60)
|
|||
|
|
|
|||
|
|
success_rate = (migrated_count / source_stats['total'] * 100) if source_stats['total'] > 0 else 0
|
|||
|
|
print(f'✅ 総移行成功率: {success_rate:.1f}% ({migrated_count:,}/{source_stats["total"]:,}件)')
|
|||
|
|
print(f'✅ エラー率: {(error_count / source_stats["total"] * 100):.1f}% ({error_count:,}件)')
|
|||
|
|
print(f'✅ 最終Location2025件数: {Location2025.objects.count():,}件')
|
|||
|
|
print(f'✅ 対応イベント数: {len(event_distribution)}件')
|
|||
|
|
|
|||
|
|
# データ品質スコア算出
|
|||
|
|
quality_score = 0
|
|||
|
|
if migrated_stats['with_event'] == migrated_stats['total']:
|
|||
|
|
quality_score += 25 # 全てにイベントがリンクされている
|
|||
|
|
if migrated_stats['with_cp_name'] >= migrated_stats['total'] * 0.95:
|
|||
|
|
quality_score += 25 # 95%以上にCP名がある
|
|||
|
|
if migrated_stats['fields']['photos'] >= migrated_stats['total'] * 0.8:
|
|||
|
|
quality_score += 25 # 80%以上に写真データがある
|
|||
|
|
if migrated_stats['fields']['remark'] >= migrated_stats['total'] * 0.8:
|
|||
|
|
quality_score += 25 # 80%以上に詳細説明がある
|
|||
|
|
|
|||
|
|
print(f'✅ データ品質スコア: {quality_score}/100点')
|
|||
|
|
|
|||
|
|
if quality_score >= 90:
|
|||
|
|
print('🏆 優秀:本格運用準備完了')
|
|||
|
|
elif quality_score >= 70:
|
|||
|
|
print('🥉 良好:運用可能レベル')
|
|||
|
|
elif quality_score >= 50:
|
|||
|
|
print('⚠️ 要改善:一部データ補完推奨')
|
|||
|
|
else:
|
|||
|
|
print('❌ 要対応:データ品質に課題あり')
|
|||
|
|
|
|||
|
|
print('\n✅ 全フィールド対応の完全データ移行が正常に完了しました')
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|