Path: blob/master/configs/instruct-pix2pix.yaml
2447 views
# File modified by authors of InstructPix2Pix from original (https://github.com/CompVis/stable-diffusion).1# See more details in LICENSE.23model:4base_learning_rate: 1.0e-045target: modules.models.diffusion.ddpm_edit.LatentDiffusion6params:7linear_start: 0.000858linear_end: 0.01209num_timesteps_cond: 110log_every_t: 20011timesteps: 100012first_stage_key: edited13cond_stage_key: edit14# image_size: 6415# image_size: 3216image_size: 1617channels: 418cond_stage_trainable: false # Note: different from the one we trained before19conditioning_key: hybrid20monitor: val/loss_simple_ema21scale_factor: 0.1821522use_ema: false2324scheduler_config: # 10000 warmup steps25target: ldm.lr_scheduler.LambdaLinearScheduler26params:27warm_up_steps: [ 0 ]28cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases29f_start: [ 1.e-6 ]30f_max: [ 1. ]31f_min: [ 1. ]3233unet_config:34target: ldm.modules.diffusionmodules.openaimodel.UNetModel35params:36image_size: 32 # unused37in_channels: 838out_channels: 439model_channels: 32040attention_resolutions: [ 4, 2, 1 ]41num_res_blocks: 242channel_mult: [ 1, 2, 4, 4 ]43num_heads: 844use_spatial_transformer: True45transformer_depth: 146context_dim: 76847use_checkpoint: False48legacy: False4950first_stage_config:51target: ldm.models.autoencoder.AutoencoderKL52params:53embed_dim: 454monitor: val/rec_loss55ddconfig:56double_z: true57z_channels: 458resolution: 25659in_channels: 360out_ch: 361ch: 12862ch_mult:63- 164- 265- 466- 467num_res_blocks: 268attn_resolutions: []69dropout: 0.070lossconfig:71target: torch.nn.Identity7273cond_stage_config:74target: ldm.modules.encoders.modules.FrozenCLIPEmbedder7576data:77target: main.DataModuleFromConfig78params:79batch_size: 12880num_workers: 181wrap: false82validation:83target: edit_dataset.EditDataset84params:85path: data/clip-filtered-dataset86cache_dir: data/87cache_name: data_10k88split: val89min_text_sim: 0.290min_image_sim: 0.7591min_direction_sim: 0.292max_samples_per_prompt: 193min_resize_res: 51294max_resize_res: 51295crop_res: 51296output_as_edit: False97real_input: True9899100